From fb716f8e6786cd88b1bcff6926928a53792cedb7 Mon Sep 17 00:00:00 2001 From: Jack Koenig Date: Sat, 19 Nov 2022 14:22:02 -0800 Subject: [PATCH] Implement compressed Namespace The namespace disambiguates requests for the same name with _. Rather than storing every disambiguated name in the underlying HashMap, it now only stores the base along with the "next available" index. This makes the logic for checking if a name is already contained in the namespace slightly more sophisticated because users can name things in a way that will collide with disambiguated names from a common substring. For example, in naming the sequence "foo", "foo", "foo_1", the 2nd "foo" takes the name "foo_1" so the following "foo_1" gets disambiguated to "foo_1_1". But since we compressed that original "foo_1" into the same HashMap entry as just "foo", we have to do a form of "prefix checking" whenever naming something that ends in "_". In practice, the saved memory allocations more than make up for the more complicated logic to disambiguate names because the common case is still fast. --- .../main/scala/chisel3/internal/Builder.scala | 68 ++++++++++++++-- .../chisel3/internal/NamespaceSpec.scala | 81 +++++++++++++++++++ 2 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 src/test/scala/chisel3/internal/NamespaceSpec.scala diff --git a/core/src/main/scala/chisel3/internal/Builder.scala b/core/src/main/scala/chisel3/internal/Builder.scala index dba5163a539..22e37fce28b 100644 --- a/core/src/main/scala/chisel3/internal/Builder.scala +++ b/core/src/main/scala/chisel3/internal/Builder.scala @@ -19,18 +19,29 @@ import logger.LazyLogging import scala.collection.mutable import chisel3.internal.sourceinfo.UnlocatableSourceInfo +import scala.annotation.tailrec private[chisel3] class Namespace(keywords: Set[String]) { + // This HashMap is compressed, not every name in the namespace is present here. + // If the same name is requested multiple times, it only takes 1 entry in the HashMap and the + // value is incremented for each time the name is requested. + // Names can be requested that collide with compressed sets of names, thus the algorithm for + // checking if a name is present in the Namespace is more complex than just checking the HashMap, + // see getIndex below. private val names = collection.mutable.HashMap[String, Long]() def copyTo(other: Namespace): Unit = names.foreach { case (s: String, l: Long) => other.names(s) = l } for (keyword <- keywords) names(keyword) = 1 - private def rename(n: String): String = { - val index = names(n) + @tailrec + private def rename(n: String, index: Long): String = { val tryName = s"${n}_${index}" - names(n) = index + 1 - if (this contains tryName) rename(n) else tryName + if (names.contains(tryName)) { + rename(n, index + 1) + } else { + names(n) = index + 1 + tryName + } } private def sanitize(s: String, leadingDigitOk: Boolean = false): String = { @@ -42,14 +53,55 @@ private[chisel3] class Namespace(keywords: Set[String]) { if (headOk) res else s"_$res" } - def contains(elem: String): Boolean = names.contains(elem) + /** Checks if `n` ends in `_\d+` and returns the substring before `_` if so, null otherwise */ + // TODO can and should this be folded in to sanitize? Same iteration as the forall? + private def prefix(n: String): Int = { + // This is micro-optimized because it runs on every single name + var i = n.size - 1 + while (i > 0 && n(i).isDigit) { + i -= 1 + } + // Will get i == 0 for all digits or _\d+ with empty prefix, those have no prefix so returning 0 is correct + if (i == n.size) 0 // no digits + else if (n(i) != '_') 0 // no _ + else i + } + + // TODO implement, also can this be useful below? + def contains(elem: String): Boolean = getIndex(elem).isDefined + + // Gets the current index for this name, None means it is not contained in the Namespace + def getIndex(elem: String): Option[Long] = + names.get(elem).orElse { + // This exact name isn't contained, but if we end in _, we need to check our prefix + val maybePrefix = prefix(elem) + if (maybePrefix == 0) None + else { + // If we get a prefix collision and our index is taken, we start disambiguating with __1 + names + .get(elem.take(maybePrefix)) + .filter { prefixIdx => + val ourIdx = elem.drop(maybePrefix + 1).toInt + // The namespace starts disambiguating at _1 so _0 is a false collision case + ourIdx != 0 && prefixIdx > ourIdx + } + .map(_ => 1) // If we have a prefix match and our index is take + } + } // leadingDigitOk is for use in fields of Records def name(elem: String, leadingDigitOk: Boolean = false): String = { val sanitized = sanitize(elem, leadingDigitOk) - val result = if (this.contains(sanitized)) rename(sanitized) else sanitized - names(result) = 1 - result + getIndex(sanitized) match { + case Some(idx) => rename(sanitized, idx) + case None => + names(sanitized) = 1 + sanitized + } + } + + def print(): Unit = { + names.foreach(println) } } diff --git a/src/test/scala/chisel3/internal/NamespaceSpec.scala b/src/test/scala/chisel3/internal/NamespaceSpec.scala new file mode 100644 index 00000000000..fd808ff05f4 --- /dev/null +++ b/src/test/scala/chisel3/internal/NamespaceSpec.scala @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: Apache-2.0 + +package chisel3.internal + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers._ + +class NamespaceSpec extends AnyFlatSpec { + behavior.of("Namespace") + + they should "support basic disambiguation" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x") should be("x_2") + } + + they should "support explicit _# names before names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x_1") should be("x_1") + name("x_2") should be("x_2") + name("x") should be("x") + name("x") should be("x_3") + } + + they should "support explicit _# names in the middle of names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_1") should be("x_1_1") + name("x_2") should be("x_2") + name("x") should be("x_3") + } + + // For some reason, multi-character names tickled a different failure mode than single character + they should "support explicit _# names in the middle of longer names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("foo") should be("foo") + name("foo") should be("foo_1") + name("foo_1") should be("foo_1_1") + name("foo_2") should be("foo_2") + name("foo") should be("foo_3") + } + + they should "support collisions in recursively growing names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_1") should be("x_1_1") + name("x_1") should be("x_1_2") + name("x_1_1") should be("x_1_1_1") + name("x_1_1") should be("x_1_1_2") + } + + they should "support collisions in recursively shrinking names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x_1_1") should be("x_1_1") + name("x_1_1") should be("x_1_1_1") + name("x_1") should be("x_1") + name("x_1") should be("x_1_2") + name("x") should be("x") + name("x") should be("x_2") + } + + // The namespace never generates names with _0 so it's actually a false collision case + they should "properly handle false collisions with signals ending in _0" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_0") should be("x_0") + name("x") should be("x_2") + name("x_0") should be("x_0_1") + } +}