Skip to content

Commit

Permalink
Implement compressed Namespace
Browse files Browse the repository at this point in the history
The namespace disambiguates requests for the same name with _<idx>.
Rather than storing every disambiguated name in the underlying HashMap,
it now only stores the base along with the "next available" index. This
makes the logic for checking if a name is already contained in the
namespace slightly more sophisticated because users can name things in a
way that will collide with disambiguated names from a common substring.

For example, in naming the sequence "foo", "foo", "foo_1", the 2nd "foo"
takes the name "foo_1" so the following "foo_1" gets disambiguated to
"foo_1_1". But since we compressed that original "foo_1" into the same
HashMap entry as just "foo", we have to do a form of "prefix checking"
whenever naming something that ends in "_<idx>".

In practice, the saved memory allocations more than make up for the more
complicated logic to disambiguate names because the common case is still
fast.
  • Loading branch information
jackkoenig committed Nov 29, 2022
1 parent 269ce47 commit 36b81f7
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 8 deletions.
63 changes: 55 additions & 8 deletions core/src/main/scala/chisel3/internal/Builder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,29 @@ import logger.LazyLogging

import scala.collection.mutable
import chisel3.internal.sourceinfo.UnlocatableSourceInfo
import scala.annotation.tailrec

private[chisel3] class Namespace(keywords: Set[String]) {
// This HashMap is compressed, not every name in the namespace is present here.
// If the same name is requested multiple times, it only takes 1 entry in the HashMap and the
// value is incremented for each time the name is requested.
// Names can be requested that collide with compressed sets of names, thus the algorithm for
// checking if a name is present in the Namespace is more complex than just checking the HashMap,
// see getIndex below.
private val names = collection.mutable.HashMap[String, Long]()
def copyTo(other: Namespace): Unit = names.foreach { case (s: String, l: Long) => other.names(s) = l }
for (keyword <- keywords)
names(keyword) = 1

private def rename(n: String): String = {
val index = names(n)
@tailrec
private def rename(n: String, index: Long): String = {
val tryName = s"${n}_${index}"
names(n) = index + 1
if (this contains tryName) rename(n) else tryName
if (names.contains(tryName)) {
rename(n, index + 1)
} else {
names(n) = index + 1
tryName
}
}

private def sanitize(s: String, leadingDigitOk: Boolean = false): String = {
Expand All @@ -42,14 +53,50 @@ private[chisel3] class Namespace(keywords: Set[String]) {
if (headOk) res else s"_$res"
}

def contains(elem: String): Boolean = names.contains(elem)
/** Checks if `n` ends in `_\d+` and returns the substring before `_` if so, null otherwise */
// TODO can and should this be folded in to sanitize? Same iteration as the forall?
private def prefix(n: String): Int = {
// This is micro-optimized because it runs on every single name
var i = n.size - 1
while (i > 0 && n(i).isDigit) {
i -= 1
}
// Will get i == 0 for all digits or _\d+ with empty prefix, those have no prefix so returning 0 is correct
if (i == n.size) 0 // no digits
else if (n(i) != '_') 0 // no _
else i
}

def contains(elem: String): Boolean = getIndex(elem).isDefined

// Gets the current index for this name, None means it is not contained in the Namespace
def getIndex(elem: String): Option[Long] =
names.get(elem).orElse {
// This exact name isn't contained, but if we end in _<idx>, we need to check our prefix
val maybePrefix = prefix(elem)
if (maybePrefix == 0) None
else {
// If we get a prefix collision and our index is taken, we start disambiguating with _<idx>_1
names
.get(elem.take(maybePrefix))
.filter { prefixIdx =>
val ourIdx = elem.drop(maybePrefix + 1).toInt
// The namespace starts disambiguating at _1 so _0 is a false collision case
ourIdx != 0 && prefixIdx > ourIdx
}
.map(_ => 1)
}
}

// leadingDigitOk is for use in fields of Records
def name(elem: String, leadingDigitOk: Boolean = false): String = {
val sanitized = sanitize(elem, leadingDigitOk)
val result = if (this.contains(sanitized)) rename(sanitized) else sanitized
names(result) = 1
result
getIndex(sanitized) match {
case Some(idx) => rename(sanitized, idx)
case None =>
names(sanitized) = 1
sanitized
}
}
}

Expand Down
81 changes: 81 additions & 0 deletions src/test/scala/chisel3/internal/NamespaceSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// SPDX-License-Identifier: Apache-2.0

package chisel3.internal

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers._

class NamespaceSpec extends AnyFlatSpec {
behavior.of("Namespace")

they should "support basic disambiguation" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x") should be("x")
name("x") should be("x_1")
name("x") should be("x_2")
}

they should "support explicit <prefix>_# names before <prefix> names" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x_1") should be("x_1")
name("x_2") should be("x_2")
name("x") should be("x")
name("x") should be("x_3")
}

they should "support explicit <prefix>_# names in the middle of <prefix> names" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x") should be("x")
name("x") should be("x_1")
name("x_1") should be("x_1_1")
name("x_2") should be("x_2")
name("x") should be("x_3")
}

// For some reason, multi-character names tickled a different failure mode than single character
they should "support explicit <prefix>_# names in the middle of longer <prefix> names" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("foo") should be("foo")
name("foo") should be("foo_1")
name("foo_1") should be("foo_1_1")
name("foo_2") should be("foo_2")
name("foo") should be("foo_3")
}

they should "support collisions in recursively growing names" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x") should be("x")
name("x") should be("x_1")
name("x_1") should be("x_1_1")
name("x_1") should be("x_1_2")
name("x_1_1") should be("x_1_1_1")
name("x_1_1") should be("x_1_1_2")
}

they should "support collisions in recursively shrinking names" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x_1_1") should be("x_1_1")
name("x_1_1") should be("x_1_1_1")
name("x_1") should be("x_1")
name("x_1") should be("x_1_2")
name("x") should be("x")
name("x") should be("x_2")
}

// The namespace never generates names with _0 so it's actually a false collision case
they should "properly handle false collisions with signals ending in _0" in {
val namespace = Namespace.empty
val name = namespace.name(_, false)
name("x") should be("x")
name("x") should be("x_1")
name("x_0") should be("x_0")
name("x") should be("x_2")
name("x_0") should be("x_0_1")
}
}

0 comments on commit 36b81f7

Please sign in to comment.