Skip to content

Commit

Permalink
Colocate length filter calculations to avoid repeat enumeration
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewjsaid committed Oct 20, 2023
1 parent 641929e commit 39a44df
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,12 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>

// Calculate the minimum and maximum lengths of the strings in the dictionary. Several of the analyses need this.
int minLength = int.MaxValue, maxLength = 0;
ulong lengthFilter = 0;
foreach (string key in keys)
{
if (key.Length < minLength) minLength = key.Length;
if (key.Length > maxLength) maxLength = key.Length;
lengthFilter |= (1UL << (key.Length % 64));
}
Debug.Assert(minLength >= 0 && maxLength >= minLength);

Expand All @@ -184,29 +186,29 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenDictionary = analysis.HashCount == 1
? new OrdinalStringFrozenDictionary_RightJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_RightJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_RightJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_RightJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
}
else
{
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenDictionary = analysis.HashCount == 1
? new OrdinalStringFrozenDictionary_LeftJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_LeftJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_LeftJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_LeftJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
}
}
Expand All @@ -215,12 +217,12 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter)
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter);
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter)
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
}
else
{
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter);
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,12 @@ private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)

// Calculate the minimum and maximum lengths of the strings in the set. Several of the analyses need this.
int minLength = int.MaxValue, maxLength = 0;
foreach (string s in entries)
ulong lengthFilter = 0;
foreach (string key in entries)
{
if (s.Length < minLength) minLength = s.Length;
if (s.Length > maxLength) maxLength = s.Length;
if (key.Length < minLength) minLength = key.Length;
if (key.Length > maxLength) maxLength = key.Length;
lengthFilter |= (1UL << (key.Length % 64));
}
Debug.Assert(minLength >= 0 && maxLength >= minLength);

Expand All @@ -132,29 +134,29 @@ private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)
if (analysis.IgnoreCase)
{
frozenSet = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenSet_RightJustifiedCaseInsensitiveAsciiSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenSet_RightJustifiedCaseInsensitiveSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenSet_RightJustifiedCaseInsensitiveAsciiSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenSet_RightJustifiedCaseInsensitiveSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenSet = analysis.HashCount == 1
? new OrdinalStringFrozenSet_RightJustifiedSingleChar(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenSet_RightJustifiedSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenSet_RightJustifiedSingleChar(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenSet_RightJustifiedSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
}
else
{
if (analysis.IgnoreCase)
{
frozenSet = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenSet_LeftJustifiedCaseInsensitiveAsciiSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenSet_LeftJustifiedCaseInsensitiveSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenSet_LeftJustifiedCaseInsensitiveAsciiSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenSet_LeftJustifiedCaseInsensitiveSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenSet = analysis.HashCount == 1
? new OrdinalStringFrozenSet_LeftJustifiedSingleChar(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenSet_LeftJustifiedSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenSet_LeftJustifiedSingleChar(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex)
: new OrdinalStringFrozenSet_LeftJustifiedSubstring(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter, analysis.HashIndex, analysis.HashCount);
}
}
}
Expand All @@ -163,12 +165,12 @@ private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)
if (analysis.IgnoreCase)
{
frozenSet = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter)
: new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter);
? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter)
: new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
}
else
{
frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.LengthFilter);
frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,8 @@ private static AnalysisResults CreateAnalysisResults(
}
}

ulong lengthFilter = 0;
foreach (string s in uniqueStrings)
{
lengthFilter |= (1UL << (s.Length % 64));
}

// Return the analysis results.
return new AnalysisResults(ignoreCase, allAsciiIfIgnoreCase, index, count, minLength, maxLength, lengthFilter);
return new AnalysisResults(ignoreCase, allAsciiIfIgnoreCase, index, count, minLength, maxLength);
}

private delegate ReadOnlySpan<char> GetSpan(string s, int index, int count);
Expand Down Expand Up @@ -249,15 +243,14 @@ internal static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnly

internal readonly struct AnalysisResults
{
public AnalysisResults(bool ignoreCase, bool allAsciiIfIgnoreCase, int hashIndex, int hashCount, int minLength, int maxLength, ulong lengthFilter)
public AnalysisResults(bool ignoreCase, bool allAsciiIfIgnoreCase, int hashIndex, int hashCount, int minLength, int maxLength)
{
IgnoreCase = ignoreCase;
AllAsciiIfIgnoreCase = allAsciiIfIgnoreCase;
HashIndex = hashIndex;
HashCount = hashCount;
MinimumLength = minLength;
MaximumLengthDiff = maxLength - minLength;
LengthFilter = lengthFilter;
}

public bool IgnoreCase { get; }
Expand All @@ -266,7 +259,6 @@ public AnalysisResults(bool ignoreCase, bool allAsciiIfIgnoreCase, int hashIndex
public int HashCount { get; }
public int MinimumLength { get; }
public int MaximumLengthDiff { get; }
public ulong LengthFilter { get; }

public bool SubstringHashing => HashCount != 0;
public bool RightJustifiedSubstring => HashIndex < 0;
Expand Down

0 comments on commit 39a44df

Please sign in to comment.