Skip to content

Commit

Permalink
Avoid RegexCode/RegexWriter for all engines other than RegexInterpret…
Browse files Browse the repository at this point in the history
…er (#65986)

* Avoid RegexCode/RegexWriter for all engines other than RegexInterpreter

* Address PR feedback
  • Loading branch information
stephentoub authored Mar 2, 2022
1 parent 2330b4b commit 093bdc4
Show file tree
Hide file tree
Showing 28 changed files with 433 additions and 480 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
}

// Parse the input pattern
RegexCode code;
RegexTree tree;
try
{
code = RegexWriter.Write(RegexParser.Parse(pattern, regexOptions, culture), culture);
tree = RegexParser.Parse(pattern, regexOptions, culture);
}
catch (Exception e)
{
Expand All @@ -199,7 +199,7 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
pattern,
regexOptions,
matchTimeout ?? Timeout.Infinite,
code);
tree);

var regexType = new RegexType(
regexMethod,
Expand Down Expand Up @@ -233,7 +233,7 @@ static bool IsAllowedKind(SyntaxKind kind) =>
}

/// <summary>A regex method.</summary>
internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree);

/// <summary>A type holding a regex method.</summary>
internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
<Compile Include="..\src\System\Threading\StackHelper.cs" Link="Production\StackHelper.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.cs" Link="Production\RegexCharClass.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" Link="Production\RegexCharClass.MappingTable.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexCode.cs" Link="Production\RegexCode.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexFindOptimizations.cs" Link="Production\RegexFindOptimizations.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexNode.cs" Link="Production\RegexNode.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexNodeKind.cs" Link="Production\RegexNodeKind.cs" />
Expand All @@ -45,7 +44,6 @@
<Compile Include="..\src\System\Text\RegularExpressions\RegexPrefixAnalyzer.cs" Link="Production\RegexPrefixAnalyzer.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexTree.cs" Link="Production\RegexTree.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexTreeAnalyzer.cs" Link="Production\RegexTreeAnalyzer.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexWriter.cs" Link="Production\RegexWriter.cs" />
<Compile Include="..\src\System\Collections\HashtableExtensions.cs" Link="Production\HashtableExtensions.cs" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCharClass.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCode.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCompilationInfo.cs" />
<Compile Include="System\Text\RegularExpressions\RegexFindOptimizations.cs" />
<Compile Include="System\Text\RegularExpressions\RegexGeneratorAttribute.cs" />
<Compile Include="System\Text\RegularExpressions\RegexInterpreter.cs" />
<Compile Include="System\Text\RegularExpressions\RegexInterpreterCode.cs" />
<Compile Include="System\Text\RegularExpressions\RegexMatchTimeoutException.cs" />
<Compile Include="System\Text\RegularExpressions\RegexNode.cs" />
<Compile Include="System\Text\RegularExpressions\RegexNodeKind.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public bool IsMatch(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Run(quick: true, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0) is null;
return Run(quick: true, -1, input, 0, input.Length, RightToLeft ? input.Length : 0) is null;
}

/// <summary>
Expand All @@ -87,7 +87,7 @@ public bool IsMatch(string input)
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out ocurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input) =>
Run(input, UseOptionR() ? input.Length : 0) is null;
Run(input, RightToLeft ? input.Length : 0) is null;

/// <summary>
/// Searches the input string for one or more matches using the previous pattern and options,
Expand Down Expand Up @@ -132,7 +132,7 @@ public Match Match(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Run(quick: false, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0)!;
return Run(quick: false, -1, input, 0, input.Length, RightToLeft ? input.Length : 0)!;
}

/// <summary>
Expand All @@ -159,7 +159,7 @@ public Match Match(string input, int beginning, int length)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Run(quick: false, -1, input, beginning, length, UseOptionR() ? beginning + length : beginning)!;
return Run(quick: false, -1, input, beginning, length, RightToLeft ? beginning + length : beginning)!;
}

/// <summary>
Expand Down Expand Up @@ -187,7 +187,7 @@ public MatchCollection Matches(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return new MatchCollection(this, input, UseOptionR() ? input.Length : 0);
return new MatchCollection(this, input, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public string Replace(string input, string replacement)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Replace(input, replacement, -1, UseOptionR() ? input.Length : 0);
return Replace(input, replacement, -1, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand All @@ -57,7 +57,7 @@ public string Replace(string input, string replacement, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Replace(input, replacement, count, UseOptionR() ? input.Length : 0);
return Replace(input, replacement, count, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand Down Expand Up @@ -111,7 +111,7 @@ public string Replace(string input, MatchEvaluator evaluator)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Replace(evaluator, this, input, -1, UseOptionR() ? input.Length : 0);
return Replace(evaluator, this, input, -1, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand All @@ -125,7 +125,7 @@ public string Replace(string input, MatchEvaluator evaluator, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Replace(evaluator, this, input, count, UseOptionR() ? input.Length : 0);
return Replace(evaluator, this, input, count, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public string[] Split(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Split(this, input, 0, UseOptionR() ? input.Length : 0);
return Split(this, input, 0, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand All @@ -49,7 +49,7 @@ public string[] Split(string input, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}

return Split(this, input, count, UseOptionR() ? input.Length : 0);
return Split(this, input, count, RightToLeft ? input.Length : 0);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ namespace System.Text.RegularExpressions
/// </summary>
public partial class Regex : ISerializable
{
internal const int MaxOptionShift = 11;

[StringSyntax(StringSyntaxAttribute.Regex)]
protected internal string? pattern; // The string pattern provided
protected internal RegexOptions roptions; // the top-level options from the options string
Expand All @@ -33,7 +31,6 @@ public partial class Regex : ISerializable

private WeakReference<RegexReplacement?>? _replref; // cached parsed replacement pattern
private volatile RegexRunner? _runner; // cached runner
private RegexCode? _code; // if interpreted, this is the code for RegexInterpreter

protected Regex()
{
Expand Down Expand Up @@ -63,64 +60,69 @@ public Regex([StringSyntax(StringSyntaxAttribute.Regex, "options")] string patte

internal Regex(string pattern, CultureInfo? culture)
{
// Call Init directly rather than delegating to a Regex ctor that takes
// options to enable linking / tree shaking to remove the Regex compiler
// and NonBacktracking implementation if it's not used.
Init(pattern, RegexOptions.None, s_defaultMatchTimeout, culture ?? CultureInfo.CurrentCulture);
// Validate arguments.
ValidatePattern(pattern);

// Parse and store the argument information.
RegexTree tree = Init(pattern, RegexOptions.None, s_defaultMatchTimeout, ref culture);

// Create the interpreter factory.
factory = new RegexInterpreterFactory(tree, culture);

// NOTE: This overload _does not_ delegate to the one that takes options, in order
// to avoid unnecessarily rooting the support for RegexOptions.NonBacktracking/Compiler
// if no options are ever used.
}

internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo? culture)
{
culture ??= RegexParser.GetTargetCulture(options);
Init(pattern, options, matchTimeout, culture);
// Validate arguments.
ValidatePattern(pattern);
ValidateOptions(options);
ValidateMatchTimeout(matchTimeout);

// Parse and store the argument information.
RegexTree tree = Init(pattern, options, matchTimeout, ref culture);

// Create the appropriate factory.
if ((options & RegexOptions.NonBacktracking) != 0)
{
// If we're in non-backtracking mode, create the appropriate factory.
factory = new SymbolicRegexRunnerFactory(_code, options, matchTimeout, culture);
_code = null;
factory = new SymbolicRegexRunnerFactory(tree, options, matchTimeout, culture);
}
else if (RuntimeFeature.IsDynamicCodeCompiled && UseOptionC())
else
{
// If the compile option is set and compilation is supported, then compile the code.
// If the compiler can't compile this regex, it'll return null, and we'll fall back
// to the interpreter.
factory = Compile(pattern, _code, options, matchTimeout != InfiniteMatchTimeout);
if (factory is not null)
if (RuntimeFeature.IsDynamicCodeCompiled && (options & RegexOptions.Compiled) != 0)
{
_code = null;
// If the compile option is set and compilation is supported, then compile the code.
// If the compiler can't compile this regex, it'll return null, and we'll fall back
// to the interpreter.
factory = Compile(pattern, tree, options, matchTimeout != InfiniteMatchTimeout);
}

// If no factory was created, fall back to creating one for the interpreter.
factory ??= new RegexInterpreterFactory(tree, culture);
}
}

/// <summary>Initializes the instance.</summary>
/// <remarks>
/// This is separated out of the constructor so that an app only using 'new Regex(pattern)'
/// rather than 'new Regex(pattern, options)' can avoid statically referencing the Regex
/// compiler, such that a tree shaker / linker can trim it away if it's not otherwise used.
/// </remarks>
[MemberNotNull(nameof(_code))]
private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
/// <summary>Stores the supplied arguments and capture information, returning the parsed expression.</summary>
private RegexTree Init(string pattern, RegexOptions options, TimeSpan matchTimeout, [NotNull] ref CultureInfo? culture)
{
ValidatePattern(pattern);
ValidateOptions(options);
ValidateMatchTimeout(matchTimeout);

this.pattern = pattern;
internalMatchTimeout = matchTimeout;
roptions = options;
internalMatchTimeout = matchTimeout;
culture ??= RegexParser.GetTargetCulture(options);

// Parse the input
RegexTree tree = RegexParser.Parse(pattern, roptions, culture);
// Parse the pattern.
RegexTree tree = RegexParser.Parse(pattern, options, culture);

// Generate the RegexCode from the node tree. This is required for interpreting,
// and is used as input into RegexOptions.Compiled and RegexOptions.NonBacktracking.
_code = RegexWriter.Write(tree, culture);
// Store the relevant information, constructing the appropriate factory.
capnames = tree.CaptureNameToNumberMapping;
capslist = tree.CaptureNames;
caps = tree.CaptureNumberSparseMapping;
capsize = tree.CaptureCount;

capnames = tree.CapNames;
capslist = tree.CapsList;
caps = _code.Caps;
capsize = _code.CapSize;
return tree;
}

internal static void ValidatePattern(string pattern)
Expand All @@ -133,9 +135,9 @@ internal static void ValidatePattern(string pattern)

internal static void ValidateOptions(RegexOptions options)
{
const int MaxOptionShift = 11;
if (((((uint)options) >> MaxOptionShift) != 0) ||
((options & RegexOptions.ECMAScript) != 0 &&
(options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.NonBacktracking | RegexOptions.CultureInvariant)) != 0))
((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.NonBacktracking | RegexOptions.CultureInvariant)) != 0))
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.options);
}
Expand Down Expand Up @@ -199,8 +201,8 @@ protected IDictionary? CapNames
/// instantiating a non-compiled regex.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
RegexCompiler.Compile(pattern, code, options, hasTimeout);
private static RegexRunnerFactory? Compile(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout) =>
RegexCompiler.Compile(pattern, regexTree, options, hasTimeout);

[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname) =>
Expand Down Expand Up @@ -254,7 +256,7 @@ public static string Unescape(string str)
/// <summary>
/// Indicates whether the regular expression matches from right to left.
/// </summary>
public bool RightToLeft => UseOptionR();
public bool RightToLeft => (roptions & RegexOptions.RightToLeft) != 0;

/// <summary>
/// Returns the regular expression pattern passed into the constructor
Expand Down Expand Up @@ -554,13 +556,14 @@ internal void Run<TState>(string input, int startat, ref TState state, MatchCall

/// <summary>Creates a new runner instance.</summary>
private RegexRunner CreateRunner() =>
factory?.CreateInstance() ??
new RegexInterpreter(_code!, RegexParser.GetTargetCulture(roptions));
// The factory needs to be set by the ctor. `factory` is a protected field, so it's possible a derived
// type nulls out the factory after we've set it, but that's the nature of the design.
factory!.CreateInstance();

/// <summary>True if the <see cref="RegexOptions.Compiled"/> option was set.</summary>
protected bool UseOptionC() => (roptions & RegexOptions.Compiled) != 0;

/// <summary>True if the <see cref="RegexOptions.RightToLeft"/> option was set.</summary>
protected internal bool UseOptionR() => (roptions & RegexOptions.RightToLeft) != 0;
protected internal bool UseOptionR() => RightToLeft;
}
}
Loading

0 comments on commit 093bdc4

Please sign in to comment.