Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Regex.IsMatch(Span) and RegexRunner.Scan() methods #65473

Merged
merged 18 commits into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,19 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine($" {{");

// Main implementation methods
writer.WriteLine($" protected override void InitTrackCount() => base.runtrackcount = {rm.Code.TrackCount};");
writer.WriteLine();

writer.WriteLine(" // Description:");
DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture
writer.WriteLine();

writer.WriteLine($" protected override bool FindFirstChar()");
writer.WriteLine($" protected override void Scan(global::System.ReadOnlySpan<char> text)");
writer.WriteLine($" {{");
writer.Indent += 4;
EmitScan(writer, rm, id);
writer.Indent -= 4;
writer.WriteLine($" }}");
writer.WriteLine();

writer.WriteLine($" private bool FindFirstChar(global::System.ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 4;
RequiredHelperFunctions requiredHelpers = EmitFindFirstChar(writer, rm, id);
Expand All @@ -233,7 +238,7 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
{
writer.WriteLine($" [global::System.Runtime.CompilerServices.SkipLocalsInit]");
}
writer.WriteLine($" protected override void Go()");
writer.WriteLine($" private bool Go(global::System.ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 4;
requiredHelpers |= EmitGo(writer, rm, id);
Expand Down Expand Up @@ -271,6 +276,42 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine($" }}");
}

if ((requiredHelpers & RequiredHelperFunctions.IsBoundary) != 0)
{
writer.WriteLine();
writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
writer.WriteLine($" private bool IsBoundary(global::System.ReadOnlySpan<char>inputSpan, int index)");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine($" {{");
writer.WriteLine($" const char ZeroWidthNonJoiner = '\\u200C', ZeroWidthJoiner = '\\u200D';");
writer.WriteLine();
writer.WriteLine($" return (index > base.runtextbeg && IsBoundaryWordChar(inputSpan![index - 1])) !=");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine($" (index < inputSpan.Length && IsBoundaryWordChar(inputSpan![index]));");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine();
writer.WriteLine($" bool IsBoundaryWordChar(char ch) =>");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine($" IsWordChar(ch) ||");
writer.WriteLine($" (ch == ZeroWidthJoiner | ch == ZeroWidthNonJoiner);");
writer.WriteLine($" }}");
}

if ((requiredHelpers & RequiredHelperFunctions.IsECMABoundary) != 0)
{
writer.WriteLine();
writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
writer.WriteLine($" private bool IsECMABoundary(global::System.ReadOnlySpan<char>inputSpan, int index)");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine($" {{");
writer.WriteLine($" return (index > base.runtextbeg && IsECMAWordChar(inputSpan![index - 1])) !=");
writer.WriteLine($" (index < inputSpan.Length && IsECMAWordChar(inputSpan![index]));");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
writer.WriteLine();
writer.WriteLine($" bool IsECMAWordChar(char ch) =>");
writer.WriteLine($" ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter");
writer.WriteLine($" (((uint)ch - '0') < 10) || // digit");
writer.WriteLine($" ch == '_' || // underscore");
writer.WriteLine($" ch == '\\u0130'; // latin capital letter I with dot above");
writer.WriteLine($" }}");
}

writer.WriteLine($" }}");
writer.WriteLine($" }}");
writer.WriteLine("}");
Expand Down Expand Up @@ -299,6 +340,37 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
}
}

private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id)
joperezr marked this conversation as resolved.
Show resolved Hide resolved
{
joperezr marked this conversation as resolved.
Show resolved Hide resolved
using (EmitBlock(writer, "while (true)"))
{
using (EmitBlock(writer, "if (FindFirstChar(text))"))
joperezr marked this conversation as resolved.
Show resolved Hide resolved
{
if (rm.MatchTimeout != Timeout.Infinite)
{
writer.WriteLine("base.CheckTimeout();");
writer.WriteLine();
}

writer.WriteLine("// If we got a match, we're done.");
using (EmitBlock(writer, "if (Go(text))"))
{
writer.WriteLine("return;");
}
}
writer.WriteLine();
joperezr marked this conversation as resolved.
Show resolved Hide resolved

writer.WriteLine("// We failed to find a match. If we're at the end of the input, then we're done.");
using (EmitBlock(writer, "if (base.runtextpos == text.Length)"))
{
writer.WriteLine("return;");
}
writer.WriteLine();

writer.WriteLine("base.runtextpos++;");
joperezr marked this conversation as resolved.
Show resolved Hide resolved
}
}

/// <summary>Emits the body of the FindFirstChar override.</summary>
private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id)
{
Expand Down Expand Up @@ -347,7 +419,6 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitIndexOf(code.FindOptimizations.LeadingCaseSensitivePrefix);
break;

Expand All @@ -356,13 +427,11 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitFixedSet();
break;

case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive:
Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null);
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitLiteralAfterAtomicLoop();
break;

Expand Down Expand Up @@ -463,7 +532,6 @@ bool EmitAnchors()
// the other anchors, which all skip all subsequent processing if found, with BOL we just use it
// to boost our position to the next line, and then continue normally with any searches.
writer.WriteLine("// Beginning-of-line anchor");
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
additionalDeclarations.Add("int beginning = base.runtextbeg;");
using (EmitBlock(writer, "if (pos > beginning && inputSpan[pos - 1] != '\\n')"))
{
Expand Down Expand Up @@ -763,13 +831,15 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
writer.WriteLine($"int end = start + {(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1)};");
writer.WriteLine("base.Capture(0, start, end);");
writer.WriteLine("base.runtextpos = end;");
writer.WriteLine("return true;");
return requiredHelpers;

case RegexNodeKind.Empty:
// This case isn't common in production, but it's very common when first getting started with the
// source generator and seeing what happens as you add more to expressions. When approaching
// it from a learning perspective, this is very common, as it's the empty string you start with.
writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);");
writer.WriteLine("return true;");
return requiredHelpers;
}

Expand All @@ -781,7 +851,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe

// Declare some locals.
string sliceSpan = "slice";
writer.WriteLine("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;");
writer.WriteLine($"int original_pos = pos;");
bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm);
Expand Down Expand Up @@ -826,7 +895,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
}
writer.WriteLine("base.runtextpos = pos;");
writer.WriteLine("base.Capture(0, original_pos, pos);");
writer.WriteLine("return;");
writer.WriteLine("return true;");
writer.WriteLine();

// We only get here in the code if the whole expression fails to match and jumps to
Expand All @@ -837,6 +906,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
{
EmitUncaptureUntil("0");
}
writer.WriteLine("return false;");

// We're done with the match.

Expand All @@ -846,8 +916,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
// And emit any required helpers.
if (additionalLocalFunctions.Count != 0)
{
writer.WriteLine("return;"); // not strictly necessary, just for readability

foreach (KeyValuePair<string, string[]> localFunctions in additionalLocalFunctions.OrderBy(k => k.Key))
{
writer.WriteLine();
Expand Down Expand Up @@ -2144,13 +2212,22 @@ void EmitBoundary(RegexNode node)

string call = node.Kind switch
{
RegexNodeKind.Boundary => "!base.IsBoundary",
RegexNodeKind.NonBoundary => "base.IsBoundary",
RegexNodeKind.ECMABoundary => "!base.IsECMABoundary",
_ => "base.IsECMABoundary",
RegexNodeKind.Boundary => "!IsBoundary",
RegexNodeKind.NonBoundary => "IsBoundary",
RegexNodeKind.ECMABoundary => "!IsECMABoundary",
_ => "IsECMABoundary",
};

var boundaryFunctionRequired = node.Kind switch
joperezr marked this conversation as resolved.
Show resolved Hide resolved
{
RegexNodeKind.Boundary or
RegexNodeKind.NonBoundary => RequiredHelperFunctions.IsBoundary | RequiredHelperFunctions.IsWordChar, // IsBoundary internally uses IsWordChar
_ => RequiredHelperFunctions.IsECMABoundary
};

using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))"))
requiredHelpers |= boundaryFunctionRequired;

using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))
{
writer.WriteLine($"goto {doneLabel};");
}
Expand Down Expand Up @@ -3833,9 +3910,13 @@ public void Dispose()
private enum RequiredHelperFunctions
{
/// <summary>No additional functions are required.</summary>
None,
None = 0b0,
/// <summary>The IsWordChar helper is required.</summary>
IsWordChar
IsWordChar = 0b1,
/// <summary>The IsBoundary helper is required.</summary>
IsBoundary = 0b10,
/// <summary>The IsECMABoundary helper is required.</summary>
IsECMABoundary = 0b100
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public string GroupNameFromNumber(int i) { throw null; }
public int GroupNumberFromName(string name) { throw null; }
protected void InitializeReferences() { }
public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public bool IsMatch(string input) { throw null; }
public bool IsMatch(string input, int startat) { throw null; }
public static bool IsMatch(string input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
Expand Down Expand Up @@ -330,9 +334,9 @@ protected void DoubleCrawl() { }
protected void DoubleStack() { }
protected void DoubleTrack() { }
protected void EnsureStorage() { }
protected abstract bool FindFirstChar();
protected abstract void Go();
protected abstract void InitTrackCount();
protected virtual bool FindFirstChar() { throw null; }
protected virtual void Go() { throw null; }
protected virtual void InitTrackCount() { throw null; }
protected bool IsBoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsMatched(int cap) { throw null; }
Expand All @@ -341,6 +345,7 @@ protected void EnsureStorage() { }
protected int Popcrawl() { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; }
protected internal virtual void Scan(System.ReadOnlySpan<char> text) { throw null; }
protected void TransferCapture(int capnum, int uncapnum, int start, int end) { }
protected void Uncapture() { }
}
Expand Down
Loading