diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d0dd2d8dd2590..ad8db87de0084 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -222,7 +222,7 @@ private static ImmutableArray EmitRegexMethod(IndentedTextWriter wri DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture writer.WriteLine(); - writer.WriteLine($" protected override void Scan(global::System.Text.RegularExpressions.Regex regex, global::System.ReadOnlySpan text, int textstart, int prevlen, bool quick, global::System.TimeSpan timeout)"); + writer.WriteLine($" protected override void Scan(global::System.Text.RegularExpressions.Regex regex, global::System.ReadOnlySpan text, int textstart, int prevlen, bool quick)"); writer.WriteLine($" {{"); writer.Indent += 4; EmitScan(writer, rm, id); @@ -2261,7 +2261,7 @@ void EmitBoundary(RegexNode node) _ => "base.IsECMABoundary", }; - using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) + using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) { writer.WriteLine($"goto {doneLabel};"); } diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index 8e688e348d0e2..5f5d4b5bf0302 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -338,14 +338,16 @@ protected void EnsureStorage() { } protected virtual void Go() { throw null; } protected virtual void InitTrackCount() { throw null; } protected bool IsBoundary(int index, int startpos, int endpos) { throw null; } + protected bool IsBoundary(System.ReadOnlySpan inputSpan, int index, int startpos, int endpos) { throw null; } protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; } + protected bool IsECMABoundary(System.ReadOnlySpan inputSpan, int index, int startpos, int endpos) { throw null; } protected bool IsMatched(int cap) { throw null; } protected int MatchIndex(int cap) { throw null; } protected int MatchLength(int cap) { throw null; } protected int Popcrawl() { throw null; } protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; } protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; } - protected internal virtual void Scan(System.Text.RegularExpressions.Regex regex, System.ReadOnlySpan text, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; } + protected internal virtual void Scan(System.Text.RegularExpressions.Regex regex, System.ReadOnlySpan text, int textstart, int prevlen, bool quick) { throw null; } protected void TransferCapture(int capnum, int uncapnum, int start, int end) { } protected void Uncapture() { } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs index abdb182135d87..2eee81fadc66f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs @@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions /// public class Capture { - internal Capture(string text, int index, int length) + internal Capture(string? text, int index, int length) { Text = text; Index = index; @@ -34,23 +34,23 @@ internal void AddBeginningToIndex(int beginning) public int Length { get; private protected set; } /// The original string - internal string Text { get; set; } + internal string? Text { get; set; } /// Gets the captured substring from the input string. /// The substring that is captured by the match. - public string Value => Text.Substring(Index, Length); + public string Value => Text is string text ? text.Substring(Index, Length) : string.Empty; /// Gets the captured span from the input string. /// The span that is captured by the match. - public ReadOnlySpan ValueSpan => Text.AsSpan(Index, Length); + public ReadOnlySpan ValueSpan => Text is string text ? text.AsSpan(Index, Length) : ReadOnlySpan.Empty; /// Returns the substring that was matched. public override string ToString() => Value; /// The substring to the left of the capture - internal ReadOnlyMemory GetLeftSubstring() => Text.AsMemory(0, Index); + internal ReadOnlyMemory GetLeftSubstring() => Text is string text ? text.AsMemory(0, Index) : ReadOnlyMemory.Empty; /// The substring to the right of the capture - internal ReadOnlyMemory GetRightSubstring() => Text.AsMemory(Index + Length, Text.Length - Index - Length); + internal ReadOnlyMemory GetRightSubstring() => Text is string text ? text.AsMemory(Index + Length, Text.Length - Index - Length) : ReadOnlyMemory.Empty; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunner.cs index 4711932555bc7..acd122f8a57e5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunner.cs @@ -7,7 +7,7 @@ internal sealed class CompiledRegexRunner : RegexRunner { private readonly ScanDelegate _scanMethod; - internal delegate void ScanDelegate(RegexRunner runner, Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick, TimeSpan timeout); + internal delegate void ScanDelegate(RegexRunner runner, Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick); public CompiledRegexRunner(ScanDelegate scan, int trackCount) { @@ -15,8 +15,8 @@ public CompiledRegexRunner(ScanDelegate scan, int trackCount) runtrackcount = trackCount; } - protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick, TimeSpan timeout) - => _scanMethod(this, regex, text, textstart, prevlen, quick, timeout); + protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick) + => _scanMethod(this, regex, text, textstart, prevlen, quick); protected override void InitTrackCount() { } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs index f4b2a7fb2e980..2c34694f1ecaf 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs @@ -16,7 +16,7 @@ public class Group : Capture internal int _capcount; internal CaptureCollection? _capcoll; - internal Group(string text, int[] caps, int capcount, string name) + internal Group(string? text, int[] caps, int capcount, string name) : base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], capcount == 0 ? 0 : caps[(capcount * 2) - 1]) { _caps = caps; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs index 3c67526b40e18..19859fd2f0b2d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs @@ -50,7 +50,7 @@ public class Match : Group internal bool _balancing; // whether we've done any balancing with this match. If we // have done balancing, we'll need to do extra work in Tidy(). - internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) : + internal Match(Regex? regex, int capcount, string? text, int begpos, int len, int startpos) : base(text, new int[2], 0, "0") { _regex = regex; @@ -66,7 +66,7 @@ internal Match(Regex? regex, int capcount, string text, int begpos, int len, int /// Returns an empty Match object. public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0); - internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart) + internal void Reset(Regex regex, string? text, int textbeg, int textend, int textstart) { _regex = regex; Text = text; @@ -94,6 +94,7 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text public Match NextMatch() { Regex? r = _regex; + Debug.Assert(Text != null); return r != null ? r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! : this; @@ -338,7 +339,7 @@ internal sealed class MatchSparse : Match { private new readonly Hashtable _caps; - internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) : + internal MatchSparse(Regex regex, Hashtable caps, int capcount, string? text, int begpos, int len, int startpos) : base(regex, capcount, text, begpos, len, startpos) { _caps = caps; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 52983d3ade42d..1e2f176033cfd 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -387,7 +387,7 @@ protected void InitializeReferences() ReadOnlySpan span = input.AsSpan(beginning, length); runner.InitializeForScan(this, span, 0, span.Length, startat - beginning, quick); runner.InitializeForGo(); - runner.Scan(this, span, startat - beginning, prevlen, quick, internalMatchTimeout); + runner.Scan(this, span, startat - beginning, prevlen, quick); Match? m = runner.runmatch; runner.runmatch = null; // Reset runmatch if (m is not null) @@ -433,7 +433,7 @@ protected void InitializeReferences() ReadOnlySpan span = input.Slice(beginning, length); runner.InitializeForScan(this, span, 0, span.Length, startat - beginning, quick); runner.InitializeForGo(); - runner.Scan(this, span, startat - beginning, prevlen, quick, internalMatchTimeout); + runner.Scan(this, span, startat - beginning, prevlen, quick); Match? m = runner.runmatch; runner.runmatch = null; // Reset runmatch return m; @@ -451,7 +451,83 @@ internal void Run(string input, int startat, ref TState state, MatchCall RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); try { - runner.ScanInternal(this, input, startat, ref state, callback, reuseMatchObject, internalMatchTimeout); + runner.InitializeTimeout(internalMatchTimeout); + while (true) + { + runner.InitializeForScan(this, input, 0, input.Length, startat, false); + runner.InitializeForGo(); + runner.Scan(this, input, startat, -1, false); + Match? m = runner.runmatch; + + if (m is not null) + { + if (m._matchcount[0] > 0) + { + if (m.Text != input) + m.Text = input; + if (!reuseMatchObject) + { + // We're not reusing match objects, so null out our field reference to the instance. + // It'll be recreated the next time one is needed. + runner.runmatch = null; + } + m.Tidy(runner.runtextpos); + if (!callback(ref state, m)) + { + // If the callback returns false, we're done. + // Drop reference to text to avoid keeping it alive in a cache. + runner.runtext = null!; + if (reuseMatchObject) + { + // We're reusing the single match instance, so clear out its text as well. + // We don't do this if we're not reusing instances, as in that case we're + // dropping the whole reference to the match, and we no longer own the instance + // having handed it out to the callback. + m.Text = null!; + } + return; + } + + // Now that we've matched successfully, update the starting position to reflect + // the current position, just as Match.NextMatch() would pass in _textpos as textstart. + runner.runtextstart = startat = runner.runtextpos; + + // Reset state for another iteration. + runner.runtrackpos = runner.runtrack!.Length; + runner.runstackpos = runner.runstack!.Length; + runner.runcrawlpos = runner.runcrawl!.Length; + if (m.Length == 0) + { + if (runner.runtextpos == input.Length) + { + // Drop reference to text to avoid keeping it alive in a cache. + runner.runtext = null!; + if (reuseMatchObject) + { + // See above comment. + m.Text = null!; + } + return; + } + + runner.runtextpos += ((this.Options & RegexOptions.RightToLeft) > 0) ? -1 : 1; + } + + // Loop around to perform next match from where we left off. + continue; + } + else + { + // if we are at the end of the input, just return. + if (startat == input.Length) + { + runner.runtext = null; + runner.runmatch = null; + return; + } + } + } + } } finally { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index cf2167e07c1c8..a015206bb23a8 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Reflection; using System.Reflection.Emit; @@ -36,9 +37,9 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_isMatchedMethod = RegexRunnerMethod("IsMatched"); private static readonly MethodInfo s_matchLengthMethod = RegexRunnerMethod("MatchLength"); private static readonly MethodInfo s_matchIndexMethod = RegexRunnerMethod("MatchIndex"); - private static readonly MethodInfo s_isBoundaryMethod = RegexRunnerMethod("IsBoundary"); + private static readonly MethodInfo s_isBoundaryMethod = typeof(RegexRunner).GetMethod("IsBoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan), typeof(int), typeof(int), typeof(int) })!; private static readonly MethodInfo s_isWordCharMethod = RegexRunnerMethod("IsWordChar"); - private static readonly MethodInfo s_isECMABoundaryMethod = RegexRunnerMethod("IsECMABoundary"); + private static readonly MethodInfo s_isECMABoundaryMethod = typeof(RegexRunner).GetMethod("IsECMABoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan), typeof(int), typeof(int), typeof(int) })!; private static readonly MethodInfo s_crawlposMethod = RegexRunnerMethod("Crawlpos"); private static readonly MethodInfo s_charInClassMethod = RegexRunnerMethod("CharInClass"); private static readonly MethodInfo s_checkTimeoutMethod = RegexRunnerMethod("CheckTimeout"); @@ -2311,8 +2312,9 @@ void EmitBoundary(RegexNode node) { Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}"); - // if (!IsBoundary(pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel; + // if (!IsBoundary(inputSpan, pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel; Ldthis(); + Ldloc(inputSpan); Ldloc(pos); if (sliceStaticPos > 0) { @@ -3970,33 +3972,33 @@ protected void EmitScan(DynamicMethod findFirstCharMethod, DynamicMethod goMetho // if (prevlen == 0) // { - _ilg!.Emit(OpCodes.Ldarga_S, 4); + Label prevelenIsNotZero = DefineLabel(); + _ilg!.Emit(OpCodes.Ldarg_S, 4); Ldc(0); Ceq(); - Label prevelenIsNotZero = DefineLabel(); BrfalseFar(prevelenIsNotZero); // if (textstart == stoppos) // { + Label textstartNotEqualToStoppos = DefineLabel(); _ilg!.Emit(OpCodes.Ldarg_3); Ldloc(stoppos); Ceq(); - Label textstartNotEqualToStoppos = DefineLabel(); BrfalseFar(textstartNotEqualToStoppos); // runmatch = Match.Empty; // return; + Label returnLabel = DefineLabel(); Ldthis(); Call(s_matchGetEmptyMethod); Stfld(s_runmatchField); - Label returnLabel = DefineLabel(); BrFar(returnLabel); MarkLabel(textstartNotEqualToStoppos); - // runtextpos++; + // runtextpos += bump; Ldthis(); Ldthisfld(s_runtextposField); - Ldc(1); + Ldloc(bump); Add(); Stfld(s_runtextposField); MarkLabel(prevelenIsNotZero); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 712e8e512360b..fdec70cb0c202 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -324,7 +324,7 @@ private bool MatchRef(int index, int length, ReadOnlySpan inputSpan) private void Backwardnext() => runtextpos += _rightToLeft ? 1 : -1; - protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick, TimeSpan timeout) + protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick) { // Configure the additional value to "bump" the position along each time we loop around // to call FindFirstChar again, as well as the stopping position for the loop. We generally @@ -346,7 +346,7 @@ protected internal override void Scan(Regex regex, ReadOnlySpan text, int return; } - runtextpos++; + runtextpos += bump; } while (true) @@ -770,7 +770,7 @@ private void Go(ReadOnlySpan inputSpan) continue; case RegexOpcode.Boundary: - if (!IsBoundary(runtextpos, runtextbeg, runtextend)) + if (!IsBoundary(inputSpan, runtextpos, runtextbeg, runtextend)) { break; } @@ -778,7 +778,7 @@ private void Go(ReadOnlySpan inputSpan) continue; case RegexOpcode.NonBoundary: - if (IsBoundary(runtextpos, runtextbeg, runtextend)) + if (IsBoundary(inputSpan, runtextpos, runtextbeg, runtextend)) { break; } @@ -786,7 +786,7 @@ private void Go(ReadOnlySpan inputSpan) continue; case RegexOpcode.ECMABoundary: - if (!IsECMABoundary(runtextpos, runtextbeg, runtextend)) + if (!IsECMABoundary(inputSpan, runtextpos, runtextbeg, runtextend)) { break; } @@ -794,7 +794,7 @@ private void Go(ReadOnlySpan inputSpan) continue; case RegexOpcode.NonECMABoundary: - if (IsECMABoundary(runtextpos, runtextbeg, runtextend)) + if (IsECMABoundary(inputSpan, runtextpos, runtextbeg, runtextend)) { break; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs index 060750e23d58d..7c0767c02707e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs @@ -58,7 +58,7 @@ internal sealed class RegexLWCGCompiler : RegexCompiler DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner), s_paramTypes); EmitGo(); - DynamicMethod scanMethod = DefineDynamicMethod($"Regex{regexNum}_Scan{description}", null, typeof(CompiledRegexRunner), new[] { typeof(RegexRunner), typeof(Regex), typeof(ReadOnlySpan), typeof(int), typeof(int), typeof(bool), typeof(TimeSpan) }); + DynamicMethod scanMethod = DefineDynamicMethod($"Regex{regexNum}_Scan{description}", null, typeof(CompiledRegexRunner), new[] { typeof(RegexRunner), typeof(Regex), typeof(ReadOnlySpan), typeof(int), typeof(int), typeof(bool) }); EmitScan(findFirstCharMethod, goMethod); return new CompiledRegexRunnerFactory(scanMethod, code.TrackCount); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index e8ea102b0421f..8c6378ad2e6d7 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -88,15 +88,16 @@ protected RegexRunner() { } protected Match? Scan(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) => Scan(regex, text, textbeg, textend, textstart, prevlen, quick, regex.MatchTimeout); - protected internal virtual void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick, TimeSpan timeout) + protected internal virtual void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick) { string? s = runtext; if (text != s) { s = text.ToString(); + runtext = s; } - Match? match = Scan(regex, s, 0, text.Length, textstart, prevlen, quick, timeout); + Match? match = Scan(regex, s, 0, text.Length, textstart, prevlen, quick, regex.internalMatchTimeout); runmatch = match; } @@ -378,7 +379,9 @@ private void DoCheckTimeout() if (0 > _timeoutOccursAt && 0 < currentMillis) return; - throw new RegexMatchTimeoutException(runtext!, runregex!.pattern!, TimeSpan.FromMilliseconds(_timeout)); + string input = runtext ?? string.Empty; + + throw new RegexMatchTimeoutException(input, runregex!.pattern!, TimeSpan.FromMilliseconds(_timeout)); } /// @@ -412,12 +415,12 @@ internal void InitializeForGo() { // Use a hashtabled Match object if the capture numbers are sparse runmatch = runregex!.caps is null ? - new Match(runregex, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart) : - new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart); + new Match(runregex, runregex.capsize, runtext ?? string.Empty, runtextbeg, runtextend - runtextbeg, runtextstart) : + new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext, runtextbeg, runtextend - runtextbeg, runtextstart); } else { - runmatch.Reset(runregex!, runtext!, runtextbeg, runtextend, runtextstart); + runmatch.Reset(runregex!, runtext, runtextbeg, runtextend, runtextstart); } // Note we test runcrawl, because it is the last one to be allocated @@ -480,8 +483,15 @@ protected void EnsureStorage() /// protected bool IsBoundary(int index, int startpos, int endpos) { - return (index > startpos && RegexCharClass.IsBoundaryWordChar(runtext![index - 1])) != - (index < endpos && RegexCharClass.IsBoundaryWordChar(runtext![index])); + Debug.Assert(runtext != null, "runtext should not be null since this method is only callable by old codegen."); + return (index > startpos && RegexCharClass.IsBoundaryWordChar(runtext[index - 1])) != + (index < endpos && RegexCharClass.IsBoundaryWordChar(runtext[index])); + } + + protected bool IsBoundary(ReadOnlySpan inputSpan, int index, int startpos, int endpos) + { + return (index > startpos && RegexCharClass.IsBoundaryWordChar(inputSpan[index - 1])) != + (index < endpos && RegexCharClass.IsBoundaryWordChar(inputSpan[index])); } /// Called to determine a char's inclusion in the \w set. @@ -489,8 +499,15 @@ protected bool IsBoundary(int index, int startpos, int endpos) protected bool IsECMABoundary(int index, int startpos, int endpos) { - return (index > startpos && RegexCharClass.IsECMAWordChar(runtext![index - 1])) != - (index < endpos && RegexCharClass.IsECMAWordChar(runtext![index])); + Debug.Assert(runtext != null, "runtext should not be null since this method is only callable by old codegen."); + return (index > startpos && RegexCharClass.IsECMAWordChar(runtext[index - 1])) != + (index < endpos && RegexCharClass.IsECMAWordChar(runtext[index])); + } + + protected bool IsECMABoundary(ReadOnlySpan inputSpan, int index, int startpos, int endpos) + { + return (index > startpos && RegexCharClass.IsECMAWordChar(inputSpan[index - 1])) != + (index < endpos && RegexCharClass.IsECMAWordChar(inputSpan[index])); } protected static bool CharInSet(char ch, string set, string category) @@ -692,7 +709,10 @@ string DescribeTextPosition() if (runtextpos > runtextbeg) { - sb.Append(RegexCharClass.DescribeChar(runtext![runtextpos - 1])); + if (runtext != null) + { + sb.Append(RegexCharClass.DescribeChar(runtext[runtextpos - 1])); + } } else { @@ -703,7 +723,10 @@ string DescribeTextPosition() for (int i = runtextpos; i < runtextend; i++) { - sb.Append(RegexCharClass.DescribeChar(runtext![i])); + if (runtext != null) + { + sb.Append(RegexCharClass.DescribeChar(runtext[i])); + } } if (sb.Length >= 64) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs index dd2050848d692..6aff09f2ba897 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs @@ -83,9 +83,42 @@ private sealed class Runner : RegexRunner where TSetType : notnull internal Runner(SymbolicRegexMatcher matcher) => _matcher = matcher; - protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick, TimeSpan timeout) + protected internal override void Scan(Regex regex, ReadOnlySpan text, int textstart, int prevlen, bool quick) { + // Configure the additional value to "bump" the position along each time we loop around + // to call FindFirstChar again, as well as the stopping position for the loop. We generally + // bump by 1 and stop at textend, but if we're examining right-to-left, we instead bump + // by -1 and stop at textbeg. + int stoppos = text.Length; + if (regex.RightToLeft) + { + stoppos = 0; + } + + // If previous match was empty or failed, advance by one before matching. + if (prevlen == 0) + { + if (textstart == stoppos) + { + runmatch = Match.Empty; + return; + } + + runtextpos += regex.RightToLeft ? -1 : 1; + } + Go(text); + + // If we got a match, we're done. + if (runmatch!._matchcount[0] > 0) + { + if (quick) + { + runmatch = null; + } + } + + return; } private void Go(ReadOnlySpan inputSpan)