Skip to content

Commit

Permalink
Remove timeout from Scan, as well as fix most of the test issues
Browse files Browse the repository at this point in the history
  • Loading branch information
joperezr committed Feb 4, 2022
1 parent 2d3aef3 commit 2960e99
Show file tree
Hide file tree
Showing 12 changed files with 185 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture
writer.WriteLine();

writer.WriteLine($" protected override void Scan(global::System.Text.RegularExpressions.Regex regex, global::System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick, global::System.TimeSpan timeout)");
writer.WriteLine($" protected override void Scan(global::System.Text.RegularExpressions.Regex regex, global::System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick)");
writer.WriteLine($" {{");
writer.Indent += 4;
EmitScan(writer, rm, id);
Expand Down Expand Up @@ -2261,7 +2261,7 @@ void EmitBoundary(RegexNode node)
_ => "base.IsECMABoundary",
};

using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))"))
using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))"))
{
writer.WriteLine($"goto {doneLabel};");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,16 @@ protected void EnsureStorage() { }
protected virtual void Go() { throw null; }
protected virtual void InitTrackCount() { throw null; }
protected bool IsBoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsBoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; }
protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsECMABoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; }
protected bool IsMatched(int cap) { throw null; }
protected int MatchIndex(int cap) { throw null; }
protected int MatchLength(int cap) { throw null; }
protected int Popcrawl() { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; }
protected internal virtual void Scan(System.Text.RegularExpressions.Regex regex, System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; }
protected internal virtual void Scan(System.Text.RegularExpressions.Regex regex, System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick) { throw null; }
protected void TransferCapture(int capnum, int uncapnum, int start, int end) { }
protected void Uncapture() { }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions
/// </summary>
public class Capture
{
internal Capture(string text, int index, int length)
internal Capture(string? text, int index, int length)
{
Text = text;
Index = index;
Expand All @@ -34,23 +34,23 @@ internal void AddBeginningToIndex(int beginning)
public int Length { get; private protected set; }

/// <summary>The original string</summary>
internal string Text { get; set; }
internal string? Text { get; set; }

/// <summary>Gets the captured substring from the input string.</summary>
/// <value>The substring that is captured by the match.</value>
public string Value => Text.Substring(Index, Length);
public string Value => Text is string text ? text.Substring(Index, Length) : string.Empty;

/// <summary>Gets the captured span from the input string.</summary>
/// <value>The span that is captured by the match.</value>
public ReadOnlySpan<char> ValueSpan => Text.AsSpan(Index, Length);
public ReadOnlySpan<char> ValueSpan => Text is string text ? text.AsSpan(Index, Length) : ReadOnlySpan<char>.Empty;

/// <summary>Returns the substring that was matched.</summary>
public override string ToString() => Value;

/// <summary>The substring to the left of the capture</summary>
internal ReadOnlyMemory<char> GetLeftSubstring() => Text.AsMemory(0, Index);
internal ReadOnlyMemory<char> GetLeftSubstring() => Text is string text ? text.AsMemory(0, Index) : ReadOnlyMemory<char>.Empty;

/// <summary>The substring to the right of the capture</summary>
internal ReadOnlyMemory<char> GetRightSubstring() => Text.AsMemory(Index + Length, Text.Length - Index - Length);
internal ReadOnlyMemory<char> GetRightSubstring() => Text is string text ? text.AsMemory(Index + Length, Text.Length - Index - Length) : ReadOnlyMemory<char>.Empty;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ internal sealed class CompiledRegexRunner : RegexRunner
{
private readonly ScanDelegate _scanMethod;

internal delegate void ScanDelegate(RegexRunner runner, Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick, TimeSpan timeout);
internal delegate void ScanDelegate(RegexRunner runner, Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick);

public CompiledRegexRunner(ScanDelegate scan, int trackCount)
{
_scanMethod = scan;
runtrackcount = trackCount;
}

protected internal override void Scan(Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick, TimeSpan timeout)
=> _scanMethod(this, regex, text, textstart, prevlen, quick, timeout);
protected internal override void Scan(Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick)
=> _scanMethod(this, regex, text, textstart, prevlen, quick);

protected override void InitTrackCount() { }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class Group : Capture
internal int _capcount;
internal CaptureCollection? _capcoll;

internal Group(string text, int[] caps, int capcount, string name)
internal Group(string? text, int[] caps, int capcount, string name)
: base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], capcount == 0 ? 0 : caps[(capcount * 2) - 1])
{
_caps = caps;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class Match : Group
internal bool _balancing; // whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().

internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) :
internal Match(Regex? regex, int capcount, string? text, int begpos, int len, int startpos) :
base(text, new int[2], 0, "0")
{
_regex = regex;
Expand All @@ -66,7 +66,7 @@ internal Match(Regex? regex, int capcount, string text, int begpos, int len, int
/// <summary>Returns an empty Match object.</summary>
public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0);

internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
internal void Reset(Regex regex, string? text, int textbeg, int textend, int textstart)
{
_regex = regex;
Text = text;
Expand Down Expand Up @@ -94,6 +94,7 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text
public Match NextMatch()
{
Regex? r = _regex;
Debug.Assert(Text != null);
return r != null ?
r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! :
this;
Expand Down Expand Up @@ -338,7 +339,7 @@ internal sealed class MatchSparse : Match
{
private new readonly Hashtable _caps;

internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) :
internal MatchSparse(Regex regex, Hashtable caps, int capcount, string? text, int begpos, int len, int startpos) :
base(regex, capcount, text, begpos, len, startpos)
{
_caps = caps;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ protected void InitializeReferences()
ReadOnlySpan<char> span = input.AsSpan(beginning, length);
runner.InitializeForScan(this, span, 0, span.Length, startat - beginning, quick);
runner.InitializeForGo();
runner.Scan(this, span, startat - beginning, prevlen, quick, internalMatchTimeout);
runner.Scan(this, span, startat - beginning, prevlen, quick);
Match? m = runner.runmatch;
runner.runmatch = null; // Reset runmatch
if (m is not null)
Expand Down Expand Up @@ -433,7 +433,7 @@ protected void InitializeReferences()
ReadOnlySpan<char> span = input.Slice(beginning, length);
runner.InitializeForScan(this, span, 0, span.Length, startat - beginning, quick);
runner.InitializeForGo();
runner.Scan(this, span, startat - beginning, prevlen, quick, internalMatchTimeout);
runner.Scan(this, span, startat - beginning, prevlen, quick);
Match? m = runner.runmatch;
runner.runmatch = null; // Reset runmatch
return m;
Expand All @@ -451,7 +451,83 @@ internal void Run<TState>(string input, int startat, ref TState state, MatchCall
RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
runner.ScanInternal(this, input, startat, ref state, callback, reuseMatchObject, internalMatchTimeout);
runner.InitializeTimeout(internalMatchTimeout);
while (true)
{
runner.InitializeForScan(this, input, 0, input.Length, startat, false);
runner.InitializeForGo();
runner.Scan(this, input, startat, -1, false);
Match? m = runner.runmatch;

if (m is not null)
{
if (m._matchcount[0] > 0)
{
if (m.Text != input)
m.Text = input;
if (!reuseMatchObject)
{
// We're not reusing match objects, so null out our field reference to the instance.
// It'll be recreated the next time one is needed.
runner.runmatch = null;
}
m.Tidy(runner.runtextpos);
if (!callback(ref state, m))
{
// If the callback returns false, we're done.
// Drop reference to text to avoid keeping it alive in a cache.
runner.runtext = null!;
if (reuseMatchObject)
{
// We're reusing the single match instance, so clear out its text as well.
// We don't do this if we're not reusing instances, as in that case we're
// dropping the whole reference to the match, and we no longer own the instance
// having handed it out to the callback.
m.Text = null!;
}
return;
}

// Now that we've matched successfully, update the starting position to reflect
// the current position, just as Match.NextMatch() would pass in _textpos as textstart.
runner.runtextstart = startat = runner.runtextpos;

// Reset state for another iteration.
runner.runtrackpos = runner.runtrack!.Length;
runner.runstackpos = runner.runstack!.Length;
runner.runcrawlpos = runner.runcrawl!.Length;
if (m.Length == 0)
{
if (runner.runtextpos == input.Length)
{
// Drop reference to text to avoid keeping it alive in a cache.
runner.runtext = null!;
if (reuseMatchObject)
{
// See above comment.
m.Text = null!;
}
return;
}

runner.runtextpos += ((this.Options & RegexOptions.RightToLeft) > 0) ? -1 : 1;
}

// Loop around to perform next match from where we left off.
continue;
}
else
{
// if we are at the end of the input, just return.
if (startat == input.Length)
{
runner.runtext = null;
runner.runmatch = null;
return;
}
}
}
}
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Reflection;
using System.Reflection.Emit;
Expand Down Expand Up @@ -36,9 +37,9 @@ internal abstract class RegexCompiler
private static readonly MethodInfo s_isMatchedMethod = RegexRunnerMethod("IsMatched");
private static readonly MethodInfo s_matchLengthMethod = RegexRunnerMethod("MatchLength");
private static readonly MethodInfo s_matchIndexMethod = RegexRunnerMethod("MatchIndex");
private static readonly MethodInfo s_isBoundaryMethod = RegexRunnerMethod("IsBoundary");
private static readonly MethodInfo s_isBoundaryMethod = typeof(RegexRunner).GetMethod("IsBoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan<char>), typeof(int), typeof(int), typeof(int) })!;
private static readonly MethodInfo s_isWordCharMethod = RegexRunnerMethod("IsWordChar");
private static readonly MethodInfo s_isECMABoundaryMethod = RegexRunnerMethod("IsECMABoundary");
private static readonly MethodInfo s_isECMABoundaryMethod = typeof(RegexRunner).GetMethod("IsECMABoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan<char>), typeof(int), typeof(int), typeof(int) })!;
private static readonly MethodInfo s_crawlposMethod = RegexRunnerMethod("Crawlpos");
private static readonly MethodInfo s_charInClassMethod = RegexRunnerMethod("CharInClass");
private static readonly MethodInfo s_checkTimeoutMethod = RegexRunnerMethod("CheckTimeout");
Expand Down Expand Up @@ -2311,8 +2312,9 @@ void EmitBoundary(RegexNode node)
{
Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}");

// if (!IsBoundary(pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel;
// if (!IsBoundary(inputSpan, pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel;
Ldthis();
Ldloc(inputSpan);
Ldloc(pos);
if (sliceStaticPos > 0)
{
Expand Down Expand Up @@ -3970,33 +3972,33 @@ protected void EmitScan(DynamicMethod findFirstCharMethod, DynamicMethod goMetho

// if (prevlen == 0)
// {
_ilg!.Emit(OpCodes.Ldarga_S, 4);
Label prevelenIsNotZero = DefineLabel();
_ilg!.Emit(OpCodes.Ldarg_S, 4);
Ldc(0);
Ceq();
Label prevelenIsNotZero = DefineLabel();
BrfalseFar(prevelenIsNotZero);

// if (textstart == stoppos)
// {
Label textstartNotEqualToStoppos = DefineLabel();
_ilg!.Emit(OpCodes.Ldarg_3);
Ldloc(stoppos);
Ceq();
Label textstartNotEqualToStoppos = DefineLabel();
BrfalseFar(textstartNotEqualToStoppos);

// runmatch = Match.Empty;
// return;
Label returnLabel = DefineLabel();
Ldthis();
Call(s_matchGetEmptyMethod);
Stfld(s_runmatchField);
Label returnLabel = DefineLabel();
BrFar(returnLabel);
MarkLabel(textstartNotEqualToStoppos);

// runtextpos++;
// runtextpos += bump;
Ldthis();
Ldthisfld(s_runtextposField);
Ldc(1);
Ldloc(bump);
Add();
Stfld(s_runtextposField);
MarkLabel(prevelenIsNotZero);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ private bool MatchRef(int index, int length, ReadOnlySpan<char> inputSpan)

private void Backwardnext() => runtextpos += _rightToLeft ? 1 : -1;

protected internal override void Scan(Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick, TimeSpan timeout)
protected internal override void Scan(Regex regex, ReadOnlySpan<char> text, int textstart, int prevlen, bool quick)
{
// Configure the additional value to "bump" the position along each time we loop around
// to call FindFirstChar again, as well as the stopping position for the loop. We generally
Expand All @@ -346,7 +346,7 @@ protected internal override void Scan(Regex regex, ReadOnlySpan<char> text, int
return;
}

runtextpos++;
runtextpos += bump;
}

while (true)
Expand Down Expand Up @@ -770,31 +770,31 @@ private void Go(ReadOnlySpan<char> inputSpan)
continue;

case RegexOpcode.Boundary:
if (!IsBoundary(runtextpos, runtextbeg, runtextend))
if (!IsBoundary(inputSpan, runtextpos, runtextbeg, runtextend))
{
break;
}
advance = 0;
continue;

case RegexOpcode.NonBoundary:
if (IsBoundary(runtextpos, runtextbeg, runtextend))
if (IsBoundary(inputSpan, runtextpos, runtextbeg, runtextend))
{
break;
}
advance = 0;
continue;

case RegexOpcode.ECMABoundary:
if (!IsECMABoundary(runtextpos, runtextbeg, runtextend))
if (!IsECMABoundary(inputSpan, runtextpos, runtextbeg, runtextend))
{
break;
}
advance = 0;
continue;

case RegexOpcode.NonECMABoundary:
if (IsECMABoundary(runtextpos, runtextbeg, runtextend))
if (IsECMABoundary(inputSpan, runtextpos, runtextbeg, runtextend))
{
break;
}
Expand Down
Loading

0 comments on commit 2960e99

Please sign in to comment.