From 90abcefa4cb0458444984eb781195e932193341b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 20 Nov 2020 20:07:51 -0500 Subject: [PATCH] Fix handling of \G in Regex.Split/Replace (#44985) In our optimized Regex.Split loop, we failed to update runtextstart, which means the \G anchor (aka starting where the previous match ended). Co-authored-by: Stephen Toub --- .../Text/RegularExpressions/RegexRunner.cs | 4 +++ .../tests/Regex.KnownPattern.Tests.cs | 28 +++++++++++++++++++ .../tests/Regex.Replace.Tests.cs | 3 ++ .../tests/Regex.Split.Tests.cs | 5 +++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index e3f48e3dded14..3006cbc26cf8d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -322,6 +322,10 @@ internal void Scan(Regex regex, string text, int textstart, ref TState s return; } + // Now that we've matched successfully, update the starting position to reflect + // the current position, just as Match.NextMatch() would pass in _textpos as textstart. + runtextstart = runtextpos; + // Reset state for another iteration. runtrackpos = runtrack!.Length; runstackpos = runstack!.Length; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs index 2012ecb81e5a9..8f7d7c26bcabe 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Globalization; +using System.Linq; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -873,6 +874,33 @@ public void Docs_EndOfLineComment(RegexOptions options) } + // https://docs.microsoft.com/en-us/dotnet/standard/base-types/anchors-in-regular-expressions#contiguous-matches-g + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] + public void Docs_Anchors_ContiguousMatches(RegexOptions options) + { + const string Input = "capybara,squirrel,chipmunk,porcupine"; + const string Pattern = @"\G(\w+\s?\w*),?"; + string[] expected = new[] { "capybara", "squirrel", "chipmunk", "porcupine" }; + + Match m = Regex.Match(Input, Pattern, options); + + string[] actual = new string[4]; + for (int i = 0; i < actual.Length; i++) + { + Assert.True(m.Success); + actual[i] = m.Groups[1].Value; + m = m.NextMatch(); + } + Assert.False(m.Success); + Assert.Equal(expected, actual); + + Assert.Equal( + ",arabypac,lerriuqs,knumpihcenipucrop", + Regex.Replace(Input, Pattern, m => string.Concat(m.Value.Reverse()))); + } + // // These patterns come from real-world customer usages diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs index de74ea358e046..32fa6763a703f 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs @@ -100,6 +100,9 @@ public static IEnumerable Replace_String_TestData() yield return new object[] { "([1-9])([1-9])([1-9])def", "abc123def!", "$+", RegexOptions.RightToLeft, -1, 10, "abc3!" }; yield return new object[] { "([1-9])([1-9])([1-9])def", "abc123def!", "$_", RegexOptions.RightToLeft, -1, 10, "abcabc123def!!" }; + + // Anchors + yield return new object[] { @"\Ga", "aaaaa", "b", RegexOptions.None, 5, 0, "bbbbb" }; } [Theory] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs index a2d54967d5811..dc539fe0da8fa 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs @@ -52,6 +52,9 @@ public static IEnumerable Split_TestData() yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 10, 20, new string[] { "1a", "b", "c", "d", "e", "f", "g", "h", "i", "k" } }; yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 2, 20, new string[] { "1a2b3c4d5e6f7g8h9i", "k" } }; yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 1, 20, new string[] { "1a2b3c4d5e6f7g8h9i0k" } }; + + // Anchors + yield return new object[] { @"(?<=\G..)(?=..)", "aabbccdd", RegexOptions.None, 8, 0, new string[] { "aa", "bb", "cc", "dd" } }; } [Theory] @@ -60,7 +63,7 @@ public static IEnumerable Split_TestData() public void Split(string pattern, string input, RegexOptions options, int count, int start, string[] expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); - bool isDefaultCount = RegexHelpers.IsDefaultStart(input, options, count); + bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); if (options == RegexOptions.None) { // Use Split(string), Split(string, string), Split(string, int) or Split(string, int, int)