Skip to content

Commit

Permalink
[release/8.0-staging] Fix regex lazy loop handling of backtracking st…
Browse files Browse the repository at this point in the history
…ate at max iteration limit (#97927)

* Fix regex lazy loop handling of backtracking state at max iteration limit

Upon entering a lazy loop, state is pushed onto the backtracking stack if the lazy loop might be backtracked into.  That state is then dutifully popped off when unwinding the loop due to failure to match. However, if the loop successfully matches its maximum number of times but still fails because of a failure after the lazy loop, the state still needs to be popped off the stack, but isn't. This fixes that.

* Add a few more test variations

---------

Co-authored-by: Stephen Toub <stoub@microsoft.com>
  • Loading branch information
github-actions[bot] and stephentoub authored Feb 12, 2024
1 parent 9598872 commit 11ae42b
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3770,13 +3770,25 @@ void EmitLazy(RegexNode node)

using (clause)
{
// We're backtracking, which could either be to something prior to the lazy loop or to something
// inside of the lazy loop. If it's to something inside of the lazy loop, then either the loop
// will eventually succeed or we'll eventually end up unwinding back through the iterations all
// the way back to the loop not matching at all, in which case the state we first pushed on at the
// beginning of the !isAtomic section will get popped off. But if here we're instead going to jump
// to something prior to the lazy loop, then we need to pop off that state here.
if (doneLabel == originalDoneLabel)
{
EmitAdd(writer, "stackpos", -entriesPerIteration);
}

if (iterationMayBeEmpty)
{
// If we saw empty, it must have been in the most recent iteration, as we wouldn't have
// allowed additional iterations after one that was empty. Thus, we reset it back to
// false prior to backtracking / undoing that iteration.
writer.WriteLine($"{sawEmpty} = 0; // false");
}

Goto(doneLabel);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4075,6 +4075,8 @@ void EmitLazy(RegexNode node)
// Determine where to branch, either back to the lazy loop body to add an additional iteration,
// or to the last backtracking label.

Label jumpToDone = DefineLabel();

if (iterationMayBeEmpty)
{
// if (sawEmpty != 0)
Expand All @@ -4093,7 +4095,7 @@ void EmitLazy(RegexNode node)
Ldc(0);
Stloc(sawEmpty!);

BrFar(doneLabel);
Br(jumpToDone);
MarkLabel(sawEmptyZero);
}

Expand All @@ -4102,12 +4104,32 @@ void EmitLazy(RegexNode node)
// if (iterationCount >= maxIterations) goto doneLabel;
Ldloc(iterationCount);
Ldc(maxIterations);
BgeFar(doneLabel);
Bge(jumpToDone);
}

// goto body;
BrFar(body);

MarkLabel(jumpToDone);

// We're backtracking, which could either be to something prior to the lazy loop or to something
// inside of the lazy loop. If it's to something inside of the lazy loop, then either the loop
// will eventually succeed or we'll eventually end up unwinding back through the iterations all
// the way back to the loop not matching at all, in which case the state we first pushed on at the
// beginning of the !isAtomic section will get popped off. But if here we're instead going to jump
// to something prior to the lazy loop, then we need to pop off that state here.
if (doneLabel == originalDoneLabel)
{
// stackpos -= entriesPerIteration;
Ldloc(stackpos);
Ldc(entriesPerIteration);
Sub();
Stloc(stackpos);
}

// goto done;
BrFar(doneLabel);

doneLabel = backtrack;
MarkLabel(skipBacktrack);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2374,6 +2374,11 @@ public static IEnumerable<object[]> AllMatches_TestData()
yield return new object[] { engine, "^(?:aaa|aa){1,5}?$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } };
yield return new object[] { engine, "^(?:aaa|aa){4}$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } };
yield return new object[] { engine, "^(?:aaa|aa){4}?$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } };
yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } };
yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x*)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } };
yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x+)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } };
yield return new object[] { engine, "^((?:(x(x?),x(x?))|xx?,(x(x?)>x(x?)),(x((x))?))-?(x+?)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } };
yield return new object[] { engine, "^(?:x|(x)??){2}$", RegexOptions.None, "x>", null };

// Mostly empty matches using unusual regexes consisting mostly of anchors only
yield return new object[] { engine, "^", RegexOptions.None, "", new (int, int, string)[] { (0, 0, "") } };
Expand Down

0 comments on commit 11ae42b

Please sign in to comment.