Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use IndexOf for .* in RegexInterpreter/Compiler #31930

Merged
merged 3 commits into from
Feb 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,6 @@
<data name="UnexpectedOpcode" xml:space="preserve">
<value>Unexpected opcode in regular expression generation: {0}.</value>
</data>
<data name="UnimplementedState" xml:space="preserve">
<value>Unimplemented state.</value>
</data>
<data name="UnknownProperty" xml:space="preserve">
<value>Unknown property '{0}'.</value>
</data>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ internal sealed class RegexCode
public const int Bol = 14; // ^
public const int Eol = 15; // $
public const int Boundary = 16; // \b
public const int Nonboundary = 17; // \B
public const int NonBoundary = 17; // \B
public const int Beginning = 18; // \A
public const int Start = 19; // \G
public const int EndZ = 20; // \Z
Expand Down Expand Up @@ -170,7 +170,7 @@ public static int OpcodeSize(int opcode)
case Bol:
case Eol:
case Boundary:
case Nonboundary:
case NonBoundary:
case ECMABoundary:
case NonECMABoundary:
case Beginning:
Expand Down Expand Up @@ -245,7 +245,7 @@ private static string OperatorDescription(int Opcode)
Bol => nameof(Bol),
Eol => nameof(Eol),
Boundary => nameof(Boundary),
Nonboundary => nameof(Nonboundary),
NonBoundary => nameof(NonBoundary),
Beginning => nameof(Beginning),
Start => nameof(Start),
EndZ => nameof(EndZ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ static bool NodeSupportsNonBacktrackingImplementation(RegexNode node, int maxDep
case RegexNode.Multi:
// Boundaries are like set checks and don't involve repetition, either.
case RegexNode.Boundary:
case RegexNode.Nonboundary:
case RegexNode.NonBoundary:
case RegexNode.ECMABoundary:
case RegexNode.NonECMABoundary:
// Anchors are also trivial.
Expand Down Expand Up @@ -2259,7 +2259,7 @@ void EmitNode(RegexNode node)
break;

case RegexNode.Boundary:
case RegexNode.Nonboundary:
case RegexNode.NonBoundary:
case RegexNode.ECMABoundary:
case RegexNode.NonECMABoundary:
EmitBoundary(node);
Expand Down Expand Up @@ -2418,7 +2418,7 @@ void EmitBoundary(RegexNode node)
BrfalseFar(doneLabel);
break;

case RegexNode.Nonboundary:
case RegexNode.NonBoundary:
Callvirt(s_isBoundaryMethod);
BrtrueFar(doneLabel);
break;
Expand Down Expand Up @@ -3891,7 +3891,7 @@ private void GenerateOneCode()
}

case RegexCode.Boundary:
case RegexCode.Nonboundary:
case RegexCode.NonBoundary:
//: if (!IsBoundary(Textpos(), _textbeg, _textend))
//: break Backward;
Ldthis();
Expand Down Expand Up @@ -4355,27 +4355,26 @@ private void GenerateOneCode()
case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
//: int c = Operand(1);
//: if (c > Rightchars())
//: c = Rightchars();
//: int len = Operand(1);
//: if (len > Rightchars())
//: len = Rightchars();
//: char ch = (char)Operand(0);
//: int i;
//: for (i = c; i > 0; i--)
//: for (i = len; i > 0; i--)
//: {
//: if (Rightcharnext() != ch)
//: {
//: Leftnext();
//: break;
//: }
//: }
//: if (c > i)
//: Track(c - i - 1, Textpos() - 1);
//: if (len > i)
//: Track(len - i - 1, Textpos() - 1);
{
LocalBuilder cLocal = _temp1Local!;
LocalBuilder lenLocal = _temp2Local!;
LocalBuilder iLocal = _temp1Local!;
charInClassLocal = _temp3Local!;
Label l1 = DefineLabel();
Label l2 = DefineLabel();
Label loopEnd = DefineLabel();

int c = Operand(1);
if (c == 0)
Expand Down Expand Up @@ -4404,78 +4403,137 @@ private void GenerateOneCode()
Ldc(c);
MarkLabel(l4);
}
Dup();
Stloc(lenLocal);
Ldc(1);
Add();
Stloc(cLocal);

MarkLabel(l1);
Ldloc(cLocal);
Ldc(1);
Sub();
Dup();
Stloc(cLocal);
Ldc(0);
if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
BleFar(l2);
}
else
// If this is a notoneloop{atomic} and we're left-to-right and case-sensitive,
// we can use the vectorized IndexOf to search for the target character.
if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) &&
!IsRightToLeft() &&
!IsCaseInsensitive())
{
Ble(l2);
}
Stloc(lenLocal);

if (IsRightToLeft())
{
Leftcharnext();
}
else
{
Rightcharnext();
}
// i = runtext.AsSpan(runtextpos, len).IndexOf(ch);
Ldloc(_runtextLocal!);
Ldloc(_runtextposLocal!);
Ldloc(lenLocal);
Call(s_stringAsSpanIntIntMethod);
Ldc(Operand(0));
Call(s_spanIndexOf);
Stloc(iLocal);

if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
EmitTimeoutCheck();
EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal);
BrtrueFar(l1);
Label charFound = DefineLabel();

// if (i != -1) goto charFound;
Ldloc(iLocal);
Ldc(-1);
Bne(charFound);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved

// runtextpos += len;
// i = 0;
// goto loopEnd;
Ldloc(_runtextposLocal!);
Ldloc(lenLocal);
Add();
Stloc(_runtextposLocal!);
Ldc(0);
Stloc(iLocal);
BrFar(loopEnd);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved

// charFound:
// runtextpos += i;
// i = len - i;
// goto loopEnd;
MarkLabel(charFound);
Ldloc(_runtextposLocal!);
Ldloc(iLocal);
Add();
Stloc(_runtextposLocal!);
Ldloc(lenLocal);
Ldloc(iLocal);
Sub();
Stloc(iLocal);
BrFar(loopEnd);
}
else
{
if (IsCaseInsensitive())
// Otherwise, we emit the open-coded loop.

Dup();
Stloc(lenLocal);
Ldc(1);
Add();
Stloc(iLocal);

Label loopCondition = DefineLabel();
MarkLabel(loopCondition);
Ldloc(iLocal);
Ldc(1);
Sub();
Dup();
Stloc(iLocal);
Ldc(0);
if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
CallToLower();
BleFar(loopEnd);
}
else
{
Ble(loopEnd);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
}

Ldc(Operand(0));
if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
if (IsRightToLeft())
{
Beq(l1);
Leftcharnext();
}
else
{
Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
Bne(l1);
Rightcharnext();
}
}

Ldloc(_runtextposLocal!);
Ldc(1);
Sub(IsRightToLeft());
Stloc(_runtextposLocal!);
if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
EmitTimeoutCheck();
EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal);
BrtrueFar(loopCondition);
}
else
{
if (IsCaseInsensitive())
{
CallToLower();
}

MarkLabel(l2);
Ldc(Operand(0));
if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
{
Beq(loopCondition);
}
else
{
Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
Bne(loopCondition);
}
}

Ldloc(_runtextposLocal!);
Ldc(1);
Sub(IsRightToLeft());
Stloc(_runtextposLocal!);
}

// loopEnd:
MarkLabel(loopEnd);
if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
{
// if (len <= i) goto advance;
Ldloc(lenLocal);
Ldloc(cLocal);
Ldloc(iLocal);
Ble(AdvanceLabel());

// TrackPush(len - i - 1, runtextpos - Bump())
ReadyPushTrack();
Ldloc(lenLocal);
Ldloc(cLocal);
Ldloc(iLocal);
Sub();
Ldc(1);
Sub();
Expand Down Expand Up @@ -4667,7 +4725,8 @@ private void GenerateOneCode()
break;

default:
throw new NotImplementedException(SR.UnimplementedState);
Debug.Fail($"Unimplemented state: {_regexopcode:X8}");
break;
}
}

Expand Down
Loading