Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements of Find recipe #4758

Merged
merged 6 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions rewrite-core/src/main/java/org/openrewrite/text/Find.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@
import org.openrewrite.remote.Remote;
import org.openrewrite.table.TextMatches;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -91,6 +89,22 @@ public String getDescription() {
@Nullable
String filePattern;

private static Deque<Integer> findAllNewLineIndexes(String input, int offset) {
ArrayDeque<Integer> indexes = new ArrayDeque<>();
int index = input.lastIndexOf('\n', offset); // Find the first occurrence
if (index != -1) {
indexes.add(index);
}

index = input.indexOf('\n', offset); // Find occurrence after the offset
while (index != -1) {
indexes.add(index); // Add the index to the list
index = input.indexOf('\n', index + 1); // Find the next occurrence
}

return indexes;
}

@Override
public TreeVisitor<?, ExecutionContext> getVisitor() {

Expand Down Expand Up @@ -123,24 +137,42 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
return sourceFile;
}
matcher.reset();

String sourceFilePath = sourceFile.getSourcePath().toString();

List<PlainText.Snippet> snippets = new ArrayList<>();
int previousEnd = 0;

Deque<Integer> newlineIndexes = null;
int lastNewLineIndex = -1;

while (matcher.find()) {
if (newlineIndexes == null) {
newlineIndexes = findAllNewLineIndexes(rawText, matcher.start());
}

int matchStart = matcher.start();
snippets.add(snippet(rawText.substring(previousEnd, matchStart)));
snippets.add(SearchResult.found(snippet(rawText.substring(matchStart, matcher.end()))));
previousEnd = matcher.end();

int startLine = Math.max(0, rawText.substring(0, matchStart).lastIndexOf('\n') + 1);
while (!newlineIndexes.isEmpty() && newlineIndexes.peek() < matchStart) {
lastNewLineIndex = newlineIndexes.pop();
}
int startLine = Math.max(0, lastNewLineIndex + 1);

int endLine = rawText.indexOf('\n', matcher.end());
if (endLine == -1) {
endLine = rawText.length();
}

textMatches.insertRow(ctx, new TextMatches.Row(
sourceFile.getSourcePath().toString(),
rawText.substring(startLine, matcher.start()) + "~~>" +
rawText.substring(matcher.start(), endLine)
sourceFilePath,
new StringBuilder(endLine - startLine + 3)
.append(rawText, startLine, matcher.start())
.append("~~>")
.append(rawText, matcher.start(), endLine)
.toString()
));
}
snippets.add(snippet(rawText.substring(previousEnd)));
Expand All @@ -160,8 +192,8 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
return visitor;
}


private static PlainText.Snippet snippet(String text) {
return new PlainText.Snippet(Tree.randomId(), Markers.EMPTY, text);
}

}
117 changes: 117 additions & 0 deletions rewrite-core/src/test/java/org/openrewrite/text/FindTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,121 @@ void caseInsensitive() {
)
);
}

@Test
void regexBasicMultiLine() {
rewriteRun(
spec -> spec.recipe(new Find("[T\\s]", true, true, true, null, null)),
text(
"""
This is\ttext.
This is\ttext.
""",
"""
~~>This~~> is~~>\ttext.~~>
~~>This~~> is~~>\ttext.
"""
)
);
}

@Test
void regexWithoutMultilineAndDotall() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, false, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}

@Test
void regexMatchingWhitespaceWithoutMultilineWithDotall() {
rewriteRun(
spec -> spec.recipe(new Find("One.Two$", true, true, false, true, null)),
//language=csv
text( // the `.` above matches the space character on the same line
"""
Zero
One Two
Three
"""
)
);
}

@Test
void regexWithoutMultilineAndWithDotAll() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, true, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
~~>This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}

@Test
void regexWithMultilineAndWithoutDotall() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, false, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
This is text.
~~>This is a line below.
This is a line above.
This is text.
~~>This is a line below.
"""
)
);
}

@Test
void regexWithBothMultilineAndDotAll() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null)),
text(
"""
The first line.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
The first line.
~~>This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}
}
Loading