Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix RegEx search_all for zero length matches/lookahead #85783

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions modules/regex/regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,16 +270,18 @@ Ref<RegExMatch> RegEx::search(const String &p_subject, int p_offset, int p_end)
TypedArray<RegExMatch> RegEx::search_all(const String &p_subject, int p_offset, int p_end) const {
ERR_FAIL_COND_V_MSG(p_offset < 0, Array(), "RegEx search offset must be >= 0");

int last_end = -1;
int last_end = 0;
TypedArray<RegExMatch> result;
Ref<RegExMatch> match = search(p_subject, p_offset, p_end);

while (match.is_valid()) {
if (last_end == match->get_end(0)) {
break;
last_end = match->get_end(0);
if (match->get_start(0) == last_end) {
last_end++;
}

result.push_back(match);
last_end = match->get_end(0);
match = search(p_subject, match->get_end(0), p_end);
match = search(p_subject, last_end, p_end);
}
return result;
}
Expand Down
139 changes: 138 additions & 1 deletion modules/regex/tests/test_regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ TEST_CASE("[RegEx] Uninitialized use") {
ERR_PRINT_ON
}

TEST_CASE("[RegEx] Empty Pattern") {
TEST_CASE("[RegEx] Empty pattern") {
const String s = "Godot";

RegEx re;
Expand Down Expand Up @@ -222,6 +222,143 @@ TEST_CASE("[RegEx] Match start and end positions") {
CHECK(match->get_start("vowel") == 2);
CHECK(match->get_end("vowel") == 3);
}

TEST_CASE("[RegEx] Asterisk search all") {
const String s = "Godot Engine";

RegEx re("o*");
REQUIRE(re.is_valid());
Ref<RegExMatch> match;
const Array all_results = re.search_all(s);
CHECK(all_results.size() == 13);

match = all_results[0];
CHECK(match->get_string(0) == "");
match = all_results[1];
CHECK(match->get_string(0) == "o");
match = all_results[2];
CHECK(match->get_string(0) == "");
match = all_results[3];
CHECK(match->get_string(0) == "o");

for (int i = 4; i < 13; i++) {
match = all_results[i];
CHECK(match->get_string(0) == "");
}
}

TEST_CASE("[RegEx] Simple lookahead") {
const String s = "Godot Engine";

RegEx re("o(?=t)");
REQUIRE(re.is_valid());
Ref<RegExMatch> match = re.search(s);
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 3);
CHECK(match->get_end(0) == 4);
}

TEST_CASE("[RegEx] Lookahead groups empty matches") {
const String s = "12";

RegEx re("(?=(\\d+))");
REQUIRE(re.is_valid());
Ref<RegExMatch> match = re.search(s);
CHECK(match->get_string(0) == "");
CHECK(match->get_string(1) == "12");

const Array all_results = re.search_all(s);
CHECK(all_results.size() == 2);

match = all_results[0];
REQUIRE(match != nullptr);
CHECK(match->get_string(0) == String(""));
CHECK(match->get_string(1) == String("12"));

match = all_results[1];
REQUIRE(match != nullptr);
CHECK(match->get_string(0) == String(""));
CHECK(match->get_string(1) == String("2"));
}

TEST_CASE("[RegEx] Simple lookbehind") {
const String s = "Godot Engine";

RegEx re("(?<=d)o");
REQUIRE(re.is_valid());
Ref<RegExMatch> match = re.search(s);
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 3);
CHECK(match->get_end(0) == 4);
}

TEST_CASE("[RegEx] Simple lookbehind search all") {
const String s = "ababbaabab";

RegEx re("(?<=a)b");
REQUIRE(re.is_valid());
const Array all_results = re.search_all(s);
CHECK(all_results.size() == 4);

Ref<RegExMatch> match = all_results[0];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 1);
CHECK(match->get_end(0) == 2);

match = all_results[1];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 3);
CHECK(match->get_end(0) == 4);

match = all_results[2];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 7);
CHECK(match->get_end(0) == 8);

match = all_results[3];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 9);
CHECK(match->get_end(0) == 10);
}

TEST_CASE("[RegEx] Lookbehind groups empty matches") {
const String s = "abaaabab";

RegEx re("(?<=(b))");
REQUIRE(re.is_valid());
Ref<RegExMatch> match;

const Array all_results = re.search_all(s);
CHECK(all_results.size() == 3);

match = all_results[0];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 2);
CHECK(match->get_end(0) == 2);
CHECK(match->get_start(1) == 1);
CHECK(match->get_end(1) == 2);
CHECK(match->get_string(0) == String(""));
CHECK(match->get_string(1) == String("b"));

match = all_results[1];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 6);
CHECK(match->get_end(0) == 6);
CHECK(match->get_start(1) == 5);
CHECK(match->get_end(1) == 6);
CHECK(match->get_string(0) == String(""));
CHECK(match->get_string(1) == String("b"));

match = all_results[2];
REQUIRE(match != nullptr);
CHECK(match->get_start(0) == 8);
CHECK(match->get_end(0) == 8);
CHECK(match->get_start(1) == 7);
CHECK(match->get_end(1) == 8);
CHECK(match->get_string(0) == String(""));
CHECK(match->get_string(1) == String("b"));
}

} // namespace TestRegEx

#endif // TEST_REGEX_H