From 666c515c6e62bcc26396fc5211782907f4c8c2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Thu, 13 Apr 2023 10:32:33 +0200 Subject: [PATCH 1/3] Fix infinite loop with MATCH_INVALID_UTF8 in PCRE2 <10.36 --- src/regex/pcre2.cr | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr index 4bfa074f4289..6c64fa1d8e41 100644 --- a/src/regex/pcre2.cr +++ b/src/regex/pcre2.cr @@ -23,7 +23,9 @@ module Regex::PCRE2 # :nodoc: def initialize(*, _source @source : String, _options @options) options = pcre2_compile_options(options) | LibPCRE2::UTF | LibPCRE2::DUPNAMES | LibPCRE2::UCP - if PCRE2.version_number >= {10, 34} + # MATCH_INVALID_UTF was introduced in 10.34 but a bug that can lead to an + # infinite loop is only fixed in 10.36 (https://github.com/PCRE2Project/pcre2/commit/e0c6029a62db9c2161941ecdf459205382d4d379). + if PCRE2.version_number >= {10, 36} options |= LibPCRE2::MATCH_INVALID_UTF end @re = PCRE2.compile(source, options) do |error_message| From bfe9128ded69a55d7f0a983633c3e29dc2677e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Thu, 13 Apr 2023 11:25:10 +0200 Subject: [PATCH 2/3] Adjust specs --- spec/std/regex_spec.cr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/std/regex_spec.cr b/spec/std/regex_spec.cr index f71684ef66bb..25413b7db250 100644 --- a/spec/std/regex_spec.cr +++ b/spec/std/regex_spec.cr @@ -154,7 +154,7 @@ describe "Regex" do end it "multibyte index" do - if Regex::Engine.version_number < {10, 34} + if Regex::Engine.version_number < {10, 36} expect_raises(ArgumentError, "bad offset into UTF string") do /foo/.match_at_byte_index("öfoo", 1) end @@ -246,7 +246,7 @@ describe "Regex" do end it "invalid codepoint" do - if Regex::Engine.version_number < {10, 34} + if Regex::Engine.version_number < {10, 36} expect_raises(ArgumentError, "UTF-8 error") do /foo/.matches?("f\x96o") end @@ -310,7 +310,7 @@ describe "Regex" do end it "multibyte index" do - if Regex::Engine.version_number < {10, 34} + if Regex::Engine.version_number < {10, 36} expect_raises(ArgumentError, "bad offset into UTF string") do /foo/.matches_at_byte_index?("öfoo", 1) end From 2bf6c5cd9c2aea6aae5c65337f2a4c9e792617e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Thu, 13 Apr 2023 13:47:21 +0200 Subject: [PATCH 3/3] fixup --- spec/std/regex_spec.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/std/regex_spec.cr b/spec/std/regex_spec.cr index 25413b7db250..698ca17c9a7d 100644 --- a/spec/std/regex_spec.cr +++ b/spec/std/regex_spec.cr @@ -115,8 +115,8 @@ describe "Regex" do /([\w_\.@#\/\*])+/.match("\xFF\xFE") end {% else %} - if Regex::PCRE2.version_number < {10, 35} - pending! "Error in libpcre2 < 10.35" + if Regex::PCRE2.version_number < {10, 36} + pending! "Error in libpcre2 < 10.36" else /([\w_\.@#\/\*])+/.match("\xFF\xFE").should be_nil end