Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix PCRE2 implementation and tests #13105

Merged
merged 5 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Run stdlib specs
uses: docker://jhass/crystal:1.0.0-alpine-build
with:
args: make std_spec
args: make std_spec FLAGS=-Duse_pcre
aarch64-musl-test-compiler:
needs: aarch64-musl-build
runs-on: [linux, ARM64]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wasm32.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
rm wasm32-wasi-libs.tar.gz

- name: Build spec/wasm32_std_spec.cr
run: bin/crystal build spec/wasm32_std_spec.cr -o wasm32_std_spec.wasm --target wasm32-wasi
run: bin/crystal build spec/wasm32_std_spec.cr -o wasm32_std_spec.wasm --target wasm32-wasi -Duse_pcre
env:
CRYSTAL_LIBRARY_PATH: ${{ github.workspace }}/wasm32-wasi-libs

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
with:
path: | # openssl and llvm take much longer to build so they are cached separately
libs/pcre.lib
libs/pcre2-8.lib
libs/iconv.lib
libs/gc.lib
libs/ffi.lib
Expand Down
4 changes: 3 additions & 1 deletion spec/std/regex_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ describe "Regex" do
{% else %}
# Can't use regex literal because the *LIMIT_DEPTH verb is not supported in libpcre (only libpcre2)
# and thus the compiler doesn't recognize it.
str.matches?(Regex.new("(*LIMIT_DEPTH=8192)^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"))
regex = Regex.new("(*LIMIT_DEPTH=8192)^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$")
pending! "PCRE2 JIT mode not available." unless regex.@jit
str.matches?(regex)
{% end %}
# We don't care whether this actually matches or not, it's just to make
# sure the engine does not stack overflow with a large string.
Expand Down
10 changes: 6 additions & 4 deletions src/regex/lib_pcre2.cr
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ lib LibPCRE2

fun get_error_message = pcre2_get_error_message_8(errorcode : Int, buffer : UInt8*, bufflen : LibC::SizeT) : Int

fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : LibC::SizeT*, erroroffset : Int*, ccontext : CompileContext*) : Code*
fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : Int*, erroroffset : LibC::SizeT*, ccontext : CompileContext*) : Code*
fun code_free = pcre2_code_free_8(code : Code*) : Void

type MatchContext = Void*
Expand Down Expand Up @@ -207,8 +207,10 @@ lib LibPCRE2
fun get_ovector_pointer = pcre2_get_ovector_pointer_8(match_data : MatchData*) : LibC::SizeT*
fun get_ovector_count = pcre2_get_ovector_count_8(match_data : MatchData*) : UInt32

# void *private_malloc(Int, void *);
# void private_free(void *, void *);
fun general_context_create = pcre2_general_context_create_8(private_malloc : Void*, private_free : Void*, memory_data : Void*) : GeneralContext
fun general_context_create = pcre2_general_context_create_8(
private_malloc : LibC::SizeT, Void* -> Void,
private_free : Void*, Void* -> Void,
memory_data : Void*
) : GeneralContext
fun config = pcre2_config_8(what : UInt32, where : Void*) : Int
end
13 changes: 8 additions & 5 deletions src/regex/pcre2.cr
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,29 @@ require "crystal/thread_local_value"
# :nodoc:
module Regex::PCRE2
@re : LibPCRE2::Code*
@jit : Bool

# :nodoc:
def initialize(*, _source @source : String, _options @options)
@re = PCRE2.compile(source, pcre2_options(options) | LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message|
raise ArgumentError.new(error_message)
end

jit_compile
@jit = jit_compile
end

private def jit_compile : Nil
private def jit_compile : Bool
ret = LibPCRE2.jit_compile(@re, LibPCRE2::JIT_COMPLETE)
if ret < 0
case error = LibPCRE2::Error.new(ret)
when .jit_badoption?
# okay
return false
else
raise ArgumentError.new("Regex JIT compile error: #{error}")
end
end
true
end

protected def self.compile(source, options, &)
Expand Down Expand Up @@ -135,7 +138,7 @@ module Regex::PCRE2
end

class_getter general_context do
LibPCRE2.general_context_create(->(size : LibC::Int, data : Void*) { GC.malloc(size) }.pointer, ->(pointer : Void*, data : Void*) { GC.free(pointer) }.pointer, nil)
LibPCRE2.general_context_create(->(size, _data) { GC.malloc(size) }, ->(pointer, _data) { GC.free(pointer) }, nil)
end

# Returns a JIT stack that's shared in the current thread.
Expand All @@ -153,7 +156,7 @@ module Regex::PCRE2
private def match_data(str, byte_index, options)
match_data = LibPCRE2.match_data_create_from_pattern(@re, Regex::PCRE2.general_context)
match_context = LibPCRE2.match_context_create(nil)
LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*))
LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*)) if @jit
match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, match_context)

if match_count < 0
Expand All @@ -176,7 +179,7 @@ module Regex::PCRE2

module MatchData
# :nodoc:
def initialize(@regex : Regex, @code : LibPCRE2::Code*, @string : String, @pos : Int32, @ovector : UInt64*, @group_size : Int32)
def initialize(@regex : Regex, @code : LibPCRE2::Code*, @string : String, @pos : Int32, @ovector : LibC::SizeT*, @group_size : Int32)
end

private def byte_range(n, &)
Expand Down