Skip to content

Commit

Permalink
Implement PCRE2 JIT compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
straight-shoota committed Dec 22, 2022
1 parent 979f01c commit 626ad01
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
16 changes: 15 additions & 1 deletion src/regex/lib_pcre2.cr
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,23 @@ lib LibPCRE2
fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : LibC::SizeT*, erroroffset : Int*, ccontext : CompileContext*) : Code*
fun code_free = pcre2_code_free_8(code : Code*) : Void

type MatchContext = Void*
fun match_context_create = pcre2_match_context_create_8(gcontext : Void*) : MatchContext

JIT_COMPLETE = 0x00000001_u32 # For full matching
JIT_PARTIAL_SOFT = 0x00000002_u32
JIT_PARTIAL_HARD = 0x00000004_u32
JIT_INVALID_UTF = 0x00000100_u32
fun jit_compile = pcre2_jit_compile_8(code : Code*, options : UInt32) : Int

type JITStack = Void*

fun jit_stack_create = pcre2_jit_stack_create_8(startsize : LibC::SizeT, maxsize : LibC::SizeT, gcontext : GeneralContext) : JITStack
fun jit_stack_assign = pcre2_jit_stack_assign_8(mcontext : MatchContext, callable_function : Void*, callable_data : Void*) : Void

fun pattern_info = pcre2_pattern_info_8(code : Code*, what : UInt32, where : Void*) : Int

fun match = pcre2_match_8(code : Code*, subject : UInt8*, length : LibC::SizeT, startoffset : LibC::SizeT, options : UInt32, match_data : MatchData*, mcontext : Void*) : Int
fun match = pcre2_match_8(code : Code*, subject : UInt8*, length : LibC::SizeT, startoffset : LibC::SizeT, options : UInt32, match_data : MatchData*, mcontext : MatchContext) : Int
fun match_data_create_from_pattern = pcre2_match_data_create_from_pattern_8(code : Code*, gcontext : GeneralContext) : MatchData*
fun match_data_free = pcre2_match_data_free_8(match_data : MatchData*) : Void

Expand Down
31 changes: 30 additions & 1 deletion src/regex/pcre2.cr
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@ module Regex::PCRE2
@re = PCRE2.compile(source, pcre2_options(options) | LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message|
raise ArgumentError.new(error_message)
end

jit_compile
end

private def jit_compile : Nil
ret = LibPCRE2.jit_compile(@re, LibPCRE2::JIT_COMPLETE)
if ret < 0
case error = LibPCRE2::Error.new(ret)
when .jit_badoption?
# okay
else
raise ArgumentError.new("Regex JIT compile error: #{error}")
end
end
end

protected def self.compile(source, options)
Expand Down Expand Up @@ -123,9 +137,24 @@ module Regex::PCRE2
LibPCRE2.general_context_create(->(size : LibC::Int, data : Void*) { GC.malloc(size) }.pointer, ->(pointer : Void*, data : Void*) { GC.free(pointer) }.pointer, nil)
end

# Returns a JIT stack that's shared in the current thread.
#
# Only a single `match` function can run per thread at any given time, so there
# can't be any concurrent access to the JIT stack.
@[ThreadLocal]
class_getter jit_stack : LibPCRE2::JITStack do
jit_stack = LibPCRE2.jit_stack_create(32_768, 1_048_576, Regex::PCRE2.general_context)
if jit_stack.null?
raise "Error allocating JIT stack"
end
jit_stack
end

private def match_data(str, byte_index, options)
match_data = LibPCRE2.match_data_create_from_pattern(@re, Regex::PCRE2.general_context)
match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, nil)
match_context = LibPCRE2.match_context_create(nil)
LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*))
match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, match_context)

if match_count < 0
case error = LibPCRE2::Error.new(match_count)
Expand Down

0 comments on commit 626ad01

Please sign in to comment.