From 626ad017da2ac6f5fa7712f5a8054744ee12cb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Thu, 1 Dec 2022 01:41:38 +0100 Subject: [PATCH] Implement PCRE2 JIT compilation --- src/regex/lib_pcre2.cr | 16 +++++++++++++++- src/regex/pcre2.cr | 31 ++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/regex/lib_pcre2.cr b/src/regex/lib_pcre2.cr index 32e3c9afa767..0df60a05499b 100644 --- a/src/regex/lib_pcre2.cr +++ b/src/regex/lib_pcre2.cr @@ -182,9 +182,23 @@ lib LibPCRE2 fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : LibC::SizeT*, erroroffset : Int*, ccontext : CompileContext*) : Code* fun code_free = pcre2_code_free_8(code : Code*) : Void + type MatchContext = Void* + fun match_context_create = pcre2_match_context_create_8(gcontext : Void*) : MatchContext + + JIT_COMPLETE = 0x00000001_u32 # For full matching + JIT_PARTIAL_SOFT = 0x00000002_u32 + JIT_PARTIAL_HARD = 0x00000004_u32 + JIT_INVALID_UTF = 0x00000100_u32 + fun jit_compile = pcre2_jit_compile_8(code : Code*, options : UInt32) : Int + + type JITStack = Void* + + fun jit_stack_create = pcre2_jit_stack_create_8(startsize : LibC::SizeT, maxsize : LibC::SizeT, gcontext : GeneralContext) : JITStack + fun jit_stack_assign = pcre2_jit_stack_assign_8(mcontext : MatchContext, callable_function : Void*, callable_data : Void*) : Void + fun pattern_info = pcre2_pattern_info_8(code : Code*, what : UInt32, where : Void*) : Int - fun match = pcre2_match_8(code : Code*, subject : UInt8*, length : LibC::SizeT, startoffset : LibC::SizeT, options : UInt32, match_data : MatchData*, mcontext : Void*) : Int + fun match = pcre2_match_8(code : Code*, subject : UInt8*, length : LibC::SizeT, startoffset : LibC::SizeT, options : UInt32, match_data : MatchData*, mcontext : MatchContext) : Int fun match_data_create_from_pattern = pcre2_match_data_create_from_pattern_8(code : Code*, gcontext : GeneralContext) : MatchData* fun match_data_free = pcre2_match_data_free_8(match_data : MatchData*) : Void diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr index dffd2369ba11..176a479bbb57 100644 --- a/src/regex/pcre2.cr +++ b/src/regex/pcre2.cr @@ -9,6 +9,20 @@ module Regex::PCRE2 @re = PCRE2.compile(source, pcre2_options(options) | LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message| raise ArgumentError.new(error_message) end + + jit_compile + end + + private def jit_compile : Nil + ret = LibPCRE2.jit_compile(@re, LibPCRE2::JIT_COMPLETE) + if ret < 0 + case error = LibPCRE2::Error.new(ret) + when .jit_badoption? + # okay + else + raise ArgumentError.new("Regex JIT compile error: #{error}") + end + end end protected def self.compile(source, options) @@ -123,9 +137,24 @@ module Regex::PCRE2 LibPCRE2.general_context_create(->(size : LibC::Int, data : Void*) { GC.malloc(size) }.pointer, ->(pointer : Void*, data : Void*) { GC.free(pointer) }.pointer, nil) end + # Returns a JIT stack that's shared in the current thread. + # + # Only a single `match` function can run per thread at any given time, so there + # can't be any concurrent access to the JIT stack. + @[ThreadLocal] + class_getter jit_stack : LibPCRE2::JITStack do + jit_stack = LibPCRE2.jit_stack_create(32_768, 1_048_576, Regex::PCRE2.general_context) + if jit_stack.null? + raise "Error allocating JIT stack" + end + jit_stack + end + private def match_data(str, byte_index, options) match_data = LibPCRE2.match_data_create_from_pattern(@re, Regex::PCRE2.general_context) - match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, nil) + match_context = LibPCRE2.match_context_create(nil) + LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*)) + match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, match_context) if match_count < 0 case error = LibPCRE2::Error.new(match_count)