Skip to content

Commit

Permalink
Use normal rust project with aho-corasick dependency instead of sourc… (
Browse files Browse the repository at this point in the history
  • Loading branch information
anuraaga authored Dec 22, 2022
1 parent 219bb09 commit 5da11c2
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 152 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.vscode
build
target
.idea
.DS_Store
.vimrc
25 changes: 25 additions & 0 deletions buildtools/aho-corasick/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions buildtools/aho-corasick/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[workspace]

[package]
name = "aho-corasick-c"
version = "0.1.0"
description = "C wrapper for aho-corasick for loading from Wasm"

[lib]
crate-type = ["staticlib"]
name = "aho_corasick"

[dependencies]
aho-corasick = "0.7.20"
8 changes: 3 additions & 5 deletions buildtools/aho-corasick/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@

FROM rust:1-alpine

RUN apk add --no-cache curl patch && rustup target add wasm32-wasi
RUN rustup target add wasm32-wasi

RUN mkdir -p /aho-corasick && curl -L https://github.com/BurntSushi/aho-corasick/archive/refs/tags/0.7.19.tar.gz | tar -xz --strip-components 1 -C /aho-corasick
ADD . /aho-corasick
WORKDIR /aho-corasick
ADD aho-corasick.patch aho-corasick.patch
RUN patch -p1 < aho-corasick.patch
ENV RUSTFLAGS "-C target-feature=-crt-static"
RUN cargo build --release --target wasm32-wasi

CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"]
CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"]
147 changes: 0 additions & 147 deletions buildtools/aho-corasick/aho-corasick.patch

This file was deleted.

112 changes: 112 additions & 0 deletions buildtools/aho-corasick/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright The OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

extern crate aho_corasick;

use std::mem::MaybeUninit;
use std::slice;
use std::str;
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};

static mut MATCHERS: Vec<AhoCorasick> = Vec::new();

#[no_mangle]
pub extern "C" fn new_matcher(patterns_ptr: *mut u8, patterns_len: usize) -> usize {
let all_patterns = unsafe {
slice::from_raw_parts(patterns_ptr, patterns_len)
};

let mut patterns = Vec::new();

let mut off = 0;
while off < patterns_len {
let pattern_len = u32::from_le_bytes([all_patterns[off], all_patterns[off+1], all_patterns[off+2], all_patterns[off+3]]) as usize;
off += 4;
let pattern = unsafe {
str::from_utf8_unchecked(&all_patterns[off..off+pattern_len])
};
patterns.push(pattern);
off += pattern_len;
}

let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(patterns);

unsafe {
MATCHERS.push(ac);
MATCHERS.len() - 1
}

}

#[no_mangle]
pub extern "C" fn matches(matcher_ptr: usize, value_ptr: usize, value_len: usize, n: usize, matches: *mut usize) -> usize {
let ac = unsafe {
let matcher = MATCHERS.get_unchecked(matcher_ptr);
matcher
};

let value = ptr_to_string(value_ptr, value_len);
std::mem::forget(&value);

let mut num = 0;
for value in ac.find_iter(value.as_bytes()) {
if num == n {
break;
}
unsafe {
*matches.offset(2*num as isize) = value.start();
*matches.offset((2*num+1) as isize) = value.end();
}
num += 1;
}

return num
}

/// WebAssembly export that allocates a pointer (linear memory offset) that can
/// be used for a string.
///
/// This is an ownership transfer, which means the caller must call
/// [`deallocate`] when finished.
#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")]
#[no_mangle]
pub extern "C" fn _allocate(size: usize) -> *mut u8 {
allocate(size as usize)
}

/// Allocates size bytes and leaks the pointer where they start.
fn allocate(size: usize) -> *mut u8 {
// Allocate the amount of bytes needed.
let vec: Vec<MaybeUninit<u8>> = Vec::with_capacity(size);

// into_raw leaks the memory to the caller.
Box::into_raw(vec.into_boxed_slice()) as *mut u8
}


/// WebAssembly export that deallocates a pointer of the given size (linear
/// memory offset, byteCount) allocated by [`allocate`].
#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")]
#[no_mangle]
pub unsafe extern "C" fn _deallocate(ptr: usize, size: usize) {
deallocate(ptr as *mut u8, size);
}

/// Retakes the pointer which allows its memory to be freed.
unsafe fn deallocate(ptr: *mut u8, size: usize) {
let _ = Vec::from_raw_parts(ptr, 0, size);
}

/// Returns a string from WebAssembly compatible numeric types representing
/// its pointer and length.
fn ptr_to_string(ptr: usize, len: usize) -> String {
unsafe {
let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize);
let utf8 = std::str::from_utf8_unchecked_mut(slice);
return String::from(utf8);
}
}
Binary file modified lib/libaho_corasick.a
Binary file not shown.

0 comments on commit 5da11c2

Please sign in to comment.