Skip to content

Commit

Permalink
Convert from inline assembly to intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
shepmaster committed Mar 25, 2018
1 parent 7b3e62c commit cfef5b4
Showing 1 changed file with 40 additions and 75 deletions.
115 changes: 40 additions & 75 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![feature(asm)]
#![feature(stdsimd)]
#![feature(const_fn)]
#![feature(pattern)]
#![cfg_attr(test, feature(test))]
Expand Down Expand Up @@ -331,47 +331,29 @@ impl fmt::Debug for AsciiChars {
#[cfg(all(feature = "unstable", target_arch = "x86_64"))]
impl PackedCompareOperation for AsciiChars {
unsafe fn initial(&self, ptr: *const u8, offset: usize, len: usize) -> u64 {
let matching_bytes;

asm!("movlhps $2, $1
pcmpestrm $$0, ($3), $1"
: // output operands
"={xmm0}"(matching_bytes)
: // input operands
"x"(self.needle),
"x"(self.needle_hi),
"r"(ptr),
"{rdx}"(offset + len), // saturates at 16
"{rax}"(self.count as u64)
: // clobbers
"cc"
: // options
);

matching_bytes
use std::arch::x86_64::{__m128i, _mm_loadu_si128};
use std::simd::{u64x2, IntoBits};
use std::arch::x86_64::{_mm_cmpestrm, _mm_set_epi64x};

let needle = _mm_set_epi64x(self.needle_hi as _, self.needle as _);

let haystack = _mm_loadu_si128(ptr as *const __m128i);
let haystack_len = offset + len;
let r = _mm_cmpestrm(needle, self.count as _, haystack, haystack_len as _, 0);

let r: u64x2 = r.into_bits();
r.extract(0)
}

unsafe fn body(&self, ptr: *const u8, offset: usize, len: usize) -> u32 {
let res;

asm!("# Move low word of $2 to high word of $1
movlhps $2, $1
pcmpestri $$0, ($3, $4), $1"
: // output operands
"={ecx}"(res)
: // input operands
"x"(self.needle),
"x"(self.needle_hi),
"r"(ptr),
"r"(offset)
"{rdx}"(len), // haystack length
"{rax}"(self.count as u64) // needle length
: // clobbers
"cc"
: // options
);
use std::arch::x86_64::{__m128i, _mm_loadu_si128};
use std::arch::x86_64::{_mm_cmpestri, _mm_set_epi64x};

res
let needle = _mm_set_epi64x(self.needle_hi as _, self.needle as _);

let haystack = _mm_loadu_si128(ptr.offset(offset as isize) as *const __m128i);
let haystack_len = len;
_mm_cmpestri(needle, self.count as _, haystack, haystack_len as _, 0) as _
}
}

Expand Down Expand Up @@ -457,46 +439,29 @@ impl<'a> Substring<'a> {

impl<'a> PackedCompareOperation for Substring<'a> {
unsafe fn initial(&self, haystack: *const u8, offset: usize, len: usize) -> u64 {
let matching_bytes;

asm!("movlhps $2, $1
pcmpestrm $$0b00001100, ($3), $1"
: // output operands
"={xmm0}"(matching_bytes)
: // input operands
"x"(self.needle_lo),
"x"(self.needle_hi),
"r"(haystack),
"{rax}"(self.needle_len as u64),
"{rdx}"(offset + len)
: // clobbers
"cc"
: // options
);

matching_bytes
use std::arch::x86_64::{__m128i, _mm_loadu_si128};
use std::simd::{u64x2, IntoBits};
use std::arch::x86_64::{_mm_cmpestrm, _mm_set_epi64x, _SIDD_CMP_EQUAL_ORDERED};

let needle = _mm_set_epi64x(self.needle_hi as _, self.needle_lo as _);

let haystack = _mm_loadu_si128(haystack as *const __m128i);
let haystack_len = offset + len;
let r = _mm_cmpestrm(needle, self.needle_len as _, haystack, haystack_len as _, _SIDD_CMP_EQUAL_ORDERED);

let r: u64x2 = r.into_bits();
r.extract(0)
}

unsafe fn body(&self, haystack: *const u8, offset: usize, len: usize) -> u32 {
let matching_idx;

asm!("movlhps $2, $1
pcmpestri $$0b00001100, ($3, $4), $1"
: // output operands
"={ecx}"(matching_idx)
: // input operands
"x"(self.needle_lo),
"x"(self.needle_hi),
"r"(haystack),
"r"(offset),
"{rax}"(self.needle_len as u64),
"{rdx}"(len)
: // clobbers
"cc"
: // options
);

matching_idx
use std::arch::x86_64::{__m128i, _mm_loadu_si128};
use std::arch::x86_64::{_mm_cmpestri, _mm_set_epi64x, _SIDD_CMP_EQUAL_ORDERED};

let needle = _mm_set_epi64x(self.needle_hi as _, self.needle_lo as _);

let haystack = _mm_loadu_si128(haystack.offset(offset as isize) as *const __m128i);
let haystack_len = len;
_mm_cmpestri(needle, self.needle_len as _, haystack, haystack_len as _, _SIDD_CMP_EQUAL_ORDERED) as _
}
}

Expand Down

0 comments on commit cfef5b4

Please sign in to comment.