Skip to content

Commit

Permalink
ARM NEON support (#32)
Browse files Browse the repository at this point in the history
* Put something in the readme so we can have a PR
* Add drone file
* update build status
* unguard for sse4.2 to allow rust to polyfill on older platforms
* Add more simd tests
* RFC: Neon support (pretty much working) (#35)
* feat: neon support
* feat: temp stub replacements for neon intrinsics (pending rust-lang/stdarch#792)
* fix: drone CI rustup nightly
* feat: fix guards, use rust stdlib for bit count operations
* fix: remove double semicolon
* feat: fancy generic generator functions, thanks @Licenser
* Update extq intrinsics
* Use simd-lite (#39)
* Use simd-lite
* Update badge
* Update badge
* Get rid of transmutes
* Use NeonInit trait
* vqsubq_u8 fix
* vqsubq_u8 fix pt. 2
* use reexprted values from simd-lite
* add simd-lite real version
  • Loading branch information
Licenser authored and sunnygleason committed Sep 4, 2019
1 parent 84eee6d commit 1954f9b
Show file tree
Hide file tree
Showing 26 changed files with 1,399 additions and 102 deletions.
64 changes: 64 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
kind: pipeline
name: test-on-avx2

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native'
commands:
- cargo build --verbose --all
- cargo test --verbose --all
---

kind: pipeline
name: test-on-sse42

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native -C target-feature=-avx2'
commands:
- cargo build --verbose --all
- cargo test --verbose --all

---

kind: pipeline
name: test-on-pre-sse42

platform:
arch: amd64

steps:
- name: test
image: rust:1
environment:
RUSTFLAGS: '-C target-cpu=native -C target-feature=-avx2,-sse4.2'
commands:
- cargo build --verbose --all
- cargo test --verbose --all

---

kind: pipeline
name: test-on-arm64

platform:
arch: arm64

steps:
- name: test
image: rust:1
commands:
- rustup default nightly
- rustup update
- cargo clean && cargo +nightly build --verbose --all --features neon
- cargo +nightly test --verbose --all --features neon
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jemallocator = { version = "0.3", optional = true }
perfcnt = { version = "0.4", optional = true }
getopts = { version = "0.2", optional = true }
colored = { version = "1.7", optional = true }
simd-lite = { version = "0.1.0", optional = true }



Expand All @@ -45,6 +46,8 @@ harness = false

[features]
default = ["swar-number-parsing", "serde_impl"]
# Support for ARM NEON SIMD
neon = ["simd-lite"]
# use 8 number at once parsing strategy
swar-number-parsing = []
# serde compatibility
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SIMD Json for Rust   [![Build Status]][circleci.com] [![Windows Build Status]][appveyor.com] [![Latest Version]][crates.io]
# SIMD Json for Rust   [![Build Status]][drone.io] [![Windows Build Status]][appveyor.com] [![Latest Version]][crates.io]

[Build Status]: https://circleci.com/gh/Licenser/simdjson-rs/tree/master.svg?style=svg
[circleci.com]: https://circleci.com/gh/Licenser/simdjson-rs/tree/master
[Windows Build Status]: https://ci.appveyor.com/api/projects/status/0kf0v6hj5v2gite9?svg=true

[Build Status]: https://cloud.drone.io/api/badges/simd-lite/simdjson-rs/status.svg
[drone.io]: https://cloud.drone.io/simd-lite/simdjson-rs
[Windows Build Status]: https://ci.appveyor.com/api/projects/status/ffi2ese7dxse6pb8?svg=true
[appveyor.com]: https://ci.appveyor.com/project/Licenser/simdjson-rs
[Latest Version]: https://img.shields.io/crates/v/simd-json.svg
[crates.io]: https://crates.io/crates/simd-json
Expand All @@ -17,6 +18,8 @@

To be able to take advantage of simdjson your system needs to be SIMD compatible. This means to compile with native cpu support and the given features. Look at [The cargo config in this repository](.cargo/config) to get an example.

simd-json.rs supports AVX2, SSE4.2 and NEON.

### jemalloc

If you are writing performance centric code, make sure to use jemalloc and not the system allocator (which has now become default in rust), it gives a very noticeable boost in performance.
Expand Down
1 change: 1 addition & 0 deletions data/fail/fail70.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["string contains bad UTF-8 €"]
1 change: 1 addition & 0 deletions data/fail/fail71.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["contains bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail72.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["contains bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail73.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["bad UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail74.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["UTF-8 �"]
1 change: 1 addition & 0 deletions data/fail/fail75.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[""]
1 change: 1 addition & 0 deletions data/fail/fail76.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[""]
51 changes: 51 additions & 0 deletions src/avx2/generator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

use crate::value::generator::ESCAPED;
use std::io;

#[inline(always)]
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write {
let zero = _mm256_set1_epi8(0);
let lower_quote_range = _mm256_set1_epi8(0x1F as i8);
let quote = _mm256_set1_epi8(b'"' as i8);
let backslash = _mm256_set1_epi8(b'\\' as i8);
while *len - *idx >= 32 {
// Load 32 bytes of data;
#[allow(clippy::cast_ptr_alignment)]
let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i);
// Test the data against being backslash and quote.
let bs_or_quote = _mm256_or_si256(
_mm256_cmpeq_epi8(data, backslash),
_mm256_cmpeq_epi8(data, quote),
);
// Now mask the data with the quote range (0x1F).
let in_quote_range = _mm256_and_si256(data, lower_quote_range);
// then test of the data is unchanged. aka: xor it with the
// Any field that was inside the quote range it will be zero
// now.
let is_unchanged = _mm256_xor_si256(data, in_quote_range);
let in_range = _mm256_cmpeq_epi8(is_unchanged, zero);
let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range));
if quote_bits != 0 {
let quote_dist = quote_bits.trailing_zeros() as usize;
stry!(writer.write_all(&string[0..*idx + quote_dist]));
let ch = string[*idx + quote_dist];
match ESCAPED[ch as usize] {
b'u' => stry!(write!(writer, "\\u{:04x}", ch)),

escape => stry!(writer.write_all(&[b'\\', escape])),
};
*string = &string[*idx + quote_dist + 1..];
*idx = 0;
*len = string.len();
} else {
*idx += 32;
}
}
stry!(writer.write_all(&string[0..*idx]));
*string = &string[*idx..];
Ok(())
}
3 changes: 2 additions & 1 deletion src/avx2/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod deser;
pub mod stage1;
pub mod utf8check;
pub mod utf8check;
pub mod generator;
35 changes: 31 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
#![deny(warnings)]

#![cfg_attr(target_feature = "neon", feature(
asm,
stdsimd,
repr_simd,
custom_inner_attributes,
aarch64_target_feature,
platform_intrinsics,
stmt_expr_attributes,
simd_ffi,
link_llvm_intrinsics,
rustc_attrs,
)
)]

#![cfg_attr(feature = "hints", feature(core_intrinsics))]
//! simdjson-rs is a rust port of the simejson c++ library. It follows
//! most of the design closely with a few exceptions to make it better
Expand Down Expand Up @@ -89,17 +104,25 @@ pub use crate::avx2::deser::*;
#[cfg(target_feature = "avx2")]
use crate::avx2::stage1::SIMDJSON_PADDING;

#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
mod sse42;
#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
pub use crate::sse42::deser::*;
#[cfg(all(target_feature = "sse4.2", not(target_feature = "avx2")))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
use crate::sse42::stage1::SIMDJSON_PADDING;

#[cfg(all(target_feature = "neon", feature = "neon"))]
mod neon;
#[cfg(all(target_feature = "neon", feature = "neon"))]
pub use crate::neon::deser::*;
#[cfg(all(target_feature = "neon", feature = "neon"))]
use crate::neon::stage1::SIMDJSON_PADDING;

mod stage2;
pub mod value;

use crate::numberparse::Number;
#[cfg(not(target_feature = "neon"))]
use std::mem;
use std::str;

Expand Down Expand Up @@ -163,7 +186,11 @@ impl<'de> Deserializer<'de> {

let counts = Deserializer::validate(input, &structural_indexes)?;

let strings = Vec::with_capacity(len + SIMDJSON_PADDING);
// Set length to allow slice access in ARM code
let mut strings = Vec::with_capacity(len + SIMDJSON_PADDING);
unsafe {
strings.set_len(len + SIMDJSON_PADDING);
}

Ok(Deserializer {
counts,
Expand Down
Loading

0 comments on commit 1954f9b

Please sign in to comment.