From b09b969044c8ee83ffa3f47ca012ba7628ef1f95 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Thu, 12 Sep 2024 12:54:04 +0200 Subject: [PATCH] feat: Make num-bigint optional (#130) --- crates/jiter-python/Cargo.toml | 2 +- crates/jiter/Cargo.toml | 34 ++++---- crates/jiter/src/number_decoder.rs | 120 +++++++++++++++++------------ crates/jiter/src/value.rs | 5 ++ 4 files changed, 97 insertions(+), 64 deletions(-) diff --git a/crates/jiter-python/Cargo.toml b/crates/jiter-python/Cargo.toml index 86efab42..71ed7c98 100644 --- a/crates/jiter-python/Cargo.toml +++ b/crates/jiter-python/Cargo.toml @@ -11,7 +11,7 @@ repository = {workspace = true} [dependencies] pyo3 = { workspace = true, features = ["num-bigint"] } -jiter = { path = "../jiter", features = ["python"] } +jiter = { path = "../jiter", features = ["python", "num-bigint"] } [features] # must be enabled when building with `cargo build`, maturin enables this automatically diff --git a/crates/jiter/Cargo.toml b/crates/jiter/Cargo.toml index 782f8b0b..93c1dc2e 100644 --- a/crates/jiter/Cargo.toml +++ b/crates/jiter/Cargo.toml @@ -2,31 +2,37 @@ name = "jiter" description = "Fast Iterable JSON parser" readme = "../../README.md" -version = {workspace = true} -edition = {workspace = true} -authors = {workspace = true} -license = {workspace = true} -keywords = {workspace = true} -categories = {workspace = true} -homepage = {workspace = true} -repository = {workspace = true} +version = { workspace = true } +edition = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +categories = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } [dependencies] -num-bigint = "0.4.4" +num-bigint = { version = "0.4.4", optional = true } num-traits = "0.2.16" ahash = "0.8.0" smallvec = "1.11.0" -pyo3 = { workspace = true, optional = true, features = ["num-bigint"] } -lexical-parse-float = { version = "0.8.5", features = ["format"] } +pyo3 = { workspace = true, optional = true } +lexical-parse-float = { version = "0.8.5", features = ["format"] } bitvec = "1.0.1" [features] +default = ["num-bigint"] python = ["dep:pyo3", "dep:pyo3-build-config"] +num-bigint = ["dep:num-bigint", "pyo3/num-bigint"] [dev-dependencies] bencher = "0.1.5" paste = "1.0.7" -serde_json = {version = "1.0.87", features = ["preserve_order", "arbitrary_precision", "float_roundtrip"]} +serde_json = { version = "1.0.87", features = [ + "preserve_order", + "arbitrary_precision", + "float_roundtrip", +] } serde = "1.0.147" pyo3 = { workspace = true, features = ["auto-initialize"] } codspeed-bencher-compat = "2.7.1" @@ -71,5 +77,5 @@ doc_markdown = "allow" implicit_clone = "allow" iter_without_into_iter = "allow" return_self_not_must_use = "allow" -inline_always = "allow" # TODO remove? -match_same_arms = "allow" # TODO remove? +inline_always = "allow" # TODO remove? +match_same_arms = "allow" # TODO remove? diff --git a/crates/jiter/src/number_decoder.rs b/crates/jiter/src/number_decoder.rs index 33b0f5c0..e657510c 100644 --- a/crates/jiter/src/number_decoder.rs +++ b/crates/jiter/src/number_decoder.rs @@ -1,5 +1,8 @@ +#[cfg(feature = "num-bigint")] use num_bigint::BigInt; +#[cfg(feature = "num-bigint")] use num_traits::cast::ToPrimitive; + use std::ops::Range; use lexical_parse_float::{format as lexical_format, FromLexicalWithOptions, Options as ParseFloatOptions}; @@ -16,6 +19,7 @@ pub trait AbstractNumberDecoder { #[derive(Debug, Clone, PartialEq)] pub enum NumberInt { Int(i64), + #[cfg(feature = "num-bigint")] BigInt(BigInt), } @@ -23,6 +27,7 @@ impl From for f64 { fn from(num: NumberInt) -> Self { match num { NumberInt::Int(int) => int as f64, + #[cfg(feature = "num-bigint")] NumberInt::BigInt(big_int) => big_int.to_f64().unwrap_or(f64::NAN), } } @@ -118,6 +123,7 @@ impl pyo3::ToPyObject for NumberAny { fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject { match self { Self::Int(NumberInt::Int(int)) => int.to_object(py), + #[cfg(feature = "num-bigint")] Self::Int(NumberInt::BigInt(big_int)) => big_int.to_object(py), Self::Float(float) => float.to_object(py), } @@ -220,8 +226,8 @@ impl IntParse { index += 1; let (chunk, new_index) = IntChunk::parse_small(data, index, first_value); - let mut big_value: BigInt = match chunk { - IntChunk::Ongoing(value) => value.into(), + let ongoing: u64 = match chunk { + IntChunk::Ongoing(value) => value, IntChunk::Done(value) => { let mut value_i64 = value as i64; if !positive { @@ -231,62 +237,76 @@ impl IntParse { } IntChunk::Float => return Ok((Self::Float, new_index)), }; - index = new_index; - // number is too big for i64, we need ot use a big int - loop { - let (chunk, new_index) = IntChunk::parse_big(data, index); - if (new_index - start) > 4300 { - return json_err!(NumberOutOfRange, start + 4301); - } - match chunk { - IntChunk::Ongoing(value) => { - big_value *= ONGOING_CHUNK_MULTIPLIER; - big_value += value; - index = new_index; + // number is too big for i64, we need to use a BigInt, + // or error out if num-bigint is not enabled + + #[cfg(not(feature = "num-bigint"))] + { + // silence unused variable warning + let _ = (ongoing, start); + return json_err!(NumberOutOfRange, index); + } + + #[cfg(feature = "num-bigint")] + { + #[cfg(target_arch = "aarch64")] + // in aarch64 we use a 128 bit registers - 16 bytes + const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16); + #[cfg(not(target_arch = "aarch64"))] + // decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing + const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18); + + const POW_10: [u64; 18] = [ + 10u64.pow(0), + 10u64.pow(1), + 10u64.pow(2), + 10u64.pow(3), + 10u64.pow(4), + 10u64.pow(5), + 10u64.pow(6), + 10u64.pow(7), + 10u64.pow(8), + 10u64.pow(9), + 10u64.pow(10), + 10u64.pow(11), + 10u64.pow(12), + 10u64.pow(13), + 10u64.pow(14), + 10u64.pow(15), + 10u64.pow(16), + 10u64.pow(17), + ]; + + let mut big_value: BigInt = ongoing.into(); + index = new_index; + + loop { + let (chunk, new_index) = IntChunk::parse_big(data, index); + if (new_index - start) > 4300 { + return json_err!(NumberOutOfRange, start + 4301); } - IntChunk::Done(value) => { - big_value *= POW_10[new_index - index]; - big_value += value; - if !positive { - big_value = -big_value; + match chunk { + IntChunk::Ongoing(value) => { + big_value *= ONGOING_CHUNK_MULTIPLIER; + big_value += value; + index = new_index; + } + IntChunk::Done(value) => { + big_value *= POW_10[new_index - index]; + big_value += value; + if !positive { + big_value = -big_value; + } + return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index)); } - return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index)); + IntChunk::Float => return Ok((Self::Float, new_index)), } - IntChunk::Float => return Ok((Self::Float, new_index)), } } } } -static POW_10: [u64; 18] = [ - 10u64.pow(0), - 10u64.pow(1), - 10u64.pow(2), - 10u64.pow(3), - 10u64.pow(4), - 10u64.pow(5), - 10u64.pow(6), - 10u64.pow(7), - 10u64.pow(8), - 10u64.pow(9), - 10u64.pow(10), - 10u64.pow(11), - 10u64.pow(12), - 10u64.pow(13), - 10u64.pow(14), - 10u64.pow(15), - 10u64.pow(16), - 10u64.pow(17), -]; - -#[cfg(target_arch = "aarch64")] -// in aarch64 we use a 128 bit registers - 16 bytes -static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16); -#[cfg(not(target_arch = "aarch64"))] -// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing -static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18); - pub(crate) enum IntChunk { Ongoing(u64), Done(u64), @@ -362,6 +382,8 @@ pub(crate) static INT_CHAR_MAP: [bool; 256] = { pub struct NumberRange { pub range: Range, + // in some cfg configurations, this field is never read. + #[allow(dead_code)] pub is_int: bool, } diff --git a/crates/jiter/src/value.rs b/crates/jiter/src/value.rs index 78da22a6..2a13bcaf 100644 --- a/crates/jiter/src/value.rs +++ b/crates/jiter/src/value.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::sync::Arc; +#[cfg(feature = "num-bigint")] use num_bigint::BigInt; use smallvec::SmallVec; @@ -16,6 +17,7 @@ pub enum JsonValue<'s> { Null, Bool(bool), Int(i64), + #[cfg(feature = "num-bigint")] BigInt(BigInt), Float(f64), Str(Cow<'s, str>), @@ -34,6 +36,7 @@ impl pyo3::ToPyObject for JsonValue<'_> { Self::Null => py.None().to_object(py), Self::Bool(b) => b.to_object(py), Self::Int(i) => i.to_object(py), + #[cfg(feature = "num-bigint")] Self::BigInt(b) => b.to_object(py), Self::Float(f) => f.to_object(py), Self::Str(s) => s.to_object(py), @@ -78,6 +81,7 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> { JsonValue::Null => JsonValue::Null, JsonValue::Bool(b) => JsonValue::Bool(b), JsonValue::Int(i) => JsonValue::Int(i), + #[cfg(feature = "num-bigint")] JsonValue::BigInt(b) => JsonValue::BigInt(b), JsonValue::Float(f) => JsonValue::Float(f), JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()), @@ -200,6 +204,7 @@ fn take_value<'j, 's>( let n = parser.consume_number::(peek.into_inner(), allow_inf_nan); match n { Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)), + #[cfg(feature = "num-bigint")] Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)), Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)), Err(e) => {