Skip to content

Commit

Permalink
perf: add CowStr type to be able to use Cow::Borrowed on keys
Browse files Browse the repository at this point in the history
serde by always creates Cow::Owned even if it's possible to Borrow:
serde-rs/serde#1852 (comment)

We create our own wrapper around Cow in order to allow Borrowed keys.

This significantly improves performance:
parse
simple_json
serde_json                               Avg: 283.92 MB/s (-0.84%)     Median: 284.16 MB/s (-0.26%)     [279.00 MB/s .. 289.83 MB/s]
serde_json + access by key               Avg: 276.34 MB/s (-1.24%)     Median: 276.46 MB/s (-1.11%)     [267.63 MB/s .. 284.46 MB/s]
serde_json_borrow                        Avg: 534.96 MB/s (+19.37%)    Median: 533.52 MB/s (+18.98%)    [517.16 MB/s .. 561.15 MB/s]
serde_json_borrow + access by key        Avg: 530.26 MB/s (+18.52%)    Median: 529.16 MB/s (+18.28%)    [519.93 MB/s .. 544.36 MB/s]
SIMD_json_borrow                         Avg: 227.52 MB/s (-0.40%)     Median: 229.19 MB/s (+0.40%)     [184.71 MB/s .. 237.90 MB/s]
hdfs
serde_json                               Avg: 573.25 MB/s (-1.02%)     Median: 573.38 MB/s (-0.78%)     [549.59 MB/s .. 587.74 MB/s]
serde_json + access by key               Avg: 610.33 MB/s (-0.88%)     Median: 609.03 MB/s (-1.00%)     [587.33 MB/s .. 631.20 MB/s]
serde_json_borrow                        Avg: 1.0051 GB/s (+19.36%)    Median: 1.0037 GB/s (+18.06%)    [978.71 MB/s .. 1.0418 GB/s]
serde_json_borrow + access by key        Avg: 1.0453 GB/s (+20.75%)    Median: 1.0432 GB/s (+20.01%)    [1.0123 GB/s .. 1.0802 GB/s]
SIMD_json_borrow                         Avg: 551.42 MB/s (+0.26%)     Median: 554.60 MB/s (+0.91%)     [478.56 MB/s .. 566.50 MB/s]
hdfs_with_array
serde_json                               Avg: 470.84 MB/s (-0.24%)    Median: 470.70 MB/s (-0.20%)    [460.56 MB/s .. 480.44 MB/s]
serde_json + access by key               Avg: 471.89 MB/s (+0.09%)    Median: 471.92 MB/s (-0.03%)    [459.45 MB/s .. 484.84 MB/s]
serde_json_borrow                        Avg: 813.40 MB/s (+7.66%)    Median: 816.02 MB/s (+7.78%)    [798.52 MB/s .. 834.84 MB/s]
serde_json_borrow + access by key        Avg: 821.93 MB/s (+7.64%)    Median: 822.22 MB/s (+7.67%)    [786.16 MB/s .. 838.18 MB/s]
SIMD_json_borrow                         Avg: 458.26 MB/s (-0.15%)    Median: 457.11 MB/s (-0.39%)    [445.08 MB/s .. 473.40 MB/s]
wiki
serde_json                               Avg: 1.2361 GB/s (-0.93%)    Median: 1.2367 GB/s (-1.24%)    [1.2083 GB/s .. 1.2777 GB/s]
serde_json + access by key               Avg: 1.2717 GB/s (-0.63%)    Median: 1.2715 GB/s (-1.91%)    [1.1773 GB/s .. 1.3294 GB/s]
serde_json_borrow                        Avg: 1.4936 GB/s (+3.09%)    Median: 1.4950 GB/s (+3.02%)    [1.4339 GB/s .. 1.5327 GB/s]
serde_json_borrow + access by key        Avg: 1.5255 GB/s (+3.51%)    Median: 1.5266 GB/s (+3.13%)    [1.4733 GB/s .. 1.5849 GB/s]
SIMD_json_borrow                         Avg: 1.2579 GB/s (-1.86%)    Median: 1.2729 GB/s (-1.49%)    [990.58 MB/s .. 1.2995 GB/s]
gh-archive
serde_json                               Avg: 538.63 MB/s (-0.32%)     Median: 539.07 MB/s (-1.29%)     [525.76 MB/s .. 549.05 MB/s]
serde_json + access by key               Avg: 540.55 MB/s (-0.76%)     Median: 541.39 MB/s (-0.84%)     [521.75 MB/s .. 548.68 MB/s]
serde_json_borrow                        Avg: 1.0849 GB/s (+23.13%)    Median: 1.0873 GB/s (+22.41%)    [1.0266 GB/s .. 1.1046 GB/s]
serde_json_borrow + access by key        Avg: 1.0663 GB/s (+20.30%)    Median: 1.0825 GB/s (+22.08%)    [759.26 MB/s .. 1.1250 GB/s]
SIMD_json_borrow                         Avg: 1.0522 GB/s (-0.77%)     Median: 1.0539 GB/s (-0.71%)     [1.0142 GB/s .. 1.0696 GB/s]
  • Loading branch information
jszwec committed Jan 19, 2025
1 parent e4f4a6c commit 2d44376
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 2 deletions.
37 changes: 37 additions & 0 deletions src/cowstr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use std::{borrow::Cow, ops::Deref};

use serde::Deserialize;

/// A wrapper around `Cow<str>` that implements `Deserialize` and can deserialize
/// string keys into Cow::Borrowed when possible.
///
/// This is because serde always deserializes strings into `Cow::Owned`.
/// https://github.com/serde-rs/serde/issues/1852#issuecomment-559517427
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize)]
pub struct CowStr<'a>(#[serde(borrow)] pub Cow<'a, str>);

impl Deref for CowStr<'_> {
type Target = str;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl PartialEq<&str> for CowStr<'_> {
fn eq(&self, other: &&str) -> bool {
self.0 == *other
}
}

impl<'a> From<&'a str> for CowStr<'a> {
fn from(s: &'a str) -> Self {
Self(Cow::Borrowed(s))
}
}

impl<'a> From<CowStr<'a>> for Cow<'a, str> {
fn from(s: CowStr<'a>) -> Self {
s.0
}
}
16 changes: 16 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ mod tests {

use crate::Value;

#[test]
fn cowkeys() {
let json_obj = r#"
{
"bool": true,
"escaped\"": true
}
"#;

let val: Value = serde_json::from_str(json_obj).unwrap();

let obj = val.as_object().unwrap();
assert!(matches!(obj.as_vec()[0].0.clone().into(), Cow::Borrowed(_)));
assert!(matches!(obj.as_vec()[1].0.clone().into(), Cow::Owned(_)));
}

#[test]
fn deserialize_json_test() {
let json_obj = r#"
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ mod object_vec;
mod owned;
mod ser;
mod value;
mod cowstr;

pub use object_vec::{KeyStrType, ObjectAsVec, ObjectAsVec as Map};
pub use owned::OwnedValue;
Expand Down
4 changes: 2 additions & 2 deletions src/object_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

use std::borrow::Cow;

use crate::Value;
use crate::{cowstr::CowStr, Value};

#[cfg(feature = "cowkeys")]
/// The string type used. Can be toggled between &str and Cow<str> via `cowstr` feature flag
pub type KeyStrType<'a> = Cow<'a, str>;
pub type KeyStrType<'a> = CowStr<'a>;

#[cfg(not(feature = "cowkeys"))]
/// The string type used. Can be toggled between &str and Cow<str> via `cowstr` feature flag
Expand Down

0 comments on commit 2d44376

Please sign in to comment.