From 2812f285334dc1c76d0bde1dfc4c9d8915b8b01e Mon Sep 17 00:00:00 2001 From: SimonSchneider Date: Mon, 16 May 2022 10:21:37 +0200 Subject: [PATCH 1/2] fix: encode float::NAN as null in json --- src/io/json/write/serialize.rs | 44 ++++++++++++++++++++++++++++++++-- tests/it/io/json/write.rs | 18 ++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 3b1f9e087b5..41c756586df 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -43,6 +43,46 @@ fn primitive_serializer<'a, T: NativeType + ToLexical>( )) } +fn f64_serializer<'a>( + array: &'a PrimitiveArray, +) -> Box + 'a + Send + Sync> { + Box::new(BufStreamingIterator::new( + array.iter(), + |x, buf| { + if let Some(x) = x { + if f64::is_nan(*x) { + buf.extend(b"null") + } else { + lexical_to_bytes_mut(*x, buf) + } + } else { + buf.extend(b"null") + } + }, + vec![], + )) +} + +fn f32_serializer<'a>( + array: &'a PrimitiveArray, +) -> Box + 'a + Send + Sync> { + Box::new(BufStreamingIterator::new( + array.iter(), + |x, buf| { + if let Some(x) = x { + if f32::is_nan(*x) { + buf.extend(b"null") + } else { + lexical_to_bytes_mut(*x, buf) + } + } else { + buf.extend(b"null") + } + }, + vec![], + )) +} + fn utf8_serializer<'a, O: Offset>( array: &'a Utf8Array, ) -> Box + 'a + Send + Sync> { @@ -196,8 +236,8 @@ pub(crate) fn new_serializer<'a>( DataType::UInt16 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), DataType::UInt32 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), DataType::UInt64 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), - DataType::Float32 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), - DataType::Float64 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), + DataType::Float32 => f32_serializer(array.as_any().downcast_ref().unwrap()), + DataType::Float64 => f64_serializer(array.as_any().downcast_ref().unwrap()), DataType::Utf8 => utf8_serializer::(array.as_any().downcast_ref().unwrap()), DataType::LargeUtf8 => utf8_serializer::(array.as_any().downcast_ref().unwrap()), DataType::Struct(_) => struct_serializer(array.as_any().downcast_ref().unwrap()), diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 69e4bfa9d94..d79a5310f06 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -28,6 +28,24 @@ fn int32() -> Result<()> { test!(array, expected) } +#[test] +fn f32() -> Result<()> { + let array = Float32Array::from([Some(1.5), Some(2.5), Some(f32::NAN), None, Some(5.5)]); + + let expected = r#"[1.5,2.5,null,null,5.5]"#; + + test!(array, expected) +} + +#[test] +fn f64() -> Result<()> { + let array = Float64Array::from([Some(1.5), Some(2.5), Some(f64::NAN), None, Some(5.5)]); + + let expected = r#"[1.5,2.5,null,null,5.5]"#; + + test!(array, expected) +} + #[test] fn utf8() -> Result<()> { let array = Utf8Array::::from(&vec![Some("a"), Some("b"), Some("c"), Some("d"), None]); From 19968a34d77df07af084bbe87c7c16da1b632b36 Mon Sep 17 00:00:00 2001 From: SimonSchneider Date: Mon, 16 May 2022 18:01:16 +0200 Subject: [PATCH 2/2] generic float serializer --- src/io/json/write/serialize.rs | 35 +++++++++------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 41c756586df..fb90c17ad0f 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -43,34 +43,17 @@ fn primitive_serializer<'a, T: NativeType + ToLexical>( )) } -fn f64_serializer<'a>( - array: &'a PrimitiveArray, -) -> Box + 'a + Send + Sync> { - Box::new(BufStreamingIterator::new( - array.iter(), - |x, buf| { - if let Some(x) = x { - if f64::is_nan(*x) { - buf.extend(b"null") - } else { - lexical_to_bytes_mut(*x, buf) - } - } else { - buf.extend(b"null") - } - }, - vec![], - )) -} - -fn f32_serializer<'a>( - array: &'a PrimitiveArray, -) -> Box + 'a + Send + Sync> { +fn float_serializer<'a, T>( + array: &'a PrimitiveArray, +) -> Box + 'a + Send + Sync> +where + T: num_traits::Float + NativeType + ToLexical, +{ Box::new(BufStreamingIterator::new( array.iter(), |x, buf| { if let Some(x) = x { - if f32::is_nan(*x) { + if T::is_nan(*x) { buf.extend(b"null") } else { lexical_to_bytes_mut(*x, buf) @@ -236,8 +219,8 @@ pub(crate) fn new_serializer<'a>( DataType::UInt16 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), DataType::UInt32 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), DataType::UInt64 => primitive_serializer::(array.as_any().downcast_ref().unwrap()), - DataType::Float32 => f32_serializer(array.as_any().downcast_ref().unwrap()), - DataType::Float64 => f64_serializer(array.as_any().downcast_ref().unwrap()), + DataType::Float32 => float_serializer::(array.as_any().downcast_ref().unwrap()), + DataType::Float64 => float_serializer::(array.as_any().downcast_ref().unwrap()), DataType::Utf8 => utf8_serializer::(array.as_any().downcast_ref().unwrap()), DataType::LargeUtf8 => utf8_serializer::(array.as_any().downcast_ref().unwrap()), DataType::Struct(_) => struct_serializer(array.as_any().downcast_ref().unwrap()),