diff --git a/tests/it/io/json/mod.rs b/tests/it/io/json/mod.rs index 071e1b7d4fe..03b61c3b5ad 100644 --- a/tests/it/io/json/mod.rs +++ b/tests/it/io/json/mod.rs @@ -5,6 +5,8 @@ use std::io::Cursor; use std::sync::Arc; use arrow2::array::*; +use arrow2::bitmap::Bitmap; +use arrow2::buffer::Buffer; use arrow2::chunk::Chunk; use arrow2::datatypes::*; use arrow2::error::Result; @@ -65,14 +67,14 @@ fn round_trip_list() -> Result<()> { round_trip(data) } -fn case_list() -> (String, Schema, Vec>) { +fn case_list() -> (String, Vec, Vec>) { let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"} {"a":-10, "b":null, "c":[true, true]} {"a":null, "b":[2.1, null, -6.2], "c":[false, null], "d":"text"} "# .to_string(); - let schema = Schema::from(vec![ + let fields = vec![ Field::new("a", DataType::Int64, true), Field::new( "b", @@ -85,9 +87,9 @@ fn case_list() -> (String, Schema, Vec>) { true, ), Field::new("d", DataType::Utf8, true), - ]); - let a = Int64Array::from(&[Some(1), Some(-10), None]); + ]; + let a = Int64Array::from(&[Some(1), Some(-10), None]); let mut b = MutableListArray::>::new(); b.try_extend(vec![ Some(vec![Some(2.0), Some(1.3), Some(-6.1)]), @@ -115,10 +117,10 @@ fn case_list() -> (String, Schema, Vec>) { Box::new(d), ]; - (data, schema, columns) + (data, fields, columns) } -fn case_dict() -> (String, Schema, Vec>) { +fn case_dict() -> (String, Vec, Vec>) { let data = r#"{"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]} {"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]} {"machine": "c", "events": ["Send Data"]} @@ -133,7 +135,7 @@ fn case_dict() -> (String, Schema, Vec>) { true, ))); - let schema = Schema::from(vec![Field::new("events", data_type, true)]); + let fields = vec![Field::new("events", data_type, true)]; type A = MutableDictionaryArray>; @@ -155,41 +157,41 @@ fn case_dict() -> (String, Schema, Vec>) { let array: ListArray = array.into(); - (data, schema, vec![Box::new(array) as Box]) + (data, fields, vec![Box::new(array) as Box]) } -fn case_basics() -> (String, Schema, Vec>) { +fn case_basics() -> (String, Vec, Vec>) { let data = r#"{"a":1, "b":2.0, "c":false, "d":"4"} {"a":-10, "b":-3.5, "c":true, "d":null} {"a":100000000, "b":0.6, "d":"text"}"# .to_string(); - let schema = Schema::from(vec![ + let fields = vec![ Field::new("a", DataType::Int64, true), Field::new("b", DataType::Float64, true), Field::new("c", DataType::Boolean, true), Field::new("d", DataType::Utf8, true), - ]); + ]; let columns = vec![ Box::new(Int64Array::from_slice(&[1, -10, 100000000])) as Box, Box::new(Float64Array::from_slice(&[2.0, -3.5, 0.6])), Box::new(BooleanArray::from(&[Some(false), Some(true), None])), Box::new(Utf8Array::::from(&[Some("4"), None, Some("text")])), ]; - (data, schema, columns) + (data, fields, columns) } -fn case_basics_schema() -> (String, Schema, Vec>) { +fn case_projection() -> (String, Vec, Vec>) { let data = r#"{"a":1, "b":2.0, "c":false, "d":"4", "e":"4"} {"a":10, "b":-3.5, "c":true, "d":null, "e":"text"} {"a":100000000, "b":0.6, "d":"text"}"# .to_string(); - let schema = Schema::from(vec![ + let fields = vec![ Field::new("a", DataType::UInt32, true), Field::new("b", DataType::Float32, true), Field::new("c", DataType::Boolean, true), // note how "d" is not here Field::new("e", DataType::Binary, true), - ]); + ]; let columns = vec![ Box::new(UInt32Array::from_slice(&[1, 10, 100000000])) as Box, Box::new(Float32Array::from_slice(&[2.0, -3.5, 0.6])), @@ -200,10 +202,10 @@ fn case_basics_schema() -> (String, Schema, Vec>) { None, ])), ]; - (data, schema, columns) + (data, fields, columns) } -fn case_struct() -> (String, Schema, Vec>) { +fn case_struct() -> (String, Vec, Vec>) { let data = r#"{"a": {"b": true, "c": {"d": "text"}}} {"a": {"b": false, "c": null}} {"a": {"b": true, "c": {"d": "text"}}} @@ -220,7 +222,7 @@ fn case_struct() -> (String, Schema, Vec>) { ]), true, ); - let schema = Schema::from(vec![a_field]); + let fields = vec![a_field]; // build expected output let d = Utf8Array::::from(&vec![Some("text"), None, Some("text"), None]); @@ -233,5 +235,76 @@ fn case_struct() -> (String, Schema, Vec>) { None, ); - (data, schema, vec![Box::new(expected) as Box]) + (data, fields, vec![Box::new(expected) as Box]) +} + +fn case_nested_list() -> (String, Vec, Vec>) { + let d_field = Field::new("d", DataType::Utf8, true); + let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true); + let b_field = Field::new("b", DataType::Boolean, true); + let a_struct_field = Field::new( + "a", + DataType::Struct(vec![b_field.clone(), c_field.clone()]), + true, + ); + let a_list_data_type = DataType::List(Box::new(a_struct_field)); + let a_field = Field::new("a", a_list_data_type.clone(), true); + + let data = r#" + {"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]} + {"a": [{"b": false, "c": null}]} + {"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]} + {"a": null} + {"a": []} + "#.to_string(); + + // build expected output + let d = Utf8Array::::from(&vec![ + Some("a_text"), + Some("b_text"), + None, + Some("c_text"), + Some("d_text"), + None, + ]); + + let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None); + + let b = BooleanArray::from(vec![ + Some(true), + Some(false), + Some(false), + Some(true), + None, + Some(true), + ]); + let a_struct = StructArray::from_data( + DataType::Struct(vec![b_field, c_field]), + vec![Arc::new(b) as Arc, Arc::new(c) as Arc], + None, + ); + let expected = ListArray::from_data( + a_list_data_type, + Buffer::from_slice([0i32, 2, 3, 6, 6, 6]), + Arc::new(a_struct) as Arc, + Some(Bitmap::from_u8_slice([0b00010111], 5)), + ); + + ( + data, + vec![a_field], + vec![Box::new(expected) as Box], + ) +} + +fn case(case: &str) -> (String, Vec, Vec>) { + match case { + "basics" => case_basics(), + "projection" => case_projection(), + "list" => case_list(), + "dict" => case_dict(), + "struct" => case_struct(), + "nested_list" => case_nested_list(), + _ => todo!(), + } } diff --git a/tests/it/io/json/read.rs b/tests/it/io/json/read.rs index d8eccb931a9..db973ee0d53 100644 --- a/tests/it/io/json/read.rs +++ b/tests/it/io/json/read.rs @@ -1,17 +1,16 @@ -use std::{io::Cursor, sync::Arc}; +use std::io::Cursor; use arrow2::array::*; use arrow2::datatypes::*; +use arrow2::error::Result; use arrow2::io::json::read; -use arrow2::{bitmap::Bitmap, buffer::Buffer, error::Result}; use super::*; -#[test] -fn basic() -> Result<()> { - let (data, schema, columns) = case_basics(); +fn test_case(case_: &str) -> Result<()> { + let (data, fields, columns) = case(case_); - let batch = read_batch(data, &schema.fields)?; + let batch = read_batch(data, &fields)?; columns .iter() @@ -21,29 +20,33 @@ fn basic() -> Result<()> { } #[test] -fn basic_projection() -> Result<()> { - let (data, schema, columns) = case_basics_schema(); +fn basic() -> Result<()> { + test_case("basics") +} - let batch = read_batch(data, &schema.fields)?; +#[test] +fn projection() -> Result<()> { + test_case("projection") +} - columns - .iter() - .zip(batch.columns()) - .for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref())); - Ok(()) +#[test] +fn dictionary() -> Result<()> { + test_case("dict") } #[test] -fn lists() -> Result<()> { - let (data, schema, columns) = case_list(); +fn list() -> Result<()> { + test_case("list") +} - let batch = read_batch(data, &schema.fields)?; +#[test] +fn nested_struct() -> Result<()> { + test_case("struct") +} - columns - .iter() - .zip(batch.columns()) - .for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref())); - Ok(()) +#[test] +fn nested_list() -> Result<()> { + test_case("nested_list") } #[test] @@ -88,78 +91,6 @@ fn invalid_read_record() -> Result<()> { Ok(()) } -#[test] -fn nested_struct_arrays() -> Result<()> { - let (data, schema, columns) = case_struct(); - - let batch = read_batch(data, &schema.fields)?; - - columns - .iter() - .zip(batch.columns()) - .for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref())); - Ok(()) -} - -#[test] -fn nested_list_arrays() -> Result<()> { - let d_field = Field::new("d", DataType::Utf8, true); - let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true); - let b_field = Field::new("b", DataType::Boolean, true); - let a_struct_field = Field::new( - "a", - DataType::Struct(vec![b_field.clone(), c_field.clone()]), - true, - ); - let a_list_data_type = DataType::List(Box::new(a_struct_field)); - let a_field = Field::new("a", a_list_data_type.clone(), true); - - let data = r#" - {"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]} - {"a": [{"b": false, "c": null}]} - {"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]} - {"a": null} - {"a": []} - "#; - - let batch = read_batch(data.to_string(), &[a_field])?; - - // build expected output - let d = Utf8Array::::from(&vec![ - Some("a_text"), - Some("b_text"), - None, - Some("c_text"), - Some("d_text"), - None, - ]); - - let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None); - - let b = BooleanArray::from(vec![ - Some(true), - Some(false), - Some(false), - Some(true), - None, - Some(true), - ]); - let a_struct = StructArray::from_data( - DataType::Struct(vec![b_field, c_field]), - vec![Arc::new(b) as Arc, Arc::new(c) as Arc], - None, - ); - let expected = ListArray::from_data( - a_list_data_type, - Buffer::from_slice([0i32, 2, 3, 6, 6, 6]), - Arc::new(a_struct) as Arc, - Some(Bitmap::from_u8_slice([0b00010111], 5)), - ); - - assert_eq!(expected, batch.columns()[0].as_ref()); - Ok(()) -} - #[test] fn skip_empty_lines() { let data = " @@ -188,16 +119,6 @@ fn row_type_validation() { ); } -#[test] -fn list_of_string_dictionary_from_with_nulls() -> Result<()> { - let (data, schema, columns) = case_dict(); - - let batch = read_batch(data, &schema.fields)?; - - assert_eq!(columns[0].as_ref(), batch.columns()[0].as_ref()); - Ok(()) -} - #[test] fn infer_schema_mixed_list() -> Result<()> { let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1}