From 81592947e8814327ebdbd1fbc3d4a090796e37a3 Mon Sep 17 00:00:00 2001 From: silence-coding <32766901+silence-coding@users.noreply.github.com> Date: Sat, 26 Mar 2022 01:01:09 +0800 Subject: [PATCH] fix issue#2058 file_format/json.rs attempt to subtract with overflow (#2066) * fix issue#2058 file_format/json.rs attempt to subtract with overflow * issue#2058 add infer_schema_with_limit test Co-authored-by: p00512853 --- datafusion/src/datasource/file_format/json.rs | 22 +++++++++++++++++-- .../tests/jsons/schema_infer_limit.json | 4 ++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 datafusion/tests/jsons/schema_infer_limit.json diff --git a/datafusion/src/datasource/file_format/json.rs b/datafusion/src/datasource/file_format/json.rs index 0a347fa364ba..77d489895394 100644 --- a/datafusion/src/datasource/file_format/json.rs +++ b/datafusion/src/datasource/file_format/json.rs @@ -68,13 +68,15 @@ impl FileFormat for JsonFormat { let iter = ValueIter::new(&mut reader, None); let schema = infer_json_schema_from_iterator(iter.take_while(|_| { let should_take = records_to_read > 0; - records_to_read -= 1; + if should_take { + records_to_read -= 1; + } should_take }))?; + schemas.push(schema); if records_to_read == 0 { break; } - schemas.push(schema); } let schema = Schema::try_merge(schemas)?; @@ -228,4 +230,20 @@ mod tests { .await?; Ok(exec) } + + #[tokio::test] + async fn infer_schema_with_limit() { + let filename = "tests/jsons/schema_infer_limit.json"; + let format = JsonFormat::default().with_schema_infer_max_rec(Some(3)); + let file_schema = format + .infer_schema(local_object_reader_stream(vec![filename.to_owned()])) + .await + .expect("Schema inference"); + let fields = file_schema + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect::>(); + assert_eq!(vec!["a: Int64", "b: Float64", "c: Boolean"], fields); + } } diff --git a/datafusion/tests/jsons/schema_infer_limit.json b/datafusion/tests/jsons/schema_infer_limit.json new file mode 100644 index 000000000000..bfacf2fa5618 --- /dev/null +++ b/datafusion/tests/jsons/schema_infer_limit.json @@ -0,0 +1,4 @@ +{"a":1} +{"a":-10, "b":-3.5} +{"a":2, "b":0.6, "c":false} +{"a":1, "b":2.0, "c":false, "d":"4"} \ No newline at end of file