Skip to content

Commit

Permalink
Using Borrow<Value> to avoid consuming the value if you need to keep …
Browse files Browse the repository at this point in the history
…the json and get the schema. This avoids unnecessary cloning the entire json.
  • Loading branch information
rguerreiromsft committed Feb 16, 2023
1 parent 55c598d commit 34e4986
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions arrow-json/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
//! let batch = json.next().unwrap().unwrap();
//! ```
use std::borrow::Borrow;
use std::io::{BufRead, BufReader, Read, Seek};
use std::sync::Arc;

Expand Down Expand Up @@ -526,16 +527,17 @@ fn collect_field_types_from_object(
/// The reason we diverge here is because we don't have utilities to deal with JSON data once it's
/// interpreted as Strings. We should match Spark's behavior once we added more JSON parsing
/// kernels in the future.
pub fn infer_json_schema_from_iterator<I>(value_iter: I) -> Result<Schema, ArrowError>
pub fn infer_json_schema_from_iterator<I, V>(value_iter: I) -> Result<Schema, ArrowError>
where
I: Iterator<Item = Result<Value, ArrowError>>,
I: Iterator<Item = Result<V, ArrowError>>,
V: Borrow<Value>,
{
let mut field_types: HashMap<String, InferredType> = HashMap::new();

for record in value_iter {
match record? {
match record?.borrow() {
Value::Object(map) => {
collect_field_types_from_object(&mut field_types, &map)?;
collect_field_types_from_object(&mut field_types, map)?;
}
value => {
return Err(ArrowError::JsonError(format!(
Expand Down

0 comments on commit 34e4986

Please sign in to comment.