Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relax JSON schema inference generics #4063

Merged
merged 2 commits into from
Apr 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions arrow-json/src/reader/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use indexmap::map::IndexMap as HashMap;
use indexmap::set::IndexSet as HashSet;
use serde_json::Value;
use std::borrow::Borrow;
use std::io::{BufRead, BufReader, Read, Seek};
use std::io::{BufRead, Seek};
use std::sync::Arc;

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -147,17 +147,17 @@ fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema, ArrowE
/// }
/// ```
#[derive(Debug)]
pub struct ValueIter<'a, R: Read> {
reader: &'a mut BufReader<R>,
pub struct ValueIter<R: BufRead> {
reader: R,
max_read_records: Option<usize>,
record_count: usize,
// reuse line buffer to avoid allocation on each record
line_buf: String,
}

impl<'a, R: Read> ValueIter<'a, R> {
impl<R: BufRead> ValueIter<R> {
/// Creates a new `ValueIter`
pub fn new(reader: &'a mut BufReader<R>, max_read_records: Option<usize>) -> Self {
pub fn new(reader: R, max_read_records: Option<usize>) -> Self {
Self {
reader,
max_read_records,
Expand All @@ -167,7 +167,7 @@ impl<'a, R: Read> ValueIter<'a, R> {
}
}

impl<'a, R: Read> Iterator for ValueIter<'a, R> {
impl<R: BufRead> Iterator for ValueIter<R> {
type Item = Result<Value, ArrowError>;

fn next(&mut self) -> Option<Self::Item> {
Expand Down Expand Up @@ -228,11 +228,11 @@ impl<'a, R: Read> Iterator for ValueIter<'a, R> {
/// ```
///
/// [`Reader`]: super::Reader
pub fn infer_json_schema_from_seekable<R: Read + Seek>(
reader: &mut BufReader<R>,
pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
mut reader: R,
max_read_records: Option<usize>,
) -> Result<Schema, ArrowError> {
let schema = infer_json_schema(reader, max_read_records);
let schema = infer_json_schema(&mut reader, max_read_records);
// return the reader seek back to the start
reader.rewind()?;

Expand Down Expand Up @@ -265,8 +265,8 @@ pub fn infer_json_schema_from_seekable<R: Read + Seek>(
/// // seek back to start so that the original file is usable again
/// file.seek(SeekFrom::Start(0)).unwrap();
/// ```
pub fn infer_json_schema<R: Read>(
reader: &mut BufReader<R>,
pub fn infer_json_schema<R: BufRead>(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea here is that any existing user of this API will be fine because a &mut BufReader<R> also implements BufRead?

https://doc.rust-lang.org/stable/std/io/trait.BufRead.html

reader: R,
max_read_records: Option<usize>,
) -> Result<Schema, ArrowError> {
infer_json_schema_from_iterator(ValueIter::new(reader, max_read_records))
Expand Down Expand Up @@ -515,7 +515,7 @@ mod tests {
use super::*;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::Cursor;
use std::io::{BufReader, Cursor};

#[test]
fn test_json_infer_schema() {
Expand Down Expand Up @@ -700,8 +700,7 @@ mod tests {

#[test]
fn test_invalid_json_infer_schema() {
let re =
infer_json_schema_from_seekable(&mut BufReader::new(Cursor::new(b"}")), None);
let re = infer_json_schema_from_seekable(Cursor::new(b"}"), None);
assert_eq!(
re.err().unwrap().to_string(),
"Json error: Not valid JSON: expected value at line 1 column 1",
Expand Down