From 4873d61d54800290b1523f89b421e400a91c11d5 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Thu, 9 May 2024 12:58:15 +0300 Subject: [PATCH] Review Part 1 --- datafusion-cli/src/helper.rs | 17 +++++--------- datafusion/common/src/config.rs | 23 ++++++++++--------- .../core/src/datasource/file_format/csv.rs | 12 +++++----- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs index 28cba06723aa..f93aaec4218d 100644 --- a/datafusion-cli/src/helper.rs +++ b/datafusion-cli/src/helper.rs @@ -20,25 +20,20 @@ use std::borrow::Cow; +use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter}; + use datafusion::common::sql_datafusion_err; use datafusion::error::DataFusionError; use datafusion::sql::parser::{DFParser, Statement}; use datafusion::sql::sqlparser::dialect::dialect_from_str; use datafusion::sql::sqlparser::parser::ParserError; -use rustyline::completion::Completer; -use rustyline::completion::FilenameCompleter; -use rustyline::completion::Pair; + +use rustyline::completion::{Completer, FilenameCompleter, Pair}; use rustyline::error::ReadlineError; use rustyline::highlight::Highlighter; use rustyline::hint::Hinter; -use rustyline::validate::ValidationContext; -use rustyline::validate::ValidationResult; -use rustyline::validate::Validator; -use rustyline::Context; -use rustyline::Helper; -use rustyline::Result; - -use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter}; +use rustyline::validate::{ValidationContext, ValidationResult, Validator}; +use rustyline::{Context, Helper, Result}; pub struct CliHelper { completer: FilenameCompleter, diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 49d31257c8ba..a1445dfb2f40 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1564,20 +1564,21 @@ config_namespace_with_hashmap! { config_namespace! { /// Options controlling CSV format pub struct CsvOptions { - /// If the first line of the CSV is column names. - /// If not specified, uses default from `CREATE TABLE` command, if any. - pub has_header: Option, default =None + /// Specifies whether there is a CSV header (i.e. the first line + /// consists of is column names). If not specified, uses default from + /// the `CREATE TABLE` command, if any. + pub has_header: Option, default = None pub delimiter: u8, default = b',' pub quote: u8, default = b'"' pub escape: Option, default = None pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED pub schema_infer_max_rec: usize, default = 100 - pub date_format: Option, default = None - pub datetime_format: Option, default = None - pub timestamp_format: Option, default = None - pub timestamp_tz_format: Option, default = None - pub time_format: Option, default = None - pub null_value: Option, default = None + pub date_format: Option, default = None + pub datetime_format: Option, default = None + pub timestamp_format: Option, default = None + pub timestamp_tz_format: Option, default = None + pub time_format: Option, default = None + pub null_value: Option, default = None } } @@ -1606,8 +1607,8 @@ impl CsvOptions { self } - /// True if the first line is a header. If the condition of having header - /// is not set by the format options, session state decides it. + /// Returns true if the first line is a header. If format options does not + /// specify whether there is a header, consults the configuration. pub fn has_header(&self, config_opt: &ConfigOptions) -> bool { self.has_header.unwrap_or(config_opt.catalog.has_header) } diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index ab2582b4e820..3e5c7af1f82e 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -32,8 +32,9 @@ use crate::datasource::physical_plan::{ use crate::error::Result; use crate::execution::context::SessionState; use crate::physical_plan::insert::{DataSink, DataSinkExec}; -use crate::physical_plan::{DisplayAs, DisplayFormatType, Statistics}; -use crate::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; +use crate::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, Statistics, +}; use arrow::array::RecordBatch; use arrow::csv::WriterBuilder; @@ -242,8 +243,8 @@ impl FileFormat for CsvFormat { ) -> Result> { let exec = CsvExec::new( conf, - // If the condition of having header is not set by - // the format options, session state decides it. + // If format options does not specify whether there is a header, + // we consult configuration options. self.options.has_header(state.config_options()), self.options.delimiter, self.options.quote, @@ -819,8 +820,7 @@ mod tests { has_header: bool, ) -> Result> { let root = format!("{}/csv", crate::test_util::arrow_test_data()); - let mut format = CsvFormat::default(); - format = format.with_has_header(has_header); + let format = CsvFormat::default().with_has_header(has_header); scan_format(state, &format, &root, file_name, projection, limit).await }