From 6edbcaa9cb4d428c7fd57408d96d15cbdd82e504 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 13 Jan 2023 08:44:27 +0100 Subject: [PATCH 1/3] Fix reading null booleans from CSV --- arrow-csv/src/reader/mod.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index bc6b016ec9cf..1859d1950ffb 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -866,6 +866,9 @@ fn build_boolean_array( .enumerate() .map(|(row_index, row)| { let s = row.get(col_idx); + if s.is_empty() { + return Ok(None); + } let parsed = parse_bool(s); match parsed { Some(e) => Ok(Some(e)), @@ -1122,6 +1125,7 @@ mod tests { use std::io::{Cursor, Write}; use tempfile::NamedTempFile; + use arrow_array::cast::as_boolean_array; use chrono::prelude::*; #[test] @@ -2067,4 +2071,30 @@ mod tests { assert_eq!(b.num_rows(), expected, "{}", idx); } } + + #[test] + fn test_null_boolean() { + let csv = "true,false\nFalse,True\n,True\n"; + let b = ReaderBuilder::new() + .build_buffered(Cursor::new(csv.as_bytes())) + .unwrap() + .next() + .unwrap() + .unwrap(); + + assert_eq!(b.num_rows(), 3); + assert_eq!(b.num_columns(), 2); + + let c = as_boolean_array(b.column(0)); + assert_eq!(c.null_count(), 1); + assert_eq!(c.value(0), true); + assert_eq!(c.value(1), false); + assert!(c.is_null(2)); + + let c = as_boolean_array(b.column(1)); + assert_eq!(c.null_count(), 0); + assert_eq!(c.value(0), false); + assert_eq!(c.value(1), true); + assert_eq!(c.value(2), true); + } } From 045b9210939a6162fb9bf7d8c3c9f234379b85e0 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 13 Jan 2023 08:49:22 +0100 Subject: [PATCH 2/3] Clippy --- arrow-csv/src/reader/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 1859d1950ffb..78f22caa5ff3 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -2087,14 +2087,14 @@ mod tests { let c = as_boolean_array(b.column(0)); assert_eq!(c.null_count(), 1); - assert_eq!(c.value(0), true); - assert_eq!(c.value(1), false); + assert!(c.value(0)); + assert!(!c.value(1)); assert!(c.is_null(2)); let c = as_boolean_array(b.column(1)); assert_eq!(c.null_count(), 0); - assert_eq!(c.value(0), false); - assert_eq!(c.value(1), true); - assert_eq!(c.value(2), true); + assert!(!c.value(0)); + assert!(c.value(1)); + assert!(c.value(2)); } } From 27782e8fb1c4eafb4e0ed9ce3356432e88ea59ca Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 13 Jan 2023 08:53:36 +0100 Subject: [PATCH 3/3] Review feedback --- arrow-csv/src/reader/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 78f22caa5ff3..0c7bfa897fd4 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -2074,7 +2074,7 @@ mod tests { #[test] fn test_null_boolean() { - let csv = "true,false\nFalse,True\n,True\n"; + let csv = "true,false\nFalse,True\n,True\nFalse,"; let b = ReaderBuilder::new() .build_buffered(Cursor::new(csv.as_bytes())) .unwrap() @@ -2082,7 +2082,7 @@ mod tests { .unwrap() .unwrap(); - assert_eq!(b.num_rows(), 3); + assert_eq!(b.num_rows(), 4); assert_eq!(b.num_columns(), 2); let c = as_boolean_array(b.column(0)); @@ -2090,11 +2090,13 @@ mod tests { assert!(c.value(0)); assert!(!c.value(1)); assert!(c.is_null(2)); + assert!(!c.value(3)); let c = as_boolean_array(b.column(1)); - assert_eq!(c.null_count(), 0); + assert_eq!(c.null_count(), 1); assert!(!c.value(0)); assert!(c.value(1)); assert!(c.value(2)); + assert!(c.is_null(3)); } }