From 988359e02544daa8ca14443959cac44021333beb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 21 Jan 2025 11:34:51 -0500 Subject: [PATCH] Add tests for slicing larger arrays --- arrow-ipc/src/writer.rs | 74 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index ee5b9a54cc90..8e6a72821552 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -1793,7 +1793,7 @@ mod tests { use std::io::Cursor; use std::io::Seek; - use arrow_array::builder::GenericListBuilder; + use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder}; use arrow_array::builder::MapBuilder; use arrow_array::builder::UnionBuilder; use arrow_array::builder::{PrimitiveRunBuilder, UInt32Builder}; @@ -2433,6 +2433,78 @@ mod tests { ); } + #[test] + fn test_large_slice_uint32() { + ensure_roundtrip(Arc::new(UInt32Array::from_iter((0..8000).map(|i| { + if i % 2 == 0 { + Some(i) + } else { + None + } + })))); + } + + #[test] + fn test_large_slice_string() { + let strings: Vec<_> = (0..8000).map(|i| { + if i % 2 == 0 { + Some(format!("value{}", i)) + } else { + None + } + }).collect(); + + ensure_roundtrip(Arc::new(StringArray::from(strings))); + } + + #[test] + fn test_large_slice_string_list() { + let mut ls = + ListBuilder::new(StringBuilder::new()); + + let mut s = String::new(); + for row_number in 0..8000 { + if row_number % 2 == 0 { + for list_element in 0..1000 { + s.clear(); + use std::fmt::Write; + write!(&mut s, "value{row_number}-{list_element}"); + ls.values().append_value(&s); + } + } else { + ls.values().append_null(); + } + } + + ensure_roundtrip(Arc::new(ls.finish())); + } + + /// Read/write a record batch to a File and Stream and ensure it is the same at the outout + fn ensure_roundtrip(array: ArrayRef) { + let num_rows = array.len(); + let orig_batch = RecordBatch::try_from_iter(vec![("a", array)]).unwrap(); + // take off the first element + let sliced_batch = orig_batch.slice(1, num_rows-1); + + let schema = orig_batch.schema(); + let stream_data = { + let mut writer = StreamWriter::try_new(vec![], &schema).unwrap(); + writer.write(&sliced_batch).unwrap(); + writer.into_inner().unwrap() + }; + let read_batch = { + let projection = None; + let mut reader = StreamReader::try_new(Cursor::new(stream_data), projection).unwrap(); + reader + .next() + .expect("expect no errors reading batch") + .expect("expect batch") + }; + assert_eq!(sliced_batch, read_batch); + + // TODO test file writer/reader + } + #[test] fn encode_bools_slice() { // Test case for https://github.com/apache/arrow-rs/issues/3496