Skip to content

Commit

Permalink
Merge pull request #5 from relativityone/fix-list-offsets
Browse files Browse the repository at this point in the history
fix: Encoding of List offsets was incorrect when slice offsets begin with zero
  • Loading branch information
HawaiianSpork authored Nov 26, 2024
2 parents b380a00 + 64d8181 commit b666044
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion arrow-ipc/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1309,7 +1309,7 @@ fn reencode_offsets<O: OffsetSizeTrait>(
let end_offset = offset_slice.last().unwrap();

let offsets = match start_offset.as_usize() {
0 => offsets.clone(),
0 => offset_slice.iter().copied().collect(),
_ => offset_slice.iter().map(|x| *x - *start_offset).collect(),
};

Expand Down Expand Up @@ -2396,6 +2396,38 @@ mod tests {
ls.finish()
}

fn generate_nested_list_data_starting_at_zero<O: OffsetSizeTrait>() -> GenericListArray<O> {
let mut ls =
GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(UInt32Builder::new()));


for _i in 0..999 {
ls.values().append(true);
ls.append(true);
}

for j in 0..10 {
for value in [j, j, j, j] {
ls.values().values().append_value(value);
}
ls.values().append(true)
}
ls.append(true);


for i in 0..9_000 {
for j in 0..10 {
for value in [i+j, i+j, i+j, i+j] {
ls.values().values().append_value(value);
}
ls.values().append(true)
}
ls.append(true);
}

ls.finish()
}

fn generate_map_array_data() -> MapArray {
let keys_builder = UInt32Builder::new();
let values_builder = UInt32Builder::new();
Expand Down Expand Up @@ -2487,6 +2519,19 @@ mod tests {
roundtrip_ensure_sliced_smaller(in_batch, 1000);
}

#[test]
fn encode_nested_lists_starting_at_zero() {
let inner_int = Arc::new(Field::new("item", DataType::UInt32, true));
let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true));
let list_field = Field::new("val", DataType::List(inner_list_field), true);
let schema = Arc::new(Schema::new(vec![list_field]));

let values = Arc::new(generate_nested_list_data_starting_at_zero::<i32>());

let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
roundtrip_ensure_sliced_smaller(in_batch, 1000);
}

#[test]
fn encode_map_array() {
let keys = Arc::new(Field::new("keys", DataType::UInt32, false));
Expand Down

0 comments on commit b666044

Please sign in to comment.