Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
sunchao committed Mar 8, 2022
1 parent b86a377 commit 7ed4d37
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 8 deletions.
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def test_dictionary_python():
"""
Python -> Rust -> Python
"""
a = pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
a = pa.array(["a", None, "b", None, "a"], type=pa.dictionary(pa.int8(), pa.string()))
b = rust.round_trip_array(a)
assert a == b
del a
Expand Down
15 changes: 13 additions & 2 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,19 @@ mod tests {

#[test]
fn test_dictionary() -> Result<()> {
let values = StringArray::from(vec!["foo", "bar", "zoo"]);
let keys = Int32Array::from(vec![0, 1, 2, 2, 1, 0, 1, 2, 1, 2]);
let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
let keys = Int32Array::from(vec![
Some(0),
Some(1),
None,
Some(1),
Some(1),
None,
Some(1),
Some(2),
Some(1),
None,
]);
let array = DictionaryArray::try_new(&keys, &values)?;

let data = array.data();
Expand Down
12 changes: 7 additions & 5 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ pub trait ArrowArrayRef {
if let Some(d) = self.dictionary() {
// For dictionary type there should only be a single child, so we don't need to worry if
// there are other children added above.
assert!(child_data.is_empty());
child_data.push(d.to_data()?);
}

Expand Down Expand Up @@ -595,9 +596,10 @@ pub trait ArrowArrayRef {
// to fetch offset buffer's len to build the second buffer.
fn buffer_len(&self, i: usize) -> Result<usize> {
// Special handling for dictionary type as we only care about the key type in the case.
let data_type = match &self.data_type()? {
DataType::Dictionary(key_data_type, _) => key_data_type.as_ref().clone(),
dt => dt.clone(),
let t = self.data_type()?;
let data_type = match &t {
DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
dt => dt,
};

// Inner type is not important for buffer length.
Expand All @@ -609,7 +611,7 @@ pub trait ArrowArrayRef {
| (DataType::List(_), 1)
| (DataType::LargeList(_), 1) => {
// the len of the offset buffer (buffer 1) equals length + 1
let bits = bit_width(&data_type, i)?;
let bits = bit_width(data_type, i)?;
debug_assert_eq!(bits % 8, 0);
(self.array().length as usize + 1) * (bits / 8)
}
Expand Down Expand Up @@ -641,7 +643,7 @@ pub trait ArrowArrayRef {
}
// buffer len of primitive types
_ => {
let bits = bit_width(&data_type, i)?;
let bits = bit_width(data_type, i)?;
bit_util::ceil(self.array().length as usize * bits, 8)
}
})
Expand Down

0 comments on commit 7ed4d37

Please sign in to comment.