Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support for exporting metadata via FFI. (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 2, 2021
1 parent 2e81a8b commit 98b4f17
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 8 deletions.
77 changes: 69 additions & 8 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{ffi::CStr, ffi::CString, ptr};
use std::{collections::BTreeMap, convert::TryInto, ffi::CStr, ffi::CString, ptr};

use crate::{
datatypes::{DataType, Field, IntervalUnit, TimeUnit},
Expand All @@ -7,6 +7,9 @@ use crate::{

#[allow(dead_code)]
struct SchemaPrivateData {
name: CString,
format: CString,
metadata: Option<Vec<u8>>,
children_ptr: Box<[*mut Ffi_ArrowSchema]>,
dictionary: Option<*mut Ffi_ArrowSchema>,
}
Expand Down Expand Up @@ -35,9 +38,6 @@ unsafe extern "C" fn c_release_schema(schema: *mut Ffi_ArrowSchema) {
}
let schema = &mut *schema;

// take ownership back to release it.
CString::from_raw(schema.format as *mut std::os::raw::c_char);
CString::from_raw(schema.name as *mut std::os::raw::c_char);
let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
for child in private.children_ptr.iter() {
let _ = Box::from_raw(*child);
Expand Down Expand Up @@ -91,16 +91,28 @@ impl Ffi_ArrowSchema {
None
};

let metadata = field.metadata().as_ref().map(metadata_to_bytes);

let name = CString::new(name).unwrap();
let format = CString::new(format).unwrap();

let mut private = Box::new(SchemaPrivateData {
name,
format,
metadata,
children_ptr,
dictionary: dictionary.map(Box::into_raw),
});

// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
Self {
format: CString::new(format).unwrap().into_raw(),
name: CString::new(name).unwrap().into_raw(),
metadata: std::ptr::null_mut(),
format: private.format.as_ptr(),
name: private.name.as_ptr(),
metadata: private
.metadata
.as_ref()
.map(|x| x.as_ptr())
.unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char,
flags,
n_children,
children: private.children_ptr.as_mut_ptr(),
Expand Down Expand Up @@ -180,7 +192,10 @@ pub fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
} else {
to_data_type(schema)?
};
Ok(Field::new(schema.name(), data_type, schema.nullable()))
let metadata = unsafe { metadata_from_bytes(schema.metadata) };
let mut field = Field::new(schema.name(), data_type, schema.nullable());
field.set_metadata(metadata);
Ok(field)
}

fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
Expand Down Expand Up @@ -375,3 +390,49 @@ pub(super) fn get_field_child(field: &Field, index: usize) -> Result<Field> {
))),
}
}

fn metadata_to_bytes(metadata: &BTreeMap<String, String>) -> Vec<u8> {
let a = (metadata.len() as i32).to_ne_bytes().to_vec();
metadata.iter().fold(a, |mut acc, (key, value)| {
acc.extend((key.len() as i32).to_ne_bytes());
acc.extend(key.as_bytes());
acc.extend((value.len() as i32).to_ne_bytes());
acc.extend(value.as_bytes());
acc
})
}

unsafe fn read_ne_i32(ptr: *const u8) -> i32 {
let slice = std::slice::from_raw_parts(ptr, 4);
i32::from_ne_bytes(slice.try_into().unwrap())
}

unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
let slice = std::slice::from_raw_parts(ptr, len);
std::str::from_utf8(slice).unwrap()
}

unsafe fn metadata_from_bytes(
data: *const ::std::os::raw::c_char,
) -> Option<BTreeMap<String, String>> {
let mut data = data as *const u8; // u8 = i8
if data.is_null() {
return None;
};
let len = read_ne_i32(data);
data = data.add(4);

let mut result = BTreeMap::new();
for _ in 0..len {
let key_len = read_ne_i32(data) as usize;
data = data.add(4);
let key = read_bytes(data, key_len);
data = data.add(key_len);
let value_len = read_ne_i32(data) as usize;
data = data.add(4);
let value = read_bytes(data, value_len);
data = data.add(value_len);
result.insert(key.to_string(), value.to_string());
}
Some(result)
}
7 changes: 7 additions & 0 deletions tests/it/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow2::array::*;
use arrow2::datatypes::{DataType, Field, TimeUnit};
use arrow2::{error::Result, ffi};
use std::collections::BTreeMap;
use std::sync::Arc;

fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> {
Expand Down Expand Up @@ -163,5 +164,11 @@ fn schema() -> Result<()> {
DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
true,
);
test_round_trip_schema(field)?;

let field = Field::new("a", DataType::Int32, true);
let mut metadata = BTreeMap::new();
metadata.insert("some".to_string(), "stuff".to_string());
let field = field.with_metadata(metadata);
test_round_trip_schema(field)
}

0 comments on commit 98b4f17

Please sign in to comment.