Skip to content

Commit

Permalink
added a into_arrow_with_data_type method which can be used to conve…
Browse files Browse the repository at this point in the history
…rt to a target arrow data_type
  • Loading branch information
joseph-isaacs committed Dec 20, 2024
1 parent b1c2b1f commit b0de02d
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 75 deletions.
2 changes: 1 addition & 1 deletion bench-vortex/benches/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use vortex::error::VortexResult;
use vortex::file::{LayoutContext, LayoutDeserializer, VortexFileWriter, VortexReadBuilder};
use vortex::sampling_compressor::compressors::fsst::FSSTCompressor;
use vortex::sampling_compressor::{SamplingCompressor, ALL_ENCODINGS_CONTEXT};
use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoCanonical};
use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};

use crate::tokio_runtime::TOKIO_RUNTIME;

Expand Down
26 changes: 22 additions & 4 deletions vortex-array/src/array/varbin/canonical.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use arrow_array::ArrayRef;
use arrow_array::{Array, ArrayRef};
use arrow_schema::DataType;
use vortex_dtype::DType;
use vortex_error::VortexResult;
use vortex_error::{vortex_bail, VortexResult};

use crate::array::varbin::arrow::varbin_to_arrow;
use crate::array::varbin::VarBinArray;
Expand All @@ -23,10 +23,28 @@ impl IntoCanonical for VarBinArray {
VarBinViewArray::try_from(ArrayData::from_arrow(array, nullable)).map(Canonical::VarBinView)
}

fn into_arrow(self) -> VortexResult<ArrayRef> {
fn into_arrow(self) -> VortexResult<ArrayRef>
where
Self: Sized,
{
varbin_to_arrow(&self)
}

fn into_arrow_with_data_type(self, data_type: &DataType) -> VortexResult<ArrayRef> {
// Specialized implementation of `into_arrow` for VarBin since it has a direct
// Arrow representation.
varbin_to_arrow(&self)
let array_ref = varbin_to_arrow(&self)?;

// Note, arrow::cast clones the array, so don't use it if unnecessary.
Ok(match data_type {
DataType::Binary | DataType::LargeBinary | DataType::Utf8 | DataType::LargeUtf8 => {
array_ref
}
DataType::Utf8View | DataType::BinaryView => {
arrow_cast::cast(array_ref.as_ref(), data_type)?
}
_ => vortex_bail!("Unsupported data type: {:?}", data_type),
})
}
}

Expand Down
8 changes: 2 additions & 6 deletions vortex-array/src/arrow/datum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ use arrow_array::{Array, ArrayRef, Datum as ArrowDatum};
use vortex_error::VortexError;

use crate::compute::slice;
use crate::stats::{ArrayStatistics, Stat};
use crate::{ArrayData, IntoCanonical};

/// A wrapper around a generic Arrow array that can be used as a Datum in Arrow compute.
#[derive(Debug)]
pub struct Datum {
array: ArrayRef,
is_scalar: bool,
Expand All @@ -15,11 +15,7 @@ impl TryFrom<ArrayData> for Datum {
type Error = VortexError;

fn try_from(array: ArrayData) -> Result<Self, Self::Error> {
if array
.statistics()
.get_as::<bool>(Stat::IsConstant)
.unwrap_or_default()
{
if array.is_constant() {
Ok(Self {
array: slice(array, 0, 1)?.into_arrow()?,
is_scalar: true,
Expand Down
20 changes: 12 additions & 8 deletions vortex-array/src/arrow/record_batch.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use arrow_array::cast::as_struct_array;
use arrow_array::cast::AsArray;
use arrow_array::RecordBatch;
use arrow_schema::{DataType, Schema};
use itertools::Itertools;
use vortex_error::{vortex_err, VortexError, VortexResult};

Expand Down Expand Up @@ -41,16 +42,19 @@ impl TryFrom<ArrayData> for RecordBatch {
vortex_err!("RecordBatch can only be constructed from a Vortex StructArray: {err}")
})?;

RecordBatch::try_from(struct_arr)
struct_arr.into_record_batch()
}
}

impl TryFrom<StructArray> for RecordBatch {
type Error = VortexError;
impl StructArray {
pub fn into_record_batch(self) -> VortexResult<RecordBatch> {
let array_ref = self.into_array().into_arrow()?;
Ok(RecordBatch::try_from(array_ref.as_struct())?)
}

fn try_from(value: StructArray) -> VortexResult<Self> {
let array_ref = value.into_canonical()?.into_arrow()?;
let struct_array = as_struct_array(array_ref.as_ref());
Ok(Self::from(struct_array))
pub fn into_record_batch_with_schema(self, schema: &Schema) -> VortexResult<RecordBatch> {
let data_type = DataType::Struct(schema.fields.clone());
let array_ref = self.into_array().into_arrow_with_data_type(&data_type)?;
Ok(RecordBatch::try_from(array_ref.as_struct())?)
}
}
Loading

0 comments on commit b0de02d

Please sign in to comment.