Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: upgrade arrow to v0.52.1 #2589

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,17 @@ lance-test-macros = { version = "=0.14.2", path = "./rust/lance-test-macros" }
lance-testing = { version = "=0.14.2", path = "./rust/lance-testing" }
approx = "0.5.1"
# Note that this one does not include pyarrow
arrow = { version = "51.0.0", optional = false, features = ["prettyprint"] }
arrow-arith = "51.0"
arrow-array = "51.0"
arrow-buffer = "51.0"
arrow-cast = "51.0"
arrow-data = "51.0"
arrow-ipc = { version = "51.0", features = ["zstd"] }
arrow-ord = "51.0"
arrow-row = "51.0"
arrow-schema = "51.0"
arrow-select = "51.0"
arrow = { version = "52.1.0", optional = false, features = ["prettyprint"] }
arrow-arith = "52.1"
arrow-array = "52.1"
arrow-buffer = "52.1"
arrow-cast = "52.1"
arrow-data = "52.1"
arrow-ipc = { version = "52.1", features = ["zstd"] }
arrow-ord = "52.1"
arrow-row = "52.1"
arrow-schema = "52.1"
arrow-select = "52.1"
async-recursion = "1.0"
async-trait = "0.1"
aws-config = "0.57"
Expand Down Expand Up @@ -120,7 +120,7 @@ moka = "0.11"
num-traits = "0.2"
num_cpus = "1.0"
object_store = { version = "0.9.0" }
parquet = "51.0"
parquet = "52.1"
pin-project = "1.0"
path_abs = "0.5"
pprof = { version = "0.13", features = ["flamegraph", "criterion"] }
Expand Down
66 changes: 60 additions & 6 deletions rust/lance-datagen/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

use std::{iter, marker::PhantomData, sync::Arc};

use arrow::datatypes::{IntervalDayTime, IntervalMonthDayNano};
use arrow::{
array::{ArrayData, AsArray},
buffer::{BooleanBuffer, Buffer, OffsetBuffer, ScalarBuffer},
Expand All @@ -14,7 +15,7 @@
Array, FixedSizeBinaryArray, FixedSizeListArray, ListArray, PrimitiveArray, RecordBatch,
RecordBatchOptions, RecordBatchReader, StringArray, StructArray,
};
use arrow_schema::{ArrowError, DataType, Field, Fields, Schema, SchemaRef};
use arrow_schema::{ArrowError, DataType, Field, Fields, IntervalUnit, Schema, SchemaRef};
use futures::{stream::BoxStream, StreamExt};
use rand::{distributions::Uniform, Rng, RngCore, SeedableRng};

Expand Down Expand Up @@ -596,6 +597,59 @@
}
}

pub struct RandomIntervalGenerator {
unit: IntervalUnit,
data_type: DataType,
}

impl RandomIntervalGenerator {
pub fn new(unit: IntervalUnit) -> Self {
Self {
unit,
data_type: DataType::Interval(unit),
}
}
}

impl ArrayGenerator for RandomIntervalGenerator {
fn generate(
&mut self,
length: RowCount,
rng: &mut rand_xoshiro::Xoshiro256PlusPlus,
) -> Result<Arc<dyn arrow_array::Array>, ArrowError> {
match self.unit {
IntervalUnit::YearMonth => {
let months = (0..length.0).map(|_| rng.gen::<i32>()).collect::<Vec<_>>();
Ok(Arc::new(arrow_array::IntervalYearMonthArray::from(months)))
}
IntervalUnit::MonthDayNano => {
let day_time_array = (0..length.0)
.map(|_| IntervalMonthDayNano::new(rng.gen(), rng.gen(), rng.gen()))
.collect::<Vec<_>>();
Ok(Arc::new(arrow_array::IntervalMonthDayNanoArray::from(
day_time_array,
)))
}
IntervalUnit::DayTime => {
let day_time_array = (0..length.0)
.map(|_| IntervalDayTime::new(rng.gen(), rng.gen()))
.collect::<Vec<_>>();
Ok(Arc::new(arrow_array::IntervalDayTimeArray::from(
day_time_array,
)))
}
}
}

fn data_type(&self) -> &DataType {
&self.data_type
}

fn element_size_bytes(&self) -> Option<ByteCount> {
Some(ByteCount::from(12))
}
}

pub struct RandomBinaryGenerator {
bytes_per_element: ByteCount,
scale_to_utf8: bool,
Expand Down Expand Up @@ -1172,12 +1226,12 @@
pub mod array {

use arrow::datatypes::{
Int16Type, Int64Type, Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType,

Check warning on line 1229 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-arm

unused imports: `IntervalDayTimeType`, `IntervalMonthDayNanoType`

Check warning on line 1229 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-build (stable)

unused imports: `IntervalDayTimeType`, `IntervalMonthDayNanoType`

Check warning on line 1229 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-build (nightly)

unused imports: `IntervalDayTimeType` and `IntervalMonthDayNanoType`
};
use arrow_array::types::{
Decimal128Type, Decimal256Type, DurationMicrosecondType, DurationMillisecondType,
DurationNanosecondType, DurationSecondType, Float16Type, Float32Type, Float64Type,
IntervalYearMonthType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,

Check warning on line 1234 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-arm

unused import: `IntervalYearMonthType`

Check warning on line 1234 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-build (stable)

unused import: `IntervalYearMonthType`

Check warning on line 1234 in rust/lance-datagen/src/generator.rs

View workflow job for this annotation

GitHub Actions / linux-build (nightly)

unused import: `IntervalYearMonthType`
};
use arrow_array::{
ArrowNativeTypeOp, Date32Array, Date64Array, Time32MillisecondArray, Time32SecondArray,
Expand Down Expand Up @@ -1461,6 +1515,10 @@
Box::new(RandomFixedSizeBinaryGenerator::new(size))
}

pub fn rand_interval(unit: IntervalUnit) -> Box<dyn ArrayGenerator> {
Box::new(RandomIntervalGenerator::new(unit))
}

/// Create a generator of randomly sampled date32 values
///
/// Instead of sampling the entire range, all values will be drawn from the last year as this
Expand Down Expand Up @@ -1663,11 +1721,7 @@
TimeUnit::Microsecond => rand::<DurationMicrosecondType>(),
TimeUnit::Nanosecond => rand::<DurationNanosecondType>(),
},
DataType::Interval(unit) => match unit {
IntervalUnit::DayTime => rand::<IntervalDayTimeType>(),
IntervalUnit::MonthDayNano => rand::<IntervalMonthDayNanoType>(),
IntervalUnit::YearMonth => rand::<IntervalYearMonthType>(),
},
DataType::Interval(unit) => rand_interval(*unit),
DataType::Date32 => rand_date32(),
DataType::Date64 => rand_date64(),
DataType::Time32(resolution) => rand_time32(resolution),
Expand Down
Loading