Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Docs and lint
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 23, 2022
1 parent f70116d commit 8c70d9a
Show file tree
Hide file tree
Showing 16 changed files with 72 additions and 152 deletions.
9 changes: 4 additions & 5 deletions benches/avro_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ fn schema() -> AvroSchema {
fn write(size: usize, has_codec: bool) -> Result<Vec<u8>> {
let avro = schema();
// a writer needs a schema and something to write to
let mut writer: Writer<Vec<u8>>;
if has_codec {
writer = Writer::with_codec(&avro, Vec::new(), Codec::Deflate);
let mut writer = if has_codec {
Writer::with_codec(&avro, Vec::new(), Codec::Deflate)
} else {
writer = Writer::new(&avro, Vec::new());
}
Writer::new(&avro, Vec::new())
};

(0..size).for_each(|_| {
let mut record = Record::new(writer.schema()).unwrap();
Expand Down
3 changes: 1 addition & 2 deletions benches/write_parquet.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::io::Cursor;
use std::sync::Arc;

use criterion::{criterion_group, criterion_main, Criterion};
Expand Down Expand Up @@ -29,7 +28,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> {
vec![encoding],
)?;

let mut writer = vec![];
let writer = vec![];

let mut writer = FileWriter::try_new(writer, schema, options)?;

Expand Down
19 changes: 5 additions & 14 deletions examples/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ unsafe fn export(
array_ptr: *mut ffi::ArrowArray,
schema_ptr: *mut ffi::ArrowSchema,
) {
// exporting an array requires an associated field so that the consumer knows its datatype
let field = Field::new("a", array.data_type().clone(), true);
ffi::export_array_to_c(array, array_ptr);
ffi::export_field_to_c(&field, schema_ptr);
Expand All @@ -25,23 +26,13 @@ fn main() -> Result<()> {

// the goal is to export this array and import it back via FFI.
// to import, we initialize the structs that will receive the data
let array_ptr = Box::new(ffi::ArrowArray::empty());
let schema_ptr = Box::new(ffi::ArrowSchema::empty());

// since FFIs work in raw pointers, let's temporarily relinquish ownership so that producers
// can write into it in a thread-safe manner
let array_ptr = Box::into_raw(array_ptr);
let schema_ptr = Box::into_raw(schema_ptr);
let mut array_ptr = Box::new(ffi::ArrowArray::empty());
let mut schema_ptr = Box::new(ffi::ArrowSchema::empty());

// this is where a producer (in this case also us ^_^) writes to the pointers' location.
// `array` here could be anything or not even be available, if this was e.g. from Python.
// Safety: we just allocated the pointers correctly.
unsafe { export(array.clone(), array_ptr, schema_ptr) };

// we can now take ownership back, since we are responsible for deallocating this memory.
// Safety: we just into_raw them.
let array_ptr = unsafe { Box::from_raw(array_ptr) };
let schema_ptr = unsafe { Box::from_raw(schema_ptr) };
// Safety: we just allocated the pointers
unsafe { export(array.clone(), &mut *array_ptr, &mut *schema_ptr) };

// and finally interpret the written memory into a new array.
// Safety: we used `export`, which is a valid exporter to the C data interface
Expand Down
2 changes: 0 additions & 2 deletions examples/parquet_read_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ use std::sync::Arc;
use std::time::SystemTime;

use futures::future::BoxFuture;
use futures::FutureExt;
use tokio;
use tokio::fs::File;
use tokio::io::BufReader;
use tokio_util::compat::*;
Expand Down
3 changes: 2 additions & 1 deletion src/bitmap/bitmap_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use super::{
Bitmap,
};

/// Creates a [Vec<u8>] from an [`Iterator`] of [`BitChunk`].
/// # Safety
/// The iterator must be [`TrustedLen`].
pub unsafe fn from_chunk_iter_unchecked<T: BitChunk, I: Iterator<Item = T>>(
Expand Down Expand Up @@ -35,7 +36,7 @@ pub unsafe fn from_chunk_iter_unchecked<T: BitChunk, I: Iterator<Item = T>>(
buffer
}

/// Creates a Vec<u8> from a [`TrustedLen`] of [`BitChunk`],
/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
unsafe { from_chunk_iter_unchecked(iter) }
}
Expand Down
4 changes: 2 additions & 2 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl MutableBitmap {
}
}

/// Initializes an a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
/// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
Expand All @@ -67,7 +67,7 @@ impl MutableBitmap {
}
}

/// Initializes an a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
/// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
#[inline(always)]
pub fn reserve(&mut self, additional: usize) {
self.buffer
Expand Down
16 changes: 9 additions & 7 deletions src/compute/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ fn is_like_pattern(c: char) -> bool {
c == '%' || c == '_'
}

fn replace_pattern(pattern: &str) -> String {
pattern.replace('%', ".*").replace('_', ".")
}

#[inline]
fn a_like_utf8<O: Offset, F: Fn(bool) -> bool>(
lhs: &Utf8Array<O>,
Expand All @@ -40,7 +44,7 @@ fn a_like_utf8<O: Offset, F: Fn(bool) -> bool>(
let pattern = if let Some(pattern) = map.get(pattern) {
pattern
} else {
let re_pattern = pattern.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(pattern);
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -113,7 +117,7 @@ fn a_like_utf8_scalar<O: Offset, F: Fn(bool) -> bool>(
let ends_with = &rhs[1..];
Bitmap::from_trusted_len_iter(lhs.values_iter().map(|x| op(x.ends_with(ends_with))))
} else {
let re_pattern = rhs.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(rhs);
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -187,10 +191,8 @@ fn a_like_binary<O: Offset, F: Fn(bool) -> bool>(
let pattern = if let Some(pattern) = map.get(pattern) {
pattern
} else {
let re_pattern = simdutf8::basic::from_utf8(pattern)
.unwrap()
.replace("%", ".*")
.replace("_", ".");
let re_pattern = simdutf8::basic::from_utf8(pattern).unwrap();
let re_pattern = replace_pattern(re_pattern);
let re = BytesRegex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -270,7 +272,7 @@ fn a_like_binary_scalar<O: Offset, F: Fn(bool) -> bool>(
let ends_with = &rhs[1..];
Bitmap::from_trusted_len_iter(lhs.values_iter().map(|x| op(x.ends_with(ends_with))))
} else {
let re_pattern = pattern.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(pattern);
let re = BytesRegex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down
2 changes: 1 addition & 1 deletion src/compute/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ where
values.chain(null_indices.into_iter()).collect::<Vec<I>>()
};

values.truncate(limit.unwrap_or_else(|| values.len()));
values.truncate(limit.unwrap_or(values.len()));

let data_type = I::PRIMITIVE.into();
PrimitiveArray::<I>::from_data(data_type, values.into(), None)
Expand Down
3 changes: 1 addition & 2 deletions src/io/ipc/read/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ pub fn read_record_batch<R: Read + Seek>(
Ok(None)
}
})
.map(|x| x.transpose())
.flatten()
.filter_map(|x| x.transpose())
.collect::<Result<Vec<_>>>()?
} else {
fields
Expand Down
3 changes: 1 addition & 2 deletions src/io/json/read/infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ pub fn infer_rows(rows: &[Value]) -> Result<DataType> {
// discard None values and deduplicate entries
let types = types
.into_iter()
.map(|x| x.transpose())
.flatten()
.filter_map(|x| x.transpose())
.collect::<Result<HashSet<_>>>()?;

Ok(if !types.is_empty() {
Expand Down
9 changes: 3 additions & 6 deletions src/io/json_integration/read/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ fn to_binary<O: Offset>(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc<
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.flatten()
.flat_map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.collect();
Arc::new(BinaryArray::from_data(data_type, offsets, values, validity))
}
Expand All @@ -184,8 +183,7 @@ fn to_utf8<O: Offset>(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc<dy
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().unwrap().as_bytes().to_vec())
.flatten()
.flat_map(|value| value.as_str().unwrap().as_bytes().to_vec())
.collect();
Arc::new(Utf8Array::from_data(data_type, offsets, values, validity))
}
Expand Down Expand Up @@ -309,8 +307,7 @@ pub fn to_array(
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.flatten()
.flat_map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.collect();
Ok(Arc::new(FixedSizeBinaryArray::from_data(
data_type, values, validity,
Expand Down
8 changes: 2 additions & 6 deletions src/io/parquet/read/schema/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
/// Converts [`ParquetType`]s to a [`Field`], ignoring parquet fields that do not contain
/// any physical column.
pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
fields.iter().map(to_field).flatten().collect::<Vec<_>>()
fields.iter().filter_map(to_field).collect::<Vec<_>>()
}

fn from_int32(
Expand Down Expand Up @@ -224,11 +224,7 @@ fn non_repeated_group(
/// Converts a parquet group type to an arrow [`DataType::Struct`].
/// Returns [`None`] if all its fields are empty
fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
let fields = fields
.iter()
.map(to_field)
.flatten()
.collect::<Vec<Field>>();
let fields = fields.iter().filter_map(to_field).collect::<Vec<Field>>();
if fields.is_empty() {
None
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ fn get_fields(field: &Field) -> Vec<&Field> {
match field.data_type.to_logical_type() {
DataType::List(inner) => get_fields(inner),
DataType::LargeList(inner) => get_fields(inner),
DataType::Struct(fields) => fields.iter().map(get_fields).flatten().collect(),
DataType::Struct(fields) => fields.iter().flat_map(get_fields).collect(),
_ => vec![field],
}
}
Expand Down
22 changes: 9 additions & 13 deletions src/io/parquet/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,15 @@ fn encode_keys<K: DictionaryKey>(
// encode indices
// compute the required number of bits
if let Some(validity) = validity {
let keys = array
.iter()
.flatten()
.map(|x| {
let index = x.to_usize().unwrap();
// discard indices whose values are null, since they are part of the def levels.
if validity.get_bit(index) {
Some(index as u32)
} else {
None
}
})
.flatten();
let keys = array.iter().flatten().filter_map(|x| {
let index = x.to_usize().unwrap();
// discard indices whose values are null, since they are part of the def levels.
if validity.get_bit(index) {
Some(index as u32)
} else {
None
}
});
let num_bits = utils::get_bit_width(keys.clone().max().unwrap_or(0) as u64) as u8;

let keys = utils::ExactSizedIter::new(keys, array.len() - null_count);
Expand Down
Loading

0 comments on commit 8c70d9a

Please sign in to comment.