From dc81b870055c0d6d2e8e92a40bdbd424a7be4ba9 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 7 Jun 2023 18:17:51 +0100 Subject: [PATCH 1/7] Reorder crate documentation --- arrow/src/lib.rs | 68 +++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 70e615e88c73..bf39bae530b9 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -21,38 +21,6 @@ //! Please see the [arrow crates.io](https://crates.io/crates/arrow) //! page for feature flags and tips to improve performance. //! -//! # Crate Topology -//! -//! The [`arrow`] project is implemented as multiple sub-crates, which are then re-exported by -//! this top-level crate. -//! -//! Crate authors can choose to depend on this top-level crate, or just -//! the sub-crates they need. -//! -//! The current list of sub-crates is: -//! -//! * [`arrow-arith`][arrow_arith] - arithmetic kernels -//! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions -//! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays -//! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays -//! * [`arrow-csv`][arrow_csv] - read/write CSV to arrow format -//! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays -//! * [`arrow-ipc`][arrow_ipc] - read/write IPC to arrow format -//! * [`arrow-json`][arrow_json] - read/write JSON to arrow format -//! * [`arrow-ord`][arrow_ord] - ordering kernels for arrow arrays -//! * [`arrow-row`][arrow_row] - comparable row format -//! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays -//! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays -//! * [`arrow-string`][arrow_string] - string kernels for arrow arrays -//! -//! _This list is likely to grow as further functionality is split out from the top-level crate_ -//! -//! Some functionality is also distributed independently of this crate: -//! -//! * [`arrow-flight`] - support for [Arrow Flight RPC] -//! * [`arrow-integration-test`] - support for [Arrow JSON Test Format] -//! * [`parquet`](https://docs.rs/parquet/latest/parquet/) - support for [Apache Parquet] -//! //! # Columnar Format //! //! The [`array`] module provides statically typed implementations of all the array types as defined @@ -73,7 +41,7 @@ //! ``` //! //! It is also possible to write generic code. For example, the following is generic over -//! all primitively typed arrays: +//! all primitively typed arrays //! //! ```rust //! # use std::iter::Sum; @@ -92,7 +60,7 @@ //! assert_eq!(sum(&TimestampNanosecondArray::from(vec![1, 2, 3])), 6); //! ``` //! -//! And the following is generic over all arrays with comparable values: +//! And the following is generic over all arrays with comparable values //! //! ```rust //! # use arrow::array::{ArrayAccessor, ArrayIter, Int32Array, StringArray}; @@ -109,7 +77,7 @@ //! assert_eq!(min(&StringArray::from(vec!["b", "a", "c"])), Some("a")); //! ``` //! -//! For more examples, and details consult the [arrow_array] docs. +//! **For more examples, and details consult the [arrow_array] docs.** //! //! # Type Erasure / Trait Objects //! @@ -317,6 +285,36 @@ //! assert_eq!(string.value(1), "foo"); //! ``` //! +//! # Crate Topology +//! +//! The [`arrow`] project is implemented as multiple sub-crates, which are then re-exported by +//! this top-level crate. +//! +//! Crate authors can choose to depend on this top-level crate, or just +//! the sub-crates they need. +//! +//! The current list of sub-crates is: +//! +//! * [`arrow-arith`][arrow_arith] - arithmetic kernels +//! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions +//! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays +//! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays +//! * [`arrow-csv`][arrow_csv] - read/write CSV to arrow format +//! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays +//! * [`arrow-ipc`][arrow_ipc] - read/write IPC to arrow format +//! * [`arrow-json`][arrow_json] - read/write JSON to arrow format +//! * [`arrow-ord`][arrow_ord] - ordering kernels for arrow arrays +//! * [`arrow-row`][arrow_row] - comparable row format +//! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays +//! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays +//! * [`arrow-string`][arrow_string] - string kernels for arrow arrays +//! +//! Some functionality is also distributed independently of this crate: +//! +//! * [`arrow-flight`] - support for [Arrow Flight RPC] +//! * [`arrow-integration-test`] - support for [Arrow JSON Test Format] +//! * [`parquet`](https://docs.rs/parquet/latest/parquet/) - support for [Apache Parquet] +//! //! # Safety and Security //! //! Like many crates, this crate makes use of unsafe where prudent. However, it endeavours to be From 46ca2be0c2e31ace4b07b919cd51a72b5d5d8d80 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 7 Jun 2023 18:23:37 +0100 Subject: [PATCH 2/7] Add backlinks --- arrow-buffer/src/lib.rs | 2 +- arrow-data/src/lib.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs index 90b801c4ae29..cbcdb979e693 100644 --- a/arrow-buffer/src/lib.rs +++ b/arrow-buffer/src/lib.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Buffer abstractions for [Apache Arrow](https://docs.rs/arrow) +//! Low-level buffer abstractions for [Apache Arrow Rust](https://docs.rs/arrow) pub mod alloc; pub mod buffer; diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs index b864b786051a..cfa0dba66c35 100644 --- a/arrow-data/src/lib.rs +++ b/arrow-data/src/lib.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -//! Array data abstractions for [Apache Arrow](https://docs.rs/arrow) +//! Low-level array data abstractions for [Apache Arrow Rust](https://docs.rs/arrow) +//! +//! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array) mod data; pub use data::*; From 4fefb4786c1870587b1b730c416f8e9ec938330a Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 8 Jun 2023 11:38:12 +0100 Subject: [PATCH 3/7] More examples --- arrow-array/src/array/primitive_array.rs | 26 +++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 3fa011f8e127..1506befac32d 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -265,10 +265,20 @@ pub use crate::types::ArrowPrimitiveType; /// An array of [primitive values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) /// +/// # Example: From a Vec +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = PrimitiveArray::from(vec![1, 2, 3, 4]); +/// assert_eq!(4, arr.len()); +/// assert_eq!(0, arr.null_count()); +/// assert_eq!(arr.values(), &[1, 2, 3, 4]) +/// ``` +/// /// # Example: From an iterator of values /// /// ``` -/// use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; /// let arr: PrimitiveArray = PrimitiveArray::from_iter_values((0..10).map(|x| x + 1)); /// assert_eq!(10, arr.len()); /// assert_eq!(0, arr.null_count()); @@ -276,6 +286,16 @@ pub use crate::types::ArrowPrimitiveType; /// assert_eq!(i + 1, arr.value(i as usize)); /// } /// ``` +/// +/// # Example: From an iterator of option +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = (0..10).map(|x| (x % 2 == 0).then_some(x)).collect(); +/// assert_eq!(10, arr.len()); +/// assert_eq!(5, arr.null_count()); +/// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) +/// ``` pub struct PrimitiveArray { data_type: DataType, /// Values data @@ -294,7 +314,7 @@ impl Clone for PrimitiveArray { } impl PrimitiveArray { - /// Create a new [`PrimitiveArray`] from the provided data_type, values, nulls + /// Create a new [`PrimitiveArray`] from the provided values and nulls /// /// # Panics /// @@ -303,7 +323,7 @@ impl PrimitiveArray { Self::try_new(values, nulls).unwrap() } - /// Create a new [`PrimitiveArray`] from the provided data_type, values, nulls + /// Create a new [`PrimitiveArray`] from the provided values and nulls /// /// # Errors /// From 5f801d4eca3013451512655fab4e7cb97cc343be Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 8 Jun 2023 11:46:24 +0100 Subject: [PATCH 4/7] Link from typedefs to generic types --- arrow-array/src/array/binary_array.rs | 80 +----- arrow-array/src/array/boolean_array.rs | 64 +++-- arrow-array/src/array/byte_array.rs | 46 ++++ arrow-array/src/array/dictionary_array.rs | 139 +++-------- arrow-array/src/array/list_array.rs | 54 +---- arrow-array/src/array/primitive_array.rs | 282 +++++----------------- arrow-array/src/array/string_array.rs | 50 +--- 7 files changed, 176 insertions(+), 539 deletions(-) diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index a4d64040ceff..871727e3ad44 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -23,7 +23,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer}; use arrow_data::ArrayData; use arrow_schema::DataType; -/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing binary data +/// A [`GenericBinaryArray`] for storing `[u8]` pub type GenericBinaryArray = GenericByteArray>; impl GenericBinaryArray { @@ -217,84 +217,10 @@ where } } -/// An array of `[u8]` using `i32` offsets -/// -/// The byte length of each element is represented by an i32. -/// -/// # Examples -/// -/// Create a BinaryArray from a vector of byte slices. -/// -/// ``` -/// use arrow_array::{Array, BinaryArray}; -/// let values: Vec<&[u8]> = -/// vec![b"one", b"two", b"", b"three"]; -/// let array = BinaryArray::from_vec(values); -/// assert_eq!(4, array.len()); -/// assert_eq!(b"one", array.value(0)); -/// assert_eq!(b"two", array.value(1)); -/// assert_eq!(b"", array.value(2)); -/// assert_eq!(b"three", array.value(3)); -/// ``` -/// -/// Create a BinaryArray from a vector of Optional (null) byte slices. -/// -/// ``` -/// use arrow_array::{Array, BinaryArray}; -/// let values: Vec> = -/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; -/// let array = BinaryArray::from_opt_vec(values); -/// assert_eq!(5, array.len()); -/// assert_eq!(b"one", array.value(0)); -/// assert_eq!(b"two", array.value(1)); -/// assert_eq!(b"", array.value(3)); -/// assert_eq!(b"three", array.value(4)); -/// assert!(!array.is_null(0)); -/// assert!(!array.is_null(1)); -/// assert!(array.is_null(2)); -/// assert!(!array.is_null(3)); -/// assert!(!array.is_null(4)); -/// ``` -/// +/// A [`GenericBinaryArray`] of `[u8]` using `i32` offsets pub type BinaryArray = GenericBinaryArray; -/// An array of `[u8]` using `i64` offsets -/// -/// # Examples -/// -/// Create a LargeBinaryArray from a vector of byte slices. -/// -/// ``` -/// use arrow_array::{Array, LargeBinaryArray}; -/// let values: Vec<&[u8]> = -/// vec![b"one", b"two", b"", b"three"]; -/// let array = LargeBinaryArray::from_vec(values); -/// assert_eq!(4, array.len()); -/// assert_eq!(b"one", array.value(0)); -/// assert_eq!(b"two", array.value(1)); -/// assert_eq!(b"", array.value(2)); -/// assert_eq!(b"three", array.value(3)); -/// ``` -/// -/// Create a LargeBinaryArray from a vector of Optional (null) byte slices. -/// -/// ``` -/// use arrow_array::{Array, LargeBinaryArray}; -/// let values: Vec> = -/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; -/// let array = LargeBinaryArray::from_opt_vec(values); -/// assert_eq!(5, array.len()); -/// assert_eq!(b"one", array.value(0)); -/// assert_eq!(b"two", array.value(1)); -/// assert_eq!(b"", array.value(3)); -/// assert_eq!(b"three", array.value(4)); -/// assert!(!array.is_null(0)); -/// assert!(!array.is_null(1)); -/// assert!(array.is_null(2)); -/// assert!(!array.is_null(3)); -/// assert!(!array.is_null(4)); -/// ``` -/// +/// A [`GenericBinaryArray`] of `[u8]` using `i64` offsets pub type LargeBinaryArray = GenericBinaryArray; #[cfg(test)] diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 6905baa806de..8c822996d622 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -27,51 +27,47 @@ use std::sync::Arc; /// An array of [boolean values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) /// -/// # Examples +/// # Example: From a Vec /// -/// Construction +/// ``` +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = vec![true, true, false].into(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(&values, &[Some(true), Some(true), Some(false)]) +/// ``` +/// +/// # Example: From an optional Vec /// /// ``` -///# use arrow_array::{Array, BooleanArray}; -/// // Create from Vec> -/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); -/// // Create from Vec -/// let arr = BooleanArray::from(vec![false, true, true]); -/// // Create from iter/collect -/// let arr: BooleanArray = std::iter::repeat(Some(true)).take(10).collect(); +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = vec![Some(true), None, Some(false)].into(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(&values, &[Some(true), None, Some(false)]) /// ``` /// -/// Construction and Access +/// # Example: From an iterator /// /// ``` -/// use arrow_array::{Array, BooleanArray}; -/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); -/// assert_eq!(4, arr.len()); -/// assert_eq!(1, arr.null_count()); -/// assert!(arr.is_valid(0)); -/// assert!(!arr.is_null(0)); -/// assert_eq!(false, arr.value(0)); -/// assert!(!arr.is_valid(2)); -/// assert!(arr.is_null(2)); +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = (0..5).map(|x| (x % 2 == 0).then(|| x % 3 == 0)).collect(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(&values, &[Some(true), None, Some(false), None, Some(false)]) /// ``` /// -/// Using `collect` +/// # Example: Using Builder +/// /// ``` -/// use arrow_array::{Array, BooleanArray}; -/// let v = vec![Some(false), Some(true), Some(false), Some(true)]; -/// let arr = v.into_iter().collect::(); -/// assert_eq!(4, arr.len()); -/// assert_eq!(0, arr.offset()); -/// assert_eq!(0, arr.null_count()); -/// assert!(arr.is_valid(0)); -/// assert_eq!(false, arr.value(0)); -/// assert!(arr.is_valid(1)); -/// assert_eq!(true, arr.value(1)); -/// assert!(arr.is_valid(2)); -/// assert_eq!(false, arr.value(2)); -/// assert!(arr.is_valid(3)); -/// assert_eq!(true, arr.value(3)); +/// # use arrow_array::Array; +/// # use arrow_array::builder::BooleanBuilder; +/// let mut builder = BooleanBuilder::new(); +/// builder.append_value(true); +/// builder.append_null(); +/// builder.append_value(false); +/// let array = builder.finish(); +/// let values: Vec<_> = array.iter().collect(); +/// assert_eq!(&values, &[Some(true), None, Some(false)]) /// ``` +/// #[derive(Clone)] pub struct BooleanArray { values: BooleanBuffer, diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 629ffd22cdc2..9e367d215418 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -34,6 +34,52 @@ use std::sync::Arc; /// /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing arbitrary bytes /// +/// # Example: From a Vec +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = vec!["hello", "world", ""].into(); +/// assert_eq!(arr.value_data(), b"helloworld"); +/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10]); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("hello"), Some("world"), Some("")]); +/// ``` +/// +/// # Example: From an optional Vec +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = vec![Some("hello"), Some("world"), Some(""), None].into(); +/// assert_eq!(arr.value_data(), b"helloworld"); +/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10, 10]); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("hello"), Some("world"), Some(""), None]); +/// ``` +/// +/// # Example: From an iterator of option +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = (0..5).map(|x| (x % 2 == 0).then(|| x.to_string())).collect(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("0"), None, Some("2"), None, Some("4")]); +/// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::Array; +/// # use arrow_array::builder::GenericByteBuilder; +/// # use arrow_array::types::Utf8Type; +/// let mut builder = GenericByteBuilder::::new(); +/// builder.append_value("hello"); +/// builder.append_null(); +/// builder.append_value("world"); +/// let array = builder.finish(); +/// let values: Vec<_> = array.iter().collect(); +/// assert_eq!(values, &[Some("hello"), None, Some("world")]); +/// ``` +/// /// [`StringArray`]: crate::StringArray /// [`LargeStringArray`]: crate::LargeStringArray /// [`BinaryArray`]: crate::BinaryArray diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index a319a836a955..a96e192617a2 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -30,116 +30,28 @@ use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; -/// A dictionary array indexed by `i8` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, Int8DictionaryArray, Int8Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `i8` pub type Int8DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i16` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, Int16DictionaryArray, Int16Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int16Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `i16` pub type Int16DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i32` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, Int32DictionaryArray, Int32Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int32Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `i32` pub type Int32DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i64` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, Int64DictionaryArray, Int64Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: Int64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &Int64Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `i64` pub type Int64DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u8` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, UInt8DictionaryArray, UInt8Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt8Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `u8` pub type UInt8DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u16` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, UInt16DictionaryArray, UInt16Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt16Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `u16` pub type UInt16DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u32` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, UInt32DictionaryArray, UInt32Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt32Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `u32` pub type UInt32DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u64` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::{Array, UInt64DictionaryArray, UInt64Array, StringArray}; -/// # use std::sync::Arc; -/// -/// let array: UInt64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); -/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); -/// assert_eq!(array.keys(), &UInt64Array::from(vec![0, 0, 1, 2])); -/// assert_eq!(array.values(), &values); -/// ``` +/// A [`DictionaryArray`] indexed by `u64` pub type UInt64DictionaryArray = DictionaryArray; /// An array of [dictionary encoded values](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout) @@ -175,39 +87,54 @@ pub type UInt64DictionaryArray = DictionaryArray; /// length = 6 /// ``` /// -/// Example **with nullable** data: +/// # Example: From Nullable Data /// /// ``` -/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; +/// # use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)])); /// ``` /// -/// Example **without nullable** data: +/// # Example: From Non-Nullable Data /// /// ``` -/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; +/// # use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.into_iter().collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); /// ``` /// -/// Example from existing arrays: +/// # Example: From Existing Arrays /// /// ``` -/// use std::sync::Arc; -/// use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type}; +/// # use std::sync::Arc; +/// # use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type}; /// // You can form your own DictionaryArray by providing the /// // values (dictionary) and keys (indexes into the dictionary): /// let values = StringArray::from_iter_values(["a", "b", "c"]); /// let keys = Int8Array::from_iter_values([0, 0, 1, 2]); /// let array = DictionaryArray::::try_new(keys, Arc::new(values)).unwrap(); -/// let expected: DictionaryArray:: = vec!["a", "a", "b", "c"] -/// .into_iter() -/// .collect(); +/// let expected: DictionaryArray:: = vec!["a", "a", "b", "c"].into_iter().collect(); /// assert_eq!(&array, &expected); /// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::{Array, StringArray}; +/// # use arrow_array::builder::StringDictionaryBuilder; +/// # use arrow_array::types::Int32Type; +/// let mut builder = StringDictionaryBuilder::::new(); +/// builder.append_value("a"); +/// builder.append_null(); +/// builder.append_value("a"); +/// builder.append_value("b"); +/// let array = builder.finish(); +/// +/// let values: Vec<_> = array.downcast_dict::().unwrap().into_iter().collect(); +/// assert_eq!(&values, &[Some("a"), None, Some("a"), Some("b")]); +/// ``` pub struct DictionaryArray { data_type: DataType, diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index d016afccbfe5..98bdd8db5ea8 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -57,6 +57,8 @@ impl OffsetSizeTrait for i64 { /// An array of [variable length arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout) /// /// See [`ListArray`] and [`LargeListArray`]` +/// +/// See [`GenericListBuilder`](crate::builder::GenericListBuilder) for how to construct a [`GenericListArray`] pub struct GenericListArray { data_type: DataType, nulls: Option, @@ -471,58 +473,10 @@ impl std::fmt::Debug for GenericListArray(data); -/// -/// assert_eq!(false, list_array.is_valid(1)); -/// -/// let list0 = list_array.value(0); -/// let list2 = list_array.value(2); -/// let list3 = list_array.value(3); -/// -/// assert_eq!(&[] as &[i32], list0.as_any().downcast_ref::().unwrap().values()); -/// assert_eq!(false, list2.as_any().downcast_ref::().unwrap().is_valid(1)); -/// assert_eq!(&[6, 7], list3.as_any().downcast_ref::().unwrap().values()); -/// ``` +/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`. pub type ListArray = GenericListArray; -/// An array of variable size lists, storing offsets as `i64`. -/// -/// # Example -/// -/// ``` -/// # use arrow_array::{Array, LargeListArray, Int32Array, types::Int32Type}; -/// # use arrow_schema::DataType; -/// let data = vec![ -/// Some(vec![]), -/// None, -/// Some(vec![Some(3), None, Some(5), Some(19)]), -/// Some(vec![Some(6), Some(7)]), -/// ]; -/// let list_array = LargeListArray::from_iter_primitive::(data); -/// -/// assert_eq!(false, list_array.is_valid(1)); -/// -/// let list0 = list_array.value(0); -/// let list2 = list_array.value(2); -/// let list3 = list_array.value(3); -/// -/// assert_eq!(&[] as &[i32], list0.as_any().downcast_ref::().unwrap().values()); -/// assert_eq!(false, list2.as_any().downcast_ref::().unwrap().is_valid(1)); -/// assert_eq!(&[6, 7], list3.as_any().downcast_ref::().unwrap().values()); -/// ``` +/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`. pub type LargeListArray = GenericListArray; #[cfg(test)] diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 8243054bc279..f50e1bc4ae61 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -34,202 +34,40 @@ use half::f16; use std::any::Any; use std::sync::Arc; -/// An array of `i8` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Int8Array; -/// // Create from Vec> -/// let arr = Int8Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = Int8Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: Int8Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `i8` pub type Int8Array = PrimitiveArray; -/// An array of `i16` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Int16Array; -/// // Create from Vec> -/// let arr = Int16Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = Int16Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: Int16Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `i16` pub type Int16Array = PrimitiveArray; -/// An array of `i32` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Int32Array; -/// // Create from Vec> -/// let arr = Int32Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = Int32Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: Int32Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `i32` pub type Int32Array = PrimitiveArray; -/// An array of `i64` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Int64Array; -/// // Create from Vec> -/// let arr = Int64Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = Int64Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: Int64Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `i64` pub type Int64Array = PrimitiveArray; -/// An array of `u8` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::UInt8Array; -/// // Create from Vec> -/// let arr = UInt8Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = UInt8Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: UInt8Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `u8` pub type UInt8Array = PrimitiveArray; -/// An array of `u16` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::UInt16Array; -/// // Create from Vec> -/// let arr = UInt16Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = UInt16Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: UInt16Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `u16` pub type UInt16Array = PrimitiveArray; -/// An array of `u32` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::UInt32Array; -/// // Create from Vec> -/// let arr = UInt32Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = UInt32Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: UInt32Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `u32` pub type UInt32Array = PrimitiveArray; -/// An array of `u64` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::UInt64Array; -/// // Create from Vec> -/// let arr = UInt64Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = UInt64Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: UInt64Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `u64` pub type UInt64Array = PrimitiveArray; -/// An array of `f16` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Float16Array; -/// use half::f16; -/// // Create from Vec> -/// let arr = Float16Array::from(vec![Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))]); -/// // Create from Vec -/// let arr = Float16Array::from(vec![f16::from_f64(1.0), f16::from_f64(2.0), f16::from_f64(3.0)]); -/// // Create iter/collect -/// let arr: Float16Array = std::iter::repeat(f16::from_f64(1.0)).take(10).collect(); -/// ``` -/// -/// # Example: Using `collect` -/// ``` -/// # use arrow_array::Float16Array; -/// use half::f16; -/// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); -/// ``` +/// A [`PrimitiveArray`] of `f16` pub type Float16Array = PrimitiveArray; -/// An array of `f32` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Float32Array; -/// // Create from Vec> -/// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); -/// // Create from Vec -/// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); -/// // Create iter/collect -/// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `f32` pub type Float32Array = PrimitiveArray; -/// An array of `f64` -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Float32Array; -/// // Create from Vec> -/// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); -/// // Create from Vec -/// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); -/// // Create iter/collect -/// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of `f64` pub type Float64Array = PrimitiveArray; -/// An array of seconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of seconds since UNIX epoch stored as `i64` /// /// This type is similar to the [`chrono::DateTime`] type and can hold /// values such as `1970-05-09 14:25:11 +01:00` @@ -277,113 +115,82 @@ pub type Float64Array = PrimitiveArray; /// pub type TimestampSecondArray = PrimitiveArray; -/// An array of milliseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampMillisecondArray = PrimitiveArray; -/// An array of microseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of microseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampMicrosecondArray = PrimitiveArray; -/// An array of nanoseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of nanoseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampNanosecondArray = PrimitiveArray; -// TODO: give examples for the below types - -/// An array of days since UNIX epoch stored as `i32` +/// A [`PrimitiveArray`] of days since UNIX epoch stored as `i32` /// /// This type is similar to the [`chrono::NaiveDate`] type and can hold /// values such as `2018-11-13` pub type Date32Array = PrimitiveArray; -/// An array of milliseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` /// /// This type is similar to the [`chrono::NaiveDate`] type and can hold /// values such as `2018-11-13` pub type Date64Array = PrimitiveArray; -/// An array of seconds since midnight stored as `i32` +/// A [`PrimitiveArray`] of seconds since midnight stored as `i32` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00` pub type Time32SecondArray = PrimitiveArray; -/// An array of milliseconds since midnight stored as `i32` +/// A [`PrimitiveArray`] of milliseconds since midnight stored as `i32` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123` pub type Time32MillisecondArray = PrimitiveArray; -/// An array of microseconds since midnight stored as `i64` +/// A [`PrimitiveArray`] of microseconds since midnight stored as `i64` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123456` pub type Time64MicrosecondArray = PrimitiveArray; -/// An array of nanoseconds since midnight stored as `i64` +/// A [`PrimitiveArray`] of nanoseconds since midnight stored as `i64` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123456789` pub type Time64NanosecondArray = PrimitiveArray; -/// An array of “calendar” intervals in months +/// A [`PrimitiveArray`] of “calendar” intervals in months pub type IntervalYearMonthArray = PrimitiveArray; -/// An array of “calendar” intervals in days and milliseconds +/// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds pub type IntervalDayTimeArray = PrimitiveArray; -/// An array of “calendar” intervals in months, days, and nanoseconds +/// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds pub type IntervalMonthDayNanoArray = PrimitiveArray; -/// An array of elapsed durations in seconds +/// A [`PrimitiveArray`] of elapsed durations in seconds pub type DurationSecondArray = PrimitiveArray; -/// An array of elapsed durations in milliseconds +/// A [`PrimitiveArray`] of elapsed durations in milliseconds pub type DurationMillisecondArray = PrimitiveArray; -/// An array of elapsed durations in microseconds +/// A [`PrimitiveArray`] of elapsed durations in microseconds pub type DurationMicrosecondArray = PrimitiveArray; -/// An array of elapsed durations in nanoseconds +/// A [`PrimitiveArray`] of elapsed durations in nanoseconds pub type DurationNanosecondArray = PrimitiveArray; -/// An array of 128-bit fixed point decimals -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Decimal128Array; -/// // Create from Vec> -/// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]); -/// // Create from Vec -/// let arr = Decimal128Array::from(vec![1, 2, 3]); -/// // Create iter/collect -/// let arr: Decimal128Array = std::iter::repeat(42).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of 128-bit fixed point decimals pub type Decimal128Array = PrimitiveArray; -/// An array of 256-bit fixed point decimals -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::Decimal256Array; -/// use arrow_buffer::i256; -/// // Create from Vec> -/// let arr = Decimal256Array::from(vec![Some(i256::from(1)), None, Some(i256::from(2))]); -/// // Create from Vec -/// let arr = Decimal256Array::from(vec![i256::from(1), i256::from(2), i256::from(3)]); -/// // Create iter/collect -/// let arr: Decimal256Array = std::iter::repeat(i256::from(42)).take(10).collect(); -/// ``` +/// A [`PrimitiveArray`] of 256-bit fixed point decimals pub type Decimal256Array = PrimitiveArray; pub use crate::types::ArrowPrimitiveType; @@ -394,17 +201,27 @@ pub use crate::types::ArrowPrimitiveType; /// /// ``` /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; -/// let arr: PrimitiveArray = PrimitiveArray::from(vec![1, 2, 3, 4]); +/// let arr: PrimitiveArray = vec![1, 2, 3, 4].into(); /// assert_eq!(4, arr.len()); /// assert_eq!(0, arr.null_count()); /// assert_eq!(arr.values(), &[1, 2, 3, 4]) /// ``` /// +/// # Example: From an optional Vec +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = vec![Some(1), None, Some(3), None].into(); +/// assert_eq!(4, arr.len()); +/// assert_eq!(2, arr.null_count()); +/// assert_eq!(arr.values(), &[1, 0, 3, 0]) +/// ``` +/// /// # Example: From an iterator of values /// /// ``` /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; -/// let arr: PrimitiveArray = PrimitiveArray::from_iter_values((0..10).map(|x| x + 1)); +/// let arr: PrimitiveArray = (0..10).map(|x| x + 1).collect(); /// assert_eq!(10, arr.len()); /// assert_eq!(0, arr.null_count()); /// for i in 0..10i32 { @@ -421,6 +238,21 @@ pub use crate::types::ArrowPrimitiveType; /// assert_eq!(5, arr.null_count()); /// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) /// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::Array; +/// # use arrow_array::builder::PrimitiveBuilder; +/// # use arrow_array::types::Int32Type; +/// let mut builder = PrimitiveBuilder::::new(); +/// builder.append_value(1); +/// builder.append_null(); +/// builder.append_value(2); +/// let array = builder.finish(); +/// assert_eq!(array.values(), &[1, 0, 2]); +/// assert!(array.is_null(1)); +/// ``` pub struct PrimitiveArray { data_type: DataType, /// Values data diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 8a1c0bd150d8..583485da1923 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -21,7 +21,7 @@ use arrow_buffer::{bit_util, MutableBuffer}; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; -/// See [`StringArray`] and [`LargeStringArray`] for storing string data +/// A [`GenericByteArray`] for storing `str` pub type GenericStringArray = GenericByteArray>; impl GenericStringArray { @@ -208,54 +208,10 @@ impl From> for GenericStringArray> -/// let arr = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); -/// // Create from Vec<&str> -/// let arr = StringArray::from(vec!["foo", "bar", "baz"]); -/// // Create from iter/collect (requires Option<&str>) -/// let arr: StringArray = std::iter::repeat(Some("foo")).take(10).collect(); -/// ``` -/// -/// Construction and Access -/// -/// ``` -/// # use arrow_array::StringArray; -/// let array = StringArray::from(vec![Some("foo"), None, Some("bar")]); -/// assert_eq!(array.value(0), "foo"); -/// ``` +/// A [`GenericStringArray`] of `str` using `i32` offsets pub type StringArray = GenericStringArray; -/// An array of `str` using `i64` offsets -/// -/// # Examples -/// -/// Construction -/// -/// ``` -/// # use arrow_array::LargeStringArray; -/// // Create from Vec> -/// let arr = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); -/// // Create from Vec<&str> -/// let arr = LargeStringArray::from(vec!["foo", "bar", "baz"]); -/// // Create from iter/collect (requires Option<&str>) -/// let arr: LargeStringArray = std::iter::repeat(Some("foo")).take(10).collect(); -/// ``` -/// -/// Constructon and Access -/// -/// ``` -/// use arrow_array::LargeStringArray; -/// let array = LargeStringArray::from(vec![Some("foo"), None, Some("bar")]); -/// assert_eq!(array.value(2), "bar"); -/// ``` +/// A [`GenericStringArray`] of `str` using `i64` offsets pub type LargeStringArray = GenericStringArray; #[cfg(test)] From 3631db028f771d3b299a5de0b547985b46d0925b Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 8 Jun 2023 17:29:39 +0100 Subject: [PATCH 5/7] Restore examples --- arrow-array/src/array/binary_array.rs | 76 ++++++++ arrow-array/src/array/dictionary_array.rs | 104 +++++++++++ arrow-array/src/array/primitive_array.rs | 218 ++++++++++++++++++++++ arrow-array/src/array/string_array.rs | 48 +++++ 4 files changed, 446 insertions(+) diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index 871727e3ad44..87b0b77719ff 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -218,9 +218,85 @@ where } /// A [`GenericBinaryArray`] of `[u8]` using `i32` offsets +/// +/// The byte length of each element is represented by an i32. +/// +/// # Examples +/// +/// Create a BinaryArray from a vector of byte slices. +/// +/// ``` +/// use arrow_array::{Array, BinaryArray}; +/// let values: Vec<&[u8]> = +/// vec![b"one", b"two", b"", b"three"]; +/// let array = BinaryArray::from_vec(values); +/// assert_eq!(4, array.len()); +/// assert_eq!(b"one", array.value(0)); +/// assert_eq!(b"two", array.value(1)); +/// assert_eq!(b"", array.value(2)); +/// assert_eq!(b"three", array.value(3)); +/// ``` +/// +/// Create a BinaryArray from a vector of Optional (null) byte slices. +/// +/// ``` +/// use arrow_array::{Array, BinaryArray}; +/// let values: Vec> = +/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; +/// let array = BinaryArray::from_opt_vec(values); +/// assert_eq!(5, array.len()); +/// assert_eq!(b"one", array.value(0)); +/// assert_eq!(b"two", array.value(1)); +/// assert_eq!(b"", array.value(3)); +/// assert_eq!(b"three", array.value(4)); +/// assert!(!array.is_null(0)); +/// assert!(!array.is_null(1)); +/// assert!(array.is_null(2)); +/// assert!(!array.is_null(3)); +/// assert!(!array.is_null(4)); +/// ``` +/// +/// See [`GenericByteArray`] for more information and examples pub type BinaryArray = GenericBinaryArray; /// A [`GenericBinaryArray`] of `[u8]` using `i64` offsets +/// +/// # Examples +/// +/// Create a LargeBinaryArray from a vector of byte slices. +/// +/// ``` +/// use arrow_array::{Array, LargeBinaryArray}; +/// let values: Vec<&[u8]> = +/// vec![b"one", b"two", b"", b"three"]; +/// let array = LargeBinaryArray::from_vec(values); +/// assert_eq!(4, array.len()); +/// assert_eq!(b"one", array.value(0)); +/// assert_eq!(b"two", array.value(1)); +/// assert_eq!(b"", array.value(2)); +/// assert_eq!(b"three", array.value(3)); +/// ``` +/// +/// Create a LargeBinaryArray from a vector of Optional (null) byte slices. +/// +/// ``` +/// use arrow_array::{Array, LargeBinaryArray}; +/// let values: Vec> = +/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; +/// let array = LargeBinaryArray::from_opt_vec(values); +/// assert_eq!(5, array.len()); +/// assert_eq!(b"one", array.value(0)); +/// assert_eq!(b"two", array.value(1)); +/// assert_eq!(b"", array.value(3)); +/// assert_eq!(b"three", array.value(4)); +/// assert!(!array.is_null(0)); +/// assert!(!array.is_null(1)); +/// assert!(array.is_null(2)); +/// assert!(!array.is_null(3)); +/// assert!(!array.is_null(4)); +/// ``` +/// +/// See [`GenericByteArray`] for more information and examples pub type LargeBinaryArray = GenericBinaryArray; #[cfg(test)] diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index a96e192617a2..b9112d103a89 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -31,27 +31,131 @@ use std::any::Any; use std::sync::Arc; /// A [`DictionaryArray`] indexed by `i8` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int8DictionaryArray, Int8Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int8DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `i16` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int16DictionaryArray, Int16Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int16Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int16DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `i32` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int32DictionaryArray, Int32Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int32Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int32DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `i64` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, Int64DictionaryArray, Int64Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: Int64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &Int64Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int64DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `u8` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt8DictionaryArray, UInt8Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt8DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt8Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt8DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `u16` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt16DictionaryArray, UInt16Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt16DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt16Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt16DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `u32` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt32DictionaryArray, UInt32Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt32DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt32Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt32DictionaryArray = DictionaryArray; /// A [`DictionaryArray`] indexed by `u64` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::{Array, UInt64DictionaryArray, UInt64Array, StringArray}; +/// # use std::sync::Arc; +/// +/// let array: UInt64DictionaryArray = vec!["a", "a", "b", "c"].into_iter().collect(); +/// let values: Arc = Arc::new(StringArray::from(vec!["a", "b", "c"])); +/// assert_eq!(array.keys(), &UInt64Array::from(vec![0, 0, 1, 2])); +/// assert_eq!(array.values(), &values); +/// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt64DictionaryArray = DictionaryArray; /// An array of [dictionary encoded values](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout) diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index f50e1bc4ae61..c5c4ce5d3d29 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -35,36 +35,220 @@ use std::any::Any; use std::sync::Arc; /// A [`PrimitiveArray`] of `i8` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Int8Array; +/// // Create from Vec> +/// let arr = Int8Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = Int8Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: Int8Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int8Array = PrimitiveArray; /// A [`PrimitiveArray`] of `i16` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Int16Array; +/// // Create from Vec> +/// let arr = Int16Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = Int16Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: Int16Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int16Array = PrimitiveArray; /// A [`PrimitiveArray`] of `i32` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Int32Array; +/// // Create from Vec> +/// let arr = Int32Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = Int32Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: Int32Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int32Array = PrimitiveArray; /// A [`PrimitiveArray`] of `i64` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Int64Array; +/// // Create from Vec> +/// let arr = Int64Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = Int64Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: Int64Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int64Array = PrimitiveArray; /// A [`PrimitiveArray`] of `u8` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::UInt8Array; +/// // Create from Vec> +/// let arr = UInt8Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = UInt8Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: UInt8Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt8Array = PrimitiveArray; /// A [`PrimitiveArray`] of `u16` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::UInt16Array; +/// // Create from Vec> +/// let arr = UInt16Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = UInt16Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: UInt16Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt16Array = PrimitiveArray; /// A [`PrimitiveArray`] of `u32` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::UInt32Array; +/// // Create from Vec> +/// let arr = UInt32Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = UInt32Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: UInt32Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt32Array = PrimitiveArray; /// A [`PrimitiveArray`] of `u64` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::UInt64Array; +/// // Create from Vec> +/// let arr = UInt64Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = UInt64Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: UInt64Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt64Array = PrimitiveArray; /// A [`PrimitiveArray`] of `f16` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Float16Array; +/// use half::f16; +/// // Create from Vec> +/// let arr = Float16Array::from(vec![Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))]); +/// // Create from Vec +/// let arr = Float16Array::from(vec![f16::from_f64(1.0), f16::from_f64(2.0), f16::from_f64(3.0)]); +/// // Create iter/collect +/// let arr: Float16Array = std::iter::repeat(f16::from_f64(1.0)).take(10).collect(); +/// ``` +/// +/// # Example: Using `collect` +/// ``` +/// # use arrow_array::Float16Array; +/// use half::f16; +/// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float16Array = PrimitiveArray; /// A [`PrimitiveArray`] of `f32` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Float32Array; +/// // Create from Vec> +/// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); +/// // Create from Vec +/// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); +/// // Create iter/collect +/// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float32Array = PrimitiveArray; /// A [`PrimitiveArray`] of `f64` +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Float32Array; +/// // Create from Vec> +/// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); +/// // Create from Vec +/// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); +/// // Create iter/collect +/// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float64Array = PrimitiveArray; /// A [`PrimitiveArray`] of seconds since UNIX epoch stored as `i64` @@ -113,6 +297,7 @@ pub type Float64Array = PrimitiveArray; /// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00") /// ``` /// +/// See [`PrimitiveArray`] for more information and examples pub type TimestampSecondArray = PrimitiveArray; /// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` @@ -188,9 +373,42 @@ pub type DurationMicrosecondArray = PrimitiveArray; pub type DurationNanosecondArray = PrimitiveArray; /// A [`PrimitiveArray`] of 128-bit fixed point decimals +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Decimal128Array; +/// // Create from Vec> +/// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]); +/// // Create from Vec +/// let arr = Decimal128Array::from(vec![1, 2, 3]); +/// // Create iter/collect +/// let arr: Decimal128Array = std::iter::repeat(42).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Decimal128Array = PrimitiveArray; /// A [`PrimitiveArray`] of 256-bit fixed point decimals +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::Decimal256Array; +/// use arrow_buffer::i256; +/// // Create from Vec> +/// let arr = Decimal256Array::from(vec![Some(i256::from(1)), None, Some(i256::from(2))]); +/// // Create from Vec +/// let arr = Decimal256Array::from(vec![i256::from(1), i256::from(2), i256::from(3)]); +/// // Create iter/collect +/// let arr: Decimal256Array = std::iter::repeat(i256::from(42)).take(10).collect(); +/// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Decimal256Array = PrimitiveArray; pub use crate::types::ArrowPrimitiveType; diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 583485da1923..33af341a0159 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -209,9 +209,57 @@ impl From> for GenericStringArray> +/// let arr = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); +/// // Create from Vec<&str> +/// let arr = StringArray::from(vec!["foo", "bar", "baz"]); +/// // Create from iter/collect (requires Option<&str>) +/// let arr: StringArray = std::iter::repeat(Some("foo")).take(10).collect(); +/// ``` +/// +/// Construction and Access +/// +/// ``` +/// # use arrow_array::StringArray; +/// let array = StringArray::from(vec![Some("foo"), None, Some("bar")]); +/// assert_eq!(array.value(0), "foo"); +/// ``` +/// +/// See [`GenericByteArray`] for more information and examples pub type StringArray = GenericStringArray; /// A [`GenericStringArray`] of `str` using `i64` offsets +/// +/// # Examples +/// +/// Construction +/// +/// ``` +/// # use arrow_array::LargeStringArray; +/// // Create from Vec> +/// let arr = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); +/// // Create from Vec<&str> +/// let arr = LargeStringArray::from(vec!["foo", "bar", "baz"]); +/// // Create from iter/collect (requires Option<&str>) +/// let arr: LargeStringArray = std::iter::repeat(Some("foo")).take(10).collect(); +/// ``` +/// +/// Constructon and Access +/// +/// ``` +/// use arrow_array::LargeStringArray; +/// let array = LargeStringArray::from(vec![Some("foo"), None, Some("bar")]); +/// assert_eq!(array.value(2), "bar"); +/// ``` +/// +/// See [`GenericByteArray`] for more information and examples pub type LargeStringArray = GenericStringArray; #[cfg(test)] From 83d62df9869c05175bc310932688cb14210fccfc Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Thu, 8 Jun 2023 17:33:01 +0100 Subject: [PATCH 6/7] Example cleanup --- arrow-array/src/array/boolean_array.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 8c822996d622..e99b71b1846e 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -32,8 +32,6 @@ use std::sync::Arc; /// ``` /// # use arrow_array::{Array, BooleanArray}; /// let arr: BooleanArray = vec![true, true, false].into(); -/// let values: Vec<_> = arr.iter().collect(); -/// assert_eq!(&values, &[Some(true), Some(true), Some(false)]) /// ``` /// /// # Example: From an optional Vec @@ -41,8 +39,6 @@ use std::sync::Arc; /// ``` /// # use arrow_array::{Array, BooleanArray}; /// let arr: BooleanArray = vec![Some(true), None, Some(false)].into(); -/// let values: Vec<_> = arr.iter().collect(); -/// assert_eq!(&values, &[Some(true), None, Some(false)]) /// ``` /// /// # Example: From an iterator From d1ca801cd3c9ac91ee5a19882655485d862bfb8e Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 9 Jun 2023 15:34:48 +0100 Subject: [PATCH 7/7] Review feedback --- arrow-array/src/array/list_array.rs | 4 ++++ arrow-array/src/array/primitive_array.rs | 3 +++ 2 files changed, 7 insertions(+) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 98bdd8db5ea8..08791dd4f3e2 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -474,9 +474,13 @@ impl std::fmt::Debug for GenericListArray; /// A [`GenericListArray`] of variable size lists, storing offsets as `i64`. +/// +// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`] pub type LargeListArray = GenericListArray; #[cfg(test)] diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index c5c4ce5d3d29..23ca924efbfd 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -432,6 +432,7 @@ pub use crate::types::ArrowPrimitiveType; /// let arr: PrimitiveArray = vec![Some(1), None, Some(3), None].into(); /// assert_eq!(4, arr.len()); /// assert_eq!(2, arr.null_count()); +/// // Note: values for null indexes are arbitrary /// assert_eq!(arr.values(), &[1, 0, 3, 0]) /// ``` /// @@ -454,6 +455,7 @@ pub use crate::types::ArrowPrimitiveType; /// let arr: PrimitiveArray = (0..10).map(|x| (x % 2 == 0).then_some(x)).collect(); /// assert_eq!(10, arr.len()); /// assert_eq!(5, arr.null_count()); +/// // Note: values for null indexes are arbitrary /// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) /// ``` /// @@ -468,6 +470,7 @@ pub use crate::types::ArrowPrimitiveType; /// builder.append_null(); /// builder.append_value(2); /// let array = builder.finish(); +/// // Note: values for null indexes are arbitrary /// assert_eq!(array.values(), &[1, 0, 2]); /// assert!(array.is_null(1)); /// ```