diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index e809d3a6d615..54839604d192 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -23,7 +23,7 @@ use arrow_buffer::MutableBuffer; use arrow_data::ArrayData; use arrow_schema::DataType; -/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing binary data +/// A [`GenericBinaryArray`] for storing `[u8]` pub type GenericBinaryArray = GenericByteArray>; impl GenericBinaryArray { @@ -174,7 +174,7 @@ impl From> } } -/// An array of `[u8]` using `i32` offsets +/// A [`GenericBinaryArray`] of `[u8]` using `i32` offsets /// /// The byte length of each element is represented by an i32. /// @@ -213,9 +213,10 @@ impl From> /// assert!(!array.is_null(4)); /// ``` /// +/// See [`GenericByteArray`] for more information and examples pub type BinaryArray = GenericBinaryArray; -/// An array of `[u8]` using `i64` offsets +/// A [`GenericBinaryArray`] of `[u8]` using `i64` offsets /// /// # Examples /// @@ -252,6 +253,7 @@ pub type BinaryArray = GenericBinaryArray; /// assert!(!array.is_null(4)); /// ``` /// +/// See [`GenericByteArray`] for more information and examples pub type LargeBinaryArray = GenericBinaryArray; #[cfg(test)] diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 6905baa806de..e99b71b1846e 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -27,51 +27,43 @@ use std::sync::Arc; /// An array of [boolean values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) /// -/// # Examples +/// # Example: From a Vec /// -/// Construction +/// ``` +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = vec![true, true, false].into(); +/// ``` +/// +/// # Example: From an optional Vec /// /// ``` -///# use arrow_array::{Array, BooleanArray}; -/// // Create from Vec> -/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); -/// // Create from Vec -/// let arr = BooleanArray::from(vec![false, true, true]); -/// // Create from iter/collect -/// let arr: BooleanArray = std::iter::repeat(Some(true)).take(10).collect(); +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = vec![Some(true), None, Some(false)].into(); /// ``` /// -/// Construction and Access +/// # Example: From an iterator /// /// ``` -/// use arrow_array::{Array, BooleanArray}; -/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); -/// assert_eq!(4, arr.len()); -/// assert_eq!(1, arr.null_count()); -/// assert!(arr.is_valid(0)); -/// assert!(!arr.is_null(0)); -/// assert_eq!(false, arr.value(0)); -/// assert!(!arr.is_valid(2)); -/// assert!(arr.is_null(2)); +/// # use arrow_array::{Array, BooleanArray}; +/// let arr: BooleanArray = (0..5).map(|x| (x % 2 == 0).then(|| x % 3 == 0)).collect(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(&values, &[Some(true), None, Some(false), None, Some(false)]) /// ``` /// -/// Using `collect` +/// # Example: Using Builder +/// /// ``` -/// use arrow_array::{Array, BooleanArray}; -/// let v = vec![Some(false), Some(true), Some(false), Some(true)]; -/// let arr = v.into_iter().collect::(); -/// assert_eq!(4, arr.len()); -/// assert_eq!(0, arr.offset()); -/// assert_eq!(0, arr.null_count()); -/// assert!(arr.is_valid(0)); -/// assert_eq!(false, arr.value(0)); -/// assert!(arr.is_valid(1)); -/// assert_eq!(true, arr.value(1)); -/// assert!(arr.is_valid(2)); -/// assert_eq!(false, arr.value(2)); -/// assert!(arr.is_valid(3)); -/// assert_eq!(true, arr.value(3)); +/// # use arrow_array::Array; +/// # use arrow_array::builder::BooleanBuilder; +/// let mut builder = BooleanBuilder::new(); +/// builder.append_value(true); +/// builder.append_null(); +/// builder.append_value(false); +/// let array = builder.finish(); +/// let values: Vec<_> = array.iter().collect(); +/// assert_eq!(&values, &[Some(true), None, Some(false)]) /// ``` +/// #[derive(Clone)] pub struct BooleanArray { values: BooleanBuffer, diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 563e965e5e45..0a18062d9ae1 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -34,6 +34,52 @@ use std::sync::Arc; /// /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing arbitrary bytes /// +/// # Example: From a Vec +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = vec!["hello", "world", ""].into(); +/// assert_eq!(arr.value_data(), b"helloworld"); +/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10]); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("hello"), Some("world"), Some("")]); +/// ``` +/// +/// # Example: From an optional Vec +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = vec![Some("hello"), Some("world"), Some(""), None].into(); +/// assert_eq!(arr.value_data(), b"helloworld"); +/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10, 10]); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("hello"), Some("world"), Some(""), None]); +/// ``` +/// +/// # Example: From an iterator of option +/// +/// ``` +/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type}; +/// let arr: GenericByteArray = (0..5).map(|x| (x % 2 == 0).then(|| x.to_string())).collect(); +/// let values: Vec<_> = arr.iter().collect(); +/// assert_eq!(values, &[Some("0"), None, Some("2"), None, Some("4")]); +/// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::Array; +/// # use arrow_array::builder::GenericByteBuilder; +/// # use arrow_array::types::Utf8Type; +/// let mut builder = GenericByteBuilder::::new(); +/// builder.append_value("hello"); +/// builder.append_null(); +/// builder.append_value("world"); +/// let array = builder.finish(); +/// let values: Vec<_> = array.iter().collect(); +/// assert_eq!(values, &[Some("hello"), None, Some("world")]); +/// ``` +/// /// [`StringArray`]: crate::StringArray /// [`LargeStringArray`]: crate::LargeStringArray /// [`BinaryArray`]: crate::BinaryArray diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index a319a836a955..b9112d103a89 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -30,7 +30,7 @@ use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; -/// A dictionary array indexed by `i8` +/// A [`DictionaryArray`] indexed by `i8` /// /// # Example: Using `collect` /// ``` @@ -42,9 +42,11 @@ use std::sync::Arc; /// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int8DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i16` +/// A [`DictionaryArray`] indexed by `i16` /// /// # Example: Using `collect` /// ``` @@ -56,9 +58,11 @@ pub type Int8DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &Int16Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int16DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i32` +/// A [`DictionaryArray`] indexed by `i32` /// /// # Example: Using `collect` /// ``` @@ -70,9 +74,11 @@ pub type Int16DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &Int32Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int32DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `i64` +/// A [`DictionaryArray`] indexed by `i64` /// /// # Example: Using `collect` /// ``` @@ -84,9 +90,11 @@ pub type Int32DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &Int64Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type Int64DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u8` +/// A [`DictionaryArray`] indexed by `u8` /// /// # Example: Using `collect` /// ``` @@ -98,9 +106,11 @@ pub type Int64DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &UInt8Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt8DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u16` +/// A [`DictionaryArray`] indexed by `u16` /// /// # Example: Using `collect` /// ``` @@ -112,9 +122,11 @@ pub type UInt8DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &UInt16Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt16DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u32` +/// A [`DictionaryArray`] indexed by `u32` /// /// # Example: Using `collect` /// ``` @@ -126,9 +138,11 @@ pub type UInt16DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &UInt32Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt32DictionaryArray = DictionaryArray; -/// A dictionary array indexed by `u64` +/// A [`DictionaryArray`] indexed by `u64` /// /// # Example: Using `collect` /// ``` @@ -140,6 +154,8 @@ pub type UInt32DictionaryArray = DictionaryArray; /// assert_eq!(array.keys(), &UInt64Array::from(vec![0, 0, 1, 2])); /// assert_eq!(array.values(), &values); /// ``` +/// +/// See [`DictionaryArray`] for more information and examples pub type UInt64DictionaryArray = DictionaryArray; /// An array of [dictionary encoded values](https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout) @@ -175,39 +191,54 @@ pub type UInt64DictionaryArray = DictionaryArray; /// length = 6 /// ``` /// -/// Example **with nullable** data: +/// # Example: From Nullable Data /// /// ``` -/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; +/// # use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)])); /// ``` /// -/// Example **without nullable** data: +/// # Example: From Non-Nullable Data /// /// ``` -/// use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; +/// # use arrow_array::{DictionaryArray, Int8Array, types::Int8Type}; /// let test = vec!["a", "a", "b", "c"]; /// let array : DictionaryArray = test.into_iter().collect(); /// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); /// ``` /// -/// Example from existing arrays: +/// # Example: From Existing Arrays /// /// ``` -/// use std::sync::Arc; -/// use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type}; +/// # use std::sync::Arc; +/// # use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type}; /// // You can form your own DictionaryArray by providing the /// // values (dictionary) and keys (indexes into the dictionary): /// let values = StringArray::from_iter_values(["a", "b", "c"]); /// let keys = Int8Array::from_iter_values([0, 0, 1, 2]); /// let array = DictionaryArray::::try_new(keys, Arc::new(values)).unwrap(); -/// let expected: DictionaryArray:: = vec!["a", "a", "b", "c"] -/// .into_iter() -/// .collect(); +/// let expected: DictionaryArray:: = vec!["a", "a", "b", "c"].into_iter().collect(); /// assert_eq!(&array, &expected); /// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::{Array, StringArray}; +/// # use arrow_array::builder::StringDictionaryBuilder; +/// # use arrow_array::types::Int32Type; +/// let mut builder = StringDictionaryBuilder::::new(); +/// builder.append_value("a"); +/// builder.append_null(); +/// builder.append_value("a"); +/// builder.append_value("b"); +/// let array = builder.finish(); +/// +/// let values: Vec<_> = array.downcast_dict::().unwrap().into_iter().collect(); +/// assert_eq!(&values, &[Some("a"), None, Some("a"), Some("b")]); +/// ``` pub struct DictionaryArray { data_type: DataType, diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index abb5ba5e3c0b..2205d846ea34 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -57,6 +57,8 @@ impl OffsetSizeTrait for i64 { /// An array of [variable length arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout) /// /// See [`ListArray`] and [`LargeListArray`]` +/// +/// See [`GenericListBuilder`](crate::builder::GenericListBuilder) for how to construct a [`GenericListArray`] pub struct GenericListArray { data_type: DataType, nulls: Option, @@ -472,58 +474,14 @@ impl std::fmt::Debug for GenericListArray(data); -/// -/// assert_eq!(false, list_array.is_valid(1)); -/// -/// let list0 = list_array.value(0); -/// let list2 = list_array.value(2); -/// let list3 = list_array.value(3); +/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`. /// -/// assert_eq!(&[] as &[i32], list0.as_any().downcast_ref::().unwrap().values()); -/// assert_eq!(false, list2.as_any().downcast_ref::().unwrap().is_valid(1)); -/// assert_eq!(&[6, 7], list3.as_any().downcast_ref::().unwrap().values()); -/// ``` +// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`] pub type ListArray = GenericListArray; -/// An array of variable size lists, storing offsets as `i64`. -/// -/// # Example -/// -/// ``` -/// # use arrow_array::{Array, LargeListArray, Int32Array, types::Int32Type}; -/// # use arrow_schema::DataType; -/// let data = vec![ -/// Some(vec![]), -/// None, -/// Some(vec![Some(3), None, Some(5), Some(19)]), -/// Some(vec![Some(6), Some(7)]), -/// ]; -/// let list_array = LargeListArray::from_iter_primitive::(data); -/// -/// assert_eq!(false, list_array.is_valid(1)); -/// -/// let list0 = list_array.value(0); -/// let list2 = list_array.value(2); -/// let list3 = list_array.value(3); +/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`. /// -/// assert_eq!(&[] as &[i32], list0.as_any().downcast_ref::().unwrap().values()); -/// assert_eq!(false, list2.as_any().downcast_ref::().unwrap().is_valid(1)); -/// assert_eq!(&[6, 7], list3.as_any().downcast_ref::().unwrap().values()); -/// ``` +// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`] pub type LargeListArray = GenericListArray; #[cfg(test)] diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 7220aca8f44b..b821ad1b4422 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -34,7 +34,7 @@ use half::f16; use std::any::Any; use std::sync::Arc; -/// An array of `i8` +/// A [`PrimitiveArray`] of `i8` /// /// # Examples /// @@ -49,9 +49,11 @@ use std::sync::Arc; /// // Create iter/collect /// let arr: Int8Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int8Array = PrimitiveArray; -/// An array of `i16` +/// A [`PrimitiveArray`] of `i16` /// /// # Examples /// @@ -66,9 +68,11 @@ pub type Int8Array = PrimitiveArray; /// // Create iter/collect /// let arr: Int16Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int16Array = PrimitiveArray; -/// An array of `i32` +/// A [`PrimitiveArray`] of `i32` /// /// # Examples /// @@ -83,9 +87,11 @@ pub type Int16Array = PrimitiveArray; /// // Create iter/collect /// let arr: Int32Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int32Array = PrimitiveArray; -/// An array of `i64` +/// A [`PrimitiveArray`] of `i64` /// /// # Examples /// @@ -100,9 +106,11 @@ pub type Int32Array = PrimitiveArray; /// // Create iter/collect /// let arr: Int64Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Int64Array = PrimitiveArray; -/// An array of `u8` +/// A [`PrimitiveArray`] of `u8` /// /// # Examples /// @@ -117,9 +125,11 @@ pub type Int64Array = PrimitiveArray; /// // Create iter/collect /// let arr: UInt8Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt8Array = PrimitiveArray; -/// An array of `u16` +/// A [`PrimitiveArray`] of `u16` /// /// # Examples /// @@ -134,9 +144,11 @@ pub type UInt8Array = PrimitiveArray; /// // Create iter/collect /// let arr: UInt16Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt16Array = PrimitiveArray; -/// An array of `u32` +/// A [`PrimitiveArray`] of `u32` /// /// # Examples /// @@ -151,9 +163,11 @@ pub type UInt16Array = PrimitiveArray; /// // Create iter/collect /// let arr: UInt32Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt32Array = PrimitiveArray; -/// An array of `u64` +/// A [`PrimitiveArray`] of `u64` /// /// # Examples /// @@ -168,9 +182,11 @@ pub type UInt32Array = PrimitiveArray; /// // Create iter/collect /// let arr: UInt64Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type UInt64Array = PrimitiveArray; -/// An array of `f16` +/// A [`PrimitiveArray`] of `f16` /// /// # Examples /// @@ -193,9 +209,11 @@ pub type UInt64Array = PrimitiveArray; /// use half::f16; /// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float16Array = PrimitiveArray; -/// An array of `f32` +/// A [`PrimitiveArray`] of `f32` /// /// # Examples /// @@ -210,9 +228,11 @@ pub type Float16Array = PrimitiveArray; /// // Create iter/collect /// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float32Array = PrimitiveArray; -/// An array of `f64` +/// A [`PrimitiveArray`] of `f64` /// /// # Examples /// @@ -227,9 +247,11 @@ pub type Float32Array = PrimitiveArray; /// // Create iter/collect /// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Float64Array = PrimitiveArray; -/// An array of seconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of seconds since UNIX epoch stored as `i64` /// /// This type is similar to the [`chrono::DateTime`] type and can hold /// values such as `1970-05-09 14:25:11 +01:00` @@ -275,83 +297,82 @@ pub type Float64Array = PrimitiveArray; /// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00") /// ``` /// +/// See [`PrimitiveArray`] for more information and examples pub type TimestampSecondArray = PrimitiveArray; -/// An array of milliseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampMillisecondArray = PrimitiveArray; -/// An array of microseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of microseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampMicrosecondArray = PrimitiveArray; -/// An array of nanoseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of nanoseconds since UNIX epoch stored as `i64` /// /// See examples for [`TimestampSecondArray`] pub type TimestampNanosecondArray = PrimitiveArray; -// TODO: give examples for the below types - -/// An array of days since UNIX epoch stored as `i32` +/// A [`PrimitiveArray`] of days since UNIX epoch stored as `i32` /// /// This type is similar to the [`chrono::NaiveDate`] type and can hold /// values such as `2018-11-13` pub type Date32Array = PrimitiveArray; -/// An array of milliseconds since UNIX epoch stored as `i64` +/// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` /// /// This type is similar to the [`chrono::NaiveDate`] type and can hold /// values such as `2018-11-13` pub type Date64Array = PrimitiveArray; -/// An array of seconds since midnight stored as `i32` +/// A [`PrimitiveArray`] of seconds since midnight stored as `i32` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00` pub type Time32SecondArray = PrimitiveArray; -/// An array of milliseconds since midnight stored as `i32` +/// A [`PrimitiveArray`] of milliseconds since midnight stored as `i32` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123` pub type Time32MillisecondArray = PrimitiveArray; -/// An array of microseconds since midnight stored as `i64` +/// A [`PrimitiveArray`] of microseconds since midnight stored as `i64` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123456` pub type Time64MicrosecondArray = PrimitiveArray; -/// An array of nanoseconds since midnight stored as `i64` +/// A [`PrimitiveArray`] of nanoseconds since midnight stored as `i64` /// /// This type is similar to the [`chrono::NaiveTime`] type and can /// hold values such as `00:02:00.123456789` pub type Time64NanosecondArray = PrimitiveArray; -/// An array of “calendar” intervals in months +/// A [`PrimitiveArray`] of “calendar” intervals in months pub type IntervalYearMonthArray = PrimitiveArray; -/// An array of “calendar” intervals in days and milliseconds +/// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds pub type IntervalDayTimeArray = PrimitiveArray; -/// An array of “calendar” intervals in months, days, and nanoseconds +/// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds pub type IntervalMonthDayNanoArray = PrimitiveArray; -/// An array of elapsed durations in seconds +/// A [`PrimitiveArray`] of elapsed durations in seconds pub type DurationSecondArray = PrimitiveArray; -/// An array of elapsed durations in milliseconds +/// A [`PrimitiveArray`] of elapsed durations in milliseconds pub type DurationMillisecondArray = PrimitiveArray; -/// An array of elapsed durations in microseconds +/// A [`PrimitiveArray`] of elapsed durations in microseconds pub type DurationMicrosecondArray = PrimitiveArray; -/// An array of elapsed durations in nanoseconds +/// A [`PrimitiveArray`] of elapsed durations in nanoseconds pub type DurationNanosecondArray = PrimitiveArray; -/// An array of 128-bit fixed point decimals +/// A [`PrimitiveArray`] of 128-bit fixed point decimals /// /// # Examples /// @@ -366,9 +387,11 @@ pub type DurationNanosecondArray = PrimitiveArray; /// // Create iter/collect /// let arr: Decimal128Array = std::iter::repeat(42).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Decimal128Array = PrimitiveArray; -/// An array of 256-bit fixed point decimals +/// A [`PrimitiveArray`] of 256-bit fixed point decimals /// /// # Examples /// @@ -384,23 +407,73 @@ pub type Decimal128Array = PrimitiveArray; /// // Create iter/collect /// let arr: Decimal256Array = std::iter::repeat(i256::from(42)).take(10).collect(); /// ``` +/// +/// See [`PrimitiveArray`] for more information and examples pub type Decimal256Array = PrimitiveArray; pub use crate::types::ArrowPrimitiveType; /// An array of [primitive values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) /// +/// # Example: From a Vec +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = vec![1, 2, 3, 4].into(); +/// assert_eq!(4, arr.len()); +/// assert_eq!(0, arr.null_count()); +/// assert_eq!(arr.values(), &[1, 2, 3, 4]) +/// ``` +/// +/// # Example: From an optional Vec +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = vec![Some(1), None, Some(3), None].into(); +/// assert_eq!(4, arr.len()); +/// assert_eq!(2, arr.null_count()); +/// // Note: values for null indexes are arbitrary +/// assert_eq!(arr.values(), &[1, 0, 3, 0]) +/// ``` +/// /// # Example: From an iterator of values /// /// ``` -/// use arrow_array::{Array, PrimitiveArray, types::Int32Type}; -/// let arr: PrimitiveArray = PrimitiveArray::from_iter_values((0..10).map(|x| x + 1)); +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = (0..10).map(|x| x + 1).collect(); /// assert_eq!(10, arr.len()); /// assert_eq!(0, arr.null_count()); /// for i in 0..10i32 { /// assert_eq!(i + 1, arr.value(i as usize)); /// } /// ``` +/// +/// # Example: From an iterator of option +/// +/// ``` +/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; +/// let arr: PrimitiveArray = (0..10).map(|x| (x % 2 == 0).then_some(x)).collect(); +/// assert_eq!(10, arr.len()); +/// assert_eq!(5, arr.null_count()); +/// // Note: values for null indexes are arbitrary +/// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) +/// ``` +/// +/// # Example: Using Builder +/// +/// ``` +/// # use arrow_array::Array; +/// # use arrow_array::builder::PrimitiveBuilder; +/// # use arrow_array::types::Int32Type; +/// let mut builder = PrimitiveBuilder::::new(); +/// builder.append_value(1); +/// builder.append_null(); +/// builder.append_value(2); +/// let array = builder.finish(); +/// // Note: values for null indexes are arbitrary +/// assert_eq!(array.values(), &[1, 0, 2]); +/// assert!(array.is_null(1)); +/// ``` pub struct PrimitiveArray { data_type: DataType, /// Values data diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index ecc3e3eaba23..f9a3a5fbd095 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -21,7 +21,7 @@ use arrow_buffer::MutableBuffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; -/// See [`StringArray`] and [`LargeStringArray`] for storing string data +/// A [`GenericByteArray`] for storing `str` pub type GenericStringArray = GenericByteArray>; impl GenericStringArray { @@ -149,7 +149,7 @@ impl From> for GenericStringArray From> for GenericStringArray; -/// An array of `str` using `i64` offsets +/// A [`GenericStringArray`] of `str` using `i64` offsets /// /// # Examples /// @@ -197,6 +199,8 @@ pub type StringArray = GenericStringArray; /// let array = LargeStringArray::from(vec![Some("foo"), None, Some("bar")]); /// assert_eq!(array.value(2), "bar"); /// ``` +/// +/// See [`GenericByteArray`] for more information and examples pub type LargeStringArray = GenericStringArray; #[cfg(test)] diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs index 90b801c4ae29..cbcdb979e693 100644 --- a/arrow-buffer/src/lib.rs +++ b/arrow-buffer/src/lib.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Buffer abstractions for [Apache Arrow](https://docs.rs/arrow) +//! Low-level buffer abstractions for [Apache Arrow Rust](https://docs.rs/arrow) pub mod alloc; pub mod buffer; diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs index b864b786051a..cfa0dba66c35 100644 --- a/arrow-data/src/lib.rs +++ b/arrow-data/src/lib.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -//! Array data abstractions for [Apache Arrow](https://docs.rs/arrow) +//! Low-level array data abstractions for [Apache Arrow Rust](https://docs.rs/arrow) +//! +//! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array) mod data; pub use data::*; diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 70e615e88c73..bf39bae530b9 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -21,38 +21,6 @@ //! Please see the [arrow crates.io](https://crates.io/crates/arrow) //! page for feature flags and tips to improve performance. //! -//! # Crate Topology -//! -//! The [`arrow`] project is implemented as multiple sub-crates, which are then re-exported by -//! this top-level crate. -//! -//! Crate authors can choose to depend on this top-level crate, or just -//! the sub-crates they need. -//! -//! The current list of sub-crates is: -//! -//! * [`arrow-arith`][arrow_arith] - arithmetic kernels -//! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions -//! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays -//! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays -//! * [`arrow-csv`][arrow_csv] - read/write CSV to arrow format -//! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays -//! * [`arrow-ipc`][arrow_ipc] - read/write IPC to arrow format -//! * [`arrow-json`][arrow_json] - read/write JSON to arrow format -//! * [`arrow-ord`][arrow_ord] - ordering kernels for arrow arrays -//! * [`arrow-row`][arrow_row] - comparable row format -//! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays -//! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays -//! * [`arrow-string`][arrow_string] - string kernels for arrow arrays -//! -//! _This list is likely to grow as further functionality is split out from the top-level crate_ -//! -//! Some functionality is also distributed independently of this crate: -//! -//! * [`arrow-flight`] - support for [Arrow Flight RPC] -//! * [`arrow-integration-test`] - support for [Arrow JSON Test Format] -//! * [`parquet`](https://docs.rs/parquet/latest/parquet/) - support for [Apache Parquet] -//! //! # Columnar Format //! //! The [`array`] module provides statically typed implementations of all the array types as defined @@ -73,7 +41,7 @@ //! ``` //! //! It is also possible to write generic code. For example, the following is generic over -//! all primitively typed arrays: +//! all primitively typed arrays //! //! ```rust //! # use std::iter::Sum; @@ -92,7 +60,7 @@ //! assert_eq!(sum(&TimestampNanosecondArray::from(vec![1, 2, 3])), 6); //! ``` //! -//! And the following is generic over all arrays with comparable values: +//! And the following is generic over all arrays with comparable values //! //! ```rust //! # use arrow::array::{ArrayAccessor, ArrayIter, Int32Array, StringArray}; @@ -109,7 +77,7 @@ //! assert_eq!(min(&StringArray::from(vec!["b", "a", "c"])), Some("a")); //! ``` //! -//! For more examples, and details consult the [arrow_array] docs. +//! **For more examples, and details consult the [arrow_array] docs.** //! //! # Type Erasure / Trait Objects //! @@ -317,6 +285,36 @@ //! assert_eq!(string.value(1), "foo"); //! ``` //! +//! # Crate Topology +//! +//! The [`arrow`] project is implemented as multiple sub-crates, which are then re-exported by +//! this top-level crate. +//! +//! Crate authors can choose to depend on this top-level crate, or just +//! the sub-crates they need. +//! +//! The current list of sub-crates is: +//! +//! * [`arrow-arith`][arrow_arith] - arithmetic kernels +//! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions +//! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays +//! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays +//! * [`arrow-csv`][arrow_csv] - read/write CSV to arrow format +//! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays +//! * [`arrow-ipc`][arrow_ipc] - read/write IPC to arrow format +//! * [`arrow-json`][arrow_json] - read/write JSON to arrow format +//! * [`arrow-ord`][arrow_ord] - ordering kernels for arrow arrays +//! * [`arrow-row`][arrow_row] - comparable row format +//! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays +//! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays +//! * [`arrow-string`][arrow_string] - string kernels for arrow arrays +//! +//! Some functionality is also distributed independently of this crate: +//! +//! * [`arrow-flight`] - support for [Arrow Flight RPC] +//! * [`arrow-integration-test`] - support for [Arrow JSON Test Format] +//! * [`parquet`](https://docs.rs/parquet/latest/parquet/) - support for [Apache Parquet] +//! //! # Safety and Security //! //! Like many crates, this crate makes use of unsafe where prudent. However, it endeavours to be