From d39cf283e9a68b221a24b4132c27f34100439086 Mon Sep 17 00:00:00 2001
From: Yijun Zhao <ariesdevil77@gmail.com>
Date: Fri, 15 Mar 2024 01:05:04 +0800
Subject: [PATCH 01/11] feat: initial support string_view and binary_view, 
 supports layout and basic construction + tests (#5481)

* support string_view and binary_view

* fix reviewer comments
---
 arrow-array/src/array/byte_array.rs           |   6 +-
 arrow-array/src/array/byte_view_array.rs      | 480 ++++++++++++++++++
 arrow-array/src/array/mod.rs                  |   7 +
 .../src/builder/generic_bytes_view_builder.rs | 215 ++++++++
 arrow-array/src/builder/mod.rs                |   3 +
 arrow-array/src/record_batch.rs               |  28 +-
 arrow-array/src/types.rs                      |  68 +++
 arrow-buffer/src/native.rs                    |   1 +
 arrow-data/src/byte_view.rs                   | 123 +++++
 arrow-data/src/data.rs                        |  85 ++--
 arrow-data/src/equal/byte_view.rs             |  74 +++
 arrow-data/src/equal/mod.rs                   |   4 +-
 arrow-data/src/lib.rs                         |   3 +
 arrow-data/src/transform/mod.rs               | 172 ++++---
 arrow/tests/array_equal.rs                    |  48 +-
 arrow/tests/array_transform.rs                |  39 ++
 16 files changed, 1244 insertions(+), 112 deletions(-)
 create mode 100644 arrow-array/src/array/byte_view_array.rs
 create mode 100644 arrow-array/src/builder/generic_bytes_view_builder.rs
 create mode 100644 arrow-data/src/byte_view.rs
 create mode 100644 arrow-data/src/equal/byte_view.rs
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index db825bbea97d..a57abc5b1e71 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -94,7 +94,7 @@ pub struct GenericByteArray<T: ByteArrayType> {
 impl<T: ByteArrayType> Clone for GenericByteArray<T> {
     fn clone(&self) -> Self {
         Self {
-            data_type: self.data_type.clone(),
+            data_type: T::DATA_TYPE,
             value_offsets: self.value_offsets.clone(),
             value_data: self.value_data.clone(),
             nulls: self.nulls.clone(),
@@ -323,7 +323,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
     /// Returns a zero-copy slice of this array with the indicated offset and length.
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         Self {
-            data_type: self.data_type.clone(),
+            data_type: T::DATA_TYPE,
             value_offsets: self.value_offsets.slice(offset, length),
             value_data: self.value_data.clone(),
             nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
@@ -511,7 +511,7 @@ impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
         Self {
             value_offsets,
             value_data,
-            data_type: data.data_type().clone(),
+            data_type: T::DATA_TYPE,
             nulls: data.nulls().cloned(),
         }
     }
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
new file mode 100644
index 000000000000..e22e9b1688bb
--- /dev/null
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::array::print_long_array;
+use crate::builder::GenericByteViewBuilder;
+use crate::iterator::ArrayIter;
+use crate::types::bytes::ByteArrayNativeType;
+use crate::types::{BinaryViewType, ByteViewType, StringViewType};
+use crate::{Array, ArrayAccessor, ArrayRef};
+use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
+use arrow_data::{ArrayData, ArrayDataBuilder, ByteView};
+use arrow_schema::{ArrowError, DataType};
+use std::any::Any;
+use std::fmt::Debug;
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+/// [Variable-size Binary View Layout]: An array of variable length bytes view arrays.
+///
+/// Different than [`crate::GenericByteArray`] as it stores both an offset and length
+/// meaning that take / filter operations can be implemented without copying the underlying data.
+///
+/// [Variable-size Binary View Layout]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
+pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
+    data_type: DataType,
+    views: ScalarBuffer<u128>,
+    buffers: Vec<Buffer>,
+    phantom: PhantomData<T>,
+    nulls: Option<NullBuffer>,
+}
+
+impl<T: ByteViewType + ?Sized> Clone for GenericByteViewArray<T> {
+    fn clone(&self) -> Self {
+        Self {
+            data_type: T::DATA_TYPE,
+            views: self.views.clone(),
+            buffers: self.buffers.clone(),
+            nulls: self.nulls.clone(),
+            phantom: Default::default(),
+        }
+    }
+}
+
+impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
+    /// Create a new [`GenericByteViewArray`] from the provided parts, panicking on failure
+    ///
+    /// # Panics
+    ///
+    /// Panics if [`GenericByteViewArray::try_new`] returns an error
+    pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
+        Self::try_new(views, buffers, nulls).unwrap()
+    }
+
+    /// Create a new [`GenericByteViewArray`] from the provided parts, returning an error on failure
+    ///
+    /// # Errors
+    ///
+    /// * `views.len() != nulls.len()`
+    /// * [ByteViewType::validate] fails
+    pub fn try_new(
+        views: ScalarBuffer<u128>,
+        buffers: Vec<Buffer>,
+        nulls: Option<NullBuffer>,
+    ) -> Result<Self, ArrowError> {
+        T::validate(&views, &buffers)?;
+
+        if let Some(n) = nulls.as_ref() {
+            if n.len() != views.len() {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Incorrect length of null buffer for {}ViewArray, expected {} got {}",
+                    T::PREFIX,
+                    views.len(),
+                    n.len(),
+                )));
+            }
+        }
+
+        Ok(Self {
+            data_type: T::DATA_TYPE,
+            views,
+            buffers,
+            nulls,
+            phantom: Default::default(),
+        })
+    }
+
+    /// Create a new [`GenericByteViewArray`] from the provided parts, without validation
+    ///
+    /// # Safety
+    ///
+    /// Safe if [`Self::try_new`] would not error
+    pub unsafe fn new_unchecked(
+        views: ScalarBuffer<u128>,
+        buffers: Vec<Buffer>,
+        nulls: Option<NullBuffer>,
+    ) -> Self {
+        Self {
+            data_type: T::DATA_TYPE,
+            phantom: Default::default(),
+            views,
+            buffers,
+            nulls,
+        }
+    }
+
+    /// Create a new [`GenericByteViewArray`] of length `len` where all values are null
+    pub fn new_null(len: usize) -> Self {
+        Self {
+            data_type: T::DATA_TYPE,
+            views: vec![0; len].into(),
+            buffers: vec![],
+            nulls: Some(NullBuffer::new_null(len)),
+            phantom: Default::default(),
+        }
+    }
+
+    /// Creates a [`GenericByteViewArray`] based on an iterator of values without nulls
+    pub fn from_iter_values<Ptr, I>(iter: I) -> Self
+    where
+        Ptr: AsRef<T::Native>,
+        I: IntoIterator<Item = Ptr>,
+    {
+        let iter = iter.into_iter();
+        let mut builder = GenericByteViewBuilder::<T>::with_capacity(iter.size_hint().0);
+        for v in iter {
+            builder.append_value(v);
+        }
+        builder.finish()
+    }
+
+    /// Deconstruct this array into its constituent parts
+    pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+        (self.views, self.buffers, self.nulls)
+    }
+
+    /// Returns the views buffer
+    #[inline]
+    pub fn views(&self) -> &ScalarBuffer<u128> {
+        &self.views
+    }
+
+    /// Returns the buffers storing string data
+    #[inline]
+    pub fn data_buffers(&self) -> &[Buffer] {
+        &self.buffers
+    }
+
+    /// Returns the element at index `i`
+    /// # Panics
+    /// Panics if index `i` is out of bounds.
+    pub fn value(&self, i: usize) -> &T::Native {
+        assert!(
+            i < self.len(),
+            "Trying to access an element at index {} from a {}ViewArray of length {}",
+            i,
+            T::PREFIX,
+            self.len()
+        );
+
+        unsafe { self.value_unchecked(i) }
+    }
+
+    /// Returns the element at index `i`
+    /// # Safety
+    /// Caller is responsible for ensuring that the index is within the bounds of the array
+    pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
+        let v = self.views.get_unchecked(idx);
+        let len = *v as u32;
+        let b = if len <= 12 {
+            let ptr = self.views.as_ptr() as *const u8;
+            std::slice::from_raw_parts(ptr.add(idx * 16 + 4), len as usize)
+        } else {
+            let view = ByteView::from(*v);
+            let data = self.buffers.get_unchecked(view.buffer_index as usize);
+            let offset = view.offset as usize;
+            data.get_unchecked(offset..offset + len as usize)
+        };
+        T::Native::from_bytes_unchecked(b)
+    }
+
+    /// constructs a new iterator
+    pub fn iter(&self) -> ArrayIter<&Self> {
+        ArrayIter::new(self)
+    }
+
+    /// Returns a zero-copy slice of this array with the indicated offset and length.
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        Self {
+            data_type: T::DATA_TYPE,
+            views: self.views.slice(offset, length),
+            buffers: self.buffers.clone(),
+            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
+            phantom: Default::default(),
+        }
+    }
+}
+
+impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}ViewArray\n[\n", T::PREFIX)?;
+        print_long_array(self, f, |array, index, f| {
+            std::fmt::Debug::fmt(&array.value(index), f)
+        })?;
+        write!(f, "]")
+    }
+}
+
+impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn to_data(&self) -> ArrayData {
+        self.clone().into()
+    }
+
+    fn into_data(self) -> ArrayData {
+        self.into()
+    }
+
+    fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+
+    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
+        Arc::new(self.slice(offset, length))
+    }
+
+    fn len(&self) -> usize {
+        self.views.len()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.views.is_empty()
+    }
+
+    fn offset(&self) -> usize {
+        0
+    }
+
+    fn nulls(&self) -> Option<&NullBuffer> {
+        self.nulls.as_ref()
+    }
+
+    fn get_buffer_memory_size(&self) -> usize {
+        let mut sum = self.buffers.iter().map(|b| b.capacity()).sum::<usize>();
+        sum += self.views.inner().capacity();
+        if let Some(x) = &self.nulls {
+            sum += x.buffer().capacity()
+        }
+        sum
+    }
+
+    fn get_array_memory_size(&self) -> usize {
+        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
+    }
+}
+
+impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T> {
+    type Item = &'a T::Native;
+
+    fn value(&self, index: usize) -> Self::Item {
+        GenericByteViewArray::value(self, index)
+    }
+
+    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
+        GenericByteViewArray::value_unchecked(self, index)
+    }
+}
+
+impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T> {
+    type Item = Option<&'a T::Native>;
+    type IntoIter = ArrayIter<Self>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        ArrayIter::new(self)
+    }
+}
+
+impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
+    fn from(value: ArrayData) -> Self {
+        let views = value.buffers()[0].clone();
+        let views = ScalarBuffer::new(views, value.offset(), value.len());
+        let buffers = value.buffers()[1..].to_vec();
+        Self {
+            data_type: T::DATA_TYPE,
+            views,
+            buffers,
+            nulls: value.nulls().cloned(),
+            phantom: Default::default(),
+        }
+    }
+}
+
+impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
+    fn from(mut array: GenericByteViewArray<T>) -> Self {
+        let len = array.len();
+        array.buffers.insert(0, array.views.into_inner());
+        let builder = ArrayDataBuilder::new(T::DATA_TYPE)
+            .len(len)
+            .buffers(array.buffers)
+            .nulls(array.nulls);
+
+        unsafe { builder.build_unchecked() }
+    }
+}
+
+impl<Ptr, T: ByteViewType + ?Sized> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
+where
+    Ptr: AsRef<T::Native>,
+{
+    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
+        let iter = iter.into_iter();
+        let mut builder = GenericByteViewBuilder::<T>::with_capacity(iter.size_hint().0);
+        builder.extend(iter);
+        builder.finish()
+    }
+}
+
+/// A [`GenericByteViewArray`] of `[u8]`
+pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;
+
+/// A [`GenericByteViewArray`] of `str`
+///
+/// ```
+/// use arrow_array::StringViewArray;
+/// let array = StringViewArray::from_iter_values(vec!["hello", "world", "lulu", "large payload over 12 bytes"]);
+/// assert_eq!(array.value(0), "hello");
+/// assert_eq!(array.value(3), "large payload over 12 bytes");
+/// ```
+pub type StringViewArray = GenericByteViewArray<StringViewType>;
+
+impl From<Vec<&str>> for StringViewArray {
+    fn from(v: Vec<&str>) -> Self {
+        Self::from_iter_values(v)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::builder::StringViewBuilder;
+    use crate::{Array, BinaryViewArray, StringViewArray};
+    use arrow_buffer::{Buffer, ScalarBuffer};
+    use arrow_data::ByteView;
+
+    #[test]
+    fn try_new() {
+        let array = StringViewArray::from_iter_values(vec![
+            "hello",
+            "world",
+            "lulu",
+            "large payload over 12 bytes",
+        ]);
+        assert_eq!(array.value(0), "hello");
+        assert_eq!(array.value(3), "large payload over 12 bytes");
+
+        let array = BinaryViewArray::from_iter_values(vec![
+            b"hello".as_slice(),
+            b"world".as_slice(),
+            b"lulu".as_slice(),
+            b"large payload over 12 bytes".as_slice(),
+        ]);
+        assert_eq!(array.value(0), b"hello");
+        assert_eq!(array.value(3), b"large payload over 12 bytes");
+
+        // test empty array
+        let array = {
+            let mut builder = StringViewBuilder::new();
+            builder.finish()
+        };
+        assert!(array.is_empty());
+
+        // test builder append
+        let array = {
+            let mut builder = StringViewBuilder::new();
+            builder.append_value("hello");
+            builder.append_null();
+            builder.append_option(Some("large payload over 12 bytes"));
+            builder.finish()
+        };
+        assert_eq!(array.value(0), "hello");
+        assert!(array.is_null(1));
+        assert_eq!(array.value(2), "large payload over 12 bytes");
+
+        // test builder's in_progress re-created
+        let array = {
+            // make a builder with small block size.
+            let mut builder = StringViewBuilder::new().with_block_size(14);
+            builder.append_value("large payload over 12 bytes");
+            builder.append_option(Some("another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created"));
+            builder.finish()
+        };
+        assert_eq!(array.value(0), "large payload over 12 bytes");
+        assert_eq!(array.value(1), "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created");
+        assert_eq!(2, array.buffers.len());
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid buffer index at 0: got index 3 but only has 1 buffers")]
+    fn new_with_invalid_view_data() {
+        let v = "large payload over 12 bytes";
+        let view = ByteView {
+            length: 13,
+            prefix: u32::from_le_bytes(v.as_bytes()[0..4].try_into().unwrap()),
+            buffer_index: 3,
+            offset: 1,
+        };
+        let views = ScalarBuffer::from(vec![view.into()]);
+        let buffers = vec![Buffer::from_slice_ref(v)];
+        StringViewArray::new(views, buffers, None);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Encountered non-UTF-8 data at index 0: invalid utf-8 sequence of 1 bytes from index 0"
+    )]
+    fn new_with_invalid_utf8_data() {
+        let v: Vec<u8> = vec![0xf0, 0x80, 0x80, 0x80];
+        let view = ByteView {
+            length: v.len() as u32,
+            prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
+            buffer_index: 0,
+            offset: 0,
+        };
+        let views = ScalarBuffer::from(vec![view.into()]);
+        let buffers = vec![Buffer::from_slice_ref(v)];
+        StringViewArray::new(views, buffers, None);
+    }
+
+    #[test]
+    #[should_panic(expected = "View at index 0 contained non-zero padding for string of length 1")]
+    fn new_with_invalid_zero_padding() {
+        let mut data = [0; 12];
+        data[0] = b'H';
+        data[11] = 1; // no zero padding
+
+        let mut view_buffer = [0; 16];
+        view_buffer[0..4].copy_from_slice(&1u32.to_le_bytes());
+        view_buffer[4..].copy_from_slice(&data);
+
+        let view = ByteView::from(u128::from_le_bytes(view_buffer));
+        let views = ScalarBuffer::from(vec![view.into()]);
+        let buffers = vec![];
+        StringViewArray::new(views, buffers, None);
+    }
+
+    #[test]
+    #[should_panic(expected = "Mismatch between embedded prefix and data")]
+    fn test_mismatch_between_embedded_prefix_and_data() {
+        let input_str_1 = "Hello, Rustaceans!";
+        let input_str_2 = "Hallo, Rustaceans!";
+        let length = input_str_1.len() as u32;
+        assert!(input_str_1.len() > 12);
+
+        let mut view_buffer = [0; 16];
+        view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
+        view_buffer[4..8].copy_from_slice(&input_str_1.as_bytes()[0..4]);
+        view_buffer[8..12].copy_from_slice(&0u32.to_le_bytes());
+        view_buffer[12..].copy_from_slice(&0u32.to_le_bytes());
+        let view = ByteView::from(u128::from_le_bytes(view_buffer));
+        let views = ScalarBuffer::from(vec![view.into()]);
+        let buffers = vec![Buffer::from_slice_ref(input_str_2.as_bytes())];
+
+        StringViewArray::new(views, buffers, None);
+    }
+}
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 7aa3f92bfbd2..b115ff9c14cc 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -65,8 +65,13 @@ mod union_array;
 pub use union_array::*;
 
 mod run_array;
+
 pub use run_array::*;
 
+mod byte_view_array;
+
+pub use byte_view_array::*;
+
 /// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
 pub trait Array: std::fmt::Debug + Send + Sync {
     /// Returns the array as [`Any`] so that it can be
@@ -596,8 +601,10 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
         DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
         DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
+        DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
         DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
         DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
+        DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
         DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
         DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
         DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs
new file mode 100644
index 000000000000..29de7feb0ec1
--- /dev/null
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::builder::ArrayBuilder;
+use crate::types::{BinaryViewType, ByteViewType, StringViewType};
+use crate::{ArrayRef, GenericByteViewArray};
+use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
+use arrow_data::ByteView;
+use std::any::Any;
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+const DEFAULT_BLOCK_SIZE: u32 = 8 * 1024;
+
+/// A builder for [`GenericByteViewArray`]
+///
+/// See [`Self::append_value`] for the allocation strategy
+pub struct GenericByteViewBuilder<T: ByteViewType + ?Sized> {
+    views_builder: BufferBuilder<u128>,
+    null_buffer_builder: NullBufferBuilder,
+    completed: Vec<Buffer>,
+    in_progress: Vec<u8>,
+    block_size: u32,
+    phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
+    /// Creates a new [`GenericByteViewBuilder`].
+    pub fn new() -> Self {
+        Self::with_capacity(1024)
+    }
+
+    /// Creates a new [`GenericByteViewBuilder`] with space for `capacity` string values.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            views_builder: BufferBuilder::new(capacity),
+            null_buffer_builder: NullBufferBuilder::new(capacity),
+            completed: vec![],
+            in_progress: vec![],
+            block_size: DEFAULT_BLOCK_SIZE,
+            phantom: Default::default(),
+        }
+    }
+
+    /// Override the size of buffers to allocate for holding string data
+    pub fn with_block_size(self, block_size: u32) -> Self {
+        Self { block_size, ..self }
+    }
+
+    /// Appends a value into the builder
+    ///
+    /// # Panics
+    ///
+    /// Panics if
+    /// - String buffer count exceeds `u32::MAX`
+    /// - String length exceeds `u32::MAX`
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
+        let v: &[u8] = value.as_ref().as_ref();
+        let length: u32 = v.len().try_into().unwrap();
+        if length <= 12 {
+            let mut view_buffer = [0; 16];
+            view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
+            view_buffer[4..4 + v.len()].copy_from_slice(v);
+            self.views_builder.append(u128::from_le_bytes(view_buffer));
+            self.null_buffer_builder.append_non_null();
+            return;
+        }
+
+        let required_cap = self.in_progress.len() + v.len();
+        if self.in_progress.capacity() < required_cap {
+            let in_progress = Vec::with_capacity(v.len().max(self.block_size as usize));
+            let flushed = std::mem::replace(&mut self.in_progress, in_progress);
+            if !flushed.is_empty() {
+                assert!(self.completed.len() < u32::MAX as usize);
+                self.completed.push(flushed.into());
+            }
+        };
+        let offset = self.in_progress.len() as u32;
+        self.in_progress.extend_from_slice(v);
+
+        let view = ByteView {
+            length,
+            prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
+            buffer_index: self.completed.len() as u32,
+            offset,
+        };
+        self.views_builder.append(view.into());
+        self.null_buffer_builder.append_non_null();
+    }
+
+    /// Append an `Option` value into the builder
+    #[inline]
+    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
+        match value {
+            None => self.append_null(),
+            Some(v) => self.append_value(v),
+        };
+    }
+
+    /// Append a null value into the builder
+    #[inline]
+    pub fn append_null(&mut self) {
+        self.null_buffer_builder.append_null();
+        self.views_builder.append(0);
+    }
+
+    /// Builds the [`GenericByteViewArray`] and reset this builder
+    pub fn finish(&mut self) -> GenericByteViewArray<T> {
+        let mut completed = std::mem::take(&mut self.completed);
+        if !self.in_progress.is_empty() {
+            completed.push(std::mem::take(&mut self.in_progress).into());
+        }
+        let len = self.views_builder.len();
+        let views = ScalarBuffer::new(self.views_builder.finish(), 0, len);
+        let nulls = self.null_buffer_builder.finish();
+        // SAFETY: valid by construction
+        unsafe { GenericByteViewArray::new_unchecked(views, completed, nulls) }
+    }
+
+    /// Builds the [`GenericByteViewArray`] without resetting the builder
+    pub fn finish_cloned(&self) -> GenericByteViewArray<T> {
+        let mut completed = self.completed.clone();
+        if !self.in_progress.is_empty() {
+            completed.push(Buffer::from_slice_ref(&self.in_progress));
+        }
+        let len = self.views_builder.len();
+        let views = Buffer::from_slice_ref(self.views_builder.as_slice());
+        let views = ScalarBuffer::new(views, 0, len);
+        let nulls = self.null_buffer_builder.finish_cloned();
+        // SAFETY: valid by construction
+        unsafe { GenericByteViewArray::new_unchecked(views, completed, nulls) }
+    }
+}
+
+impl<T: ByteViewType + ?Sized> Default for GenericByteViewBuilder<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: ByteViewType + ?Sized> std::fmt::Debug for GenericByteViewBuilder<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}ViewBuilder", T::PREFIX)?;
+        f.debug_struct("")
+            .field("views_builder", &self.views_builder)
+            .field("in_progress", &self.in_progress)
+            .field("completed", &self.completed)
+            .field("null_buffer_builder", &self.null_buffer_builder)
+            .finish()
+    }
+}
+
+impl<T: ByteViewType + ?Sized> ArrayBuilder for GenericByteViewBuilder<T> {
+    fn len(&self) -> usize {
+        self.null_buffer_builder.len()
+    }
+
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+}
+
+impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
+    for GenericByteViewBuilder<T>
+{
+    #[inline]
+    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
+        for v in iter {
+            self.append_option(v)
+        }
+    }
+}
+
+/// Array builder for [`StringViewArray`][crate::StringViewArray]
+///
+/// Values can be appended using [`GenericByteViewBuilder::append_value`], and nulls with
+/// [`GenericByteViewBuilder::append_null`] as normal.
+pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;
+
+///  Array builder for [`BinaryViewArray`][crate::BinaryViewArray]
+///
+/// Values can be appended using [`GenericByteViewBuilder::append_value`], and nulls with
+/// [`GenericByteViewBuilder::append_null`] as normal.
+pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index d33e565a868b..e4ab7ae4ba23 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -178,7 +178,10 @@ mod generic_bytes_dictionary_builder;
 pub use generic_bytes_dictionary_builder::*;
 mod generic_byte_run_builder;
 pub use generic_byte_run_builder::*;
+mod generic_bytes_view_builder;
+pub use generic_bytes_view_builder::*;
 mod union_builder;
+
 pub use union_builder::*;
 
 use crate::ArrayRef;
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 314445bba617..c56b1fd308cf 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -626,7 +626,9 @@ mod tests {
     use std::collections::HashMap;
 
     use super::*;
-    use crate::{BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray};
+    use crate::{
+        BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray, StringViewArray,
+    };
     use arrow_buffer::{Buffer, ToByteSlice};
     use arrow_data::{ArrayData, ArrayDataBuilder};
     use arrow_schema::Fields;
@@ -646,6 +648,30 @@ mod tests {
         check_batch(record_batch, 5)
     }
 
+    #[test]
+    fn create_string_view_record_batch() {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8View, false),
+        ]);
+
+        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let b = StringViewArray::from(vec!["a", "b", "c", "d", "e"]);
+
+        let record_batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
+
+        assert_eq!(5, record_batch.num_rows());
+        assert_eq!(2, record_batch.num_columns());
+        assert_eq!(&DataType::Int32, record_batch.schema().field(0).data_type());
+        assert_eq!(
+            &DataType::Utf8View,
+            record_batch.schema().field(1).data_type()
+        );
+        assert_eq!(5, record_batch.column(0).len());
+        assert_eq!(5, record_batch.column(1).len());
+    }
+
     #[test]
     fn byte_size_should_not_regress() {
         let schema = Schema::new(vec![
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 83a229c1da0d..e33f7bde7cba 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -25,12 +25,14 @@ use crate::timezone::Tz;
 use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
+use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
     ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
+use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::ops::{Add, Sub};
 
@@ -1544,6 +1546,72 @@ pub type BinaryType = GenericBinaryType<i32>;
 /// An arrow binary array with i64 offsets
 pub type LargeBinaryType = GenericBinaryType<i64>;
 
+mod byte_view {
+    use crate::types::{BinaryViewType, StringViewType};
+
+    pub trait Sealed: Send + Sync {}
+    impl Sealed for StringViewType {}
+    impl Sealed for BinaryViewType {}
+}
+
+/// A trait over the variable length bytes view array types
+pub trait ByteViewType: byte_view::Sealed + 'static + PartialEq + Send + Sync {
+    /// If element in array is utf8 encoded string.
+    const IS_UTF8: bool;
+
+    /// Datatype of array elements
+    const DATA_TYPE: DataType = if Self::IS_UTF8 {
+        DataType::Utf8View
+    } else {
+        DataType::BinaryView
+    };
+
+    /// "Binary" or "String", for use in displayed or error messages
+    const PREFIX: &'static str;
+
+    /// Type for representing its equivalent rust type i.e
+    /// Utf8Array will have native type has &str
+    /// BinaryArray will have type as [u8]
+    type Native: bytes::ByteArrayNativeType + AsRef<Self::Native> + AsRef<[u8]> + ?Sized;
+
+    /// Type for owned corresponding to `Native`
+    type Owned: Debug + Clone + Sync + Send + AsRef<Self::Native>;
+
+    /// Verifies that the provided buffers are valid for this array type
+    fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError>;
+}
+
+/// [`ByteViewType`] for string arrays
+#[derive(PartialEq)]
+pub struct StringViewType {}
+
+impl ByteViewType for StringViewType {
+    const IS_UTF8: bool = true;
+    const PREFIX: &'static str = "String";
+
+    type Native = str;
+    type Owned = String;
+
+    fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
+        validate_string_view(views, buffers)
+    }
+}
+
+/// [`BinaryViewType`] for string arrays
+#[derive(PartialEq)]
+pub struct BinaryViewType {}
+
+impl ByteViewType for BinaryViewType {
+    const IS_UTF8: bool = false;
+    const PREFIX: &'static str = "Binary";
+    type Native = [u8];
+    type Owned = Vec<u8>;
+
+    fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
+        validate_binary_view(views, buffers)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index 38074a8dc26c..5184d60ac1fd 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -149,6 +149,7 @@ native_integer!(u8);
 native_integer!(u16);
 native_integer!(u32);
 native_integer!(u64);
+native_integer!(u128);
 
 macro_rules! native_float {
     ($t:ty, $s:ident, $as_usize: expr, $i:ident, $usize_as: expr) => {
diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs
new file mode 100644
index 000000000000..b8b1731ac60b
--- /dev/null
+++ b/arrow-data/src/byte_view.rs
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_buffer::Buffer;
+use arrow_schema::ArrowError;
+
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(C)]
+pub struct ByteView {
+    /// The length of the string/bytes.
+    pub length: u32,
+    /// First 4 bytes of string/bytes data.
+    pub prefix: u32,
+    /// The buffer index.
+    pub buffer_index: u32,
+    /// The offset into the buffer.
+    pub offset: u32,
+}
+
+impl ByteView {
+    #[inline(always)]
+    pub fn as_u128(self) -> u128 {
+        (self.length as u128)
+            | ((self.prefix as u128) << 32)
+            | ((self.buffer_index as u128) << 64)
+            | ((self.offset as u128) << 96)
+    }
+}
+
+impl From<u128> for ByteView {
+    #[inline]
+    fn from(value: u128) -> Self {
+        Self {
+            length: value as u32,
+            prefix: (value >> 32) as u32,
+            buffer_index: (value >> 64) as u32,
+            offset: (value >> 96) as u32,
+        }
+    }
+}
+
+impl From<ByteView> for u128 {
+    #[inline]
+    fn from(value: ByteView) -> Self {
+        value.as_u128()
+    }
+}
+
+/// Validates the combination of `views` and `buffers` is a valid BinaryView
+pub fn validate_binary_view(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
+    validate_view_impl(views, buffers, |_, _| Ok(()))
+}
+
+/// Validates the combination of `views` and `buffers` is a valid StringView
+pub fn validate_string_view(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
+    validate_view_impl(views, buffers, |idx, b| {
+        std::str::from_utf8(b).map_err(|e| {
+            ArrowError::InvalidArgumentError(format!(
+                "Encountered non-UTF-8 data at index {idx}: {e}"
+            ))
+        })?;
+        Ok(())
+    })
+}
+
+fn validate_view_impl<F>(views: &[u128], buffers: &[Buffer], f: F) -> Result<(), ArrowError>
+where
+    F: Fn(usize, &[u8]) -> Result<(), ArrowError>,
+{
+    for (idx, v) in views.iter().enumerate() {
+        let len = *v as u32;
+        if len <= 12 {
+            if len < 12 && (v >> (32 + len * 8)) != 0 {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "View at index {idx} contained non-zero padding for string of length {len}",
+                )));
+            }
+            f(idx, &v.to_le_bytes()[4..4 + len as usize])?;
+        } else {
+            let view = ByteView::from(*v);
+            let data = buffers.get(view.buffer_index as usize).ok_or_else(|| {
+                ArrowError::InvalidArgumentError(format!(
+                    "Invalid buffer index at {idx}: got index {} but only has {} buffers",
+                    view.buffer_index,
+                    buffers.len()
+                ))
+            })?;
+
+            let start = view.offset as usize;
+            let end = start + len as usize;
+            let b = data.get(start..end).ok_or_else(|| {
+                ArrowError::InvalidArgumentError(format!(
+                    "Invalid buffer slice at {idx}: got {start}..{end} but buffer {} has length {}",
+                    view.buffer_index,
+                    data.len()
+                ))
+            })?;
+
+            if !b.starts_with(&view.prefix.to_le_bytes()) {
+                return Err(ArrowError::InvalidArgumentError(
+                    "Mismatch between embedded prefix and data".to_string(),
+                ));
+            }
+
+            f(idx, b)?;
+        }
+    }
+    Ok(())
+}
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 16637570f520..e227b168eee5 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -26,7 +26,7 @@ use std::mem;
 use std::ops::Range;
 use std::sync::Arc;
 
-use crate::equal;
+use crate::{equal, validate_binary_view, validate_string_view};
 
 /// A collection of [`Buffer`]
 #[doc(hidden)]
@@ -159,29 +159,6 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
     }
 }
 
-/// Maps 2 [`MutableBuffer`]s into a vector of [Buffer]s whose size depends on `data_type`.
-#[inline]
-pub(crate) fn into_buffers(
-    data_type: &DataType,
-    buffer1: MutableBuffer,
-    buffer2: MutableBuffer,
-) -> Vec<Buffer> {
-    match data_type {
-        DataType::Null | DataType::Struct(_) | DataType::FixedSizeList(_, _) => vec![],
-        DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
-            vec![buffer1.into(), buffer2.into()]
-        }
-        DataType::Union(_, mode) => {
-            match mode {
-                // Based on Union's DataTypeLayout
-                UnionMode::Sparse => vec![buffer1.into()],
-                UnionMode::Dense => vec![buffer1.into(), buffer2.into()],
-            }
-        }
-        _ => vec![buffer1.into()],
-    }
-}
-
 /// A generic representation of Arrow array data which encapsulates common attributes and
 /// operations for Arrow array. Specific operations for different arrays types (e.g.,
 /// primitive, list, struct) are implemented in `Array`.
@@ -745,7 +722,10 @@ impl ArrayData {
             )));
         }
 
-        if self.buffers.len() != layout.buffers.len() {
+        // Check data buffers length for view types and other types
+        if self.buffers.len() < layout.buffers.len()
+            || (!layout.variadic && self.buffers.len() != layout.buffers.len())
+        {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Expected {} buffers in array of type {:?}, got {}",
                 layout.buffers.len(),
@@ -1240,6 +1220,14 @@ impl ArrayData {
             DataType::LargeUtf8 => self.validate_utf8::<i64>(),
             DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
             DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
+            DataType::BinaryView => {
+                let views = self.typed_buffer::<u128>(0, self.len)?;
+                validate_binary_view(views, &self.buffers[1..])
+            }
+            DataType::Utf8View => {
+                let views = self.typed_buffer::<u128>(0, self.len)?;
+                validate_string_view(views, &self.buffers[1..])
+            }
             DataType::List(_) | DataType::Map(_, _) => {
                 let child = &self.child_data[0];
                 self.validate_offsets_full::<i32>(child.len)
@@ -1511,10 +1499,12 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
         DataType::Null => DataTypeLayout {
             buffers: vec![],
             can_contain_null_mask: false,
+            variadic: false,
         },
         DataType::Boolean => DataTypeLayout {
             buffers: vec![BufferSpec::BitMap],
             can_contain_null_mask: true,
+            variadic: false,
         },
         DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
         DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
@@ -1546,15 +1536,14 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
             DataTypeLayout {
                 buffers: vec![spec],
                 can_contain_null_mask: true,
+                variadic: false,
             }
         }
         DataType::Binary => DataTypeLayout::new_binary::<i32>(),
         DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
         DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
         DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
-        DataType::BinaryView | DataType::Utf8View => {
-            unimplemented!("BinaryView/Utf8View not implemented")
-        }
+        DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
         DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all in child data
         DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
         DataType::ListView(_) | DataType::LargeListView(_) => {
@@ -1586,6 +1575,7 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
                     }
                 },
                 can_contain_null_mask: false,
+                variadic: false,
             }
         }
         DataType::Dictionary(key_type, _value_type) => layout(key_type),
@@ -1601,6 +1591,11 @@ pub struct DataTypeLayout {
 
     /// Can contain a null bitmask
     pub can_contain_null_mask: bool,
+
+    /// This field only applies to the view type [`DataType::BinaryView`] and [`DataType::Utf8View`]
+    /// If `variadic` is true, the number of buffers expected is only lower-bounded by
+    /// buffers.len(). Buffers that exceed the lower bound are legal.
+    pub variadic: bool,
 }
 
 impl DataTypeLayout {
@@ -1612,6 +1607,7 @@ impl DataTypeLayout {
                 alignment: mem::align_of::<T>(),
             }],
             can_contain_null_mask: true,
+            variadic: false,
         }
     }
 
@@ -1622,6 +1618,7 @@ impl DataTypeLayout {
         Self {
             buffers: vec![],
             can_contain_null_mask: true,
+            variadic: false,
         }
     }
 
@@ -1640,6 +1637,19 @@ impl DataTypeLayout {
                 BufferSpec::VariableWidth,
             ],
             can_contain_null_mask: true,
+            variadic: false,
+        }
+    }
+
+    /// Describes a view type
+    pub fn new_view() -> Self {
+        Self {
+            buffers: vec![BufferSpec::FixedWidth {
+                byte_width: mem::size_of::<u128>(),
+                alignment: mem::align_of::<u128>(),
+            }],
+            can_contain_null_mask: true,
+            variadic: true,
         }
     }
 }
@@ -1845,7 +1855,7 @@ impl From<ArrayData> for ArrayDataBuilder {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_schema::{Field, UnionFields};
+    use arrow_schema::Field;
 
     // See arrow/tests/array_data_validation.rs for test of array validation
 
@@ -2093,23 +2103,6 @@ mod tests {
         assert!(!contains_nulls(Some(&buffer), 0, 0));
     }
 
-    #[test]
-    fn test_into_buffers() {
-        let data_types = vec![
-            DataType::Union(UnionFields::empty(), UnionMode::Dense),
-            DataType::Union(UnionFields::empty(), UnionMode::Sparse),
-        ];
-
-        for data_type in data_types {
-            let buffers = new_buffers(&data_type, 0);
-            let [buffer1, buffer2] = buffers;
-            let buffers = into_buffers(&data_type, buffer1, buffer2);
-
-            let layout = layout(&data_type);
-            assert_eq!(buffers.len(), layout.buffers.len());
-        }
-    }
-
     #[test]
     fn test_alignment() {
         let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
diff --git a/arrow-data/src/equal/byte_view.rs b/arrow-data/src/equal/byte_view.rs
new file mode 100644
index 000000000000..def395125366
--- /dev/null
+++ b/arrow-data/src/equal/byte_view.rs
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{ArrayData, ByteView};
+
+pub(super) fn byte_view_equal(
+    lhs: &ArrayData,
+    rhs: &ArrayData,
+    lhs_start: usize,
+    rhs_start: usize,
+    len: usize,
+) -> bool {
+    let lhs_views = &lhs.buffer::<u128>(0)[lhs_start..lhs_start + len];
+    let lhs_buffers = &lhs.buffers()[1..];
+    let rhs_views = &rhs.buffer::<u128>(0)[rhs_start..rhs_start + len];
+    let rhs_buffers = &rhs.buffers()[1..];
+
+    for (idx, (l, r)) in lhs_views.iter().zip(rhs_views).enumerate() {
+        // Only checking one null mask here because by the time the control flow reaches
+        // this point, the equality of the two masks would have already been verified.
+        if lhs.is_null(idx) {
+            continue;
+        }
+
+        let l_len_prefix = *l as u64;
+        let r_len_prefix = *r as u64;
+        // short-circuit, check length and prefix
+        if l_len_prefix != r_len_prefix {
+            return false;
+        }
+
+        let len = l_len_prefix as u32;
+        // for inline storage, only need check view
+        if len <= 12 {
+            if l != r {
+                return false;
+            }
+            continue;
+        }
+
+        // check buffers
+        let l_view = ByteView::from(*l);
+        let r_view = ByteView::from(*r);
+
+        let l_buffer = &lhs_buffers[l_view.buffer_index as usize];
+        let r_buffer = &rhs_buffers[r_view.buffer_index as usize];
+
+        // prefixes are already known to be equal; skip checking them
+        let len = len as usize - 4;
+        let l_offset = l_view.offset as usize + 4;
+        let r_offset = r_view.offset as usize + 4;
+        if l_buffer[l_offset..l_offset + len] != r_buffer[r_offset..r_offset + len] {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index 0987fd4c5637..dba6a0186a56 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -25,6 +25,7 @@ use arrow_schema::{DataType, IntervalUnit};
 use half::f16;
 
 mod boolean;
+mod byte_view;
 mod dictionary;
 mod fixed_binary;
 mod fixed_list;
@@ -41,6 +42,7 @@ mod variable_size;
 // For this reason, they are not exposed and are instead used
 // to build the generic functions below (`equal_range` and `equal`).
 use boolean::boolean_equal;
+use byte_view::byte_view_equal;
 use dictionary::dictionary_equal;
 use fixed_binary::fixed_binary_equal;
 use fixed_list::fixed_list_equal;
@@ -97,7 +99,7 @@ fn equal_values(
         }
         DataType::FixedSizeBinary(_) => fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::BinaryView | DataType::Utf8View => {
-            unimplemented!("BinaryView/Utf8View not yet implemented")
+            byte_view_equal(lhs, rhs, lhs_start, rhs_start, len)
         }
         DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::ListView(_) | DataType::LargeListView(_) => {
diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs
index cfa0dba66c35..59a049fe96cf 100644
--- a/arrow-data/src/lib.rs
+++ b/arrow-data/src/lib.rs
@@ -30,3 +30,6 @@ pub mod decimal;
 
 #[cfg(feature = "ffi")]
 pub mod ffi;
+
+mod byte_view;
+pub use byte_view::*;
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index b14f6e771033..b0d9475afcd6 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -15,13 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{
-    data::{into_buffers, new_buffers},
-    ArrayData, ArrayDataBuilder,
-};
+use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
 use crate::bit_mask::set_bits;
 use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
-use arrow_buffer::{bit_util, i256, ArrowNativeType, MutableBuffer};
+use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
 use arrow_schema::{ArrowError, DataType, IntervalUnit, UnionMode};
 use half::f16;
 use num::Integer;
@@ -68,36 +65,6 @@ impl<'a> _MutableArrayData<'a> {
             .as_mut()
             .expect("MutableArrayData not nullable")
     }
-
-    fn freeze(self, dictionary: Option<ArrayData>) -> ArrayDataBuilder {
-        let buffers = into_buffers(&self.data_type, self.buffer1, self.buffer2);
-
-        let child_data = match self.data_type {
-            DataType::Dictionary(_, _) => vec![dictionary.unwrap()],
-            _ => {
-                let mut child_data = Vec::with_capacity(self.child_data.len());
-                for child in self.child_data {
-                    child_data.push(child.freeze());
-                }
-                child_data
-            }
-        };
-
-        let nulls = self
-            .null_buffer
-            .map(|nulls| {
-                let bools = BooleanBuffer::new(nulls.into(), 0, self.len);
-                unsafe { NullBuffer::new_unchecked(bools, self.null_count) }
-            })
-            .filter(|n| n.null_count() > 0);
-
-        ArrayDataBuilder::new(self.data_type)
-            .offset(0)
-            .len(self.len)
-            .nulls(nulls)
-            .buffers(buffers)
-            .child_data(child_data)
-    }
 }
 
 fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits {
@@ -138,26 +105,32 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits
 pub struct MutableArrayData<'a> {
     #[allow(dead_code)]
     arrays: Vec<&'a ArrayData>,
-    // The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
-    // mutability invariants (interior mutability):
-    // [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
-    // [MutableArrayData] itself
+    /// The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
+    /// mutability invariants (interior mutability):
+    /// [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
+    /// [MutableArrayData] itself
     data: _MutableArrayData<'a>,
 
-    // the child data of the `Array` in Dictionary arrays.
-    // This is not stored in `MutableArrayData` because these values constant and only needed
-    // at the end, when freezing [_MutableArrayData].
+    /// the child data of the `Array` in Dictionary arrays.
+    /// This is not stored in `MutableArrayData` because these values constant and only needed
+    /// at the end, when freezing [_MutableArrayData].
     dictionary: Option<ArrayData>,
 
-    // function used to extend values from arrays. This function's lifetime is bound to the array
-    // because it reads values from it.
+    /// Variadic data buffers referenced by views
+    /// This is not stored in `MutableArrayData` because these values constant  and only needed
+    /// at the end, when freezing [_MutableArrayData]
+    variadic_data_buffers: Vec<Buffer>,
+
+    /// function used to extend values from arrays. This function's lifetime is bound to the array
+    /// because it reads values from it.
     extend_values: Vec<Extend<'a>>,
-    // function used to extend nulls from arrays. This function's lifetime is bound to the array
-    // because it reads nulls from it.
+
+    /// function used to extend nulls from arrays. This function's lifetime is bound to the array
+    /// because it reads nulls from it.
     extend_null_bits: Vec<ExtendNullBits<'a>>,
 
-    // function used to extend nulls.
-    // this is independent of the arrays and therefore has no lifetime.
+    /// function used to extend nulls.
+    /// this is independent of the arrays and therefore has no lifetime.
     extend_nulls: ExtendNulls,
 }
 
@@ -197,6 +170,26 @@ fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Opti
     }
 }
 
+/// Builds an extend that adds `buffer_offset` to any buffer indices encountered
+fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend {
+    let views = array.buffer::<u128>(0);
+    Box::new(
+        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
+            mutable
+                .buffer1
+                .extend(views[start..start + len].iter().map(|v| {
+                    let len = *v as u32;
+                    if len <= 12 {
+                        return *v; // Stored inline
+                    }
+                    let mut view = ByteView::from(*v);
+                    view.buffer_index += buffer_offset;
+                    view.into()
+                }))
+        },
+    )
+}
+
 fn build_extend(array: &ArrayData) -> Extend {
     match array.data_type() {
         DataType::Null => null::build_extend(array),
@@ -224,9 +217,7 @@ fn build_extend(array: &ArrayData) -> Extend {
         DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
         DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
         DataType::LargeUtf8 | DataType::LargeBinary => variable_size::build_extend::<i64>(array),
-        DataType::BinaryView | DataType::Utf8View => {
-            unimplemented!("BinaryView/Utf8View not implemented")
-        }
+        DataType::BinaryView | DataType::Utf8View => unreachable!("should use build_extend_view"),
         DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
         DataType::ListView(_) | DataType::LargeListView(_) => {
             unimplemented!("ListView/LargeListView not implemented")
@@ -272,9 +263,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         DataType::Decimal256(_, _) => primitive::extend_nulls::<i256>,
         DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
         DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
-        DataType::BinaryView | DataType::Utf8View => {
-            unimplemented!("BinaryView/Utf8View not implemented")
-        }
+        DataType::BinaryView | DataType::Utf8View => primitive::extend_nulls::<u128>,
         DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
         DataType::ListView(_) | DataType::LargeListView(_) => {
             unimplemented!("ListView/LargeListView not implemented")
@@ -429,11 +418,10 @@ impl<'a> MutableArrayData<'a> {
             | DataType::Binary
             | DataType::LargeUtf8
             | DataType::LargeBinary
+            | DataType::BinaryView
+            | DataType::Utf8View
             | DataType::Interval(_)
             | DataType::FixedSizeBinary(_) => vec![],
-            DataType::BinaryView | DataType::Utf8View => {
-                unimplemented!("BinaryView/Utf8View not implemented")
-            }
             DataType::ListView(_) | DataType::LargeListView(_) => {
                 unimplemented!("ListView/LargeListView not implemented")
             }
@@ -566,6 +554,15 @@ impl<'a> MutableArrayData<'a> {
             _ => (None, false),
         };
 
+        let variadic_data_buffers = match &data_type {
+            DataType::BinaryView | DataType::Utf8View => arrays
+                .iter()
+                .flat_map(|x| x.buffers().iter().skip(1))
+                .map(Buffer::clone)
+                .collect(),
+            _ => vec![],
+        };
+
         let extend_nulls = build_extend_nulls(data_type);
 
         let extend_null_bits = arrays
@@ -598,6 +595,20 @@ impl<'a> MutableArrayData<'a> {
 
                 extend_values.expect("MutableArrayData::new is infallible")
             }
+            DataType::BinaryView | DataType::Utf8View => {
+                let mut next_offset = 0u32;
+                arrays
+                    .iter()
+                    .map(|arr| {
+                        let num_data_buffers = (arr.buffers().len() - 1) as u32;
+                        let offset = next_offset;
+                        next_offset = next_offset
+                            .checked_add(num_data_buffers)
+                            .expect("view buffer index overflow");
+                        build_extend_view(arr, offset)
+                    })
+                    .collect()
+            }
             _ => arrays.iter().map(|array| build_extend(array)).collect(),
         };
 
@@ -614,6 +625,7 @@ impl<'a> MutableArrayData<'a> {
             arrays,
             data,
             dictionary,
+            variadic_data_buffers,
             extend_values,
             extend_null_bits,
             extend_nulls,
@@ -673,13 +685,55 @@ impl<'a> MutableArrayData<'a> {
 
     /// Creates a [ArrayData] from the pushed regions up to this point, consuming `self`.
     pub fn freeze(self) -> ArrayData {
-        unsafe { self.data.freeze(self.dictionary).build_unchecked() }
+        unsafe { self.into_builder().build_unchecked() }
     }
 
     /// Creates a [ArrayDataBuilder] from the pushed regions up to this point, consuming `self`.
     /// This is useful for extending the default behavior of MutableArrayData.
     pub fn into_builder(self) -> ArrayDataBuilder {
-        self.data.freeze(self.dictionary)
+        let data = self.data;
+
+        let buffers = match data.data_type {
+            DataType::Null | DataType::Struct(_) | DataType::FixedSizeList(_, _) => {
+                vec![]
+            }
+            DataType::BinaryView | DataType::Utf8View => {
+                let mut b = self.variadic_data_buffers;
+                b.insert(0, data.buffer1.into());
+                b
+            }
+            DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
+                vec![data.buffer1.into(), data.buffer2.into()]
+            }
+            DataType::Union(_, mode) => {
+                match mode {
+                    // Based on Union's DataTypeLayout
+                    UnionMode::Sparse => vec![data.buffer1.into()],
+                    UnionMode::Dense => vec![data.buffer1.into(), data.buffer2.into()],
+                }
+            }
+            _ => vec![data.buffer1.into()],
+        };
+
+        let child_data = match data.data_type {
+            DataType::Dictionary(_, _) => vec![self.dictionary.unwrap()],
+            _ => data.child_data.into_iter().map(|x| x.freeze()).collect(),
+        };
+
+        let nulls = data
+            .null_buffer
+            .map(|nulls| {
+                let bools = BooleanBuffer::new(nulls.into(), 0, data.len);
+                unsafe { NullBuffer::new_unchecked(bools, data.null_count) }
+            })
+            .filter(|n| n.null_count() > 0);
+
+        ArrayDataBuilder::new(data.data_type)
+            .offset(0)
+            .len(data.len)
+            .nulls(nulls)
+            .buffers(buffers)
+            .child_data(child_data)
     }
 }
 
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index 9bd276428880..15011c547284 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -22,8 +22,8 @@ use arrow::array::{
     StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
 };
 use arrow::datatypes::{Int16Type, Int32Type};
-use arrow_array::builder::{StringBuilder, StructBuilder};
-use arrow_array::{DictionaryArray, FixedSizeListArray};
+use arrow_array::builder::{StringBuilder, StringViewBuilder, StructBuilder};
+use arrow_array::{DictionaryArray, FixedSizeListArray, StringViewArray};
 use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{DataType, Field, Fields};
@@ -307,6 +307,50 @@ fn test_fixed_size_binary_array() {
     test_equal(&a, &b, true);
 }
 
+#[test]
+fn test_string_view_equal() {
+    let a1 = StringViewArray::from(vec!["foo", "very long string over 12 bytes", "bar"]);
+    let a2 = StringViewArray::from(vec![
+        "a very long string over 12 bytes",
+        "foo",
+        "very long string over 12 bytes",
+        "bar",
+    ]);
+    test_equal(&a1, &a2.slice(1, 3), true);
+
+    let a1 = StringViewArray::from(vec!["foo", "very long string over 12 bytes", "bar"]);
+    let a2 = StringViewArray::from(vec!["foo", "very long string over 12 bytes", "bar"]);
+    test_equal(&a1, &a2, true);
+
+    let a1_s = a1.slice(1, 1);
+    let a2_s = a2.slice(1, 1);
+    test_equal(&a1_s, &a2_s, true);
+
+    let a1_s = a1.slice(2, 1);
+    let a2_s = a2.slice(0, 1);
+    test_equal(&a1_s, &a2_s, false);
+
+    // test will null value.
+    let a1 = StringViewArray::from(vec!["foo", "very long string over 12 bytes", "bar"]);
+    let a2 = {
+        let mut builder = StringViewBuilder::new();
+        builder.append_value("foo");
+        builder.append_null();
+        builder.append_option(Some("very long string over 12 bytes"));
+        builder.append_value("bar");
+        builder.finish()
+    };
+    test_equal(&a1, &a2, false);
+
+    let a1_s = a1.slice(1, 2);
+    let a2_s = a2.slice(1, 3);
+    test_equal(&a1_s, &a2_s, false);
+
+    let a1_s = a1.slice(1, 2);
+    let a2_s = a2.slice(2, 2);
+    test_equal(&a1_s, &a2_s, true);
+}
+
 #[test]
 fn test_string_offset() {
     let a = StringArray::from(vec![Some("a"), None, Some("b")]);
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 5a267c876d6a..83d3003a0586 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -22,6 +22,7 @@ use arrow::array::{
     UnionArray,
 };
 use arrow::datatypes::Int16Type;
+use arrow_array::StringViewArray;
 use arrow_buffer::Buffer;
 use arrow_data::transform::MutableArrayData;
 use arrow_data::ArrayData;
@@ -1027,6 +1028,44 @@ fn test_extend_nulls_panic() {
     mutable.extend_nulls(2);
 }
 
+#[test]
+fn test_string_view() {
+    let a1 =
+        StringViewArray::from(vec!["foo", "very long string over 12 bytes", "bar"]).into_data();
+    let a2 = StringViewArray::from_iter(vec![
+        Some("bar"),
+        None,
+        Some("long string also over 12 bytes"),
+    ])
+    .into_data();
+
+    a1.validate_full().unwrap();
+    a2.validate_full().unwrap();
+
+    let mut mutable = MutableArrayData::new(vec![&a1, &a2], false, 4);
+    mutable.extend(1, 0, 1);
+    mutable.extend(0, 1, 2);
+    mutable.extend(0, 0, 1);
+    mutable.extend(1, 2, 3);
+
+    let array = StringViewArray::from(mutable.freeze());
+    assert_eq!(array.data_buffers().len(), 2);
+    // Should have reused data buffers
+    assert_eq!(array.data_buffers()[0].as_ptr(), a1.buffers()[1].as_ptr());
+    assert_eq!(array.data_buffers()[1].as_ptr(), a2.buffers()[1].as_ptr());
+
+    let v = array.iter().collect::<Vec<_>>();
+    assert_eq!(
+        v,
+        vec![
+            Some("bar"),
+            Some("very long string over 12 bytes"),
+            Some("foo"),
+            Some("long string also over 12 bytes")
+        ]
+    )
+}
+
 #[test]
 #[should_panic(expected = "Arrays with inconsistent types passed to MutableArrayData")]
 fn test_mixed_types() {

From c3899cea0cda046c2c635d6a2f75baee8ee1ea99 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Fri, 15 Mar 2024 06:17:09 +1300
Subject: [PATCH 02/11] Fix integer parsing of empty strings (#5504) (#5505)

---
 arrow-cast/src/parse.rs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 7f23526142cc..afa00f176293 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -439,6 +439,9 @@ macro_rules! parser_primitive {
     ($t:ty) => {
         impl Parser for $t {
             fn parse(string: &str) -> Option<Self::Native> {
+                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
+                    return None;
+                }
                 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
                     string.as_bytes(),
                 ) {
@@ -2303,4 +2306,22 @@ mod tests {
             assert_eq!(i, result.unwrap());
         }
     }
+
+    #[test]
+    fn test_parse_empty() {
+        assert_eq!(Int32Type::parse(""), None);
+        assert_eq!(Int64Type::parse(""), None);
+        assert_eq!(UInt32Type::parse(""), None);
+        assert_eq!(UInt64Type::parse(""), None);
+        assert_eq!(Float32Type::parse(""), None);
+        assert_eq!(Float64Type::parse(""), None);
+        assert_eq!(Int32Type::parse("+"), None);
+        assert_eq!(Int64Type::parse("+"), None);
+        assert_eq!(UInt32Type::parse("+"), None);
+        assert_eq!(UInt64Type::parse("+"), None);
+        assert_eq!(Float32Type::parse("+"), None);
+        assert_eq!(Float64Type::parse("+"), None);
+        assert_eq!(TimestampNanosecondType::parse(""), None);
+        assert_eq!(Date32Type::parse(""), None);
+    }
 }

From 4d0316da100b20363fc68360b7fdb8bfd88f1ee7 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Fri, 15 Mar 2024 15:44:09 +1300
Subject: [PATCH 03/11] Deprecate array_to_json_array (#5515)

---
 arrow-json/src/writer.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index d8045c330481..9f63b811d74e 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -161,6 +161,7 @@ fn struct_array_to_jsonmap_array(
 }
 
 /// Converts an arrow [`Array`] into a `Vec` of Serde JSON [`serde_json::Value`]'s
+#[deprecated(note = "Use Writer")]
 pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>, ArrowError> {
     // For backwards compatibility, default to skip nulls
     array_to_json_array_internal(array, false)
@@ -1837,6 +1838,7 @@ mod tests {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn test_array_to_json_array_for_fixed_size_list_array() {
         let expected_json = vec![
             json!([0, 1, 2]),
@@ -1859,6 +1861,7 @@ mod tests {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn test_array_to_json_array_for_map_array() {
         let expected_json = serde_json::from_value::<Vec<Value>>(json!([
             [

From 5dd5418070bd6284e1ca8a5aed17f7323965b525 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Mar 2024 22:45:31 -0400
Subject: [PATCH 04/11] Minor: Add doc comments to `GenericByteViewArray`
 (#5512)

* Minor: Add doc comments to `GenericByteViewArray`

* Improve docs
---
 arrow-array/src/array/byte_view_array.rs | 70 +++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
index e22e9b1688bb..a3b8a5dcb803 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -34,7 +34,66 @@ use std::sync::Arc;
 /// Different than [`crate::GenericByteArray`] as it stores both an offset and length
 /// meaning that take / filter operations can be implemented without copying the underlying data.
 ///
+/// See [`StringViewArray`] for storing utf8 encoded string data and
+/// [`BinaryViewArray`] for storing bytes.
+///
 /// [Variable-size Binary View Layout]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
+///
+/// A `GenericByteViewArray` stores variable length byte strings. An array of
+/// `N` elements is stored as `N` fixed length "views" and a variable number
+/// of variable length "buffers".
+///
+/// Each view is a `u128` value  layout is different depending on the
+/// length of the string stored at that location:
+///
+/// ```text
+///                         ┌──────┬────────────────────────┐
+///                         │length│      string value      │
+///    Strings (len <= 12)  │      │    (padded with 0)     │
+///                         └──────┴────────────────────────┘
+///                          0    31                      127
+///
+///                         ┌───────┬───────┬───────┬───────┐
+///                         │length │prefix │  buf  │offset │
+///    Strings (len > 12)   │       │       │ index │       │
+///                         └───────┴───────┴───────┴───────┘
+///                          0    31       63      95    127
+/// ```
+///
+/// * Strings with length <= 12 are stored directly in the view.
+///
+/// * Strings with length > 12: The first four bytes are stored inline in the
+/// view and the entire string is stored in one of the buffers.
+///
+/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
+/// than they must point into a valid buffer. However, they can be out of order,
+/// non continuous and overlapping.
+///
+/// For example, in the following diagram, the strings "FishWasInTownToday" and
+/// "CrumpleFacedFish" are both longer than 12 bytes and thus are stored in a
+/// separate buffer while the string "LavaMonster" is stored inlined in the
+/// view. In this case, the same bytes for "Fish" are used to store both strings.
+///
+/// ```text
+///                                                                            ┌───┐
+///                         ┌──────┬──────┬──────┬──────┐               offset │...│
+/// "FishWasInTownTodayYay" │  21  │ Fish │  0   │ 115  │─ ─              103  │Mr.│
+///                         └──────┴──────┴──────┴──────┘   │      ┌ ─ ─ ─ ─ ▶ │Cru│
+///                         ┌──────┬──────┬──────┬──────┐                      │mpl│
+/// "CrumpleFacedFish"      │  16  │ Crum │  0   │ 103  │─ ─│─ ─ ─ ┘           │eFa│
+///                         └──────┴──────┴──────┴──────┘                      │ced│
+///                         ┌──────┬────────────────────┐   └ ─ ─ ─ ─ ─ ─ ─ ─ ▶│Fis│
+/// "LavaMonster"           │  11  │   LavaMonster\0    │                      │hWa│
+///                         └──────┴────────────────────┘               offset │sIn│
+///                                                                       115  │Tow│
+///                                                                            │nTo│
+///                                                                            │day│
+///                                  u128 "views"                              │Yay│
+///                                                                   buffer 0 │...│
+///                                                                            └───┘
+/// ```
+/// [`GenericByteArray`]: crate::array::GenericByteArray
+
 pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
     data_type: DataType,
     views: ScalarBuffer<u128>,
@@ -332,10 +391,19 @@ where
 }
 
 /// A [`GenericByteViewArray`] of `[u8]`
+///
+/// # Example
+/// ```
+/// use arrow_array::BinaryViewArray;
+/// let array = BinaryViewArray::from_iter_values(vec![b"hello" as &[u8], b"world", b"lulu", b"large payload over 12 bytes"]);
+/// assert_eq!(array.value(0), b"hello");
+/// assert_eq!(array.value(3), b"large payload over 12 bytes");
+/// ```
 pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;
 
-/// A [`GenericByteViewArray`] of `str`
+/// A [`GenericByteViewArray`] that stores uf8 data
 ///
+/// # Example
 /// ```
 /// use arrow_array::StringViewArray;
 /// let array = StringViewArray::from_iter_values(vec!["hello", "world", "lulu", "large payload over 12 bytes"]);

From 773cf18e2aeb8f299585496d4cf8a1a1f8262edb Mon Sep 17 00:00:00 2001
From: Istvan Fodor <586159+istvan-fodor@users.noreply.github.com>
Date: Thu, 14 Mar 2024 21:45:49 -0500
Subject: [PATCH 05/11] feat: clarifying comments in struct_builder.rs #5494 
 (#5499)

* feat: clarifying comments in struct_builder.rs
Added clarifying comments to StructBuilder about creating collection columns

* fixed commented line, improved comments

* Removed redundant line in comment

* fixed slightly misleading comment

* moved example code to comment

* better comment

* fixed comment type
---
 arrow-array/src/builder/struct_builder.rs | 77 +++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index eeb37cd8e66d..1e2e402f745f 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -25,6 +25,81 @@ use std::sync::Arc;
 ///
 /// Note that callers should make sure that methods of all the child field builders are
 /// properly called to maintain the consistency of the data structure.
+///
+///
+/// Handling arrays with complex layouts, such as `List<Struct<List<Struct>>>`, in Rust can be challenging due to its strong typing system.
+/// To construct a collection builder ([`ListBuilder`], [`LargeListBuilder`], or [`MapBuilder`]) using [`make_builder`], multiple calls are required. This complexity arises from the recursive approach utilized by [`StructBuilder::from_fields`].
+///
+/// Initially, [`StructBuilder::from_fields`] invokes [`make_builder`], which returns a `Box<dyn ArrayBuilder>`. To obtain the specific collection builder, one must first use [`StructBuilder::field_builder`] to get a `Collection<[Box<dyn ArrayBuilder>]>`. Subsequently, the `values()` result from this operation can be downcast to the desired builder type.
+///
+/// For example, when working with [`ListBuilder`], you would first call [`StructBuilder::field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>`] and then downcast the [`Box<dyn ArrayBuilder>`] to the specific [`StructBuilder`] you need.
+///
+/// For a practical example see the code below:
+///
+/// ```rust
+///    use arrow_array::builder::{ArrayBuilder, ListBuilder, StringBuilder, StructBuilder};
+///    use arrow_schema::{DataType, Field, Fields};
+///    use std::sync::Arc;
+///
+///    // This is an example column that has a List<Struct<List<Struct>>> layout
+///    let mut example_col = ListBuilder::new(StructBuilder::from_fields(
+///        vec![Field::new(
+///            "value_list",
+///            DataType::List(Arc::new(Field::new(
+///                "item",
+///                DataType::Struct(Fields::from(vec![
+///                    Field::new("key", DataType::Utf8, true),
+///                    Field::new("value", DataType::Utf8, true),
+///                ])), //In this example we are trying to get to this builder and insert key/value pairs
+///                true,
+///            ))),
+///            true,
+///        )],
+///        0,
+///    ));
+///
+///   // We can obtain the StructBuilder without issues, because example_col was created with StructBuilder
+///   let col_struct_builder: &mut StructBuilder = example_col.values();
+///
+///   // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
+///   // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
+///   
+///   // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
+///   // let sb = col_struct_builder
+///   //     .field_builder::<ListBuilder<StructBuilder>>(0)
+///   //     .as_mut()
+///   //     .unwrap();
+///
+///   //To keep in line with Rust's strong typing, we fetch a ListBuilder<Box<dyn ArrayBuilder>> from the column StructBuilder first...
+///   let mut list_builder_option =
+///       col_struct_builder.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(0);
+///
+///   let list_builder = list_builder_option.as_mut().unwrap();
+///
+///   // ... and then downcast the key/value pair values to a StructBuilder
+///   let struct_builder = list_builder
+///       .values()
+///       .as_any_mut()
+///       .downcast_mut::<StructBuilder>()
+///       .unwrap();
+///
+///   // We can now append values to the StructBuilder
+///   let key_builder = struct_builder.field_builder::<StringBuilder>(0).unwrap();
+///   key_builder.append_value("my key");
+///
+///   let value_builder = struct_builder.field_builder::<StringBuilder>(1).unwrap();
+///   value_builder.append_value("my value");
+///
+///   struct_builder.append(true);
+///   list_builder.append(true);
+///   col_struct_builder.append(true);
+///   example_col.append(true);
+///
+///   let array = example_col.finish();
+///
+///   println!("My array: {:?}", array);
+/// ```
+///
 pub struct StructBuilder {
     fields: Fields,
     field_builders: Vec<Box<dyn ArrayBuilder>>,
@@ -88,6 +163,8 @@ impl ArrayBuilder for StructBuilder {
 /// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType`
 /// This function is useful to construct arrays from an arbitrary vectors with known/expected
 /// schema.
+///
+/// See comments on StructBuilder on how to retreive collection builders built by make_builder.
 pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
     use crate::builder::*;
     match datatype {

From 14bd53dc1240003f171c8655863eae188cd0880f Mon Sep 17 00:00:00 2001
From: Dan Harris <1327726+thinkharderdev@users.noreply.github.com>
Date: Thu, 14 Mar 2024 22:47:48 -0400
Subject: [PATCH 06/11] Support dictionary encoding in structures for
 `FlightDataEncoder`,  add documentation for
 `arrow_flight::encode::Dictionary` (#5488)

* Add more detailed documentation for arrow_flight::encode::DicationaryHandling

* fix doc link

* Fix handling of nested dictionary arrays with DictionaryHandling::Hydrate

* clippy

* Handle large list and sparse unions

* use top-level fields

* PR comments
---
 arrow-flight/src/encode.rs | 478 +++++++++++++++++++++++++++++++++----
 1 file changed, 435 insertions(+), 43 deletions(-)

diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index bb0436816209..efd688129485 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -18,9 +18,11 @@
 use std::{collections::VecDeque, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
 
 use crate::{error::Result, FlightData, FlightDescriptor, SchemaAsIpc};
-use arrow_array::{ArrayRef, RecordBatch, RecordBatchOptions};
+
+use arrow_array::{Array, ArrayRef, RecordBatch, RecordBatchOptions, UnionArray};
 use arrow_ipc::writer::{DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
-use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef};
+
+use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef, UnionMode};
 use bytes::Bytes;
 use futures::{ready, stream::BoxStream, Stream, StreamExt};
 
@@ -323,9 +325,10 @@ impl FlightDataEncoder {
             None => self.encode_schema(batch.schema_ref()),
         };
 
-        // encode the batch
-        let send_dictionaries = self.dictionary_handling == DictionaryHandling::Resend;
-        let batch = prepare_batch_for_flight(&batch, schema, send_dictionaries)?;
+        let batch = match self.dictionary_handling {
+            DictionaryHandling::Resend => batch,
+            DictionaryHandling::Hydrate => hydrate_dictionaries(&batch, schema)?,
+        };
 
         for batch in split_batch_for_grpc_response(batch, self.max_flight_data_size) {
             let (flight_dictionaries, flight_batch) = self.encoder.encode_batch(&batch)?;
@@ -388,6 +391,31 @@ impl Stream for FlightDataEncoder {
 /// Defines how a [`FlightDataEncoder`] encodes [`DictionaryArray`]s
 ///
 /// [`DictionaryArray`]: arrow_array::DictionaryArray
+///
+/// In the arrow flight protocol dictionary values and keys are sent as two separate messages.
+/// When a sender is encoding a [`RecordBatch`] containing ['DictionaryArray'] columns, it will
+/// first send a dictionary batch (a batch with header `MessageHeader::DictionaryBatch`) containing
+/// the dictionary values. The receiver is responsible for reading this batch and maintaining state that associates
+/// those dictionary values with the corresponding array using the `dict_id` as a key.
+///
+/// After sending the dictionary batch the sender will send the array data in a batch with header `MessageHeader::RecordBatch`.
+/// For any dictionary array batches in this message, the encoded flight message will only contain the dictionary keys. The receiver
+/// is then responsible for rebuilding the `DictionaryArray` on the client side using the dictionary values from the DictionaryBatch message
+/// and the keys from the RecordBatch message.
+///
+/// For example, if we have a batch with a `TypedDictionaryArray<'_, UInt32Type, Utf8Type>` (a dictionary array where they keys are `u32` and the
+/// values are `String`), then the DictionaryBatch will contain a `StringArray` and the RecordBatch will contain a `UInt32Array`.
+///
+/// Note that since `dict_id` defined in the `Schema` is used as a key to associate dictionary values to their arrays it is required that each
+/// `DictionaryArray` in a `RecordBatch` have a unique `dict_id`.
+///
+/// The current implementation does not support "delta" dictionaries so a new dictionary batch will be sent each time the encoder sees a
+/// dictionary which is not pointer-equal to the previously observed dictionary for a given `dict_id`.
+///
+/// For clients which may not support `DictionaryEncoding`, the `DictionaryHandling::Hydrate` method will bypass the process defined above
+/// and "hydrate" any `DictionaryArray` in the batch to their underlying value type (e.g. `TypedDictionaryArray<'_, UInt32Type, Utf8Type>` will
+/// be sent as a `StringArray`). With this method all data will be sent in ``MessageHeader::RecordBatch` messages and the batch schema
+/// will be adjusted so that all dictionary encoded fields are changed to fields of the dictionary value type.
 #[derive(Debug, PartialEq)]
 pub enum DictionaryHandling {
     /// Expands to the underlying type (default). This likely sends more data
@@ -395,13 +423,6 @@ pub enum DictionaryHandling {
     /// and is more compatible with other arrow flight client implementations
     /// that may not support `DictionaryEncoding`
     ///
-    /// An IPC response, streaming or otherwise, defines its schema up front
-    /// which defines the mapping from dictionary IDs. It then sends these
-    /// dictionaries over the wire.
-    ///
-    /// This requires identifying the different dictionaries in use, assigning
-    /// them IDs, and sending new dictionaries, delta or otherwise, when needed
-    ///
     /// See also:
     /// * <https://github.com/apache/arrow-rs/issues/1206>
     Hydrate,
@@ -411,9 +432,52 @@ pub enum DictionaryHandling {
     /// twice.
     ///
     /// [`DictionaryArray`]: arrow_array::DictionaryArray
+    ///
+    /// This requires identifying the different dictionaries in use and assigning
+    //  them unique IDs
     Resend,
 }
 
+fn prepare_field_for_flight(field: &FieldRef, send_dictionaries: bool) -> Field {
+    match field.data_type() {
+        DataType::List(inner) => Field::new_list(
+            field.name(),
+            prepare_field_for_flight(inner, send_dictionaries),
+            field.is_nullable(),
+        )
+        .with_metadata(field.metadata().clone()),
+        DataType::LargeList(inner) => Field::new_list(
+            field.name(),
+            prepare_field_for_flight(inner, send_dictionaries),
+            field.is_nullable(),
+        )
+        .with_metadata(field.metadata().clone()),
+        DataType::Struct(fields) => {
+            let new_fields: Vec<Field> = fields
+                .iter()
+                .map(|f| prepare_field_for_flight(f, send_dictionaries))
+                .collect();
+            Field::new_struct(field.name(), new_fields, field.is_nullable())
+                .with_metadata(field.metadata().clone())
+        }
+        DataType::Union(fields, mode) => {
+            let (type_ids, new_fields): (Vec<i8>, Vec<Field>) = fields
+                .iter()
+                .map(|(type_id, f)| (type_id, prepare_field_for_flight(f, send_dictionaries)))
+                .unzip();
+
+            Field::new_union(field.name(), type_ids, new_fields, *mode)
+        }
+        DataType::Dictionary(_, value_type) if !send_dictionaries => Field::new(
+            field.name(),
+            value_type.as_ref().clone(),
+            field.is_nullable(),
+        )
+        .with_metadata(field.metadata().clone()),
+        _ => field.as_ref().clone(),
+    }
+}
+
 /// Prepare an arrow Schema for transport over the Arrow Flight protocol
 ///
 /// Convert dictionary types to underlying types
@@ -430,6 +494,7 @@ fn prepare_schema_for_flight(schema: &Schema, send_dictionaries: bool) -> Schema
                 field.is_nullable(),
             )
             .with_metadata(field.metadata().clone()),
+            tpe if tpe.is_nested() => prepare_field_for_flight(field, send_dictionaries),
             _ => field.as_ref().clone(),
         })
         .collect();
@@ -509,22 +574,14 @@ impl FlightIpcEncoder {
     }
 }
 
-/// Prepares a RecordBatch for transport over the Arrow Flight protocol
-///
-/// This means:
-///
-/// 1. Hydrates any dictionaries to its underlying type. See
+/// Hydrates any dictionaries arrays in `batch` to its underlying type. See
 /// hydrate_dictionary for more information.
-///
-fn prepare_batch_for_flight(
-    batch: &RecordBatch,
-    schema: SchemaRef,
-    send_dictionaries: bool,
-) -> Result<RecordBatch> {
-    let columns = batch
-        .columns()
+fn hydrate_dictionaries(batch: &RecordBatch, schema: SchemaRef) -> Result<RecordBatch> {
+    let columns = schema
+        .fields()
         .iter()
-        .map(|c| hydrate_dictionary(c, send_dictionaries))
+        .zip(batch.columns())
+        .map(|(field, c)| hydrate_dictionary(c, field.data_type()))
         .collect::<Result<Vec<_>>>()?;
 
     let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
@@ -534,22 +591,43 @@ fn prepare_batch_for_flight(
     )?)
 }
 
-/// Hydrates a dictionary to its underlying type if send_dictionaries is false. If send_dictionaries
-/// is true, dictionaries are sent with every batch which is not as optimal as described in [DictionaryHandling::Hydrate] above,
-/// but does enable sending DictionaryArray's via Flight.
-fn hydrate_dictionary(array: &ArrayRef, send_dictionaries: bool) -> Result<ArrayRef> {
-    let arr = match array.data_type() {
-        DataType::Dictionary(_, value) if !send_dictionaries => arrow_cast::cast(array, value)?,
-        _ => Arc::clone(array),
+/// Hydrates a dictionary to its underlying type.
+fn hydrate_dictionary(array: &ArrayRef, data_type: &DataType) -> Result<ArrayRef> {
+    let arr = match (array.data_type(), data_type) {
+        (DataType::Union(_, UnionMode::Sparse), DataType::Union(fields, UnionMode::Sparse)) => {
+            let union_arr = array.as_any().downcast_ref::<UnionArray>().unwrap();
+
+            let (type_ids, fields): (Vec<i8>, Vec<&FieldRef>) = fields.iter().unzip();
+
+            Arc::new(UnionArray::try_new(
+                &type_ids,
+                union_arr.type_ids().inner().clone(),
+                None,
+                fields
+                    .iter()
+                    .enumerate()
+                    .map(|(col, field)| {
+                        Ok((
+                            field.as_ref().clone(),
+                            arrow_cast::cast(union_arr.child(col as i8), field.data_type())?,
+                        ))
+                    })
+                    .collect::<Result<Vec<_>>>()?,
+            )?)
+        }
+        (_, data_type) => arrow_cast::cast(array, data_type)?,
     };
     Ok(arr)
 }
 
 #[cfg(test)]
 mod tests {
+    use arrow_array::builder::StringDictionaryBuilder;
     use arrow_array::*;
     use arrow_array::{cast::downcast_array, types::*};
+    use arrow_buffer::Buffer;
     use arrow_cast::pretty::pretty_format_batches;
+    use arrow_schema::UnionMode;
     use std::collections::HashMap;
 
     use crate::decode::{DecodedPayload, FlightDataDecoder};
@@ -570,8 +648,8 @@ mod tests {
         let (_, baseline_flight_batch) = make_flight_data(&batch, &options);
 
         let big_batch = batch.slice(0, batch.num_rows() - 1);
-        let optimized_big_batch = prepare_batch_for_flight(&big_batch, Arc::clone(schema), false)
-            .expect("failed to optimize");
+        let optimized_big_batch =
+            hydrate_dictionaries(&big_batch, Arc::clone(schema)).expect("failed to optimize");
         let (_, optimized_big_flight_batch) = make_flight_data(&optimized_big_batch, &options);
 
         assert_eq!(
@@ -581,8 +659,7 @@ mod tests {
 
         let small_batch = batch.slice(0, 1);
         let optimized_small_batch =
-            prepare_batch_for_flight(&small_batch, Arc::clone(schema), false)
-                .expect("failed to optimize");
+            hydrate_dictionaries(&small_batch, Arc::clone(schema)).expect("failed to optimize");
         let (_, optimized_small_flight_batch) = make_flight_data(&optimized_small_batch, &options);
 
         assert!(
@@ -592,19 +669,29 @@ mod tests {
 
     #[tokio::test]
     async fn test_dictionary_hydration() {
-        let arr: DictionaryArray<UInt16Type> = vec!["a", "a", "b"].into_iter().collect();
+        let arr1: DictionaryArray<UInt16Type> = vec!["a", "a", "b"].into_iter().collect();
+        let arr2: DictionaryArray<UInt16Type> = vec!["c", "c", "d"].into_iter().collect();
+
         let schema = Arc::new(Schema::new(vec![Field::new_dictionary(
             "dict",
             DataType::UInt16,
             DataType::Utf8,
             false,
         )]));
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap();
-        let encoder =
-            FlightDataEncoderBuilder::default().build(futures::stream::once(async { Ok(batch) }));
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap();
+
+        let stream = futures::stream::iter(vec![Ok(batch1), Ok(batch2)]);
+
+        let encoder = FlightDataEncoderBuilder::default().build(stream);
         let mut decoder = FlightDataDecoder::new(encoder);
         let expected_schema = Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
         let expected_schema = Arc::new(expected_schema);
+        let mut expected_arrays = vec![
+            StringArray::from(vec!["a", "a", "b"]),
+            StringArray::from(vec!["c", "c", "d"]),
+        ]
+        .into_iter();
         while let Some(decoded) = decoder.next().await {
             let decoded = decoded.unwrap();
             match decoded.payload {
@@ -612,7 +699,7 @@ mod tests {
                 DecodedPayload::Schema(s) => assert_eq!(s, expected_schema),
                 DecodedPayload::RecordBatch(b) => {
                     assert_eq!(b.schema(), expected_schema);
-                    let expected_array = StringArray::from(vec!["a", "a", "b"]);
+                    let expected_array = expected_arrays.next().unwrap();
                     let actual_array = b.column_by_name("dict").unwrap();
                     let actual_array = downcast_array::<StringArray>(actual_array);
 
@@ -622,6 +709,311 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_dictionary_list_hydration() {
+        let mut builder = builder::ListBuilder::new(StringDictionaryBuilder::<UInt16Type>::new());
+
+        builder.append_value(vec![Some("a"), None, Some("b")]);
+
+        let arr1 = builder.finish();
+
+        builder.append_value(vec![Some("c"), None, Some("d")]);
+
+        let arr2 = builder.finish();
+
+        let schema = Arc::new(Schema::new(vec![Field::new_list(
+            "dict_list",
+            Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr2)]).unwrap();
+
+        let stream = futures::stream::iter(vec![Ok(batch1), Ok(batch2)]);
+
+        let encoder = FlightDataEncoderBuilder::default().build(stream);
+
+        let mut decoder = FlightDataDecoder::new(encoder);
+        let expected_schema = Schema::new(vec![Field::new_list(
+            "dict_list",
+            Field::new("item", DataType::Utf8, true),
+            true,
+        )]);
+
+        let expected_schema = Arc::new(expected_schema);
+
+        let mut expected_arrays = vec![
+            StringArray::from_iter(vec![Some("a"), None, Some("b")]),
+            StringArray::from_iter(vec![Some("c"), None, Some("d")]),
+        ]
+        .into_iter();
+
+        while let Some(decoded) = decoder.next().await {
+            let decoded = decoded.unwrap();
+            match decoded.payload {
+                DecodedPayload::None => {}
+                DecodedPayload::Schema(s) => assert_eq!(s, expected_schema),
+                DecodedPayload::RecordBatch(b) => {
+                    assert_eq!(b.schema(), expected_schema);
+                    let expected_array = expected_arrays.next().unwrap();
+                    let list_array =
+                        downcast_array::<ListArray>(b.column_by_name("dict_list").unwrap());
+                    let elem_array = downcast_array::<StringArray>(list_array.value(0).as_ref());
+
+                    assert_eq!(elem_array, expected_array);
+                }
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_struct_hydration() {
+        let struct_fields = vec![Field::new_list(
+            "dict_list",
+            Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+            true,
+        )];
+
+        let mut builder = builder::ListBuilder::new(StringDictionaryBuilder::<UInt16Type>::new());
+
+        builder.append_value(vec![Some("a"), None, Some("b")]);
+
+        let arr1 = Arc::new(builder.finish());
+        let arr1 = StructArray::new(struct_fields.clone().into(), vec![arr1], None);
+
+        builder.append_value(vec![Some("c"), None, Some("d")]);
+
+        let arr2 = Arc::new(builder.finish());
+        let arr2 = StructArray::new(struct_fields.clone().into(), vec![arr2], None);
+
+        let schema = Arc::new(Schema::new(vec![Field::new_struct(
+            "struct",
+            struct_fields.clone(),
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr2)]).unwrap();
+
+        let stream = futures::stream::iter(vec![Ok(batch1), Ok(batch2)]);
+
+        let encoder = FlightDataEncoderBuilder::default().build(stream);
+
+        let mut decoder = FlightDataDecoder::new(encoder);
+        let expected_schema = Schema::new(vec![Field::new_struct(
+            "struct",
+            vec![Field::new_list(
+                "dict_list",
+                Field::new("item", DataType::Utf8, true),
+                true,
+            )],
+            true,
+        )]);
+
+        let expected_schema = Arc::new(expected_schema);
+
+        let mut expected_arrays = vec![
+            StringArray::from_iter(vec![Some("a"), None, Some("b")]),
+            StringArray::from_iter(vec![Some("c"), None, Some("d")]),
+        ]
+        .into_iter();
+
+        while let Some(decoded) = decoder.next().await {
+            let decoded = decoded.unwrap();
+            match decoded.payload {
+                DecodedPayload::None => {}
+                DecodedPayload::Schema(s) => assert_eq!(s, expected_schema),
+                DecodedPayload::RecordBatch(b) => {
+                    assert_eq!(b.schema(), expected_schema);
+                    let expected_array = expected_arrays.next().unwrap();
+                    let struct_array =
+                        downcast_array::<StructArray>(b.column_by_name("struct").unwrap());
+                    let list_array = downcast_array::<ListArray>(struct_array.column(0));
+
+                    let elem_array = downcast_array::<StringArray>(list_array.value(0).as_ref());
+
+                    assert_eq!(elem_array, expected_array);
+                }
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_union_hydration() {
+        let struct_fields = vec![Field::new_list(
+            "dict_list",
+            Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+            true,
+        )];
+
+        let type_ids = vec![0, 1, 2];
+        let union_fields = vec![
+            Field::new_list(
+                "dict_list",
+                Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+                true,
+            ),
+            Field::new_struct("struct", struct_fields.clone(), true),
+            Field::new("string", DataType::Utf8, true),
+        ];
+
+        let struct_fields = vec![Field::new_list(
+            "dict_list",
+            Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+            true,
+        )];
+
+        let mut builder = builder::ListBuilder::new(StringDictionaryBuilder::<UInt16Type>::new());
+
+        builder.append_value(vec![Some("a"), None, Some("b")]);
+
+        let arr1 = builder.finish();
+
+        let type_id_buffer = Buffer::from_slice_ref([0_i8]);
+        let arr1 = UnionArray::try_new(
+            &type_ids,
+            type_id_buffer,
+            None,
+            vec![
+                (union_fields[0].clone(), Arc::new(arr1)),
+                (
+                    union_fields[1].clone(),
+                    new_null_array(union_fields[1].data_type(), 1),
+                ),
+                (
+                    union_fields[2].clone(),
+                    new_null_array(union_fields[2].data_type(), 1),
+                ),
+            ],
+        )
+        .unwrap();
+
+        builder.append_value(vec![Some("c"), None, Some("d")]);
+
+        let arr2 = Arc::new(builder.finish());
+        let arr2 = StructArray::new(struct_fields.clone().into(), vec![arr2], None);
+
+        let type_id_buffer = Buffer::from_slice_ref([1_i8]);
+        let arr2 = UnionArray::try_new(
+            &type_ids,
+            type_id_buffer,
+            None,
+            vec![
+                (
+                    union_fields[0].clone(),
+                    new_null_array(union_fields[0].data_type(), 1),
+                ),
+                (union_fields[1].clone(), Arc::new(arr2)),
+                (
+                    union_fields[2].clone(),
+                    new_null_array(union_fields[2].data_type(), 1),
+                ),
+            ],
+        )
+        .unwrap();
+
+        let type_id_buffer = Buffer::from_slice_ref([2_i8]);
+        let arr3 = UnionArray::try_new(
+            &type_ids,
+            type_id_buffer,
+            None,
+            vec![
+                (
+                    union_fields[0].clone(),
+                    new_null_array(union_fields[0].data_type(), 1),
+                ),
+                (
+                    union_fields[1].clone(),
+                    new_null_array(union_fields[1].data_type(), 1),
+                ),
+                (
+                    union_fields[2].clone(),
+                    Arc::new(StringArray::from(vec!["e"])),
+                ),
+            ],
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new_union(
+            "union",
+            type_ids.clone(),
+            union_fields.clone(),
+            UnionMode::Sparse,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr2)]).unwrap();
+        let batch3 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr3)]).unwrap();
+
+        let stream = futures::stream::iter(vec![Ok(batch1), Ok(batch2), Ok(batch3)]);
+
+        let encoder = FlightDataEncoderBuilder::default().build(stream);
+
+        let mut decoder = FlightDataDecoder::new(encoder);
+
+        let hydrated_struct_fields = vec![Field::new_list(
+            "dict_list",
+            Field::new("item", DataType::Utf8, true),
+            true,
+        )];
+
+        let hydrated_union_fields = vec![
+            Field::new_list("dict_list", Field::new("item", DataType::Utf8, true), true),
+            Field::new_struct("struct", hydrated_struct_fields.clone(), true),
+            Field::new("string", DataType::Utf8, true),
+        ];
+
+        let expected_schema = Schema::new(vec![Field::new_union(
+            "union",
+            type_ids.clone(),
+            hydrated_union_fields,
+            UnionMode::Sparse,
+        )]);
+
+        let expected_schema = Arc::new(expected_schema);
+
+        let mut expected_arrays = vec![
+            StringArray::from_iter(vec![Some("a"), None, Some("b")]),
+            StringArray::from_iter(vec![Some("c"), None, Some("d")]),
+            StringArray::from(vec!["e"]),
+        ]
+        .into_iter();
+
+        let mut batch = 0;
+        while let Some(decoded) = decoder.next().await {
+            let decoded = decoded.unwrap();
+            match decoded.payload {
+                DecodedPayload::None => {}
+                DecodedPayload::Schema(s) => assert_eq!(s, expected_schema),
+                DecodedPayload::RecordBatch(b) => {
+                    assert_eq!(b.schema(), expected_schema);
+                    let expected_array = expected_arrays.next().unwrap();
+                    let union_arr =
+                        downcast_array::<UnionArray>(b.column_by_name("union").unwrap());
+
+                    let elem_array = match batch {
+                        0 => {
+                            let list_array = downcast_array::<ListArray>(union_arr.child(0));
+                            downcast_array::<StringArray>(list_array.value(0).as_ref())
+                        }
+                        1 => {
+                            let struct_array = downcast_array::<StructArray>(union_arr.child(1));
+                            let list_array = downcast_array::<ListArray>(struct_array.column(0));
+
+                            downcast_array::<StringArray>(list_array.value(0).as_ref())
+                        }
+                        _ => downcast_array::<StringArray>(union_arr.child(2)),
+                    };
+
+                    batch += 1;
+
+                    assert_eq!(elem_array, expected_array);
+                }
+            }
+        }
+    }
+
     #[tokio::test]
     async fn test_send_dictionaries() {
         let schema = Arc::new(Schema::new(vec![Field::new_dictionary(
@@ -683,7 +1075,7 @@ mod tests {
         )
         .expect("cannot create record batch");
 
-        prepare_batch_for_flight(&batch, batch.schema(), false).expect("failed to optimize");
+        hydrate_dictionaries(&batch, batch.schema()).expect("failed to optimize");
     }
 
     pub fn make_flight_data(

From 78aff9c401135e9e38b862a0fb9fba5947512da7 Mon Sep 17 00:00:00 2001
From: Yijun Zhao <ariesdevil77@gmail.com>
Date: Fri, 15 Mar 2024 13:34:50 +0800
Subject: [PATCH 07/11] update arrow-format (#5502)

---
 arrow-array/src/array/byte_view_array.rs      |   1 -
 .../src/builder/generic_bytes_view_builder.rs |   1 +
 arrow-ipc/src/convert.rs                      |   2 +-
 arrow-ipc/src/gen/Message.rs                  |  51 ++
 arrow-ipc/src/gen/Schema.rs                   | 475 +++++++++++++++++-
 format/Message.fbs                            |  18 +-
 format/Schema.fbs                             |  46 +-
 parquet/src/arrow/schema/mod.rs               | 191 +++----
 8 files changed, 641 insertions(+), 144 deletions(-)

diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
index a3b8a5dcb803..9f3a6809d9d0 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -93,7 +93,6 @@ use std::sync::Arc;
 ///                                                                            └───┘
 /// ```
 /// [`GenericByteArray`]: crate::array::GenericByteArray
-
 pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
     data_type: DataType,
     views: ScalarBuffer<u128>,
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs
index 29de7feb0ec1..9accb932ae20 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -20,6 +20,7 @@ use crate::types::{BinaryViewType, ByteViewType, StringViewType};
 use crate::{ArrayRef, GenericByteViewArray};
 use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
 use arrow_data::ByteView;
+
 use std::any::Any;
 use std::marker::PhantomData;
 use std::sync::Arc;
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index a821008d89ab..b2e580241adc 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -543,7 +543,7 @@ pub(crate) fn get_fb_field_type<'a>(
                 .as_union_value(),
             children: Some(fbb.create_vector(&empty_fields[..])),
         },
-        BinaryView | Utf8View => unimplemented!("BinaryView/Utf8View not implemented"),
+        BinaryView | Utf8View => unimplemented!("unimplemented"),
         Utf8 => FBFieldType {
             type_type: crate::Type::Utf8,
             type_: crate::Utf8Builder::new(fbb).finish().as_union_value(),
diff --git a/arrow-ipc/src/gen/Message.rs b/arrow-ipc/src/gen/Message.rs
index a546b54d9170..1f49f1d9428b 100644
--- a/arrow-ipc/src/gen/Message.rs
+++ b/arrow-ipc/src/gen/Message.rs
@@ -25,6 +25,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 #[deprecated(
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
@@ -636,6 +638,7 @@ impl<'a> RecordBatch<'a> {
     pub const VT_NODES: flatbuffers::VOffsetT = 6;
     pub const VT_BUFFERS: flatbuffers::VOffsetT = 8;
     pub const VT_COMPRESSION: flatbuffers::VOffsetT = 10;
+    pub const VT_VARIADICBUFFERCOUNTS: flatbuffers::VOffsetT = 12;
 
     #[inline]
     pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
@@ -648,6 +651,9 @@ impl<'a> RecordBatch<'a> {
     ) -> flatbuffers::WIPOffset<RecordBatch<'bldr>> {
         let mut builder = RecordBatchBuilder::new(_fbb);
         builder.add_length(args.length);
+        if let Some(x) = args.variadicBufferCounts {
+            builder.add_variadicBufferCounts(x);
+        }
         if let Some(x) = args.compression {
             builder.add_compression(x);
         }
@@ -720,6 +726,33 @@ impl<'a> RecordBatch<'a> {
                 )
         }
     }
+    /// Some types such as Utf8View are represented using a variable number of buffers.
+    /// For each such Field in the pre-ordered flattened logical schema, there will be
+    /// an entry in variadicBufferCounts to indicate the number of number of variadic
+    /// buffers which belong to that Field in the current RecordBatch.
+    ///
+    /// For example, the schema
+    ///     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+    ///     col2: Utf8View
+    /// contains two Fields with variadic buffers so variadicBufferCounts will have
+    /// two entries, the first counting the variadic buffers of `col1.beta` and the
+    /// second counting `col2`'s.
+    ///
+    /// This field may be omitted if and only if the schema contains no Fields with
+    /// a variable number of buffers, such as BinaryView and Utf8View.
+    #[inline]
+    pub fn variadicBufferCounts(&self) -> Option<flatbuffers::Vector<'a, i64>> {
+        // Safety:
+        // Created from valid Table for this object
+        // which contains a valid value in this slot
+        unsafe {
+            self._tab
+                .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
+                    RecordBatch::VT_VARIADICBUFFERCOUNTS,
+                    None,
+                )
+        }
+    }
 }
 
 impl flatbuffers::Verifiable for RecordBatch<'_> {
@@ -746,6 +779,11 @@ impl flatbuffers::Verifiable for RecordBatch<'_> {
                 Self::VT_COMPRESSION,
                 false,
             )?
+            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(
+                "variadicBufferCounts",
+                Self::VT_VARIADICBUFFERCOUNTS,
+                false,
+            )?
             .finish();
         Ok(())
     }
@@ -755,6 +793,7 @@ pub struct RecordBatchArgs<'a> {
     pub nodes: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, FieldNode>>>,
     pub buffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
     pub compression: Option<flatbuffers::WIPOffset<BodyCompression<'a>>>,
+    pub variadicBufferCounts: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
 }
 impl<'a> Default for RecordBatchArgs<'a> {
     #[inline]
@@ -764,6 +803,7 @@ impl<'a> Default for RecordBatchArgs<'a> {
             nodes: None,
             buffers: None,
             compression: None,
+            variadicBufferCounts: None,
         }
     }
 }
@@ -800,6 +840,16 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
             );
     }
     #[inline]
+    pub fn add_variadicBufferCounts(
+        &mut self,
+        variadicBufferCounts: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
+    ) {
+        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
+            RecordBatch::VT_VARIADICBUFFERCOUNTS,
+            variadicBufferCounts,
+        );
+    }
+    #[inline]
     pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RecordBatchBuilder<'a, 'b> {
         let start = _fbb.start_table();
         RecordBatchBuilder {
@@ -821,6 +871,7 @@ impl core::fmt::Debug for RecordBatch<'_> {
         ds.field("nodes", &self.nodes());
         ds.field("buffers", &self.buffers());
         ds.field("compression", &self.compression());
+        ds.field("variadicBufferCounts", &self.variadicBufferCounts());
         ds.finish()
     }
 }
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index 0dc5dccd39e7..ed9dbaa249f0 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -22,6 +22,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 #[deprecated(
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
@@ -58,7 +60,7 @@ impl MetadataVersion {
     pub const V3: Self = Self(2);
     /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
     pub const V4: Self = Self(3);
-    /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+    /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
     /// metadata and IPC messages). Implementations are recommended to provide a
     /// V4 compatibility mode with V5 format changes disabled.
     ///
@@ -734,13 +736,13 @@ pub const ENUM_MIN_TYPE: u8 = 0;
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
 )]
-pub const ENUM_MAX_TYPE: u8 = 22;
+pub const ENUM_MAX_TYPE: u8 = 26;
 #[deprecated(
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
 )]
 #[allow(non_camel_case_types)]
-pub const ENUM_VALUES_TYPE: [Type; 23] = [
+pub const ENUM_VALUES_TYPE: [Type; 27] = [
     Type::NONE,
     Type::Null,
     Type::Int,
@@ -764,6 +766,10 @@ pub const ENUM_VALUES_TYPE: [Type; 23] = [
     Type::LargeUtf8,
     Type::LargeList,
     Type::RunEndEncoded,
+    Type::BinaryView,
+    Type::Utf8View,
+    Type::ListView,
+    Type::LargeListView,
 ];
 
 /// ----------------------------------------------------------------------
@@ -797,9 +803,13 @@ impl Type {
     pub const LargeUtf8: Self = Self(20);
     pub const LargeList: Self = Self(21);
     pub const RunEndEncoded: Self = Self(22);
+    pub const BinaryView: Self = Self(23);
+    pub const Utf8View: Self = Self(24);
+    pub const ListView: Self = Self(25);
+    pub const LargeListView: Self = Self(26);
 
     pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 22;
+    pub const ENUM_MAX: u8 = 26;
     pub const ENUM_VALUES: &'static [Self] = &[
         Self::NONE,
         Self::Null,
@@ -824,6 +834,10 @@ impl Type {
         Self::LargeUtf8,
         Self::LargeList,
         Self::RunEndEncoded,
+        Self::BinaryView,
+        Self::Utf8View,
+        Self::ListView,
+        Self::LargeListView,
     ];
     /// Returns the variant's name or "" if unknown.
     pub fn variant_name(self) -> Option<&'static str> {
@@ -851,6 +865,10 @@ impl Type {
             Self::LargeUtf8 => Some("LargeUtf8"),
             Self::LargeList => Some("LargeList"),
             Self::RunEndEncoded => Some("RunEndEncoded"),
+            Self::BinaryView => Some("BinaryView"),
+            Self::Utf8View => Some("Utf8View"),
+            Self::ListView => Some("ListView"),
+            Self::LargeListView => Some("LargeListView"),
             _ => None,
         }
     }
@@ -1545,6 +1563,165 @@ impl core::fmt::Debug for LargeList<'_> {
         ds.finish()
     }
 }
+pub enum ListViewOffset {}
+#[derive(Copy, Clone, PartialEq)]
+
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+pub struct ListView<'a> {
+    pub _tab: flatbuffers::Table<'a>,
+}
+
+impl<'a> flatbuffers::Follow<'a> for ListView<'a> {
+    type Inner = ListView<'a>;
+    #[inline]
+    unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
+        Self {
+            _tab: flatbuffers::Table::new(buf, loc),
+        }
+    }
+}
+
+impl<'a> ListView<'a> {
+    #[inline]
+    pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
+        ListView { _tab: table }
+    }
+    #[allow(unused_mut)]
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+        _args: &'args ListViewArgs,
+    ) -> flatbuffers::WIPOffset<ListView<'bldr>> {
+        let mut builder = ListViewBuilder::new(_fbb);
+        builder.finish()
+    }
+}
+
+impl flatbuffers::Verifiable for ListView<'_> {
+    #[inline]
+    fn run_verifier(
+        v: &mut flatbuffers::Verifier,
+        pos: usize,
+    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
+        use flatbuffers::Verifiable;
+        v.visit_table(pos)?.finish();
+        Ok(())
+    }
+}
+pub struct ListViewArgs {}
+impl<'a> Default for ListViewArgs {
+    #[inline]
+    fn default() -> Self {
+        ListViewArgs {}
+    }
+}
+
+pub struct ListViewBuilder<'a: 'b, 'b> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
+}
+impl<'a: 'b, 'b> ListViewBuilder<'a, 'b> {
+    #[inline]
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListViewBuilder<'a, 'b> {
+        let start = _fbb.start_table();
+        ListViewBuilder {
+            fbb_: _fbb,
+            start_: start,
+        }
+    }
+    #[inline]
+    pub fn finish(self) -> flatbuffers::WIPOffset<ListView<'a>> {
+        let o = self.fbb_.end_table(self.start_);
+        flatbuffers::WIPOffset::new(o.value())
+    }
+}
+
+impl core::fmt::Debug for ListView<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut ds = f.debug_struct("ListView");
+        ds.finish()
+    }
+}
+pub enum LargeListViewOffset {}
+#[derive(Copy, Clone, PartialEq)]
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+pub struct LargeListView<'a> {
+    pub _tab: flatbuffers::Table<'a>,
+}
+
+impl<'a> flatbuffers::Follow<'a> for LargeListView<'a> {
+    type Inner = LargeListView<'a>;
+    #[inline]
+    unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
+        Self {
+            _tab: flatbuffers::Table::new(buf, loc),
+        }
+    }
+}
+
+impl<'a> LargeListView<'a> {
+    #[inline]
+    pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
+        LargeListView { _tab: table }
+    }
+    #[allow(unused_mut)]
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+        _args: &'args LargeListViewArgs,
+    ) -> flatbuffers::WIPOffset<LargeListView<'bldr>> {
+        let mut builder = LargeListViewBuilder::new(_fbb);
+        builder.finish()
+    }
+}
+
+impl flatbuffers::Verifiable for LargeListView<'_> {
+    #[inline]
+    fn run_verifier(
+        v: &mut flatbuffers::Verifier,
+        pos: usize,
+    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
+        use flatbuffers::Verifiable;
+        v.visit_table(pos)?.finish();
+        Ok(())
+    }
+}
+pub struct LargeListViewArgs {}
+impl<'a> Default for LargeListViewArgs {
+    #[inline]
+    fn default() -> Self {
+        LargeListViewArgs {}
+    }
+}
+
+pub struct LargeListViewBuilder<'a: 'b, 'b> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
+}
+impl<'a: 'b, 'b> LargeListViewBuilder<'a, 'b> {
+    #[inline]
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListViewBuilder<'a, 'b> {
+        let start = _fbb.start_table();
+        LargeListViewBuilder {
+            fbb_: _fbb,
+            start_: start,
+        }
+    }
+    #[inline]
+    pub fn finish(self) -> flatbuffers::WIPOffset<LargeListView<'a>> {
+        let o = self.fbb_.end_table(self.start_);
+        flatbuffers::WIPOffset::new(o.value())
+    }
+}
+
+impl core::fmt::Debug for LargeListView<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut ds = f.debug_struct("LargeListView");
+        ds.finish()
+    }
+}
 pub enum FixedSizeListOffset {}
 #[derive(Copy, Clone, PartialEq)]
 
@@ -2453,6 +2630,174 @@ impl core::fmt::Debug for LargeBinary<'_> {
         ds.finish()
     }
 }
+pub enum Utf8ViewOffset {}
+#[derive(Copy, Clone, PartialEq)]
+
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+pub struct Utf8View<'a> {
+    pub _tab: flatbuffers::Table<'a>,
+}
+
+impl<'a> flatbuffers::Follow<'a> for Utf8View<'a> {
+    type Inner = Utf8View<'a>;
+    #[inline]
+    unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
+        Self {
+            _tab: flatbuffers::Table::new(buf, loc),
+        }
+    }
+}
+
+impl<'a> Utf8View<'a> {
+    #[inline]
+    pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
+        Utf8View { _tab: table }
+    }
+    #[allow(unused_mut)]
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+        _args: &'args Utf8ViewArgs,
+    ) -> flatbuffers::WIPOffset<Utf8View<'bldr>> {
+        let mut builder = Utf8ViewBuilder::new(_fbb);
+        builder.finish()
+    }
+}
+
+impl flatbuffers::Verifiable for Utf8View<'_> {
+    #[inline]
+    fn run_verifier(
+        v: &mut flatbuffers::Verifier,
+        pos: usize,
+    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
+        use flatbuffers::Verifiable;
+        v.visit_table(pos)?.finish();
+        Ok(())
+    }
+}
+pub struct Utf8ViewArgs {}
+impl<'a> Default for Utf8ViewArgs {
+    #[inline]
+    fn default() -> Self {
+        Utf8ViewArgs {}
+    }
+}
+
+pub struct Utf8ViewBuilder<'a: 'b, 'b> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
+}
+impl<'a: 'b, 'b> Utf8ViewBuilder<'a, 'b> {
+    #[inline]
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8ViewBuilder<'a, 'b> {
+        let start = _fbb.start_table();
+        Utf8ViewBuilder {
+            fbb_: _fbb,
+            start_: start,
+        }
+    }
+    #[inline]
+    pub fn finish(self) -> flatbuffers::WIPOffset<Utf8View<'a>> {
+        let o = self.fbb_.end_table(self.start_);
+        flatbuffers::WIPOffset::new(o.value())
+    }
+}
+
+impl core::fmt::Debug for Utf8View<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut ds = f.debug_struct("Utf8View");
+        ds.finish()
+    }
+}
+pub enum BinaryViewOffset {}
+#[derive(Copy, Clone, PartialEq)]
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+pub struct BinaryView<'a> {
+    pub _tab: flatbuffers::Table<'a>,
+}
+
+impl<'a> flatbuffers::Follow<'a> for BinaryView<'a> {
+    type Inner = BinaryView<'a>;
+    #[inline]
+    unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
+        Self {
+            _tab: flatbuffers::Table::new(buf, loc),
+        }
+    }
+}
+
+impl<'a> BinaryView<'a> {
+    #[inline]
+    pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
+        BinaryView { _tab: table }
+    }
+    #[allow(unused_mut)]
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+        _args: &'args BinaryViewArgs,
+    ) -> flatbuffers::WIPOffset<BinaryView<'bldr>> {
+        let mut builder = BinaryViewBuilder::new(_fbb);
+        builder.finish()
+    }
+}
+
+impl flatbuffers::Verifiable for BinaryView<'_> {
+    #[inline]
+    fn run_verifier(
+        v: &mut flatbuffers::Verifier,
+        pos: usize,
+    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
+        use flatbuffers::Verifiable;
+        v.visit_table(pos)?.finish();
+        Ok(())
+    }
+}
+pub struct BinaryViewArgs {}
+impl<'a> Default for BinaryViewArgs {
+    #[inline]
+    fn default() -> Self {
+        BinaryViewArgs {}
+    }
+}
+
+pub struct BinaryViewBuilder<'a: 'b, 'b> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
+}
+impl<'a: 'b, 'b> BinaryViewBuilder<'a, 'b> {
+    #[inline]
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryViewBuilder<'a, 'b> {
+        let start = _fbb.start_table();
+        BinaryViewBuilder {
+            fbb_: _fbb,
+            start_: start,
+        }
+    }
+    #[inline]
+    pub fn finish(self) -> flatbuffers::WIPOffset<BinaryView<'a>> {
+        let o = self.fbb_.end_table(self.start_);
+        flatbuffers::WIPOffset::new(o.value())
+    }
+}
+
+impl core::fmt::Debug for BinaryView<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut ds = f.debug_struct("BinaryView");
+        ds.finish()
+    }
+}
 pub enum FixedSizeBinaryOffset {}
 #[derive(Copy, Clone, PartialEq)]
 
@@ -3213,7 +3558,7 @@ pub enum TimestampOffset {}
 /// no indication of how to map this information to a physical point in time.
 /// Naive date-times must be handled with care because of this missing
 /// information, and also because daylight saving time (DST) may make
-/// some values ambiguous or non-existent. A naive date-time may be
+/// some values ambiguous or nonexistent. A naive date-time may be
 /// stored as a struct with Date and Time fields. However, it may also be
 /// encoded into a Timestamp column with an empty timezone. The timestamp
 /// values should be computed "as if" the timezone of the date-time values
@@ -4365,6 +4710,66 @@ impl<'a> Field<'a> {
             None
         }
     }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_binary_view(&self) -> Option<BinaryView<'a>> {
+        if self.type_type() == Type::BinaryView {
+            self.type_().map(|t| {
+                // Safety:
+                // Created from a valid Table for this object
+                // Which contains a valid union in this slot
+                unsafe { BinaryView::init_from_table(t) }
+            })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_utf_8_view(&self) -> Option<Utf8View<'a>> {
+        if self.type_type() == Type::Utf8View {
+            self.type_().map(|t| {
+                // Safety:
+                // Created from a valid Table for this object
+                // Which contains a valid union in this slot
+                unsafe { Utf8View::init_from_table(t) }
+            })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_list_view(&self) -> Option<ListView<'a>> {
+        if self.type_type() == Type::ListView {
+            self.type_().map(|t| {
+                // Safety:
+                // Created from a valid Table for this object
+                // Which contains a valid union in this slot
+                unsafe { ListView::init_from_table(t) }
+            })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_large_list_view(&self) -> Option<LargeListView<'a>> {
+        if self.type_type() == Type::LargeListView {
+            self.type_().map(|t| {
+                // Safety:
+                // Created from a valid Table for this object
+                // Which contains a valid union in this slot
+                unsafe { LargeListView::init_from_table(t) }
+            })
+        } else {
+            None
+        }
+    }
 }
 
 impl flatbuffers::Verifiable for Field<'_> {
@@ -4484,6 +4889,26 @@ impl flatbuffers::Verifiable for Field<'_> {
                             "Type::RunEndEncoded",
                             pos,
                         ),
+                    Type::BinaryView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<BinaryView>>(
+                            "Type::BinaryView",
+                            pos,
+                        ),
+                    Type::Utf8View => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8View>>(
+                            "Type::Utf8View",
+                            pos,
+                        ),
+                    Type::ListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<ListView>>(
+                            "Type::ListView",
+                            pos,
+                        ),
+                    Type::LargeListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeListView>>(
+                            "Type::LargeListView",
+                            pos,
+                        ),
                     _ => Ok(()),
                 },
             )?
@@ -4827,6 +5252,46 @@ impl core::fmt::Debug for Field<'_> {
                     )
                 }
             }
+            Type::BinaryView => {
+                if let Some(x) = self.type_as_binary_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::Utf8View => {
+                if let Some(x) = self.type_as_utf_8_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::ListView => {
+                if let Some(x) = self.type_as_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::LargeListView => {
+                if let Some(x) = self.type_as_large_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
             _ => {
                 let x: Option<()> = None;
                 ds.field("type_", &x)
diff --git a/format/Message.fbs b/format/Message.fbs
index 170ea8fbced8..c8c9b4b82cbf 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -99,6 +99,22 @@ table RecordBatch {
 
   /// Optional compression of the message body
   compression: BodyCompression;
+
+  /// Some types such as Utf8View are represented using a variable number of buffers.
+  /// For each such Field in the pre-ordered flattened logical schema, there will be
+  /// an entry in variadicBufferCounts to indicate the number of number of variadic
+  /// buffers which belong to that Field in the current RecordBatch.
+  ///
+  /// For example, the schema
+  ///     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+  ///     col2: Utf8View
+  /// contains two Fields with variadic buffers so variadicBufferCounts will have
+  /// two entries, the first counting the variadic buffers of `col1.beta` and the
+  /// second counting `col2`'s.
+  ///
+  /// This field may be omitted if and only if the schema contains no Fields with
+  /// a variable number of buffers, such as BinaryView and Utf8View.
+  variadicBufferCounts: [long];
 }
 
 /// For sending dictionary encoding information. Any Field can be
@@ -138,4 +154,4 @@ table Message {
   custom_metadata: [ KeyValue ];
 }
 
-root_type Message;
+root_type Message;
\ No newline at end of file
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 6337f72ec9de..ab726903d19f 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -20,8 +20,10 @@
 /// Format Version History.
 /// Version 1.0 - Forward and backwards compatibility guaranteed.
 /// Version 1.1 - Add Decimal256.
-/// Version 1.2 - Add Interval MONTH_DAY_NANO
+/// Version 1.2 - Add Interval MONTH_DAY_NANO.
 /// Version 1.3 - Add Run-End Encoded.
+/// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and
+/// LargeListView.
 
 namespace org.apache.arrow.flatbuf;
 
@@ -38,7 +40,7 @@ enum MetadataVersion:short {
   /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
   V4,
 
-  /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+  /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
   /// metadata and IPC messages). Implementations are recommended to provide a
   /// V4 compatibility mode with V5 format changes disabled.
   ///
@@ -96,6 +98,17 @@ table List {
 table LargeList {
 }
 
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+table ListView {
+}
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+table LargeListView {
+}
+
 table FixedSizeList {
   /// Number of list items per value
   listSize: int;
@@ -171,6 +184,27 @@ table LargeUtf8 {
 table LargeBinary {
 }
 
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+table Utf8View {
+}
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+table BinaryView {
+}
+
+
 table FixedSizeBinary {
   /// Number of bytes per value
   byteWidth: int;
@@ -338,7 +372,7 @@ table Time {
 /// no indication of how to map this information to a physical point in time.
 /// Naive date-times must be handled with care because of this missing
 /// information, and also because daylight saving time (DST) may make
-/// some values ambiguous or non-existent. A naive date-time may be
+/// some values ambiguous or nonexistent. A naive date-time may be
 /// stored as a struct with Date and Time fields. However, it may also be
 /// encoded into a Timestamp column with an empty timezone. The timestamp
 /// values should be computed "as if" the timezone of the date-time values
@@ -427,6 +461,10 @@ union Type {
   LargeUtf8,
   LargeList,
   RunEndEncoded,
+  BinaryView,
+  Utf8View,
+  ListView,
+  LargeListView,
 }
 
 /// ----------------------------------------------------------------------
@@ -529,4 +567,4 @@ table Schema {
   features : [ Feature ];
 }
 
-root_type Schema;
+root_type Schema;
\ No newline at end of file
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 4a78db05ed2d..300a21c4f133 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -32,8 +32,7 @@ use arrow_ipc::writer;
 use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit};
 
 use crate::basic::{
-    ConvertedType, LogicalType, Repetition, TimeUnit as ParquetTimeUnit,
-    Type as PhysicalType,
+    ConvertedType, LogicalType, Repetition, TimeUnit as ParquetTimeUnit, Type as PhysicalType,
 };
 use crate::errors::{ParquetError, Result};
 use crate::file::{metadata::KeyValue, properties::WriterProperties};
@@ -55,11 +54,7 @@ pub fn parquet_to_arrow_schema(
     parquet_schema: &SchemaDescriptor,
     key_value_metadata: Option<&Vec<KeyValue>>,
 ) -> Result<Schema> {
-    parquet_to_arrow_schema_by_columns(
-        parquet_schema,
-        ProjectionMask::all(),
-        key_value_metadata,
-    )
+    parquet_to_arrow_schema_by_columns(parquet_schema, ProjectionMask::all(), key_value_metadata)
 }
 
 /// Convert parquet schema to arrow schema including optional metadata,
@@ -199,10 +194,7 @@ fn encode_arrow_schema(schema: &Schema) -> String {
 
 /// Mutates writer metadata by storing the encoded Arrow schema.
 /// If there is an existing Arrow schema metadata, it is replaced.
-pub(crate) fn add_encoded_arrow_schema_to_metadata(
-    schema: &Schema,
-    props: &mut WriterProperties,
-) {
+pub(crate) fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterProperties) {
     let encoded = encode_arrow_schema(schema);
 
     let schema_kv = KeyValue {
@@ -270,16 +262,15 @@ fn parse_key_value_metadata(
 /// Convert parquet column schema to arrow field.
 pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result<Field> {
     let field = complex::convert_type(&parquet_column.self_type_ptr())?;
-    let mut ret = Field::new(
-        parquet_column.name(),
-        field.arrow_type,
-        field.nullable,
-    );
+    let mut ret = Field::new(parquet_column.name(), field.arrow_type, field.nullable);
 
     let basic_info = parquet_column.self_type().get_basic_info();
     if basic_info.has_id() {
         let mut meta = HashMap::with_capacity(1);
-        meta.insert(PARQUET_FIELD_ID_META_KEY.to_string(), basic_info.id().to_string());
+        meta.insert(
+            PARQUET_FIELD_ID_META_KEY.to_string(),
+            basic_info.id().to_string(),
+        );
         ret.set_metadata(meta);
     }
 
@@ -401,15 +392,9 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
                     is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_ref().is_empty()),
                     unit: match time_unit {
                         TimeUnit::Second => unreachable!(),
-                        TimeUnit::Millisecond => {
-                            ParquetTimeUnit::MILLIS(Default::default())
-                        }
-                        TimeUnit::Microsecond => {
-                            ParquetTimeUnit::MICROS(Default::default())
-                        }
-                        TimeUnit::Nanosecond => {
-                            ParquetTimeUnit::NANOS(Default::default())
-                        }
+                        TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
+                        TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
+                        TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
                     },
                 }))
                 .with_repetition(repetition)
@@ -457,9 +442,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
-        DataType::Duration(_) => {
-            Err(arrow_err!("Converting Duration to parquet not supported",))
-        }
+        DataType::Duration(_) => Err(arrow_err!("Converting Duration to parquet not supported",)),
         DataType::Interval(_) => {
             Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
                 .with_converted_type(ConvertedType::INTERVAL)
@@ -481,9 +464,10 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
                 .with_length(*length)
                 .build()
         }
-        DataType::BinaryView | DataType::Utf8View => unimplemented!("BinaryView/Utf8View not implemented"),
-        DataType::Decimal128(precision, scale)
-        | DataType::Decimal256(precision, scale) => {
+        DataType::BinaryView | DataType::Utf8View => {
+            unimplemented!("BinaryView/Utf8View not implemented")
+        }
+        DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
             // Decimal precision determines the Parquet physical type to use.
             // Following the: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
             let (physical_type, length) = if *precision > 1 && *precision <= 9 {
@@ -528,12 +512,12 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
                 .with_id(id)
                 .build()
         }
-        DataType::ListView(_) | DataType::LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
+        DataType::ListView(_) | DataType::LargeListView(_) => {
+            unimplemented!("ListView/LargeListView not implemented")
+        }
         DataType::Struct(fields) => {
             if fields.is_empty() {
-                return Err(
-                    arrow_err!("Parquet does not support writing empty structs",),
-                );
+                return Err(arrow_err!("Parquet does not support writing empty structs",));
             }
             // recursively convert children to types/nodes
             let fields = fields
@@ -623,8 +607,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
 
         let arrow_fields = Fields::from(vec![
             Field::new("boolean", DataType::Boolean, false),
@@ -662,8 +645,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
 
         let arrow_fields = Fields::from(vec![
             Field::new("decimal1", DataType::Decimal128(4, 2), false),
@@ -689,8 +671,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
 
         let arrow_fields = Fields::from(vec![
             Field::new("binary", DataType::Binary, false),
@@ -711,8 +692,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
 
         let arrow_fields = Fields::from(vec![
             Field::new("boolean", DataType::Boolean, false),
@@ -720,12 +700,9 @@ mod tests {
         ]);
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
 
-        let converted_arrow_schema = parquet_to_arrow_schema_by_columns(
-            &parquet_schema,
-            ProjectionMask::all(),
-            None,
-        )
-        .unwrap();
+        let converted_arrow_schema =
+            parquet_to_arrow_schema_by_columns(&parquet_schema, ProjectionMask::all(), None)
+                .unwrap();
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
     }
 
@@ -923,8 +900,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
         let converted_fields = converted_arrow_schema.fields();
 
         assert_eq!(arrow_fields.len(), converted_fields.len());
@@ -1002,8 +978,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
         let converted_fields = converted_arrow_schema.fields();
 
         assert_eq!(arrow_fields.len(), converted_fields.len());
@@ -1097,8 +1072,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
         let converted_fields = converted_arrow_schema.fields();
 
         assert_eq!(arrow_fields.len(), converted_fields.len());
@@ -1115,8 +1089,7 @@ mod tests {
                 Field::new("leaf1", DataType::Boolean, false),
                 Field::new("leaf2", DataType::Int32, false),
             ]);
-            let group1_struct =
-                Field::new("group1", DataType::Struct(group1_fields), false);
+            let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false);
             arrow_fields.push(group1_struct);
 
             let leaf3_field = Field::new("leaf3", DataType::Int64, false);
@@ -1135,8 +1108,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
         let converted_fields = converted_arrow_schema.fields();
 
         assert_eq!(arrow_fields.len(), converted_fields.len());
@@ -1289,8 +1261,7 @@ mod tests {
         let parquet_group_type = parse_message_type(message_type).unwrap();
 
         let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, None).unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap();
         let converted_fields = converted_arrow_schema.fields();
 
         assert_eq!(arrow_fields.len(), converted_fields.len());
@@ -1515,20 +1486,11 @@ mod tests {
                 vec![
                     Field::new("bools", DataType::Boolean, false),
                     Field::new("uint32", DataType::UInt32, false),
-                    Field::new_list(
-                        "int32",
-                        Field::new("element", DataType::Int32, true),
-                        false,
-                    ),
+                    Field::new_list("int32", Field::new("element", DataType::Int32, true), false),
                 ],
                 false,
             ),
-            Field::new_dictionary(
-                "dictionary_strings",
-                DataType::Int32,
-                DataType::Utf8,
-                false,
-            ),
+            Field::new_dictionary("dictionary_strings", DataType::Int32, DataType::Utf8, false),
             Field::new("decimal_int32", DataType::Decimal128(8, 2), false),
             Field::new("decimal_int64", DataType::Decimal128(16, 2), false),
             Field::new("decimal_fix_length", DataType::Decimal128(30, 2), false),
@@ -1613,10 +1575,8 @@ mod tests {
 
         let schema = Schema::new_with_metadata(
             vec![
-                Field::new("c1", DataType::Utf8, false).with_metadata(meta(&[
-                    ("Key", "Foo"),
-                    (PARQUET_FIELD_ID_META_KEY, "2"),
-                ])),
+                Field::new("c1", DataType::Utf8, false)
+                    .with_metadata(meta(&[("Key", "Foo"), (PARQUET_FIELD_ID_META_KEY, "2")])),
                 Field::new("c2", DataType::Binary, false),
                 Field::new("c3", DataType::FixedSizeBinary(3), false),
                 Field::new("c4", DataType::Boolean, false),
@@ -1634,10 +1594,7 @@ mod tests {
                 ),
                 Field::new(
                     "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".into()),
-                    ),
+                    DataType::Timestamp(TimeUnit::Microsecond, Some("Africa/Johannesburg".into())),
                     false,
                 ),
                 Field::new(
@@ -1649,10 +1606,8 @@ mod tests {
                 Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
                 Field::new_list(
                     "c21",
-                    Field::new("item", DataType::Boolean, true).with_metadata(meta(&[
-                        ("Key", "Bar"),
-                        (PARQUET_FIELD_ID_META_KEY, "5"),
-                    ])),
+                    Field::new("item", DataType::Boolean, true)
+                        .with_metadata(meta(&[("Key", "Bar"), (PARQUET_FIELD_ID_META_KEY, "5")])),
                     false,
                 )
                 .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "4")])),
@@ -1702,10 +1657,7 @@ mod tests {
                 // Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
                 Field::new_dict(
                     "c31",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
                     true,
                     123,
                     true,
@@ -1740,11 +1692,7 @@ mod tests {
                     "c39",
                     "key_value",
                     Field::new("key", DataType::Utf8, false),
-                    Field::new_list(
-                        "value",
-                        Field::new("element", DataType::Utf8, true),
-                        true,
-                    ),
+                    Field::new_list("value", Field::new("element", DataType::Utf8, true), true),
                     false, // fails to roundtrip keys_sorted
                     true,
                 ),
@@ -1783,11 +1731,8 @@ mod tests {
 
         // write to an empty parquet file so that schema is serialized
         let file = tempfile::tempfile().unwrap();
-        let writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
+        let writer =
+            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema.clone()), None)?;
         writer.close()?;
 
         // read file back
@@ -1846,33 +1791,23 @@ mod tests {
         };
         let schema = Schema::new_with_metadata(
             vec![
-                Field::new("c1", DataType::Utf8, true).with_metadata(meta(&[
-                    (PARQUET_FIELD_ID_META_KEY, "1"),
-                ])),
-                Field::new("c2", DataType::Utf8, true).with_metadata(meta(&[
-                    (PARQUET_FIELD_ID_META_KEY, "2"),
-                ])),
+                Field::new("c1", DataType::Utf8, true)
+                    .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "1")])),
+                Field::new("c2", DataType::Utf8, true)
+                    .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "2")])),
             ],
             HashMap::new(),
         );
 
-        let writer = ArrowWriter::try_new(
-            vec![],
-            Arc::new(schema.clone()),
-            None,
-        )?;
+        let writer = ArrowWriter::try_new(vec![], Arc::new(schema.clone()), None)?;
         let parquet_bytes = writer.into_inner()?;
 
-        let reader = crate::file::reader::SerializedFileReader::new(
-            bytes::Bytes::from(parquet_bytes),
-        )?;
+        let reader =
+            crate::file::reader::SerializedFileReader::new(bytes::Bytes::from(parquet_bytes))?;
         let schema_descriptor = reader.metadata().file_metadata().schema_descr_ptr();
 
         // don't pass metadata so field ids are read from Parquet and not from serialized Arrow schema
-        let arrow_schema = crate::arrow::parquet_to_arrow_schema(
-            &schema_descriptor,
-            None,
-        )?;
+        let arrow_schema = crate::arrow::parquet_to_arrow_schema(&schema_descriptor, None)?;
 
         let parq_schema_descr = crate::arrow::arrow_to_parquet_schema(&arrow_schema)?;
         let parq_fields = parq_schema_descr.root_schema().get_fields();
@@ -1885,19 +1820,14 @@ mod tests {
 
     #[test]
     fn test_arrow_schema_roundtrip_lists() -> Result<()> {
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
+        let metadata: HashMap<String, String> = [("Key".to_string(), "Value".to_string())]
+            .iter()
+            .cloned()
+            .collect();
 
         let schema = Schema::new_with_metadata(
             vec![
-                Field::new_list(
-                    "c21",
-                    Field::new("array", DataType::Boolean, true),
-                    false,
-                ),
+                Field::new_list("c21", Field::new("array", DataType::Boolean, true), false),
                 Field::new(
                     "c22",
                     DataType::FixedSizeList(
@@ -1928,11 +1858,8 @@ mod tests {
 
         // write to an empty parquet file so that schema is serialized
         let file = tempfile::tempfile().unwrap();
-        let writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
+        let writer =
+            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema.clone()), None)?;
         writer.close()?;
 
         // read file back

From ada986c7ec8f8fe4f94235c8aaeba4995392ee72 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Fri, 15 Mar 2024 20:30:08 +1300
Subject: [PATCH 08/11] Prepare arrow 51.0.0 (#5516)

---
 CHANGELOG-old.md                 | 141 ++++++++++++++++++
 CHANGELOG.md                     | 248 +++++++++++++++----------------
 Cargo.toml                       |  32 ++--
 dev/release/update_change_log.sh |   4 +-
 4 files changed, 281 insertions(+), 144 deletions(-)

diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 5df29b350b85..b86431397139 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,147 @@
 
 # Historical Changelog
 
+## [50.0.0](https://github.com/apache/arrow-rs/tree/50.0.0) (2024-01-08)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/49.0.0...50.0.0)
+
+**Breaking changes:**
+
+- Make regexp\_match take scalar pattern and flag [\#5245](https://github.com/apache/arrow-rs/pull/5245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Use Vec in ColumnReader \(\#5177\) [\#5193](https://github.com/apache/arrow-rs/pull/5193) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Remove SIMD Feature [\#5184](https://github.com/apache/arrow-rs/pull/5184) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use Total Ordering for Aggregates and Refactor for Better Auto-Vectorization [\#5100](https://github.com/apache/arrow-rs/pull/5100) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Allow the `zip` compute function to operator on `Scalar` values via `Datum` [\#5086](https://github.com/apache/arrow-rs/pull/5086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Nathan-Fenner](https://github.com/Nathan-Fenner))
+- Improve C Data Interface and Add Integration Testing Entrypoints [\#5080](https://github.com/apache/arrow-rs/pull/5080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pitrou](https://github.com/pitrou))
+- Parquet: read/write f16 for Arrow [\#5003](https://github.com/apache/arrow-rs/pull/5003) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+
+**Implemented enhancements:**
+
+- Support get offsets or blocks info from arrow file.  [\#5252](https://github.com/apache/arrow-rs/issues/5252) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Make regexp\_match take scalar pattern and flag [\#5246](https://github.com/apache/arrow-rs/issues/5246) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cannot access pen state website on arrow-row [\#5238](https://github.com/apache/arrow-rs/issues/5238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- RecordBatch with\_schema's error message is hard to read [\#5227](https://github.com/apache/arrow-rs/issues/5227) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support cast between StructArray. [\#5219](https://github.com/apache/arrow-rs/issues/5219) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove nightly-only simd feature and related code in ArrowNumericType [\#5185](https://github.com/apache/arrow-rs/issues/5185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use Vec instead of Slice in ColumnReader [\#5177](https://github.com/apache/arrow-rs/issues/5177) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Request to Memmap Arrow IPC files on disk [\#5153](https://github.com/apache/arrow-rs/issues/5153) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- GenericColumnReader::read\_records Yields Truncated Records [\#5150](https://github.com/apache/arrow-rs/issues/5150) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Nested Schema Projection [\#5148](https://github.com/apache/arrow-rs/issues/5148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support specifying `quote` and `escape` in Csv `WriterBuilder` [\#5146](https://github.com/apache/arrow-rs/issues/5146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support casting of Float16 with other numeric types [\#5138](https://github.com/apache/arrow-rs/issues/5138) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet: read parquet metadata with page index in async and with size hints [\#5129](https://github.com/apache/arrow-rs/issues/5129) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Cast from floating/timestamp to timestamp/floating [\#5122](https://github.com/apache/arrow-rs/issues/5122) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Casting List To/From LargeList in Cast Kernel [\#5113](https://github.com/apache/arrow-rs/issues/5113) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Expose a path for converting `bytes::Bytes` into `arrow_buffer::Buffer` without copy [\#5104](https://github.com/apache/arrow-rs/issues/5104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- API inconsistency of ListBuilder make it hard to use as nested builder [\#5098](https://github.com/apache/arrow-rs/issues/5098) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet: don't truncate min/max statistics for float16 and decimal when writing file [\#5075](https://github.com/apache/arrow-rs/issues/5075) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet: derive boundary order when writing columns [\#5074](https://github.com/apache/arrow-rs/issues/5074) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support new Arrow PyCapsule Interface for Python FFI [\#5067](https://github.com/apache/arrow-rs/issues/5067) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `48.0.1 ` arrow patch release [\#5050](https://github.com/apache/arrow-rs/issues/5050) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Binary columns do not receive truncated statistics [\#5037](https://github.com/apache/arrow-rs/issues/5037) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Re-evaluate Explicit SIMD Aggregations [\#5032](https://github.com/apache/arrow-rs/issues/5032) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Min/Max Kernels Should Use Total Ordering [\#5031](https://github.com/apache/arrow-rs/issues/5031) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Allow `zip` compute kernel to take `Scalar` / `Datum`  [\#5011](https://github.com/apache/arrow-rs/issues/5011) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add Float16/Half-float logical type to Parquet [\#4986](https://github.com/apache/arrow-rs/issues/4986) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- feat: cast \(Large\)List to FixedSizeList [\#5081](https://github.com/apache/arrow-rs/pull/5081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Update Parquet Encoding Documentation [\#5051](https://github.com/apache/arrow-rs/issues/5051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Fixed bugs:**
+
+- json schema inference can't handle null field turned into object field in subsequent rows [\#5215](https://github.com/apache/arrow-rs/issues/5215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Invalid trailing content after `Z` in timezone is ignored [\#5182](https://github.com/apache/arrow-rs/issues/5182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Take panics on a fixed size list array when given null indices [\#5169](https://github.com/apache/arrow-rs/issues/5169) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- EnabledStatistics::Page  does not take effect on ByteArrayEncoder [\#5162](https://github.com/apache/arrow-rs/issues/5162) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet: ColumnOrder not being written when writing parquet files [\#5152](https://github.com/apache/arrow-rs/issues/5152) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet: Interval columns shouldn't write min/max stats [\#5145](https://github.com/apache/arrow-rs/issues/5145) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+-  cast `Utf8` to decimal failure [\#5127](https://github.com/apache/arrow-rs/issues/5127) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- coerce\_primitive not honored when decoding from serde object [\#5095](https://github.com/apache/arrow-rs/issues/5095) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Unsound MutableArrayData Constructor [\#5091](https://github.com/apache/arrow-rs/issues/5091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- RowGroupReader.get\_row\_iter\(\) fails with Path ColumnPath not found [\#5064](https://github.com/apache/arrow-rs/issues/5064) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- cast format 'yyyymmdd'  to Date32 give a error [\#5044](https://github.com/apache/arrow-rs/issues/5044) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Performance improvements:**
+
+- ArrowArrayStreamReader imports FFI\_ArrowSchema on each iteration [\#5103](https://github.com/apache/arrow-rs/issues/5103) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Closed issues:**
+
+- Working example of list\_flights with ObjectStore [\#5116](https://github.com/apache/arrow-rs/issues/5116)
+- \(object\_store\) Error broken pipe on S3 multipart upload [\#5106](https://github.com/apache/arrow-rs/issues/5106)
+
+**Merged pull requests:**
+
+- Update parquet object\_store dependency to 0.9.0 [\#5290](https://github.com/apache/arrow-rs/pull/5290) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.75 to =1.0.76 [\#5289](https://github.com/apache/arrow-rs/pull/5289) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Enable JS tests again [\#5287](https://github.com/apache/arrow-rs/pull/5287) ([domoritz](https://github.com/domoritz))
+- Update proc-macro2 requirement from =1.0.74 to =1.0.75 [\#5279](https://github.com/apache/arrow-rs/pull/5279) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update proc-macro2 requirement from =1.0.73 to =1.0.74 [\#5271](https://github.com/apache/arrow-rs/pull/5271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update proc-macro2 requirement from =1.0.71 to =1.0.73 [\#5265](https://github.com/apache/arrow-rs/pull/5265) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update docs for datatypes [\#5260](https://github.com/apache/arrow-rs/pull/5260) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Don't suppress errors in ArrowArrayStreamReader [\#5256](https://github.com/apache/arrow-rs/pull/5256) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add IPC FileDecoder [\#5249](https://github.com/apache/arrow-rs/pull/5249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- optimize the next function of ArrowArrayStreamReader [\#5248](https://github.com/apache/arrow-rs/pull/5248) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([doki23](https://github.com/doki23))
+- ci: Fail Miri CI on first failure [\#5243](https://github.com/apache/arrow-rs/pull/5243) ([Jefffrey](https://github.com/Jefffrey))
+- Remove 'unwrap' from Result [\#5241](https://github.com/apache/arrow-rs/pull/5241) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Update arrow-row docs URL [\#5239](https://github.com/apache/arrow-rs/pull/5239) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thomas-k-cameron](https://github.com/thomas-k-cameron))
+- Improve regexp kernels performance by avoiding cloning Regex [\#5235](https://github.com/apache/arrow-rs/pull/5235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Update proc-macro2 requirement from =1.0.70 to =1.0.71 [\#5231](https://github.com/apache/arrow-rs/pull/5231) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Minor: Improve comments and errors for ArrowPredicate [\#5230](https://github.com/apache/arrow-rs/pull/5230) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Bump actions/upload-pages-artifact from 2 to 3 [\#5229](https://github.com/apache/arrow-rs/pull/5229) ([dependabot[bot]](https://github.com/apps/dependabot))
+- make with\_schema's error more readable [\#5228](https://github.com/apache/arrow-rs/pull/5228) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([shuoli84](https://github.com/shuoli84))
+- Use `try_new` when casting between structs to propagate error [\#5226](https://github.com/apache/arrow-rs/pull/5226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- feat\(cast\): support cast between struct [\#5221](https://github.com/apache/arrow-rs/pull/5221) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([my-vegetable-has-exploded](https://github.com/my-vegetable-has-exploded))
+- Add `entries` to `MapBuilder` to return both key and value array builders [\#5218](https://github.com/apache/arrow-rs/pull/5218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- fix\(json\): fix inferring object after field was null [\#5216](https://github.com/apache/arrow-rs/pull/5216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kskalski](https://github.com/kskalski))
+- Support MapBuilder in make\_builder [\#5210](https://github.com/apache/arrow-rs/pull/5210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- impl `From<OffsetBuffer<T>>` for `ScalarBuffer<T>` [\#5203](https://github.com/apache/arrow-rs/pull/5203) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- impl `From<BufferBuilder<T>>` for `Buffer` [\#5202](https://github.com/apache/arrow-rs/pull/5202) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- impl `From<BufferBuilder<T>>` for `ScalarBuffer<T>` [\#5201](https://github.com/apache/arrow-rs/pull/5201) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- feat: Support  quote and escape in Csv WriterBuilder [\#5196](https://github.com/apache/arrow-rs/pull/5196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([my-vegetable-has-exploded](https://github.com/my-vegetable-has-exploded))
+- chore: simplify cast\_string\_to\_interval [\#5195](https://github.com/apache/arrow-rs/pull/5195) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Clarify interval comparison behavior with documentation and tests [\#5192](https://github.com/apache/arrow-rs/pull/5192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `BooleanArray::into_parts` method [\#5191](https://github.com/apache/arrow-rs/pull/5191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Fix deprecated note for `Buffer::from_raw_parts` [\#5190](https://github.com/apache/arrow-rs/pull/5190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Fix: Ensure Timestamp Parsing Rejects Characters After 'Z [\#5189](https://github.com/apache/arrow-rs/pull/5189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([razeghi71](https://github.com/razeghi71))
+- Simplify parquet statistics generation [\#5183](https://github.com/apache/arrow-rs/pull/5183) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Parquet: Ensure page statistics are written only when conifgured from the Arrow Writer [\#5181](https://github.com/apache/arrow-rs/pull/5181) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AdamGS](https://github.com/AdamGS))
+- Blockwise IO in IPC FileReader \(\#5153\) [\#5179](https://github.com/apache/arrow-rs/pull/5179) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Replace ScalarBuffer in Parquet with Vec \(\#1849\) \(\#5177\) [\#5178](https://github.com/apache/arrow-rs/pull/5178) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Bump actions/setup-python from 4 to 5 [\#5175](https://github.com/apache/arrow-rs/pull/5175) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add `LargeListBuilder` to `make_builder` [\#5171](https://github.com/apache/arrow-rs/pull/5171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- fix: ensure take\_fixed\_size\_list can handle null indices [\#5170](https://github.com/apache/arrow-rs/pull/5170) ([westonpace](https://github.com/westonpace))
+- Removing redundant `as casts` in parquet [\#5168](https://github.com/apache/arrow-rs/pull/5168) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([psvri](https://github.com/psvri))
+- Bump actions/labeler from 4.3.0 to 5.0.0 [\#5167](https://github.com/apache/arrow-rs/pull/5167) ([dependabot[bot]](https://github.com/apps/dependabot))
+- improve: make RunArray displayable [\#5166](https://github.com/apache/arrow-rs/pull/5166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yukkit](https://github.com/yukkit))
+- ci: Add cargo audit CI action [\#5160](https://github.com/apache/arrow-rs/pull/5160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Parquet: write column\_orders in FileMetaData [\#5158](https://github.com/apache/arrow-rs/pull/5158) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Adding `is_null` datatype shortcut method [\#5157](https://github.com/apache/arrow-rs/pull/5157) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
+- Parquet: don't truncate f16/decimal min/max stats [\#5154](https://github.com/apache/arrow-rs/pull/5154) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Support nested schema projection \(\#5148\) [\#5149](https://github.com/apache/arrow-rs/pull/5149) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Parquet: omit min/max for interval columns when writing stats [\#5147](https://github.com/apache/arrow-rs/pull/5147) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Deprecate Fields::remove and Schema::remove [\#5144](https://github.com/apache/arrow-rs/pull/5144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support casting of Float16 with other numeric types [\#5139](https://github.com/apache/arrow-rs/pull/5139) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Parquet: Make `MetadataLoader` public [\#5137](https://github.com/apache/arrow-rs/pull/5137) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AdamGS](https://github.com/AdamGS))
+- Add FileReaderBuilder for arrow-ipc to allow reading large no. of column files [\#5136](https://github.com/apache/arrow-rs/pull/5136) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Parquet: clear metadata and project fields of ParquetRecordBatchStream::schema [\#5135](https://github.com/apache/arrow-rs/pull/5135) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- JSON: write struct array nulls as null [\#5133](https://github.com/apache/arrow-rs/pull/5133) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Update proc-macro2 requirement from =1.0.69 to =1.0.70 [\#5131](https://github.com/apache/arrow-rs/pull/5131) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix negative decimal string [\#5128](https://github.com/apache/arrow-rs/pull/5128) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Cleanup list casting and support nested lists \(\#5113\) [\#5124](https://github.com/apache/arrow-rs/pull/5124) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Cast from numeric/timestamp to timestamp/numeric [\#5123](https://github.com/apache/arrow-rs/pull/5123) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Improve cast docs [\#5114](https://github.com/apache/arrow-rs/pull/5114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update prost-build requirement from =0.12.2 to =0.12.3 [\#5112](https://github.com/apache/arrow-rs/pull/5112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Parquet: derive boundary order when writing [\#5110](https://github.com/apache/arrow-rs/pull/5110) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Implementing `ArrayBuilder` for `Box<dyn ArrayBuilder>` [\#5109](https://github.com/apache/arrow-rs/pull/5109) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix 'ColumnPath not found' error reading Parquet files with nested REPEATED fields [\#5102](https://github.com/apache/arrow-rs/pull/5102) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
+- fix: coerce\_primitive for serde decoded data [\#5101](https://github.com/apache/arrow-rs/pull/5101) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
+- Extend aggregation benchmarks [\#5096](https://github.com/apache/arrow-rs/pull/5096) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Expand parquet crate overview doc [\#5093](https://github.com/apache/arrow-rs/pull/5093) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
+- Ensure arrays passed to MutableArrayData have same type \(\#5091\) [\#5092](https://github.com/apache/arrow-rs/pull/5092) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update prost-build requirement from =0.12.1 to =0.12.2 [\#5088](https://github.com/apache/arrow-rs/pull/5088) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add FFI from\_raw [\#5082](https://github.com/apache/arrow-rs/pull/5082) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- \[fix \#5044\] Support converting 'yyyymmdd' format to date [\#5078](https://github.com/apache/arrow-rs/pull/5078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Tangruilin](https://github.com/Tangruilin))
+- Enable truncation of binary statistics columns [\#5076](https://github.com/apache/arrow-rs/pull/5076) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([emcake](https://github.com/emcake))
 ## [49.0.0](https://github.com/apache/arrow-rs/tree/49.0.0) (2023-11-07)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/48.0.0...49.0.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4c173bfdeda0..2eac54afaf32 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,148 +19,144 @@
 
 # Changelog
 
-## [50.0.0](https://github.com/apache/arrow-rs/tree/50.0.0) (2024-01-08)
+## [51.0.0](https://github.com/apache/arrow-rs/tree/51.0.0) (2024-03-15)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/49.0.0...50.0.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/50.0.0...51.0.0)
 
 **Breaking changes:**
 
-- Make regexp\_match take scalar pattern and flag [\#5245](https://github.com/apache/arrow-rs/pull/5245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Use Vec in ColumnReader \(\#5177\) [\#5193](https://github.com/apache/arrow-rs/pull/5193) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Remove SIMD Feature [\#5184](https://github.com/apache/arrow-rs/pull/5184) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Use Total Ordering for Aggregates and Refactor for Better Auto-Vectorization [\#5100](https://github.com/apache/arrow-rs/pull/5100) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Allow the `zip` compute function to operator on `Scalar` values via `Datum` [\#5086](https://github.com/apache/arrow-rs/pull/5086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Nathan-Fenner](https://github.com/Nathan-Fenner))
-- Improve C Data Interface and Add Integration Testing Entrypoints [\#5080](https://github.com/apache/arrow-rs/pull/5080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pitrou](https://github.com/pitrou))
-- Parquet: read/write f16 for Arrow [\#5003](https://github.com/apache/arrow-rs/pull/5003) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Remove internal buffering from AsyncArrowWriter \(\#5484\) [\#5485](https://github.com/apache/arrow-rs/pull/5485) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Make ArrayBuilder also Sync [\#5353](https://github.com/apache/arrow-rs/pull/5353) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dvic](https://github.com/dvic))
+- Raw JSON writer \(~10x faster\) \(\#5314\)  [\#5318](https://github.com/apache/arrow-rs/pull/5318) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
 
 **Implemented enhancements:**
 
-- Support get offsets or blocks info from arrow file.  [\#5252](https://github.com/apache/arrow-rs/issues/5252) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Make regexp\_match take scalar pattern and flag [\#5246](https://github.com/apache/arrow-rs/issues/5246) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Cannot access pen state website on arrow-row [\#5238](https://github.com/apache/arrow-rs/issues/5238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- RecordBatch with\_schema's error message is hard to read [\#5227](https://github.com/apache/arrow-rs/issues/5227) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support cast between StructArray. [\#5219](https://github.com/apache/arrow-rs/issues/5219) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove nightly-only simd feature and related code in ArrowNumericType [\#5185](https://github.com/apache/arrow-rs/issues/5185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Use Vec instead of Slice in ColumnReader [\#5177](https://github.com/apache/arrow-rs/issues/5177) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Request to Memmap Arrow IPC files on disk [\#5153](https://github.com/apache/arrow-rs/issues/5153) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- GenericColumnReader::read\_records Yields Truncated Records [\#5150](https://github.com/apache/arrow-rs/issues/5150) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Nested Schema Projection [\#5148](https://github.com/apache/arrow-rs/issues/5148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support specifying `quote` and `escape` in Csv `WriterBuilder` [\#5146](https://github.com/apache/arrow-rs/issues/5146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support casting of Float16 with other numeric types [\#5138](https://github.com/apache/arrow-rs/issues/5138) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Parquet: read parquet metadata with page index in async and with size hints [\#5129](https://github.com/apache/arrow-rs/issues/5129) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Cast from floating/timestamp to timestamp/floating [\#5122](https://github.com/apache/arrow-rs/issues/5122) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support Casting List To/From LargeList in Cast Kernel [\#5113](https://github.com/apache/arrow-rs/issues/5113) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Expose a path for converting `bytes::Bytes` into `arrow_buffer::Buffer` without copy [\#5104](https://github.com/apache/arrow-rs/issues/5104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- API inconsistency of ListBuilder make it hard to use as nested builder [\#5098](https://github.com/apache/arrow-rs/issues/5098) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Parquet: don't truncate min/max statistics for float16 and decimal when writing file [\#5075](https://github.com/apache/arrow-rs/issues/5075) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet: derive boundary order when writing columns [\#5074](https://github.com/apache/arrow-rs/issues/5074) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Support new Arrow PyCapsule Interface for Python FFI [\#5067](https://github.com/apache/arrow-rs/issues/5067) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `48.0.1 ` arrow patch release [\#5050](https://github.com/apache/arrow-rs/issues/5050) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Binary columns do not receive truncated statistics [\#5037](https://github.com/apache/arrow-rs/issues/5037) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Re-evaluate Explicit SIMD Aggregations [\#5032](https://github.com/apache/arrow-rs/issues/5032) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Min/Max Kernels Should Use Total Ordering [\#5031](https://github.com/apache/arrow-rs/issues/5031) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Allow `zip` compute kernel to take `Scalar` / `Datum`  [\#5011](https://github.com/apache/arrow-rs/issues/5011) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add Float16/Half-float logical type to Parquet [\#4986](https://github.com/apache/arrow-rs/issues/4986) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- feat: cast \(Large\)List to FixedSizeList [\#5081](https://github.com/apache/arrow-rs/pull/5081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
-- Update Parquet Encoding Documentation [\#5051](https://github.com/apache/arrow-rs/issues/5051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Prototype Arrow over HTTP in Rust [\#5496](https://github.com/apache/arrow-rs/issues/5496) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add DataType::ListView and DataType::LargeListView [\#5492](https://github.com/apache/arrow-rs/issues/5492) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve documentation around handling of dictionary arrays in arrow flight [\#5487](https://github.com/apache/arrow-rs/issues/5487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Better memory limiting in parquet `ArrowWriter`  [\#5484](https://github.com/apache/arrow-rs/issues/5484) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support Creating Non-Nullable Lists and Maps within a Struct [\#5482](https://github.com/apache/arrow-rs/issues/5482) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DISCUSSION\] Better borrow propagation \(e.g. `RecordBatch::schema()` to return `&SchemaRef` vs `SchemaRef`\) [\#5463](https://github.com/apache/arrow-rs/issues/5463) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Build Scalar with ArrayRef [\#5459](https://github.com/apache/arrow-rs/issues/5459)
+- AsyncArrowWriter doesn't limit underlying ArrowWriter to respect buffer-size [\#5450](https://github.com/apache/arrow-rs/issues/5450) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Refine `Display` implementation for `FlightError` [\#5438](https://github.com/apache/arrow-rs/issues/5438) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Better ergonomics for `FixedSizeList` and `LargeList` [\#5372](https://github.com/apache/arrow-rs/issues/5372) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Update Flight proto [\#5367](https://github.com/apache/arrow-rs/issues/5367) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Support check similar datatype but with different magnitudes [\#5358](https://github.com/apache/arrow-rs/issues/5358) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Buffer memory usage for custom allocations is reported as 0 [\#5346](https://github.com/apache/arrow-rs/issues/5346) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Can the ArrayBuilder trait be made Sync? [\#5344](https://github.com/apache/arrow-rs/issues/5344) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- support cast 'UTF8' to `FixedSizeList` [\#5339](https://github.com/apache/arrow-rs/issues/5339) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Creating Non-Nullable Lists with ListBuilder [\#5330](https://github.com/apache/arrow-rs/issues/5330) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `ParquetRecordBatchStreamBuilder::new()` panics instead of erroring out when opening a corrupted file [\#5315](https://github.com/apache/arrow-rs/issues/5315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Raw JSON Writer [\#5314](https://github.com/apache/arrow-rs/issues/5314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for more fused boolean operations [\#5297](https://github.com/apache/arrow-rs/issues/5297) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet: Allow disabling embed `ARROW_SCHEMA_META_KEY` added by the `ArrowWriter` [\#5296](https://github.com/apache/arrow-rs/issues/5296) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support casting strings like '2001-01-01 01:01:01' to Date32 [\#5280](https://github.com/apache/arrow-rs/issues/5280) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Temporal Extract/Date Part Kernel [\#5266](https://github.com/apache/arrow-rs/issues/5266) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support for extracting hours/minutes/seconds/etc. from `Time32`/`Time64` type in temporal kernels [\#5261](https://github.com/apache/arrow-rs/issues/5261) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet: add method to get both the inner writer and the file metadata when closing SerializedFileWriter [\#5253](https://github.com/apache/arrow-rs/issues/5253) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Release arrow-rs version 50.0.0 [\#5234](https://github.com/apache/arrow-rs/issues/5234)
 
 **Fixed bugs:**
 
-- json schema inference can't handle null field turned into object field in subsequent rows [\#5215](https://github.com/apache/arrow-rs/issues/5215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Invalid trailing content after `Z` in timezone is ignored [\#5182](https://github.com/apache/arrow-rs/issues/5182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Take panics on a fixed size list array when given null indices [\#5169](https://github.com/apache/arrow-rs/issues/5169) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- EnabledStatistics::Page  does not take effect on ByteArrayEncoder [\#5162](https://github.com/apache/arrow-rs/issues/5162) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet: ColumnOrder not being written when writing parquet files [\#5152](https://github.com/apache/arrow-rs/issues/5152) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet: Interval columns shouldn't write min/max stats [\#5145](https://github.com/apache/arrow-rs/issues/5145) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
--  cast `Utf8` to decimal failure [\#5127](https://github.com/apache/arrow-rs/issues/5127) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- coerce\_primitive not honored when decoding from serde object [\#5095](https://github.com/apache/arrow-rs/issues/5095) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Unsound MutableArrayData Constructor [\#5091](https://github.com/apache/arrow-rs/issues/5091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- RowGroupReader.get\_row\_iter\(\) fails with Path ColumnPath not found [\#5064](https://github.com/apache/arrow-rs/issues/5064) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- cast format 'yyyymmdd'  to Date32 give a error [\#5044](https://github.com/apache/arrow-rs/issues/5044) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Empty String Parses as Zero in Unreleased Arrow [\#5504](https://github.com/apache/arrow-rs/issues/5504) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Unused import in nightly rust [\#5476](https://github.com/apache/arrow-rs/issues/5476) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Error `The data type type List .. has no natural order` when using `arrow::compute::lexsort_to_indices` with list and more than one column [\#5454](https://github.com/apache/arrow-rs/issues/5454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Wrong size assertion in arrow\_buffer::builder::NullBufferBuilder::new\_from\_buffer [\#5445](https://github.com/apache/arrow-rs/issues/5445) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Inconsistency between comments and code implementation [\#5430](https://github.com/apache/arrow-rs/issues/5430) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- OOB access in `Buffer::from_iter` [\#5412](https://github.com/apache/arrow-rs/issues/5412) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cast kernel doesn't return null for string to integral cases when overflowing under safe option enabled [\#5397](https://github.com/apache/arrow-rs/issues/5397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Make ffi consume variable layout arrays with empty offsets [\#5391](https://github.com/apache/arrow-rs/issues/5391) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- RecordBatch conversion from pyarrow loses Schema's metadata [\#5354](https://github.com/apache/arrow-rs/issues/5354) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Debug output of Time32/Time64 arrays with invalid values has confusing nulls [\#5336](https://github.com/apache/arrow-rs/issues/5336) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Removing a column from a `RecordBatch` drops schema metadata [\#5327](https://github.com/apache/arrow-rs/issues/5327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Panic when read an empty parquet file [\#5304](https://github.com/apache/arrow-rs/issues/5304) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- How to enable statistics for string columns? [\#5270](https://github.com/apache/arrow-rs/issues/5270) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `concat::tests::test_string_dictionary_merge failure` fails on Mac /  has different results in different platforms [\#5255](https://github.com/apache/arrow-rs/issues/5255) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Minor: Add doc comments to `GenericByteViewArray` [\#5512](https://github.com/apache/arrow-rs/pull/5512) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve docs for logical and physical nulls even more [\#5434](https://github.com/apache/arrow-rs/pull/5434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add example of converting RecordBatches to JSON objects [\#5364](https://github.com/apache/arrow-rs/pull/5364) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
 
 **Performance improvements:**
 
-- ArrowArrayStreamReader imports FFI\_ArrowSchema on each iteration [\#5103](https://github.com/apache/arrow-rs/issues/5103) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- improve float to string cast by ~20%-40% [\#5401](https://github.com/apache/arrow-rs/pull/5401) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
 
 **Closed issues:**
 
-- Working example of list\_flights with ObjectStore [\#5116](https://github.com/apache/arrow-rs/issues/5116)
-- \(object\_store\) Error broken pipe on S3 multipart upload [\#5106](https://github.com/apache/arrow-rs/issues/5106)
+- Add `StringViewArray` implementation and layout and basic construction + tests [\#5469](https://github.com/apache/arrow-rs/issues/5469) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `DataType::Utf8View` and `DataType::BinaryView` [\#5468](https://github.com/apache/arrow-rs/issues/5468) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- Update parquet object\_store dependency to 0.9.0 [\#5290](https://github.com/apache/arrow-rs/pull/5290) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Update proc-macro2 requirement from =1.0.75 to =1.0.76 [\#5289](https://github.com/apache/arrow-rs/pull/5289) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Enable JS tests again [\#5287](https://github.com/apache/arrow-rs/pull/5287) ([domoritz](https://github.com/domoritz))
-- Update proc-macro2 requirement from =1.0.74 to =1.0.75 [\#5279](https://github.com/apache/arrow-rs/pull/5279) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update proc-macro2 requirement from =1.0.73 to =1.0.74 [\#5271](https://github.com/apache/arrow-rs/pull/5271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update proc-macro2 requirement from =1.0.71 to =1.0.73 [\#5265](https://github.com/apache/arrow-rs/pull/5265) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update docs for datatypes [\#5260](https://github.com/apache/arrow-rs/pull/5260) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Don't suppress errors in ArrowArrayStreamReader [\#5256](https://github.com/apache/arrow-rs/pull/5256) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add IPC FileDecoder [\#5249](https://github.com/apache/arrow-rs/pull/5249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- optimize the next function of ArrowArrayStreamReader [\#5248](https://github.com/apache/arrow-rs/pull/5248) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([doki23](https://github.com/doki23))
-- ci: Fail Miri CI on first failure [\#5243](https://github.com/apache/arrow-rs/pull/5243) ([Jefffrey](https://github.com/Jefffrey))
-- Remove 'unwrap' from Result [\#5241](https://github.com/apache/arrow-rs/pull/5241) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
-- Update arrow-row docs URL [\#5239](https://github.com/apache/arrow-rs/pull/5239) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thomas-k-cameron](https://github.com/thomas-k-cameron))
-- Improve regexp kernels performance by avoiding cloning Regex [\#5235](https://github.com/apache/arrow-rs/pull/5235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Update proc-macro2 requirement from =1.0.70 to =1.0.71 [\#5231](https://github.com/apache/arrow-rs/pull/5231) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Minor: Improve comments and errors for ArrowPredicate [\#5230](https://github.com/apache/arrow-rs/pull/5230) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Bump actions/upload-pages-artifact from 2 to 3 [\#5229](https://github.com/apache/arrow-rs/pull/5229) ([dependabot[bot]](https://github.com/apps/dependabot))
-- make with\_schema's error more readable [\#5228](https://github.com/apache/arrow-rs/pull/5228) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([shuoli84](https://github.com/shuoli84))
-- Use `try_new` when casting between structs to propagate error [\#5226](https://github.com/apache/arrow-rs/pull/5226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- feat\(cast\): support cast between struct [\#5221](https://github.com/apache/arrow-rs/pull/5221) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([my-vegetable-has-exploded](https://github.com/my-vegetable-has-exploded))
-- Add `entries` to `MapBuilder` to return both key and value array builders [\#5218](https://github.com/apache/arrow-rs/pull/5218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- fix\(json\): fix inferring object after field was null [\#5216](https://github.com/apache/arrow-rs/pull/5216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kskalski](https://github.com/kskalski))
-- Support MapBuilder in make\_builder [\#5210](https://github.com/apache/arrow-rs/pull/5210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- impl `From<OffsetBuffer<T>>` for `ScalarBuffer<T>` [\#5203](https://github.com/apache/arrow-rs/pull/5203) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- impl `From<BufferBuilder<T>>` for `Buffer` [\#5202](https://github.com/apache/arrow-rs/pull/5202) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- impl `From<BufferBuilder<T>>` for `ScalarBuffer<T>` [\#5201](https://github.com/apache/arrow-rs/pull/5201) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- feat: Support  quote and escape in Csv WriterBuilder [\#5196](https://github.com/apache/arrow-rs/pull/5196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([my-vegetable-has-exploded](https://github.com/my-vegetable-has-exploded))
-- chore: simplify cast\_string\_to\_interval [\#5195](https://github.com/apache/arrow-rs/pull/5195) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Clarify interval comparison behavior with documentation and tests [\#5192](https://github.com/apache/arrow-rs/pull/5192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add `BooleanArray::into_parts` method [\#5191](https://github.com/apache/arrow-rs/pull/5191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- Fix deprecated note for `Buffer::from_raw_parts` [\#5190](https://github.com/apache/arrow-rs/pull/5190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- Fix: Ensure Timestamp Parsing Rejects Characters After 'Z [\#5189](https://github.com/apache/arrow-rs/pull/5189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([razeghi71](https://github.com/razeghi71))
-- Simplify parquet statistics generation [\#5183](https://github.com/apache/arrow-rs/pull/5183) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Parquet: Ensure page statistics are written only when conifgured from the Arrow Writer [\#5181](https://github.com/apache/arrow-rs/pull/5181) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AdamGS](https://github.com/AdamGS))
-- Blockwise IO in IPC FileReader \(\#5153\) [\#5179](https://github.com/apache/arrow-rs/pull/5179) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Replace ScalarBuffer in Parquet with Vec \(\#1849\) \(\#5177\) [\#5178](https://github.com/apache/arrow-rs/pull/5178) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Bump actions/setup-python from 4 to 5 [\#5175](https://github.com/apache/arrow-rs/pull/5175) ([dependabot[bot]](https://github.com/apps/dependabot))
-- Add `LargeListBuilder` to `make_builder` [\#5171](https://github.com/apache/arrow-rs/pull/5171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- fix: ensure take\_fixed\_size\_list can handle null indices [\#5170](https://github.com/apache/arrow-rs/pull/5170) ([westonpace](https://github.com/westonpace))
-- Removing redundant `as casts` in parquet [\#5168](https://github.com/apache/arrow-rs/pull/5168) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([psvri](https://github.com/psvri))
-- Bump actions/labeler from 4.3.0 to 5.0.0 [\#5167](https://github.com/apache/arrow-rs/pull/5167) ([dependabot[bot]](https://github.com/apps/dependabot))
-- improve: make RunArray displayable [\#5166](https://github.com/apache/arrow-rs/pull/5166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yukkit](https://github.com/yukkit))
-- ci: Add cargo audit CI action [\#5160](https://github.com/apache/arrow-rs/pull/5160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Parquet: write column\_orders in FileMetaData [\#5158](https://github.com/apache/arrow-rs/pull/5158) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- Adding `is_null` datatype shortcut method [\#5157](https://github.com/apache/arrow-rs/pull/5157) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
-- Parquet: don't truncate f16/decimal min/max stats [\#5154](https://github.com/apache/arrow-rs/pull/5154) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- Support nested schema projection \(\#5148\) [\#5149](https://github.com/apache/arrow-rs/pull/5149) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Parquet: omit min/max for interval columns when writing stats [\#5147](https://github.com/apache/arrow-rs/pull/5147) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- Deprecate Fields::remove and Schema::remove [\#5144](https://github.com/apache/arrow-rs/pull/5144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Support casting of Float16 with other numeric types [\#5139](https://github.com/apache/arrow-rs/pull/5139) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Parquet: Make `MetadataLoader` public [\#5137](https://github.com/apache/arrow-rs/pull/5137) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AdamGS](https://github.com/AdamGS))
-- Add FileReaderBuilder for arrow-ipc to allow reading large no. of column files [\#5136](https://github.com/apache/arrow-rs/pull/5136) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Parquet: clear metadata and project fields of ParquetRecordBatchStream::schema [\#5135](https://github.com/apache/arrow-rs/pull/5135) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- JSON: write struct array nulls as null [\#5133](https://github.com/apache/arrow-rs/pull/5133) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Update proc-macro2 requirement from =1.0.69 to =1.0.70 [\#5131](https://github.com/apache/arrow-rs/pull/5131) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix negative decimal string [\#5128](https://github.com/apache/arrow-rs/pull/5128) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Cleanup list casting and support nested lists \(\#5113\) [\#5124](https://github.com/apache/arrow-rs/pull/5124) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Cast from numeric/timestamp to timestamp/numeric [\#5123](https://github.com/apache/arrow-rs/pull/5123) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Improve cast docs [\#5114](https://github.com/apache/arrow-rs/pull/5114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Update prost-build requirement from =0.12.2 to =0.12.3 [\#5112](https://github.com/apache/arrow-rs/pull/5112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Parquet: derive boundary order when writing [\#5110](https://github.com/apache/arrow-rs/pull/5110) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- Implementing `ArrayBuilder` for `Box<dyn ArrayBuilder>` [\#5109](https://github.com/apache/arrow-rs/pull/5109) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix 'ColumnPath not found' error reading Parquet files with nested REPEATED fields [\#5102](https://github.com/apache/arrow-rs/pull/5102) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
-- fix: coerce\_primitive for serde decoded data [\#5101](https://github.com/apache/arrow-rs/pull/5101) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
-- Extend aggregation benchmarks [\#5096](https://github.com/apache/arrow-rs/pull/5096) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Expand parquet crate overview doc [\#5093](https://github.com/apache/arrow-rs/pull/5093) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
-- Ensure arrays passed to MutableArrayData have same type \(\#5091\) [\#5092](https://github.com/apache/arrow-rs/pull/5092) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Update prost-build requirement from =0.12.1 to =0.12.2 [\#5088](https://github.com/apache/arrow-rs/pull/5088) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Add FFI from\_raw [\#5082](https://github.com/apache/arrow-rs/pull/5082) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- \[fix \#5044\] Support converting 'yyyymmdd' format to date [\#5078](https://github.com/apache/arrow-rs/pull/5078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Tangruilin](https://github.com/Tangruilin))
-- Enable truncation of binary statistics columns [\#5076](https://github.com/apache/arrow-rs/pull/5076) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([emcake](https://github.com/emcake))
-- IPC writer truncated sliced list/map values [\#5071](https://github.com/apache/arrow-rs/pull/5071) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Implement Arrow PyCapsule Interface [\#5070](https://github.com/apache/arrow-rs/pull/5070) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
-- Remove ByteBufferPtr and replace with Bytes [\#5055](https://github.com/apache/arrow-rs/pull/5055) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
-- Support multiple GZip members in parquet page [\#4951](https://github.com/apache/arrow-rs/pull/4951) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Deprecate array\_to\_json\_array [\#5515](https://github.com/apache/arrow-rs/pull/5515) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix integer parsing of empty strings \(\#5504\) [\#5505](https://github.com/apache/arrow-rs/pull/5505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- feat: clarifying comments in struct\_builder.rs \#5494  [\#5499](https://github.com/apache/arrow-rs/pull/5499) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([istvan-fodor](https://github.com/istvan-fodor))
+- Update proc-macro2 requirement from =1.0.78 to =1.0.79 [\#5498](https://github.com/apache/arrow-rs/pull/5498) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add DataType::ListView and DataType::LargeListView [\#5493](https://github.com/apache/arrow-rs/pull/5493) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Kikkon](https://github.com/Kikkon))
+- Better document parquet pushdown [\#5491](https://github.com/apache/arrow-rs/pull/5491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Fix NullBufferBuilder::new\_from\_buffer wrong size assertion [\#5489](https://github.com/apache/arrow-rs/pull/5489) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Kikkon](https://github.com/Kikkon))
+- Support dictionary encoding in structures for `FlightDataEncoder`,  add documentation for `arrow_flight::encode::Dictionary` [\#5488](https://github.com/apache/arrow-rs/pull/5488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([thinkharderdev](https://github.com/thinkharderdev))
+- Add MapBuilder::with\_values\_field to support non-nullable values \(\#5482\) [\#5483](https://github.com/apache/arrow-rs/pull/5483) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lasantosr](https://github.com/lasantosr))
+- feat: initial support string\_view and binary\_view,  supports layout and basic construction + tests [\#5481](https://github.com/apache/arrow-rs/pull/5481) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ariesdevil](https://github.com/ariesdevil))
+- Add more comprehensive documentation on testing and benchmarking to CONTRIBUTING.md [\#5478](https://github.com/apache/arrow-rs/pull/5478) ([monkwire](https://github.com/monkwire))
+- Remove unused import detected by nightly rust [\#5477](https://github.com/apache/arrow-rs/pull/5477) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Add RecordBatch::schema\_ref [\#5474](https://github.com/apache/arrow-rs/pull/5474) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([monkwire](https://github.com/monkwire))
+- Provide access to inner Write for parquet writers [\#5471](https://github.com/apache/arrow-rs/pull/5471) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add DataType::Utf8View and DataType::BinaryView [\#5470](https://github.com/apache/arrow-rs/pull/5470) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Update base64 requirement from 0.21 to 0.22 [\#5467](https://github.com/apache/arrow-rs/pull/5467) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Minor: Fix formatting typo in `Field::new_list_field` [\#5464](https://github.com/apache/arrow-rs/pull/5464) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix test\_string\_dictionary\_merge \(\#5255\) [\#5461](https://github.com/apache/arrow-rs/pull/5461) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use Vec::from\_iter in Buffer::from\_iter [\#5460](https://github.com/apache/arrow-rs/pull/5460) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Kikkon](https://github.com/Kikkon))
+- Document parquet writer memory limiting \(\#5450\) [\#5457](https://github.com/apache/arrow-rs/pull/5457) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Document UnionArray Panics [\#5456](https://github.com/apache/arrow-rs/pull/5456) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Kikkon](https://github.com/Kikkon))
+- fix: lexsort\_to\_indices unsupported mixed types with list [\#5455](https://github.com/apache/arrow-rs/pull/5455) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Refine `Display` and `Source` implementation for error types [\#5439](https://github.com/apache/arrow-rs/pull/5439) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([BugenZhao](https://github.com/BugenZhao))
+- Improve debug output of Time32/Time64 arrays [\#5428](https://github.com/apache/arrow-rs/pull/5428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([monkwire](https://github.com/monkwire))
+- Miri fix: Rename invalid\_mut to without\_provenance\_mut [\#5418](https://github.com/apache/arrow-rs/pull/5418) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Ensure addition/multiplications in when allocating buffers don't overflow [\#5417](https://github.com/apache/arrow-rs/pull/5417) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Update Flight proto: PollFlightInfo & expiration time [\#5413](https://github.com/apache/arrow-rs/pull/5413) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([Jefffrey](https://github.com/Jefffrey))
+- Add tests for serializing lists of dictionary encoded values to json [\#5399](https://github.com/apache/arrow-rs/pull/5399) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Return null for overflow when casting string to integer under safe option enabled [\#5398](https://github.com/apache/arrow-rs/pull/5398) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Propagate error instead of panic for `take_bytes` [\#5395](https://github.com/apache/arrow-rs/pull/5395) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Improve like kernel by ~2% [\#5390](https://github.com/apache/arrow-rs/pull/5390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Enable running arrow-array and arrow-arith with miri and avoid strict provenance warning [\#5387](https://github.com/apache/arrow-rs/pull/5387) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Update to chrono 0.4.34 [\#5385](https://github.com/apache/arrow-rs/pull/5385) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Return error instead of panic when reading invalid Parquet metadata [\#5382](https://github.com/apache/arrow-rs/pull/5382) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
+- Update tonic requirement from 0.10.0 to 0.11.0 [\#5380](https://github.com/apache/arrow-rs/pull/5380) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update tonic-build requirement from =0.10.2 to =0.11.0 [\#5379](https://github.com/apache/arrow-rs/pull/5379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix latest clippy lints [\#5376](https://github.com/apache/arrow-rs/pull/5376) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- feat: utility functions for creating `FixedSizeList` and `LargeList` dtypes [\#5373](https://github.com/apache/arrow-rs/pull/5373) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([universalmind303](https://github.com/universalmind303))
+- Minor\(docs\): update master to main for DataFusion/Ballista [\#5363](https://github.com/apache/arrow-rs/pull/5363) ([caicancai](https://github.com/caicancai))
+- Return an error instead of a panic when reading a corrupted Parquet file with mismatched column counts [\#5362](https://github.com/apache/arrow-rs/pull/5362) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mmaitre314](https://github.com/mmaitre314))
+- feat: support casting FixedSizeList with new child type [\#5360](https://github.com/apache/arrow-rs/pull/5360) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Add more debugging info to StructBuilder validate\_content [\#5357](https://github.com/apache/arrow-rs/pull/5357) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- pyarrow: Preserve RecordBatch's schema metadata [\#5355](https://github.com/apache/arrow-rs/pull/5355) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([atwam](https://github.com/atwam))
+- Mark Encoding::BIT\_PACKED as deprecated and document its compatibility issues [\#5348](https://github.com/apache/arrow-rs/pull/5348) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- Track the size of custom allocations for use via Array::get\_buffer\_memory\_size [\#5347](https://github.com/apache/arrow-rs/pull/5347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- fix: Return an error on type mismatch rather than panic \(\#4995\) [\#5341](https://github.com/apache/arrow-rs/pull/5341) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([carols10cents](https://github.com/carols10cents))
+- Minor: support cast values to fixedsizelist [\#5340](https://github.com/apache/arrow-rs/pull/5340) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Enhance Time32/Time64 support in date\_part [\#5337](https://github.com/apache/arrow-rs/pull/5337) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- feat: add `take_record_batch`. [\#5333](https://github.com/apache/arrow-rs/pull/5333) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([RinChanNOWWW](https://github.com/RinChanNOWWW))
+- Add ListBuilder::with\_field to support non nullable list fields \(\#5330\) [\#5331](https://github.com/apache/arrow-rs/pull/5331) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Don't omit schema metadata when removing column [\#5328](https://github.com/apache/arrow-rs/pull/5328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
+- Update proc-macro2 requirement from =1.0.76 to =1.0.78 [\#5324](https://github.com/apache/arrow-rs/pull/5324) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Enhance Date64 type documentation [\#5323](https://github.com/apache/arrow-rs/pull/5323) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- fix panic when decode a group with no child [\#5322](https://github.com/apache/arrow-rs/pull/5322) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Liyixin95](https://github.com/Liyixin95))
+- Minor/Doc Expand FlightSqlServiceClient::handshake doc [\#5321](https://github.com/apache/arrow-rs/pull/5321) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([devinjdangelo](https://github.com/devinjdangelo))
+- Refactor temporal extract date part kernels [\#5319](https://github.com/apache/arrow-rs/pull/5319) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Add JSON writer benchmarks \(\#5314\) [\#5317](https://github.com/apache/arrow-rs/pull/5317) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Bump actions/cache from 3 to 4 [\#5308](https://github.com/apache/arrow-rs/pull/5308) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Avro block decompression [\#5306](https://github.com/apache/arrow-rs/pull/5306) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Result into error in case of endianness mismatches [\#5301](https://github.com/apache/arrow-rs/pull/5301) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pangiole](https://github.com/pangiole))
+- parquet: Add ArrowWriterOptions to skip embedding the arrow metadata [\#5299](https://github.com/apache/arrow-rs/pull/5299) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([evenyag](https://github.com/evenyag))
+- Add support for more fused boolean operations [\#5298](https://github.com/apache/arrow-rs/pull/5298) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([RTEnzyme](https://github.com/RTEnzyme))
+- Support Parquet  Byte Stream Split Encoding [\#5293](https://github.com/apache/arrow-rs/pull/5293) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mwlon](https://github.com/mwlon))
+- Extend string parsing support for Date32 [\#5282](https://github.com/apache/arrow-rs/pull/5282) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gruuya](https://github.com/gruuya))
+- Bring some methods over from ArrowWriter to the async version [\#5251](https://github.com/apache/arrow-rs/pull/5251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AdamGS](https://github.com/AdamGS))
+
+
+
+\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/Cargo.toml b/Cargo.toml
index bbbf907ba7f0..e09660941d60 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "50.0.0"
+version = "51.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -77,20 +77,20 @@ edition = "2021"
 rust-version = "1.62"
 
 [workspace.dependencies]
-arrow = { version = "50.0.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "50.0.0", path = "./arrow-arith" }
-arrow-array = { version = "50.0.0", path = "./arrow-array" }
-arrow-buffer = { version = "50.0.0", path = "./arrow-buffer" }
-arrow-cast = { version = "50.0.0", path = "./arrow-cast" }
-arrow-csv = { version = "50.0.0", path = "./arrow-csv" }
-arrow-data = { version = "50.0.0", path = "./arrow-data" }
-arrow-ipc = { version = "50.0.0", path = "./arrow-ipc" }
-arrow-json = { version = "50.0.0", path = "./arrow-json" }
-arrow-ord = { version = "50.0.0", path = "./arrow-ord" }
-arrow-row = { version = "50.0.0", path = "./arrow-row" }
-arrow-schema = { version = "50.0.0", path = "./arrow-schema" }
-arrow-select = { version = "50.0.0", path = "./arrow-select" }
-arrow-string = { version = "50.0.0", path = "./arrow-string" }
-parquet = { version = "50.0.0", path = "./parquet", default-features = false }
+arrow = { version = "51.0.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "51.0.0", path = "./arrow-arith" }
+arrow-array = { version = "51.0.0", path = "./arrow-array" }
+arrow-buffer = { version = "51.0.0", path = "./arrow-buffer" }
+arrow-cast = { version = "51.0.0", path = "./arrow-cast" }
+arrow-csv = { version = "51.0.0", path = "./arrow-csv" }
+arrow-data = { version = "51.0.0", path = "./arrow-data" }
+arrow-ipc = { version = "51.0.0", path = "./arrow-ipc" }
+arrow-json = { version = "51.0.0", path = "./arrow-json" }
+arrow-ord = { version = "51.0.0", path = "./arrow-ord" }
+arrow-row = { version = "51.0.0", path = "./arrow-row" }
+arrow-schema = { version = "51.0.0", path = "./arrow-schema" }
+arrow-select = { version = "51.0.0", path = "./arrow-select" }
+arrow-string = { version = "51.0.0", path = "./arrow-string" }
+parquet = { version = "51.0.0", path = "./parquet", default-features = false }
 
 chrono = { version = "0.4.34", default-features = false, features = ["clock"] }
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index e8590ddd6788..20d6d55615b1 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="49.0.0"
-FUTURE_RELEASE="50.0.0"
+SINCE_TAG="50.0.0"
+FUTURE_RELEASE="51.0.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"

From ae42b3b6883d79ff25537e4b1837a9d9bfb0b088 Mon Sep 17 00:00:00 2001
From: Val Lorentz <progval+github@progval.net>
Date: Sat, 16 Mar 2024 23:14:50 +0100
Subject: [PATCH 09/11] parquet: Use specific error variant when codec is
 disabled (#5521)

* Use specific error variant when codec is disabled

Instead of reporting it as 'not yet implemented'

* Replace Disabled with General
---
 parquet/src/compression.rs | 47 ++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index 89f4b64d48b5..10560210e4e8 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -145,21 +145,40 @@ pub(crate) trait CompressionLevel<T: std::fmt::Display + std::cmp::PartialOrd> {
 /// bytes for the compression type.
 /// This returns `None` if the codec type is `UNCOMPRESSED`.
 pub fn create_codec(codec: CodecType, _options: &CodecOptions) -> Result<Option<Box<dyn Codec>>> {
+    #[allow(unreachable_code, unused_variables)]
     match codec {
-        #[cfg(any(feature = "brotli", test))]
-        CodecType::BROTLI(level) => Ok(Some(Box::new(BrotliCodec::new(level)))),
-        #[cfg(any(feature = "flate2", test))]
-        CodecType::GZIP(level) => Ok(Some(Box::new(GZipCodec::new(level)))),
-        #[cfg(any(feature = "snap", test))]
-        CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
-        #[cfg(any(feature = "lz4", test))]
-        CodecType::LZ4 => Ok(Some(Box::new(LZ4HadoopCodec::new(
-            _options.backward_compatible_lz4,
-        )))),
-        #[cfg(any(feature = "zstd", test))]
-        CodecType::ZSTD(level) => Ok(Some(Box::new(ZSTDCodec::new(level)))),
-        #[cfg(any(feature = "lz4", test))]
-        CodecType::LZ4_RAW => Ok(Some(Box::new(LZ4RawCodec::new()))),
+        CodecType::BROTLI(level) => {
+            #[cfg(any(feature = "brotli", test))]
+            return Ok(Some(Box::new(BrotliCodec::new(level))));
+            Err(ParquetError::General("Disabled feature at compile time: brotli".into()))
+        },
+        CodecType::GZIP(level) => {
+            #[cfg(any(feature = "flate2", test))]
+            return Ok(Some(Box::new(GZipCodec::new(level))));
+            Err(ParquetError::General("Disabled feature at compile time: flate2".into()))
+        },
+        CodecType::SNAPPY => {
+            #[cfg(any(feature = "snap", test))]
+            return Ok(Some(Box::new(SnappyCodec::new())));
+            Err(ParquetError::General("Disabled feature at compile time: snap".into()))
+        },
+        CodecType::LZ4 => {
+            #[cfg(any(feature = "lz4", test))]
+            return Ok(Some(Box::new(LZ4HadoopCodec::new(
+                _options.backward_compatible_lz4,
+            ))));
+            Err(ParquetError::General("Disabled feature at compile time: lz4".into()))
+        },
+        CodecType::ZSTD(level) => {
+            #[cfg(any(feature = "zstd", test))]
+            return Ok(Some(Box::new(ZSTDCodec::new(level))));
+            Err(ParquetError::General("Disabled feature at compile time: zstd".into()))
+        },
+        CodecType::LZ4_RAW => {
+            #[cfg(any(feature = "lz4", test))]
+            return Ok(Some(Box::new(LZ4RawCodec::new())));
+            Err(ParquetError::General("Disabled feature at compile time: lz4".into()))
+        },
         CodecType::UNCOMPRESSED => Ok(None),
         _ => Err(nyi_err!("The codec type {} is not supported yet", codec)),
     }

From 7e5f523a17444a3da01e87e9d1778315295065e9 Mon Sep 17 00:00:00 2001
From: Matthijs Brobbel <m1brobbel@gmail.com>
Date: Sat, 16 Mar 2024 23:15:13 +0100
Subject: [PATCH 10/11] impl `From<ScalarBuffer<T>>` for `Vec<T>` (#5518)

* impl `From<ScalarBuffer<T>>` for `Vec<T>`

* Remove layout test, prevented by `miri`
---
 arrow-buffer/src/buffer/scalar.rs | 52 +++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
index 3826d74e43bd..2019cc79830d 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -160,6 +160,15 @@ impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
     }
 }
 
+impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
+    fn from(value: ScalarBuffer<T>) -> Self {
+        value
+            .buffer
+            .into_vec()
+            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
+    }
+}
+
 impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
     fn from(mut value: BufferBuilder<T>) -> Self {
         let len = value.len();
@@ -208,6 +217,8 @@ impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
 
 #[cfg(test)]
 mod tests {
+    use std::{ptr::NonNull, sync::Arc};
+
     use super::*;
 
     #[test]
@@ -284,4 +295,45 @@ mod tests {
         let scalar_buffer = ScalarBuffer::from(buffer_builder);
         assert_eq!(scalar_buffer.as_ref(), input);
     }
+
+    #[test]
+    fn into_vec() {
+        let input = vec![1u8, 2, 3, 4];
+
+        // No copy
+        let input_buffer = Buffer::from_vec(input.clone());
+        let input_ptr = input_buffer.as_ptr();
+        let input_len = input_buffer.len();
+        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
+        let vec = Vec::from(scalar_buffer);
+        assert_eq!(vec.as_slice(), input.as_slice());
+        assert_eq!(vec.as_ptr(), input_ptr);
+
+        // Custom allocation - makes a copy
+        let mut input_clone = input.clone();
+        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
+        let dealloc = Arc::new(());
+        let buffer =
+            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
+        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
+        let vec = Vec::from(scalar_buffer);
+        assert_eq!(vec, input.as_slice());
+        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
+
+        // Offset - makes a copy
+        let input_buffer = Buffer::from_vec(input.clone());
+        let input_ptr = input_buffer.as_ptr();
+        let input_len = input_buffer.len();
+        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
+        let vec = Vec::from(scalar_buffer);
+        assert_eq!(vec.as_slice(), &input[1..]);
+        assert_ne!(vec.as_ptr(), input_ptr);
+
+        // Inner buffer Arc ref count != 0 - makes a copy
+        let buffer = Buffer::from_slice_ref(input.as_slice());
+        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
+        let vec = Vec::from(scalar_buffer);
+        assert_eq!(vec, input.as_slice());
+        assert_ne!(vec.as_ptr(), input.as_ptr());
+    }
 }

From f41c2a4e5a33e482e12351051d77d0e059f28e33 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 16 Mar 2024 18:41:14 -0400
Subject: [PATCH 11/11] Minor: add additional documentation about `BufWriter`
 (#5519)

* Minor: add additional documentation about BufWriter

* Update object_store/src/buffered.rs

* Apply suggestions from code review

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

* Format

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Co-authored-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
---
 object_store/src/buffered.rs |  7 ++++++-
 object_store/src/lib.rs      | 13 +++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/object_store/src/buffered.rs b/object_store/src/buffered.rs
index fdefe599f79e..9299e1147bc1 100644
--- a/object_store/src/buffered.rs
+++ b/object_store/src/buffered.rs
@@ -207,6 +207,10 @@ impl AsyncBufRead for BufReader {
 
 /// An async buffered writer compatible with the tokio IO traits
 ///
+/// This writer adaptively uses [`ObjectStore::put`] or
+/// [`ObjectStore::put_multipart`] depending on the amount of data that has
+/// been written.
+///
 /// Up to `capacity` bytes will be buffered in memory, and flushed on shutdown
 /// using [`ObjectStore::put`]. If `capacity` is exceeded, data will instead be
 /// streamed using [`ObjectStore::put_multipart`]
@@ -255,7 +259,8 @@ impl BufWriter {
         }
     }
 
-    /// Returns the [`MultipartId`] if multipart upload
+    /// Returns the [`MultipartId`] of the multipart upload created by this
+    /// writer, if any.
     pub fn multipart_id(&self) -> Option<&MultipartId> {
         self.multipart_id.as_ref()
     }
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 8132002b6e01..4960f3ba390a 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -88,11 +88,11 @@
 //!
 //! # Why not a Filesystem Interface?
 //!
-//! Whilst this crate does provide a [`BufReader`], the [`ObjectStore`] interface mirrors the APIs
-//! of object stores and not filesystems, opting to provide stateless APIs instead of the cursor
-//! based interfaces such as [`Read`] or [`Seek`] favoured by filesystems.
+//! The [`ObjectStore`] interface is designed to mirror the APIs
+//! of object stores and *not* filesystems, and thus has stateless APIs instead
+//! of cursor based interfaces such as [`Read`] or [`Seek`] available in filesystems.
 //!
-//! This provides some compelling advantages:
+//! This design provides the following advantages:
 //!
 //! * All operations are atomic, and readers cannot observe partial and/or failed writes
 //! * Methods map directly to object store APIs, providing both efficiency and predictability
@@ -100,7 +100,12 @@
 //! * Allows for functionality not native to filesystems, such as operation preconditions
 //! and atomic multipart uploads
 //!
+//! This crate does provide [`BufReader`] and [`BufWriter`] adapters
+//! which provide a more filesystem-like API for working with the
+//! [`ObjectStore`] trait, however, they should be used with care
+//!
 //! [`BufReader`]: buffered::BufReader
+//! [`BufWriter`]: buffered::BufWriter
 //!
 //! # Adapters
 //!