Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into scalar-experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Jun 29, 2023
2 parents e29f5c5 + 92a7894 commit 8d47965
Show file tree
Hide file tree
Showing 54 changed files with 1,339 additions and 1,034 deletions.
8 changes: 7 additions & 1 deletion .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@ github:
# require branches to be up-to-date before merging
strict: true
# don't require any jobs to pass
contexts: []
contexts: []

# publishes the content of the `asf-site` branch to
# https://arrow.apache.org/rust/
publish:
whoami: asf-site
subdir: rust
41 changes: 40 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
container:
image: ${{ matrix.arch }}/rust
env:
RUSTDOCFLAGS: "-Dwarnings"
RUSTDOCFLAGS: "-Dwarnings --enable-index-page -Zunstable-options"
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -56,3 +56,42 @@ jobs:
rust-version: ${{ matrix.rust }}
- name: Run cargo doc
run: cargo doc --document-private-items --no-deps --workspace --all-features
- name: Fix file permissions
shell: sh
run: |
chmod -c -R +rX "target/doc" |
while read line; do
echo "::warning title=Invalid file permissions automatically fixed::$line"
done
- name: Upload artifacts
uses: actions/upload-pages-artifact@v1
with:
name: crate-docs
path: target/doc

deploy:
# Only deploy if a push to master
if: github.ref_name == 'master' && github.event_name == 'push'
needs: docs
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Download crate docs
uses: actions/download-artifact@v3
with:
name: crate-docs
path: website/build
- name: Prepare website
run: |
tar -xf website/build/artifact.tar -C website/build
rm website/build/artifact.tar
cp .asf.yaml ./website/build/.asf.yaml
- name: Deploy to gh-pages
uses: peaceiris/[email protected]
if: github.event_name == 'push' && github.ref_name == 'master'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: website/build
publish_branch: asf-site
85 changes: 85 additions & 0 deletions CHANGELOG-old.md

Large diffs are not rendered by default.

138 changes: 66 additions & 72 deletions CHANGELOG.md

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ exclude = [
]

[workspace.package]
version = "41.0.0"
version = "42.0.0"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <[email protected]>"]
Expand All @@ -76,18 +76,18 @@ edition = "2021"
rust-version = "1.62"

[workspace.dependencies]
arrow = { version = "41.0.0", path = "./arrow", default-features = false }
arrow-arith = { version = "41.0.0", path = "./arrow-arith" }
arrow-array = { version = "41.0.0", path = "./arrow-array" }
arrow-buffer = { version = "41.0.0", path = "./arrow-buffer" }
arrow-cast = { version = "41.0.0", path = "./arrow-cast" }
arrow-csv = { version = "41.0.0", path = "./arrow-csv" }
arrow-data = { version = "41.0.0", path = "./arrow-data" }
arrow-ipc = { version = "41.0.0", path = "./arrow-ipc" }
arrow-json = { version = "41.0.0", path = "./arrow-json" }
arrow-ord = { version = "41.0.0", path = "./arrow-ord" }
arrow-row = { version = "41.0.0", path = "./arrow-row" }
arrow-schema = { version = "41.0.0", path = "./arrow-schema" }
arrow-select = { version = "41.0.0", path = "./arrow-select" }
arrow-string = { version = "41.0.0", path = "./arrow-string" }
parquet = { version = "41.0.0", path = "./parquet", default-features = false }
arrow = { version = "42.0.0", path = "./arrow", default-features = false }
arrow-arith = { version = "42.0.0", path = "./arrow-arith" }
arrow-array = { version = "42.0.0", path = "./arrow-array" }
arrow-buffer = { version = "42.0.0", path = "./arrow-buffer" }
arrow-cast = { version = "42.0.0", path = "./arrow-cast" }
arrow-csv = { version = "42.0.0", path = "./arrow-csv" }
arrow-data = { version = "42.0.0", path = "./arrow-data" }
arrow-ipc = { version = "42.0.0", path = "./arrow-ipc" }
arrow-json = { version = "42.0.0", path = "./arrow-json" }
arrow-ord = { version = "42.0.0", path = "./arrow-ord" }
arrow-row = { version = "42.0.0", path = "./arrow-row" }
arrow-schema = { version = "42.0.0", path = "./arrow-schema" }
arrow-select = { version = "42.0.0", path = "./arrow-select" }
arrow-string = { version = "42.0.0", path = "./arrow-string" }
parquet = { version = "42.0.0", path = "./parquet", default-features = false }
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ This repo contains the following main components:
| arrow-flight | Support for Arrow-Flight IPC protocol | [(README)][flight-readme] |
| object-store | Support for object store interactions (aws, azure, gcp, local, in-memory) | [(README)][objectstore-readme] |

See the list of all crates in this repo and their rustdocs [here](https://arrow.apache.org/rust).

There are two related crates in a different repository

| Crate | Description | Documentation |
Expand Down
8 changes: 6 additions & 2 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,9 @@ mod tests {
}

#[test]
#[should_panic(expected = "memory is not aligned")]
#[should_panic(
expected = "Memory pointer is not aligned with the specified scalar type"
)]
fn test_primitive_array_alignment() {
let buf = Buffer::from_slice_ref([0_u64]);
let buf2 = buf.slice(1);
Expand All @@ -980,7 +982,9 @@ mod tests {
}

#[test]
#[should_panic(expected = "memory is not aligned")]
#[should_panic(
expected = "Memory pointer is not aligned with the specified scalar type"
)]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
Expand Down
6 changes: 6 additions & 0 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! Contains the `NullArray` type.
use crate::builder::NullBuilder;
use crate::{Array, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -62,6 +63,11 @@ impl NullArray {

Self { len }
}

/// Returns a new null array builder
pub fn builder(capacity: usize) -> NullBuilder {
NullBuilder::with_capacity(capacity)
}
}

impl Array for NullArray {
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ mod generic_list_builder;
pub use generic_list_builder::*;
mod map_builder;
pub use map_builder::*;
mod null_builder;
pub use null_builder::*;
mod primitive_builder;
pub use primitive_builder::*;
mod primitive_dictionary_builder;
Expand Down
184 changes: 184 additions & 0 deletions arrow-array/src/builder/null_builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::builder::ArrayBuilder;
use crate::{ArrayRef, NullArray};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;

/// Builder for [`NullArray`]
///
/// # Example
///
/// Create a `NullArray` from a `NullBuilder`
///
/// ```
///
/// # use arrow_array::{Array, NullArray, builder::NullBuilder};
///
/// let mut b = NullBuilder::new();
/// b.append_empty_value();
/// b.append_null();
/// b.append_nulls(3);
/// b.append_empty_values(3);
/// let arr = b.finish();
///
/// assert_eq!(8, arr.len());
/// assert_eq!(8, arr.null_count());
/// ```
#[derive(Debug)]
pub struct NullBuilder {
len: usize,
}

impl Default for NullBuilder {
fn default() -> Self {
Self::new()
}
}

impl NullBuilder {
/// Creates a new null builder
pub fn new() -> Self {
Self { len: 0 }
}

/// Creates a new null builder with space for `capacity` elements without re-allocating
pub fn with_capacity(capacity: usize) -> Self {
Self { len: capacity }
}

/// Returns the capacity of this builder measured in slots of type `T`
pub fn capacity(&self) -> usize {
self.len
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.len += 1;
}

/// Appends `n` `null`s into the builder.
#[inline]
pub fn append_nulls(&mut self, n: usize) {
self.len += n;
}

/// Appends a null slot into the builder
#[inline]
pub fn append_empty_value(&mut self) {
self.append_null();
}

/// Appends `n` `null`s into the builder.
#[inline]
pub fn append_empty_values(&mut self, n: usize) {
self.append_nulls(n);
}

/// Builds the [NullArray] and reset this builder.
pub fn finish(&mut self) -> NullArray {
let len = self.len();
let builder = ArrayData::new_null(&DataType::Null, len).into_builder();

let array_data = unsafe { builder.build_unchecked() };
NullArray::from(array_data)
}

/// Builds the [NullArray] without resetting the builder.
pub fn finish_cloned(&self) -> NullArray {
let len = self.len();
let builder = ArrayData::new_null(&DataType::Null, len).into_builder();

let array_data = unsafe { builder.build_unchecked() };
NullArray::from(array_data)
}
}

impl ArrayBuilder for NullBuilder {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
}

/// Returns the builder as a mutable `Any` reference.
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}

/// Returns the boxed builder as a box of `Any`.
fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
self
}

/// Returns the number of array slots in the builder
fn len(&self) -> usize {
self.len
}

/// Returns whether the number of array slots is zero
fn is_empty(&self) -> bool {
self.len() == 0
}

/// Builds the array and reset this builder.
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::Array;

#[test]
fn test_null_array_builder() {
let mut builder = NullArray::builder(10);
builder.append_null();
builder.append_nulls(4);
builder.append_empty_value();
builder.append_empty_values(4);

let arr = builder.finish();
assert_eq!(20, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(20, arr.null_count());
}

#[test]
fn test_null_array_builder_finish_cloned() {
let mut builder = NullArray::builder(16);
builder.append_null();
builder.append_empty_value();
builder.append_empty_values(3);
let mut array = builder.finish_cloned();
assert_eq!(21, array.null_count());

builder.append_empty_values(5);
array = builder.finish();
assert_eq!(26, array.null_count());
}
}
2 changes: 1 addition & 1 deletion arrow-array/src/builder/struct_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ impl ArrayBuilder for StructBuilder {
pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
use crate::builder::*;
match datatype {
DataType::Null => unimplemented!(),
DataType::Null => Box::new(NullBuilder::with_capacity(capacity)),
DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),
Expand Down
17 changes: 17 additions & 0 deletions arrow-array/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,15 @@ pub trait AsArray: private::Sealed {
self.as_list_opt().expect("list array")
}

/// Downcast this to a [`FixedSizeListArray`] returning `None` if not possible
fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray>;

/// Downcast this to a [`FixedSizeListArray`] panicking if not possible
fn as_fixed_size_list(&self) -> &FixedSizeListArray {
self.as_fixed_size_list_opt()
.expect("fixed size list array")
}

/// Downcast this to a [`MapArray`] returning `None` if not possible
fn as_map_opt(&self) -> Option<&MapArray>;

Expand Down Expand Up @@ -839,6 +848,10 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}

fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
self.as_any().downcast_ref()
}

fn as_map_opt(&self) -> Option<&MapArray> {
self.as_any().downcast_ref()
}
Expand Down Expand Up @@ -872,6 +885,10 @@ impl AsArray for ArrayRef {
self.as_ref().as_list_opt()
}

fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
self.as_ref().as_fixed_size_list_opt()
}

fn as_map_opt(&self) -> Option<&MapArray> {
self.as_any().downcast_ref()
}
Expand Down
Loading

0 comments on commit 8d47965

Please sign in to comment.