Skip to content

Commit

Permalink
regenerate arrow-ipc/src/gen with patched flatbuffers (#6426)
Browse files Browse the repository at this point in the history
* regenerate arrow-ipc/src/gen with patched flatbuffers

* use git repo instead of local path

* add backticks

* expand allowed overage to accommodate more alignment padding

* re-enable nanoarrow integration test

* add assertions that struct alignment is correct

* remove struct alignment assertions

* apply a patch to generated code rather than requiring patched flatc

* point to google/flatbuffers with pub PushAlignment

* add license header to gen.patch

* use flatbuffers 24.12.23

* remove unnecessary gen.patch
  • Loading branch information
bkietz authored Jan 8, 2025
1 parent a47d996 commit 485dbb1
Show file tree
Hide file tree
Showing 10 changed files with 609 additions and 332 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ jobs:
ARROW_INTEGRATION_JAVA: ON
ARROW_INTEGRATION_JS: ON
ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
# Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052
ARCHERY_INTEGRATION_WITH_NANOARROW: "0"
ARCHERY_INTEGRATION_WITH_NANOARROW: "1"
# https://github.com/apache/arrow/pull/38403/files#r1371281630
ARCHERY_INTEGRATION_WITH_RUST: "1"
# These are necessary because the github runner overrides $HOME
Expand Down
14 changes: 7 additions & 7 deletions arrow-flight/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1708,7 +1708,7 @@ mod tests {
])
.unwrap();

verify_encoded_split(batch, 112).await;
verify_encoded_split(batch, 120).await;
}

#[tokio::test]
Expand All @@ -1719,7 +1719,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 4304).await;
verify_encoded_split(batch, 4312).await;
}

#[tokio::test]
Expand Down Expand Up @@ -1755,7 +1755,7 @@ mod tests {
// 5k over limit (which is 2x larger than limit of 5k)
// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 5800).await;
verify_encoded_split(batch, 5808).await;
}

#[tokio::test]
Expand All @@ -1771,7 +1771,7 @@ mod tests {

let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();

verify_encoded_split(batch, 48).await;
verify_encoded_split(batch, 56).await;
}

#[tokio::test]
Expand All @@ -1785,7 +1785,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 3328).await;
verify_encoded_split(batch, 3336).await;
}

#[tokio::test]
Expand All @@ -1799,7 +1799,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 5280).await;
verify_encoded_split(batch, 5288).await;
}

#[tokio::test]
Expand All @@ -1824,7 +1824,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 4128).await;
verify_encoded_split(batch, 4136).await;
}

/// Return size, in memory of flight data
Expand Down
2 changes: 1 addition & 1 deletion arrow-ipc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-data = { workspace = true }
arrow-schema = { workspace = true }
flatbuffers = { version = "24.3.25", default-features = false }
flatbuffers = { version = "24.12.23", default-features = false }
lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
zstd = { version = "0.13.0", default-features = false, optional = true }

Expand Down
90 changes: 47 additions & 43 deletions arrow-ipc/regen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Change to the toplevel `arrow-rs` directory
pushd $DIR/../

echo "Build flatc from source ..."

FB_URL="https://github.com/google/flatbuffers"
FB_DIR="arrow/.flatbuffers"
FLATC="$FB_DIR/bazel-bin/flatc"

if [ -z $(which bazel) ]; then
echo "bazel is required to build flatc"
exit 1
fi

echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"

if [ ! -e $FB_DIR ]; then
echo "git clone $FB_URL ..."
git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
if [ -z "$FLATC" ]; then
echo "Build flatc from source ..."

FB_URL="https://github.com/google/flatbuffers"
FB_DIR="arrow/.flatbuffers"
FLATC="$FB_DIR/bazel-bin/flatc"

if [ -z $(which bazel) ]; then
echo "bazel is required to build flatc"
exit 1
fi

echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"

if [ ! -e $FB_DIR ]; then
echo "git clone $FB_URL ..."
git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
else
echo "git pull $FB_URL ..."
git -C $FB_DIR pull
fi

pushd $FB_DIR
echo "run: bazel build :flatc ..."
bazel build :flatc
popd
else
echo "git pull $FB_URL ..."
git -C $FB_DIR pull
echo "Using flatc $FLATC ..."
fi

pushd $FB_DIR
echo "run: bazel build :flatc ..."
bazel build :flatc
popd


# Execute the code generation:
$FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs

Expand Down Expand Up @@ -99,37 +102,38 @@ for f in `ls *.rs`; do
fi

echo "Modifying: $f"
sed -i '' '/extern crate flatbuffers;/d' $f
sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
sed -i '' '/pub mod org {/d' $f
sed -i '' '/pub mod apache {/d' $f
sed -i '' '/pub mod arrow {/d' $f
sed -i '' '/pub mod flatbuf {/d' $f
sed -i '' '/} \/\/ pub mod flatbuf/d' $f
sed -i '' '/} \/\/ pub mod arrow/d' $f
sed -i '' '/} \/\/ pub mod apache/d' $f
sed -i '' '/} \/\/ pub mod org/d' $f
sed -i '' '/use core::mem;/d' $f
sed -i '' '/use core::cmp::Ordering;/d' $f
sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
sed --in-place='' '/extern crate flatbuffers;/d' $f
sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f
sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f
sed --in-place='' '/pub mod org {/d' $f
sed --in-place='' '/pub mod apache {/d' $f
sed --in-place='' '/pub mod arrow {/d' $f
sed --in-place='' '/pub mod flatbuf {/d' $f
sed --in-place='' '/} \/\/ pub mod flatbuf/d' $f
sed --in-place='' '/} \/\/ pub mod arrow/d' $f
sed --in-place='' '/} \/\/ pub mod apache/d' $f
sed --in-place='' '/} \/\/ pub mod org/d' $f
sed --in-place='' '/use core::mem;/d' $f
sed --in-place='' '/use core::cmp::Ordering;/d' $f
sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f

# required by flatc 1.12.0+
sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
for name in ${names[@]}; do
sed -i '' "/use crate::${name}::\*;/d" $f
sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
sed --in-place='' "/use crate::${name}::\*;/d" $f
sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
done

# Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
sed -i '' 's/type__/type_/g' $f
sed -i '' 's/TYPE__/TYPE_/g' $f
sed --in-place='' 's/type__/type_/g' $f
sed --in-place='' 's/TYPE__/TYPE_/g' $f

# Some files need prefixes
if [[ $f == "File.rs" ]]; then
# Now prefix the file with the static contents
echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
elif [[ $f == "Message.rs" ]]; then
sed --in-place='' 's/List<Int16>/\`List<Int16>\`/g' $f
echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
elif [[ $f == "SparseTensor.rs" ]]; then
echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
Expand Down
26 changes: 16 additions & 10 deletions arrow-ipc/src/gen/File.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
use std::{cmp::Ordering, mem};
// automatically generated by the FlatBuffers compiler, do not modify

// @generated

// struct Block, aligned to 8
#[repr(transparent)]
#[derive(Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block {
let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
dst.copy_from_slice(src);
}
#[inline]
fn alignment() -> flatbuffers::PushAlignment {
flatbuffers::PushAlignment::new(8)
}
}

impl<'a> flatbuffers::Verifiable for Block {
Expand Down Expand Up @@ -211,8 +217,8 @@ impl<'a> Footer<'a> {
Footer { _tab: table }
}
#[allow(unused_mut)]
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
args: &'args FooterArgs<'args>,
) -> flatbuffers::WIPOffset<Footer<'bldr>> {
let mut builder = FooterBuilder::new(_fbb);
Expand Down Expand Up @@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> {
}
}

pub struct FooterBuilder<'a: 'b, 'b> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
}
impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> {
#[inline]
pub fn add_version(&mut self, version: MetadataVersion) {
self.fbb_
Expand Down Expand Up @@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
);
}
#[inline]
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> {
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> {
let start = _fbb.start_table();
FooterBuilder {
fbb_: _fbb,
Expand Down Expand Up @@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
}
#[inline]
pub fn finish_footer_buffer<'a, 'b>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub fn finish_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
root: flatbuffers::WIPOffset<Footer<'a>>,
) {
fbb.finish(root, None);
}

#[inline]
pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub fn finish_size_prefixed_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
root: flatbuffers::WIPOffset<Footer<'a>>,
) {
fbb.finish_size_prefixed(root, None);
Expand Down
Loading

0 comments on commit 485dbb1

Please sign in to comment.