Skip to content

Commit

Permalink
Improve dealing with raw buffers for texture read/write (#1744)
Browse files Browse the repository at this point in the history
* Replace TextureRowDataInfo with the more versatile Texture2DBufferInfo

* comment & naming fixes
  • Loading branch information
Wumpf authored Apr 3, 2023
1 parent d5b68f2 commit 2728854
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 72 deletions.
10 changes: 4 additions & 6 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{num::NonZeroU32, sync::mpsc};

use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo};
use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo};

/// A sub-allocated staging buffer that can be written to.
///
Expand Down Expand Up @@ -100,15 +100,13 @@ where
destination: wgpu::ImageCopyTexture<'_>,
copy_extent: glam::UVec2,
) {
let bytes_per_row = TextureRowDataInfo::new(destination.texture.format(), copy_extent.x)
.bytes_per_row_padded;
let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent);

// Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
// We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
// (and if you did you probably have regrets anyways!)
let required_buffer_size = bytes_per_row * copy_extent.y;
debug_assert_eq!(
required_buffer_size as usize,
buffer_info.buffer_size_padded as usize,
self.num_written() * std::mem::size_of::<T>()
);

Expand All @@ -117,7 +115,7 @@ where
buffer: &self.chunk_buffer,
layout: wgpu::ImageDataLayout {
offset: self.byte_offset_in_chunk_buffer,
bytes_per_row: NonZeroU32::new(bytes_per_row),
bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded),
rows_per_image: None,
},
},
Expand Down
10 changes: 4 additions & 6 deletions crates/re_renderer/src/allocator/gpu_readback_belt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{num::NonZeroU32, ops::Range, sync::mpsc};

use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo};
use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo};

/// Identifier used to identify a buffer upon retrieval of the data.
///
Expand Down Expand Up @@ -61,13 +61,11 @@ impl GpuReadbackBuffer {
source.texture.format().describe().block_size as u64,
);

let bytes_per_row = TextureRowDataInfo::new(source.texture.format(), copy_extents.x)
.bytes_per_row_padded;
let num_bytes = bytes_per_row * copy_extents.y;
let buffer_info = Texture2DBufferInfo::new(source.texture.format(), *copy_extents);

// Validate that stay within the slice (wgpu can't fully know our intention here, so we have to check).
debug_assert!(
(num_bytes as u64) <= self.range_in_chunk.end - start_offset,
buffer_info.buffer_size_padded <= self.range_in_chunk.end - start_offset,
"Texture data is too large to fit into the readback buffer!"
);

Expand All @@ -77,7 +75,7 @@ impl GpuReadbackBuffer {
buffer: &self.chunk_buffer,
layout: wgpu::ImageDataLayout {
offset: start_offset,
bytes_per_row: NonZeroU32::new(bytes_per_row),
bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded),
rows_per_image: None,
},
},
Expand Down
44 changes: 18 additions & 26 deletions crates/re_renderer/src/draw_phases/picking_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
allocator::create_and_fill_uniform_buffer,
global_bindings::FrameUniformBuffer,
view_builder::ViewBuilder,
wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc, TextureRowDataInfo},
wgpu_resources::{GpuBindGroup, GpuTexture, Texture2DBufferInfo, TextureDesc},
DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, IntRect, RenderContext,
};

Expand Down Expand Up @@ -122,12 +122,23 @@ impl PickingLayerProcessor {
readback_identifier: GpuReadbackIdentifier,
readback_user_data: T,
) -> Self {
let row_info = TextureRowDataInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.width());
let buffer_size = row_info.bytes_per_row_padded * picking_rect.height();
let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
//let row_info_depth =
//Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);

// Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
// This is "trivially true" if the size of the depth format is a multiple of the size of the id format.
debug_assert!(
Self::PICKING_LAYER_FORMAT.describe().block_size
% Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size
== 0
);
let buffer_size = row_info_id.buffer_size_padded; // + row_info_depth.buffer_size_padded;

let readback_buffer = ctx.gpu_readback_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
buffer_size as u64,
buffer_size,
readback_identifier,
Box::new(ReadbackBeltMetadata {
picking_rect,
Expand Down Expand Up @@ -279,31 +290,12 @@ impl PickingLayerProcessor {
ctx.gpu_readback_belt
.lock()
.readback_data::<ReadbackBeltMetadata<T>>(identifier, |data, metadata| {
// Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned,
// so much, that we can't interpret it just as `PickingLayerId`.
// Therefore, we have to do a copy of the data regardless.
let row_info = TextureRowDataInfo::new(
let buffer_info_id = Texture2DBufferInfo::new(
Self::PICKING_LAYER_FORMAT,
metadata.picking_rect.extent.x,
metadata.picking_rect.extent,
);

// Copies need to use [u8] because of aforementioned alignment issues.
let mut picking_data = vec![
PickingLayerId::default();
(metadata.picking_rect.extent.x * metadata.picking_rect.extent.y)
as usize
];
let picking_data_as_u8 = bytemuck::cast_slice_mut(&mut picking_data);
for row in 0..metadata.picking_rect.extent.y {
let offset_padded = (row_info.bytes_per_row_padded * row) as usize;
let offset_unpadded = (row_info.bytes_per_row_unpadded * row) as usize;
picking_data_as_u8[offset_unpadded
..(offset_unpadded + row_info.bytes_per_row_unpadded as usize)]
.copy_from_slice(
&data[offset_padded
..(offset_padded + row_info.bytes_per_row_unpadded as usize)],
);
}
let picking_data = buffer_info_id.remove_padding_and_convert(data);

result = Some(PickingResult {
picking_data,
Expand Down
13 changes: 6 additions & 7 deletions crates/re_renderer/src/draw_phases/screenshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//! Or alternatively try to render the images in several tiles 🤔. In any case this would greatly improve quality!
use crate::{
wgpu_resources::{GpuTexture, TextureDesc, TextureRowDataInfo},
wgpu_resources::{GpuTexture, Texture2DBufferInfo, TextureDesc},
DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, RenderContext,
};

Expand All @@ -37,12 +37,11 @@ impl ScreenshotProcessor {
readback_identifier: GpuReadbackIdentifier,
readback_user_data: T,
) -> Self {
let row_info = TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution.x);
let buffer_size = row_info.bytes_per_row_padded * resolution.y;
let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution);
let screenshot_readback_buffer = ctx.gpu_readback_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
buffer_size as u64,
buffer_info.buffer_size_padded,
readback_identifier,
Box::new(ReadbackBeltMetadata {
extent: resolution,
Expand Down Expand Up @@ -130,9 +129,9 @@ impl ScreenshotProcessor {
.lock()
.readback_data::<ReadbackBeltMetadata<T>>(identifier, |data: &[u8], metadata| {
screenshot_was_available = Some(());
let texture_row_info =
TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent.x);
let texture_data = texture_row_info.remove_padding(data);
let buffer_info =
Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent);
let texture_data = buffer_info.remove_padding(data);
on_screenshot(&texture_data, metadata.extent, metadata.user_data);
});
screenshot_was_available
Expand Down
21 changes: 10 additions & 11 deletions crates/re_renderer/src/renderer/depth_cloud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use crate::{
view_builder::ViewBuilder,
wgpu_resources::{
BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle,
GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc,
TextureRowDataInfo,
GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc,
Texture2DBufferInfo, TextureDesc,
},
ColorMap, OutlineMaskPreference, PickingLayerProcessor,
};
Expand Down Expand Up @@ -336,34 +336,33 @@ fn create_and_upload_texture<T: bytemuck::Pod>(
.textures
.alloc(&ctx.device, &depth_texture_desc);

let TextureRowDataInfo {
bytes_per_row_unpadded: bytes_per_row_unaligned,
bytes_per_row_padded,
} = TextureRowDataInfo::new(depth_texture_desc.format, depth_texture_desc.size.width);

// Not supporting compressed formats here.
debug_assert!(depth_texture_desc.format.describe().block_dimensions == (1, 1));

let buffer_info =
Texture2DBufferInfo::new(depth_texture_desc.format, depth_cloud.depth_dimensions);

// TODO(andreas): CpuGpuWriteBelt should make it easier to do this.
let bytes_padding_per_row = (bytes_per_row_padded - bytes_per_row_unaligned) as usize;
let bytes_padding_per_row =
(buffer_info.bytes_per_row_padded - buffer_info.bytes_per_row_unpadded) as usize;
// Sanity check the padding size. If this happens something is seriously wrong, as it would imply
// that we can't express the required alignment with the block size.
debug_assert!(
bytes_padding_per_row % std::mem::size_of::<T>() == 0,
"Padding is not a multiple of pixel size. Can't correctly pad the texture data"
);
let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::<T>();

let mut depth_texture_staging = ctx.cpu_write_gpu_read_belt.lock().allocate::<T>(
&ctx.device,
&ctx.gpu_resources.buffers,
data.len() + num_pixel_padding_per_row * depth_texture_desc.size.height as usize,
buffer_info.buffer_size_padded as usize / std::mem::size_of::<T>(),
);

// Fill with a single copy if possible, otherwise do multiple, filling in padding.
if num_pixel_padding_per_row == 0 {
if bytes_padding_per_row == 0 {
depth_texture_staging.extend_from_slice(data);
} else {
let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::<T>();
for row in data.chunks(depth_texture_desc.size.width as usize) {
depth_texture_staging.extend_from_slice(row);
depth_texture_staging
Expand Down
93 changes: 77 additions & 16 deletions crates/re_renderer/src/wgpu_resources/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,44 +117,67 @@ impl WgpuResourcePools {
}
}

/// Utility for dealing with rows of raw texture data.
#[derive(Clone, Copy)]
pub struct TextureRowDataInfo {
/// Utility for dealing with buffers containing raw 2D texture data.
#[derive(Clone)]
pub struct Texture2DBufferInfo {
/// How many bytes per row contain actual data.
pub bytes_per_row_unpadded: u32,

/// How many bytes per row are required to be allocated in total.
///
/// Padding bytes are always at the end of a row.
pub bytes_per_row_padded: u32,

/// Size required for an unpadded buffer.
pub buffer_size_unpadded: wgpu::BufferAddress,

/// Size required for a padded buffer as it is read/written from/to the GPU.
pub buffer_size_padded: wgpu::BufferAddress,
}

impl TextureRowDataInfo {
pub fn new(format: wgpu::TextureFormat, width: u32) -> Self {
impl Texture2DBufferInfo {
#[inline]
pub fn new(format: wgpu::TextureFormat, extent: glam::UVec2) -> Self {
let format_info = format.describe();
let width_blocks = width / format_info.block_dimensions.0 as u32;
let bytes_per_row_unaligned = width_blocks * format_info.block_size as u32;

let width_blocks = extent.x / format_info.block_dimensions.0 as u32;
let height_blocks = extent.y / format_info.block_dimensions.1 as u32;

let bytes_per_row_unpadded = width_blocks * format_info.block_size as u32;
let bytes_per_row_padded =
wgpu::util::align_to(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);

Self {
bytes_per_row_unpadded: bytes_per_row_unaligned,
bytes_per_row_padded: wgpu::util::align_to(
bytes_per_row_unaligned,
wgpu::COPY_BYTES_PER_ROW_ALIGNMENT,
),
bytes_per_row_unpadded,
bytes_per_row_padded,
buffer_size_unpadded: (bytes_per_row_unpadded * height_blocks) as wgpu::BufferAddress,
buffer_size_padded: (bytes_per_row_padded * height_blocks) as wgpu::BufferAddress,
}
}

#[inline]
pub fn num_rows(&self) -> u32 {
self.buffer_size_padded as u32 / self.bytes_per_row_padded
}

/// Removes the padding from a buffer containing gpu texture data.
///
/// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`].
///
/// Note that if you're passing in gpu data, there no alignment guarantees on the returned slice,
/// do NOT convert it using [`bytemuck`]. Use [`Texture2DBufferInfo::remove_padding_and_convert`] instead.
pub fn remove_padding<'a>(&self, buffer: &'a [u8]) -> Cow<'a, [u8]> {
crate::profile_function!();

assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);

if self.bytes_per_row_padded == self.bytes_per_row_unpadded {
return Cow::Borrowed(buffer);
}

let height = (buffer.len() as u32) / self.bytes_per_row_padded;
let mut unpadded_buffer =
Vec::with_capacity((self.bytes_per_row_unpadded * height) as usize);
let mut unpadded_buffer = Vec::with_capacity(self.buffer_size_unpadded as _);

for row in 0..height {
for row in 0..self.num_rows() {
let offset = (self.bytes_per_row_padded * row) as usize;
unpadded_buffer.extend_from_slice(
&buffer[offset..(offset + self.bytes_per_row_unpadded as usize)],
Expand All @@ -163,4 +186,42 @@ impl TextureRowDataInfo {

unpadded_buffer.into()
}

/// Removes the padding from a buffer containing gpu texture data and remove convert to a given type.
///
/// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`].
///
/// The unpadded row size is expected to be a multiple of the size of the target type.
/// (Which means that, while uncommon, it technically doesn't need to be as big as a block in the pixel - this can be useful for e.g. packing wide bitfields)
pub fn remove_padding_and_convert<T: bytemuck::Pod>(&self, buffer: &[u8]) -> Vec<T> {
crate::profile_function!();

assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);
assert!(self.bytes_per_row_unpadded % std::mem::size_of::<T>() as u32 == 0);

// Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned,
// so much, that we can't even interpret it as e.g. a u32 slice.
// Therefore, we have to do a copy of the data regardless of whether it's padded or not.

let mut unpadded_buffer: Vec<T> = vec![
T::zeroed();
(self.num_rows() * self.bytes_per_row_unpadded / std::mem::size_of::<T>() as u32)
as usize
]; // TODO(andreas): Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory.

// The copy has to happen on a u8 slice, because any other type would assume some alignment that we can't guarantee because of the above.
let unpadded_buffer_u8_view = bytemuck::cast_slice_mut(&mut unpadded_buffer);

for row in 0..self.num_rows() {
let offset_padded = (self.bytes_per_row_padded * row) as usize;
let offset_unpadded = (self.bytes_per_row_unpadded * row) as usize;
unpadded_buffer_u8_view
[offset_unpadded..(offset_unpadded + self.bytes_per_row_unpadded as usize)]
.copy_from_slice(
&buffer[offset_padded..(offset_padded + self.bytes_per_row_unpadded as usize)],
);
}

unpadded_buffer
}
}

1 comment on commit 2728854

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust Benchmark

Benchmark suite Current: 2728854 Previous: d5b68f2 Ratio
arrow2_convert/serialize/primitive/instances=100000/arrow2_convert 414731 ns/iter (± 503) 415037 ns/iter (± 1735) 1.00
arrow2_convert/serialize/primitive/instances=100000/arrow2/from_values 19550 ns/iter (± 303) 19350 ns/iter (± 108) 1.01
arrow2_convert/serialize/primitive/instances=100000/arrow2/from_vec 19362 ns/iter (± 96) 19368 ns/iter (± 136) 1.00
arrow2_convert/deserialize/primitive/instances=100000/arrow2_convert 96793 ns/iter (± 138) 97294 ns/iter (± 527) 0.99
arrow2_convert/deserialize/primitive/instances=100000/arrow2/validity_checks 162039 ns/iter (± 285) 160533 ns/iter (± 463) 1.01
arrow2_convert/deserialize/primitive/instances=100000/arrow2/validity_bypass 58261 ns/iter (± 203) 58559 ns/iter (± 232) 0.99
datastore/num_rows=1000/num_instances=1000/packed=false/insert/default 10591012 ns/iter (± 415006) 10566765 ns/iter (± 316059) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/insert/bucketsz=0 12591752 ns/iter (± 492193) 12142196 ns/iter (± 348934) 1.04
datastore/num_rows=1000/num_instances=1000/packed=false/insert/bucketsz=2 12137762 ns/iter (± 531891) 11633520 ns/iter (± 374682) 1.04
datastore/num_rows=1000/num_instances=1000/packed=false/insert/bucketsz=32 10686316 ns/iter (± 401781) 10442528 ns/iter (± 284770) 1.02
datastore/num_rows=1000/num_instances=1000/packed=false/insert/bucketsz=2048 10562339 ns/iter (± 393734) 10227569 ns/iter (± 306931) 1.03
datastore/num_rows=1000/num_instances=1000/packed=true/insert/default 10405402 ns/iter (± 587856) 10019789 ns/iter (± 369782) 1.04
datastore/num_rows=1000/num_instances=1000/packed=true/insert/bucketsz=0 12719141 ns/iter (± 659290) 11631094 ns/iter (± 466069) 1.09
datastore/num_rows=1000/num_instances=1000/packed=true/insert/bucketsz=2 12152703 ns/iter (± 747051) 11424750 ns/iter (± 370588) 1.06
datastore/num_rows=1000/num_instances=1000/packed=true/insert/bucketsz=32 10314032 ns/iter (± 559231) 9937837 ns/iter (± 287115) 1.04
datastore/num_rows=1000/num_instances=1000/packed=true/insert/bucketsz=2048 10118792 ns/iter (± 387465) 9866332 ns/iter (± 518822) 1.03
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/default 1819 ns/iter (± 25) 1817 ns/iter (± 17) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/bucketsz=0 1804 ns/iter (± 1) 1832 ns/iter (± 8) 0.98
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/bucketsz=2 1804 ns/iter (± 8) 1828 ns/iter (± 11) 0.99
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/bucketsz=32 1810 ns/iter (± 7) 1821 ns/iter (± 11) 0.99
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/bucketsz=2048 1801 ns/iter (± 2) 1798 ns/iter (± 19) 1.00
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at/default 1857 ns/iter (± 21) 1828 ns/iter (± 28) 1.02
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at/bucketsz=0 1825 ns/iter (± 2) 1844 ns/iter (± 5) 0.99
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at/bucketsz=2 1840 ns/iter (± 3) 1835 ns/iter (± 4) 1.00
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at/bucketsz=32 1802 ns/iter (± 1) 1830 ns/iter (± 5) 0.98
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at/bucketsz=2048 1807 ns/iter (± 2) 1812 ns/iter (± 6) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/default 279 ns/iter (± 0) 279 ns/iter (± 0) 1
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/default 434 ns/iter (± 0) 434 ns/iter (± 0) 1
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/bucketsz=0 279 ns/iter (± 0) 280 ns/iter (± 0) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/bucketsz=0 443 ns/iter (± 0) 443 ns/iter (± 2) 1
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/bucketsz=2 279 ns/iter (± 0) 280 ns/iter (± 1) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/bucketsz=2 443 ns/iter (± 0) 443 ns/iter (± 1) 1
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/bucketsz=32 279 ns/iter (± 0) 280 ns/iter (± 0) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/bucketsz=32 440 ns/iter (± 1) 440 ns/iter (± 1) 1
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/bucketsz=2048 279 ns/iter (± 0) 281 ns/iter (± 0) 0.99
datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/bucketsz=2048 435 ns/iter (± 0) 436 ns/iter (± 1) 1.00
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/primary/default 281 ns/iter (± 0) 279 ns/iter (± 0) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/secondaries/default 434 ns/iter (± 0) 434 ns/iter (± 0) 1
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/primary/bucketsz=0 281 ns/iter (± 0) 279 ns/iter (± 0) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/secondaries/bucketsz=0 442 ns/iter (± 0) 442 ns/iter (± 0) 1
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/primary/bucketsz=2 282 ns/iter (± 0) 280 ns/iter (± 0) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/secondaries/bucketsz=2 442 ns/iter (± 1) 443 ns/iter (± 1) 1.00
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/primary/bucketsz=32 281 ns/iter (± 0) 280 ns/iter (± 0) 1.00
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/secondaries/bucketsz=32 443 ns/iter (± 1) 440 ns/iter (± 0) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/primary/bucketsz=2048 282 ns/iter (± 0) 280 ns/iter (± 0) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/latest_at_missing/secondaries/bucketsz=2048 436 ns/iter (± 1) 436 ns/iter (± 0) 1
datastore/num_rows=1000/num_instances=1000/packed=false/range/default 11518597 ns/iter (± 460972) 12068378 ns/iter (± 542906) 0.95
datastore/num_rows=1000/num_instances=1000/packed=false/range/bucketsz=0 2244076 ns/iter (± 9399) 2184001 ns/iter (± 7501) 1.03
datastore/num_rows=1000/num_instances=1000/packed=false/range/bucketsz=2 2168131 ns/iter (± 10911) 2168306 ns/iter (± 13133) 1.00
datastore/num_rows=1000/num_instances=1000/packed=false/range/bucketsz=32 1910733 ns/iter (± 20584) 1933176 ns/iter (± 6424) 0.99
datastore/num_rows=1000/num_instances=1000/packed=false/range/bucketsz=2048 1927469 ns/iter (± 7854) 1828391 ns/iter (± 16691) 1.05
datastore/num_rows=1000/num_instances=1000/packed=true/range/default 10583992 ns/iter (± 694319) 11445904 ns/iter (± 511766) 0.92
datastore/num_rows=1000/num_instances=1000/packed=true/range/bucketsz=0 2174207 ns/iter (± 5470) 2192463 ns/iter (± 25423) 0.99
datastore/num_rows=1000/num_instances=1000/packed=true/range/bucketsz=2 2142803 ns/iter (± 5916) 2114848 ns/iter (± 20404) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/range/bucketsz=32 1870006 ns/iter (± 7904) 1844397 ns/iter (± 9311) 1.01
datastore/num_rows=1000/num_instances=1000/packed=true/range/bucketsz=2048 1843141 ns/iter (± 17078) 1820030 ns/iter (± 8899) 1.01
vector_ops/sort/instances=10000/smallvec/n=4 12487 ns/iter (± 18) 12493 ns/iter (± 35) 1.00
vector_ops/sort/instances=10000/tinyvec/n=4 9646 ns/iter (± 20) 9646 ns/iter (± 37) 1
vector_ops/sort/instances=10000/vec 9641 ns/iter (± 57) 9660 ns/iter (± 45) 1.00
vector_ops/split_off/instances=10000/smallvec/n=4/manual 5566 ns/iter (± 10) 5613 ns/iter (± 51) 0.99
vector_ops/split_off/instances=10000/tinyvec/n=4 2733 ns/iter (± 14) 2734 ns/iter (± 20) 1.00
vector_ops/split_off/instances=10000/tinyvec/n=4/manual 2743 ns/iter (± 28) 2721 ns/iter (± 30) 1.01
vector_ops/split_off/instances=10000/vec 2730 ns/iter (± 20) 2731 ns/iter (± 26) 1.00
vector_ops/split_off/instances=10000/vec/manual 2724 ns/iter (± 21) 2728 ns/iter (± 22) 1.00
vector_ops/swap/instances=10000/smallvec/n=4 32776 ns/iter (± 22) 32828 ns/iter (± 130) 1.00
vector_ops/swap/instances=10000/tinyvec/n=4 18273 ns/iter (± 18) 18328 ns/iter (± 84) 1.00
vector_ops/swap/instances=10000/vec 12312 ns/iter (± 16) 12338 ns/iter (± 56) 1.00
vector_ops/swap_opt/instances=10000/smallvec/n=4 42720 ns/iter (± 61) 42772 ns/iter (± 130) 1.00
vector_ops/swap_opt/instances=10000/tinyvec/n=4 28804 ns/iter (± 335) 28838 ns/iter (± 87) 1.00
vector_ops/swap_opt/instances=10000/vec 19891 ns/iter (± 38) 19861 ns/iter (± 189) 1.00
mono_points_arrow/generate_message_bundles 44405764 ns/iter (± 817005) 41770543 ns/iter (± 1139998) 1.06
mono_points_arrow/generate_messages 180352535 ns/iter (± 1313741) 165160729 ns/iter (± 1358921) 1.09
mono_points_arrow/encode_log_msg 217191826 ns/iter (± 894658) 206805296 ns/iter (± 905202) 1.05
mono_points_arrow/encode_total 441239600 ns/iter (± 1647479) 415400764 ns/iter (± 2304647) 1.06
mono_points_arrow/decode_log_msg 267097867 ns/iter (± 1156351) 250293303 ns/iter (± 966752) 1.07
mono_points_arrow/decode_message_bundles 95806663 ns/iter (± 1445049) 83666056 ns/iter (± 892689) 1.15
mono_points_arrow/decode_total 359931106 ns/iter (± 1856965) 332996742 ns/iter (± 1808515) 1.08
mono_points_arrow_batched/generate_message_bundles 34034189 ns/iter (± 1924243) 31411624 ns/iter (± 1198870) 1.08
mono_points_arrow_batched/generate_messages 9943646 ns/iter (± 965471) 8463616 ns/iter (± 379740) 1.17
mono_points_arrow_batched/encode_log_msg 1807957 ns/iter (± 5542) 1811105 ns/iter (± 7352) 1.00
mono_points_arrow_batched/encode_total 46463829 ns/iter (± 2142837) 43747820 ns/iter (± 1328384) 1.06
mono_points_arrow_batched/decode_log_msg 981193 ns/iter (± 2548) 974147 ns/iter (± 3375) 1.01
mono_points_arrow_batched/decode_message_bundles 16642589 ns/iter (± 1016405) 16243651 ns/iter (± 681135) 1.02
mono_points_arrow_batched/decode_total 19838088 ns/iter (± 1234294) 17719584 ns/iter (± 811858) 1.12
batch_points_arrow/generate_message_bundles 284377 ns/iter (± 2006) 283388 ns/iter (± 2244) 1.00
batch_points_arrow/generate_messages 7606 ns/iter (± 29) 7609 ns/iter (± 29) 1.00
batch_points_arrow/encode_log_msg 379697 ns/iter (± 1190) 381696 ns/iter (± 1405) 0.99
batch_points_arrow/encode_total 697869 ns/iter (± 2541) 698278 ns/iter (± 4783) 1.00
batch_points_arrow/decode_log_msg 334821 ns/iter (± 553) 337620 ns/iter (± 1046) 0.99
batch_points_arrow/decode_message_bundles 2901 ns/iter (± 8) 2925 ns/iter (± 26) 0.99
batch_points_arrow/decode_total 345162 ns/iter (± 668) 349185 ns/iter (± 992) 0.99
arrow_mono_points/insert 6826519940 ns/iter (± 22461105) 6103329517 ns/iter (± 19970787) 1.12
arrow_mono_points/query 1816930 ns/iter (± 8102) 1757622 ns/iter (± 13180) 1.03
arrow_batch_points/insert 3003327 ns/iter (± 7321) 2966514 ns/iter (± 19990) 1.01
arrow_batch_points/query 16457 ns/iter (± 44) 16364 ns/iter (± 176) 1.01
arrow_batch_vecs/insert 43298 ns/iter (± 421) 42789 ns/iter (± 188) 1.01
arrow_batch_vecs/query 506528 ns/iter (± 875) 506207 ns/iter (± 4548) 1.00
tuid/Tuid::random 34 ns/iter (± 0) 34 ns/iter (± 0) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.