Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use arrow::array::make_view;
19
use arrow::array::BufferBuilder;
20
use arrow::array::ByteView;
21
use arrow::array::GenericBinaryArray;
22
use arrow::array::GenericStringArray;
23
use arrow::array::OffsetSizeTrait;
24
use arrow::array::PrimitiveArray;
25
use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray};
26
use arrow::buffer::OffsetBuffer;
27
use arrow::buffer::ScalarBuffer;
28
use arrow::datatypes::ByteArrayType;
29
use arrow::datatypes::ByteViewType;
30
use arrow::datatypes::DataType;
31
use arrow::datatypes::GenericBinaryType;
32
use arrow_array::GenericByteViewArray;
33
use arrow_buffer::Buffer;
34
use datafusion_common::utils::proxy::VecAllocExt;
35
36
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
37
use arrow_array::types::GenericStringType;
38
use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAPACITY};
39
use std::marker::PhantomData;
40
use std::mem;
41
use std::sync::Arc;
42
use std::vec;
43
44
const BYTE_VIEW_MAX_BLOCK_SIZE: usize = 2 * 1024 * 1024;
45
46
/// Trait for storing a single column of group values in [`GroupValuesColumn`]
47
///
48
/// Implementations of this trait store an in-progress collection of group values
49
/// (similar to various builders in Arrow-rs) that allow for quick comparison to
50
/// incoming rows.
51
///
52
/// [`GroupValuesColumn`]: crate::aggregates::group_values::GroupValuesColumn
53
pub trait GroupColumn: Send + Sync {
54
    /// Returns equal if the row stored in this builder at `lhs_row` is equal to
55
    /// the row in `array` at `rhs_row`
56
    ///
57
    /// Note that this comparison returns true if both elements are NULL
58
    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool;
59
    /// Appends the row at `row` in `array` to this builder
60
    fn append_val(&mut self, array: &ArrayRef, row: usize);
61
    /// Returns the number of rows stored in this builder
62
    fn len(&self) -> usize;
63
    /// Returns the number of bytes used by this [`GroupColumn`]
64
    fn size(&self) -> usize;
65
    /// Builds a new array from all of the stored rows
66
    fn build(self: Box<Self>) -> ArrayRef;
67
    /// Builds a new array from the first `n` stored rows, shifting the
68
    /// remaining rows to the start of the builder
69
    fn take_n(&mut self, n: usize) -> ArrayRef;
70
}
71
72
/// An implementation of [`GroupColumn`] for primitive values
73
///
74
/// Optimized to skip null buffer construction if the input is known to be non nullable
75
///
76
/// # Template parameters
77
///
78
/// `T`: the native Rust type that stores the data
79
/// `NULLABLE`: if the data can contain any nulls
80
#[derive(Debug)]
81
pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType, const NULLABLE: bool> {
82
    group_values: Vec<T::Native>,
83
    nulls: MaybeNullBufferBuilder,
84
}
85
86
impl<T, const NULLABLE: bool> PrimitiveGroupValueBuilder<T, NULLABLE>
87
where
88
    T: ArrowPrimitiveType,
89
{
90
    /// Create a new `PrimitiveGroupValueBuilder`
91
29
    pub fn new() -> Self {
92
29
        Self {
93
29
            group_values: vec![],
94
29
            nulls: MaybeNullBufferBuilder::new(),
95
29
        }
96
29
    }
97
}
98
99
impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
100
    for PrimitiveGroupValueBuilder<T, NULLABLE>
101
{
102
295k
    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
103
295k
        // Perf: skip null check (by short circuit) if input is not nullable
104
295k
        if NULLABLE {
105
295k
            let exist_null = self.nulls.is_null(lhs_row);
106
295k
            let input_null = array.is_null(rhs_row);
107
295k
            if let Some(
result196k
) = nulls_equal_to(exist_null, input_null) {
108
196k
                return result;
109
98.4k
            }
110
            // Otherwise, we need to check their values
111
2
        }
112
113
98.4k
        self.group_values[lhs_row] == array.as_primitive::<T>().value(rhs_row)
114
295k
    }
115
116
337
    fn append_val(&mut self, array: &ArrayRef, row: usize) {
117
337
        // Perf: skip null check if input can't have nulls
118
337
        if NULLABLE {
119
335
            if array.is_null(row) {
120
107
                self.nulls.append(true);
121
107
                self.group_values.push(T::default_value());
122
228
            } else {
123
228
                self.nulls.append(false);
124
228
                self.group_values.push(array.as_primitive::<T>().value(row));
125
228
            }
126
2
        } else {
127
2
            self.group_values.push(array.as_primitive::<T>().value(row));
128
2
        }
129
337
    }
130
131
664
    fn len(&self) -> usize {
132
664
        self.group_values.len()
133
664
    }
134
135
92
    fn size(&self) -> usize {
136
92
        self.group_values.allocated_size() + self.nulls.allocated_size()
137
92
    }
138
139
23
    fn build(self: Box<Self>) -> ArrayRef {
140
23
        let Self {
141
23
            group_values,
142
23
            nulls,
143
23
        } = *self;
144
23
145
23
        let nulls = nulls.build();
146
23
        if !NULLABLE {
147
0
            assert!(nulls.is_none(), "unexpected nulls in non nullable input");
148
23
        }
149
150
23
        Arc::new(PrimitiveArray::<T>::new(
151
23
            ScalarBuffer::from(group_values),
152
23
            nulls,
153
23
        ))
154
23
    }
155
156
8
    fn take_n(&mut self, n: usize) -> ArrayRef {
157
8
        let first_n = self.group_values.drain(0..n).collect::<Vec<_>>();
158
159
8
        let first_n_nulls = if NULLABLE { self.nulls.take_n(n) } else { 
None0
};
160
161
8
        Arc::new(PrimitiveArray::<T>::new(
162
8
            ScalarBuffer::from(first_n),
163
8
            first_n_nulls,
164
8
        ))
165
8
    }
166
}
167
168
/// An implementation of [`GroupColumn`] for binary and utf8 types.
169
///
170
/// Stores a collection of binary or utf8 group values in a single buffer
171
/// in a way that allows:
172
///
173
/// 1. Efficient comparison of incoming rows to existing rows
174
/// 2. Efficient construction of the final output array
175
pub struct ByteGroupValueBuilder<O>
176
where
177
    O: OffsetSizeTrait,
178
{
179
    output_type: OutputType,
180
    buffer: BufferBuilder<u8>,
181
    /// Offsets into `buffer` for each distinct value. These offsets as used
182
    /// directly to create the final `GenericBinaryArray`. The `i`th string is
183
    /// stored in the range `offsets[i]..offsets[i+1]` in `buffer`. Null values
184
    /// are stored as a zero length string.
185
    offsets: Vec<O>,
186
    /// Nulls
187
    nulls: MaybeNullBufferBuilder,
188
}
189
190
impl<O> ByteGroupValueBuilder<O>
191
where
192
    O: OffsetSizeTrait,
193
{
194
2
    pub fn new(output_type: OutputType) -> Self {
195
2
        Self {
196
2
            output_type,
197
2
            buffer: BufferBuilder::new(INITIAL_BUFFER_CAPACITY),
198
2
            offsets: vec![O::default()],
199
2
            nulls: MaybeNullBufferBuilder::new(),
200
2
        }
201
2
    }
202
203
15
    fn append_val_inner<B>(&mut self, array: &ArrayRef, row: usize)
204
15
    where
205
15
        B: ByteArrayType,
206
15
    {
207
15
        let arr = array.as_bytes::<B>();
208
15
        if arr.is_null(row) {
209
8
            self.nulls.append(true);
210
8
            // nulls need a zero length in the offset buffer
211
8
            let offset = self.buffer.len();
212
8
            self.offsets.push(O::usize_as(offset));
213
8
        } else {
214
7
            self.nulls.append(false);
215
7
            let value: &[u8] = arr.value(row).as_ref();
216
7
            self.buffer.append_slice(value);
217
7
            self.offsets.push(O::usize_as(self.buffer.len()));
218
7
        }
219
15
    }
220
221
6
    fn equal_to_inner<B>(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool
222
6
    where
223
6
        B: ByteArrayType,
224
6
    {
225
6
        let array = array.as_bytes::<B>();
226
6
        let exist_null = self.nulls.is_null(lhs_row);
227
6
        let input_null = array.is_null(rhs_row);
228
6
        if let Some(
result4
) = nulls_equal_to(exist_null, input_null) {
229
4
            return result;
230
2
        }
231
2
        // Otherwise, we need to check their values
232
2
        self.value(lhs_row) == (array.value(rhs_row).as_ref() as &[u8])
233
6
    }
234
235
    /// return the current value of the specified row irrespective of null
236
2
    pub fn value(&self, row: usize) -> &[u8] {
237
2
        let l = self.offsets[row].as_usize();
238
2
        let r = self.offsets[row + 1].as_usize();
239
2
        // Safety: the offsets are constructed correctly and never decrease
240
2
        unsafe { self.buffer.as_slice().get_unchecked(l..r) }
241
2
    }
242
}
243
244
impl<O> GroupColumn for ByteGroupValueBuilder<O>
245
where
246
    O: OffsetSizeTrait,
247
{
248
6
    fn equal_to(&self, lhs_row: usize, column: &ArrayRef, rhs_row: usize) -> bool {
249
6
        // Sanity array type
250
6
        match self.output_type {
251
            OutputType::Binary => {
252
0
                debug_assert!(matches!(
253
0
                    column.data_type(),
254
                    DataType::Binary | DataType::LargeBinary
255
                ));
256
0
                self.equal_to_inner::<GenericBinaryType<O>>(lhs_row, column, rhs_row)
257
            }
258
            OutputType::Utf8 => {
259
6
                debug_assert!(
matches!0
(
260
6
                    column.data_type(),
261
                    DataType::Utf8 | DataType::LargeUtf8
262
                ));
263
6
                self.equal_to_inner::<GenericStringType<O>>(lhs_row, column, rhs_row)
264
            }
265
0
            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
266
        }
267
6
    }
268
269
15
    fn append_val(&mut self, column: &ArrayRef, row: usize) {
270
15
        // Sanity array type
271
15
        match self.output_type {
272
            OutputType::Binary => {
273
0
                debug_assert!(matches!(
274
0
                    column.data_type(),
275
                    DataType::Binary | DataType::LargeBinary
276
                ));
277
0
                self.append_val_inner::<GenericBinaryType<O>>(column, row)
278
            }
279
            OutputType::Utf8 => {
280
15
                debug_assert!(
matches!0
(
281
15
                    column.data_type(),
282
                    DataType::Utf8 | DataType::LargeUtf8
283
                ));
284
15
                self.append_val_inner::<GenericStringType<O>>(column, row)
285
            }
286
0
            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
287
        };
288
15
    }
289
290
3
    fn len(&self) -> usize {
291
3
        self.offsets.len() - 1
292
3
    }
293
294
0
    fn size(&self) -> usize {
295
0
        self.buffer.capacity() * std::mem::size_of::<u8>()
296
0
            + self.offsets.allocated_size()
297
0
            + self.nulls.allocated_size()
298
0
    }
299
300
0
    fn build(self: Box<Self>) -> ArrayRef {
301
0
        let Self {
302
0
            output_type,
303
0
            mut buffer,
304
0
            offsets,
305
0
            nulls,
306
0
        } = *self;
307
0
308
0
        let null_buffer = nulls.build();
309
0
310
0
        // SAFETY: the offsets were constructed correctly in `insert_if_new` --
311
0
        // monotonically increasing, overflows were checked.
312
0
        let offsets = unsafe { OffsetBuffer::new_unchecked(ScalarBuffer::from(offsets)) };
313
0
        let values = buffer.finish();
314
0
        match output_type {
315
            OutputType::Binary => {
316
                // SAFETY: the offsets were constructed correctly
317
0
                Arc::new(unsafe {
318
0
                    GenericBinaryArray::new_unchecked(offsets, values, null_buffer)
319
0
                })
320
            }
321
            OutputType::Utf8 => {
322
                // SAFETY:
323
                // 1. the offsets were constructed safely
324
                //
325
                // 2. the input arrays were all the correct type and thus since
326
                // all the values that went in were valid (e.g. utf8) so are all
327
                // the values that come out
328
0
                Arc::new(unsafe {
329
0
                    GenericStringArray::new_unchecked(offsets, values, null_buffer)
330
0
                })
331
            }
332
0
            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
333
        }
334
0
    }
335
336
3
    fn take_n(&mut self, n: usize) -> ArrayRef {
337
3
        debug_assert!(self.len() >= n);
338
3
        let null_buffer = self.nulls.take_n(n);
339
3
        let first_remaining_offset = O::as_usize(self.offsets[n]);
340
3
341
3
        // Given offests like [0, 2, 4, 5] and n = 1, we expect to get
342
3
        // offsets [0, 2, 3]. We first create two offsets for first_n as [0, 2] and the remaining as [2, 4, 5].
343
3
        // And we shift the offset starting from 0 for the remaining one, [2, 4, 5] -> [0, 2, 3].
344
3
        let mut first_n_offsets = self.offsets.drain(0..n).collect::<Vec<_>>();
345
3
        let offset_n = *self.offsets.first().unwrap();
346
3
        self.offsets
347
3
            .iter_mut()
348
7
            .for_each(|offset| *offset = offset.sub(offset_n));
349
3
        first_n_offsets.push(offset_n);
350
3
351
3
        // SAFETY: the offsets were constructed correctly in `insert_if_new` --
352
3
        // monotonically increasing, overflows were checked.
353
3
        let offsets =
354
3
            unsafe { OffsetBuffer::new_unchecked(ScalarBuffer::from(first_n_offsets)) };
355
3
356
3
        let mut remaining_buffer =
357
3
            BufferBuilder::new(self.buffer.len() - first_remaining_offset);
358
3
        // TODO: Current approach copy the remaining and truncate the original one
359
3
        // Find out a way to avoid copying buffer but split the original one into two.
360
3
        remaining_buffer.append_slice(&self.buffer.as_slice()[first_remaining_offset..]);
361
3
        self.buffer.truncate(first_remaining_offset);
362
3
        let values = self.buffer.finish();
363
3
        self.buffer = remaining_buffer;
364
3
365
3
        match self.output_type {
366
            OutputType::Binary => {
367
                // SAFETY: the offsets were constructed correctly
368
0
                Arc::new(unsafe {
369
0
                    GenericBinaryArray::new_unchecked(offsets, values, null_buffer)
370
0
                })
371
            }
372
            OutputType::Utf8 => {
373
                // SAFETY:
374
                // 1. the offsets were constructed safely
375
                //
376
                // 2. we asserted the input arrays were all the correct type and
377
                // thus since all the values that went in were valid (e.g. utf8)
378
                // so are all the values that come out
379
3
                Arc::new(unsafe {
380
3
                    GenericStringArray::new_unchecked(offsets, values, null_buffer)
381
3
                })
382
            }
383
0
            _ => unreachable!("View types should use `ArrowBytesViewMap`"),
384
        }
385
3
    }
386
}
387
388
/// An implementation of [`GroupColumn`] for binary view and utf8 view types.
389
///
390
/// Stores a collection of binary view or utf8 view group values in a buffer
391
/// whose structure is similar to `GenericByteViewArray`, and we can get benefits:
392
///
393
/// 1. Efficient comparison of incoming rows to existing rows
394
/// 2. Efficient construction of the final output array
395
/// 3. Efficient to perform `take_n` comparing to use `GenericByteViewBuilder`
396
pub struct ByteViewGroupValueBuilder<B: ByteViewType> {
397
    /// The views of string values
398
    ///
399
    /// If string len <= 12, the view's format will be:
400
    ///   string(12B) | len(4B)
401
    ///
402
    /// If string len > 12, its format will be:
403
    ///     offset(4B) | buffer_index(4B) | prefix(4B) | len(4B)
404
    views: Vec<u128>,
405
406
    /// The progressing block
407
    ///
408
    /// New values will be inserted into it until its capacity
409
    /// is not enough(detail can see `max_block_size`).
410
    in_progress: Vec<u8>,
411
412
    /// The completed blocks
413
    completed: Vec<Buffer>,
414
415
    /// The max size of `in_progress`
416
    ///
417
    /// `in_progress` will be flushed into `completed`, and create new `in_progress`
418
    /// when found its remaining capacity(`max_block_size` - `len(in_progress)`),
419
    /// is no enough to store the appended value.
420
    ///
421
    /// Currently it is fixed at 2MB.
422
    max_block_size: usize,
423
424
    /// Nulls
425
    nulls: MaybeNullBufferBuilder,
426
427
    /// phantom data so the type requires `<B>`
428
    _phantom: PhantomData<B>,
429
}
430
431
impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
432
7
    pub fn new() -> Self {
433
7
        Self {
434
7
            views: Vec::new(),
435
7
            in_progress: Vec::new(),
436
7
            completed: Vec::new(),
437
7
            max_block_size: BYTE_VIEW_MAX_BLOCK_SIZE,
438
7
            nulls: MaybeNullBufferBuilder::new(),
439
7
            _phantom: PhantomData {},
440
7
        }
441
7
    }
442
443
    /// Set the max block size
444
6
    fn with_max_block_size(mut self, max_block_size: usize) -> Self {
445
6
        self.max_block_size = max_block_size;
446
6
        self
447
6
    }
448
449
53
    fn append_val_inner(&mut self, array: &ArrayRef, row: usize)
450
53
    where
451
53
        B: ByteViewType,
452
53
    {
453
53
        let arr = array.as_byte_view::<B>();
454
53
455
53
        // Null row case, set and return
456
53
        if arr.is_null(row) {
457
19
            self.nulls.append(true);
458
19
            self.views.push(0);
459
19
            return;
460
34
        }
461
34
462
34
        // Not null row case
463
34
        self.nulls.append(false);
464
34
        let value: &[u8] = arr.value(row).as_ref();
465
34
466
34
        let value_len = value.len();
467
34
        let view = if value_len <= 12 {
468
18
            make_view(value, 0, 0)
469
        } else {
470
            // Ensure big enough block to hold the value firstly
471
16
            self.ensure_in_progress_big_enough(value_len);
472
16
473
16
            // Append value
474
16
            let buffer_index = self.completed.len();
475
16
            let offset = self.in_progress.len();
476
16
            self.in_progress.extend_from_slice(value);
477
16
478
16
            make_view(value, buffer_index as u32, offset as u32)
479
        };
480
481
        // Append view
482
34
        self.views.push(view);
483
53
    }
484
485
16
    fn ensure_in_progress_big_enough(&mut self, value_len: usize) {
486
16
        debug_assert!(value_len > 12);
487
16
        let require_cap = self.in_progress.len() + value_len;
488
16
489
16
        // If current block isn't big enough, flush it and create a new in progress block
490
16
        if require_cap > self.max_block_size {
491
6
            let flushed_block = mem::replace(
492
6
                &mut self.in_progress,
493
6
                Vec::with_capacity(self.max_block_size),
494
6
            );
495
6
            let buffer = Buffer::from_vec(flushed_block);
496
6
            self.completed.push(buffer);
497
10
        }
498
16
    }
499
500
7
    fn equal_to_inner(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
501
7
        let array = array.as_byte_view::<B>();
502
7
503
7
        // Check if nulls equal firstly
504
7
        let exist_null = self.nulls.is_null(lhs_row);
505
7
        let input_null = array.is_null(rhs_row);
506
7
        if let Some(
result5
) = nulls_equal_to(exist_null, input_null) {
507
5
            return result;
508
2
        }
509
2
510
2
        // Otherwise, we need to check their values
511
2
        let exist_view = self.views[lhs_row];
512
2
        let exist_view_len = exist_view as u32;
513
2
514
2
        let input_view = array.views()[rhs_row];
515
2
        let input_view_len = input_view as u32;
516
2
517
2
        // The check logic
518
2
        //   - Check len equality
519
2
        //   - If inlined, check inlined value
520
2
        //   - If non-inlined, check prefix and then check value in buffer
521
2
        //     when needed
522
2
        if exist_view_len != input_view_len {
523
1
            return false;
524
1
        }
525
1
526
1
        if exist_view_len <= 12 {
527
1
            let exist_inline = unsafe {
528
1
                GenericByteViewArray::<B>::inline_value(
529
1
                    &exist_view,
530
1
                    exist_view_len as usize,
531
1
                )
532
1
            };
533
1
            let input_inline = unsafe {
534
1
                GenericByteViewArray::<B>::inline_value(
535
1
                    &input_view,
536
1
                    input_view_len as usize,
537
1
                )
538
1
            };
539
1
            exist_inline == input_inline
540
        } else {
541
0
            let exist_prefix =
542
0
                unsafe { GenericByteViewArray::<B>::inline_value(&exist_view, 4) };
543
0
            let input_prefix =
544
0
                unsafe { GenericByteViewArray::<B>::inline_value(&input_view, 4) };
545
0
546
0
            if exist_prefix != input_prefix {
547
0
                return false;
548
0
            }
549
0
550
0
            let exist_full = {
551
0
                let byte_view = ByteView::from(exist_view);
552
0
                self.value(
553
0
                    byte_view.buffer_index as usize,
554
0
                    byte_view.offset as usize,
555
0
                    byte_view.length as usize,
556
0
                )
557
0
            };
558
0
            let input_full: &[u8] = unsafe { array.value_unchecked(rhs_row).as_ref() };
559
0
            exist_full == input_full
560
        }
561
7
    }
562
563
0
    fn value(&self, buffer_index: usize, offset: usize, length: usize) -> &[u8] {
564
0
        debug_assert!(buffer_index <= self.completed.len());
565
566
0
        if buffer_index < self.completed.len() {
567
0
            let block = &self.completed[buffer_index];
568
0
            &block[offset..offset + length]
569
        } else {
570
0
            &self.in_progress[offset..offset + length]
571
        }
572
0
    }
573
574
4
    fn build_inner(self) -> ArrayRef {
575
4
        let Self {
576
4
            views,
577
4
            in_progress,
578
4
            mut completed,
579
4
            nulls,
580
4
            ..
581
4
        } = self;
582
4
583
4
        // Build nulls
584
4
        let null_buffer = nulls.build();
585
4
586
4
        // Build values
587
4
        // Flush `in_process` firstly
588
4
        if !in_progress.is_empty() {
589
4
            let buffer = Buffer::from(in_progress);
590
4
            completed.push(buffer);
591
4
        }
0
592
593
4
        let views = ScalarBuffer::from(views);
594
4
595
4
        Arc::new(GenericByteViewArray::<B>::new(
596
4
            views,
597
4
            completed,
598
4
            null_buffer,
599
4
        ))
600
4
    }
601
602
9
    fn take_n_inner(&mut self, n: usize) -> ArrayRef {
603
9
        debug_assert!(self.len() >= n);
604
605
        // The `n == len` case, we need to take all
606
9
        if self.len() == n {
607
3
            let new_builder = Self::new().with_max_block_size(self.max_block_size);
608
3
            let cur_builder = std::mem::replace(self, new_builder);
609
3
            return cur_builder.build_inner();
610
6
        }
611
6
612
6
        // The `n < len` case
613
6
        // Take n for nulls
614
6
        let null_buffer = self.nulls.take_n(n);
615
6
616
6
        // Take n for values:
617
6
        //   - Take first n `view`s from `views`
618
6
        //
619
6
        //   - Find the last non-inlined `view`, if all inlined,
620
6
        //     we can build array and return happily, otherwise we
621
6
        //     we need to continue to process related buffers
622
6
        //
623
6
        //   - Get the last related `buffer index`(let's name it `buffer index n`)
624
6
        //     from last non-inlined `view`
625
6
        //
626
6
        //   - Take buffers, the key is that we need to know if we need to take
627
6
        //     the whole last related buffer. The logic is a bit complex, you can
628
6
        //     detail in `take_buffers_with_whole_last`, `take_buffers_with_partial_last`
629
6
        //     and other related steps in following
630
6
        //
631
6
        //   - Shift the `buffer index` of remaining non-inlined `views`
632
6
        //
633
6
        let first_n_views = self.views.drain(0..n).collect::<Vec<_>>();
634
6
635
6
        let last_non_inlined_view = first_n_views
636
6
            .iter()
637
6
            .rev()
638
9
            .find(|view| ((**view) as u32) > 12
)6
;
639
640
6
        if let Some(
view4
) = last_non_inlined_view {
641
4
            let view = ByteView::from(*view);
642
4
            let last_related_buffer_index = view.buffer_index as usize;
643
4
644
4
            // Check should we take the whole `last_related_buffer_index` buffer
645
4
            let take_whole_last_buffer = self.should_take_whole_buffer(
646
4
                last_related_buffer_index,
647
4
                (view.offset + view.length) as usize,
648
4
            );
649
650
            // Take related buffers
651
4
            let buffers = if take_whole_last_buffer {
652
2
                self.take_buffers_with_whole_last(last_related_buffer_index)
653
            } else {
654
2
                self.take_buffers_with_partial_last(
655
2
                    last_related_buffer_index,
656
2
                    (view.offset + view.length) as usize,
657
2
                )
658
            };
659
660
            // Shift `buffer index`s finally
661
4
            let shifts = if take_whole_last_buffer {
662
2
                last_related_buffer_index + 1
663
            } else {
664
2
                last_related_buffer_index
665
            };
666
667
20
            
self.views.iter_mut().for_each(4
|view| {
668
20
                if (*view as u32) > 12 {
669
10
                    let mut byte_view = ByteView::from(*view);
670
10
                    byte_view.buffer_index -= shifts as u32;
671
10
                    *view = byte_view.as_u128();
672
10
                }
673
20
            });
674
4
675
4
            // Build array and return
676
4
            let views = ScalarBuffer::from(first_n_views);
677
4
            Arc::new(GenericByteViewArray::<B>::new(views, buffers, null_buffer))
678
        } else {
679
2
            let views = ScalarBuffer::from(first_n_views);
680
2
            Arc::new(GenericByteViewArray::<B>::new(
681
2
                views,
682
2
                Vec::new(),
683
2
                null_buffer,
684
2
            ))
685
        }
686
9
    }
687
688
2
    fn take_buffers_with_whole_last(
689
2
        &mut self,
690
2
        last_related_buffer_index: usize,
691
2
    ) -> Vec<Buffer> {
692
2
        if last_related_buffer_index == self.completed.len() {
693
0
            self.flush_in_progress();
694
2
        }
695
2
        self.completed
696
2
            .drain(0..last_related_buffer_index + 1)
697
2
            .collect()
698
2
    }
699
700
2
    fn take_buffers_with_partial_last(
701
2
        &mut self,
702
2
        last_related_buffer_index: usize,
703
2
        take_len: usize,
704
2
    ) -> Vec<Buffer> {
705
2
        let mut take_buffers = Vec::with_capacity(last_related_buffer_index + 1);
706
2
707
2
        // Take `0 ~ last_related_buffer_index - 1` buffers
708
2
        if !self.completed.is_empty() || 
last_related_buffer_index == 01
{
709
2
            take_buffers.extend(self.completed.drain(0..last_related_buffer_index));
710
2
        }
0
711
712
        // Process the `last_related_buffer_index` buffers
713
2
        let last_buffer = if last_related_buffer_index < self.completed.len() {
714
            // If it is in `completed`, simply clone
715
1
            self.completed[last_related_buffer_index].clone()
716
        } else {
717
            // If it is `in_progress`, copied `0 ~ offset` part
718
1
            let taken_last_buffer = self.in_progress[0..take_len].to_vec();
719
1
            Buffer::from_vec(taken_last_buffer)
720
        };
721
2
        take_buffers.push(last_buffer);
722
2
723
2
        take_buffers
724
2
    }
725
726
    #[inline]
727
4
    fn should_take_whole_buffer(&self, buffer_index: usize, take_len: usize) -> bool {
728
4
        if buffer_index < self.completed.len() {
729
3
            take_len == self.completed[buffer_index].len()
730
        } else {
731
1
            take_len == self.in_progress.len()
732
        }
733
4
    }
734
735
0
    fn flush_in_progress(&mut self) {
736
0
        let flushed_block = mem::replace(
737
0
            &mut self.in_progress,
738
0
            Vec::with_capacity(self.max_block_size),
739
0
        );
740
0
        let buffer = Buffer::from_vec(flushed_block);
741
0
        self.completed.push(buffer);
742
0
    }
743
}
744
745
impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> {
746
7
    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
747
7
        self.equal_to_inner(lhs_row, array, rhs_row)
748
7
    }
749
750
53
    fn append_val(&mut self, array: &ArrayRef, row: usize) {
751
53
        self.append_val_inner(array, row)
752
53
    }
753
754
18
    fn len(&self) -> usize {
755
18
        self.views.len()
756
18
    }
757
758
0
    fn size(&self) -> usize {
759
0
        let buffers_size = self
760
0
            .completed
761
0
            .iter()
762
0
            .map(|buf| buf.capacity() * std::mem::size_of::<u8>())
763
0
            .sum::<usize>();
764
0
765
0
        self.nulls.allocated_size()
766
0
            + self.views.capacity() * std::mem::size_of::<u128>()
767
0
            + self.in_progress.capacity() * std::mem::size_of::<u8>()
768
0
            + buffers_size
769
0
            + std::mem::size_of::<Self>()
770
0
    }
771
772
1
    fn build(self: Box<Self>) -> ArrayRef {
773
1
        Self::build_inner(*self)
774
1
    }
775
776
9
    fn take_n(&mut self, n: usize) -> ArrayRef {
777
9
        self.take_n_inner(n)
778
9
    }
779
}
780
781
/// Determines if the nullability of the existing and new input array can be used
782
/// to short-circuit the comparison of the two values.
783
///
784
/// Returns `Some(result)` if the result of the comparison can be determined
785
/// from the nullness of the two values, and `None` if the comparison must be
786
/// done on the values themselves.
787
295k
fn nulls_equal_to(lhs_null: bool, rhs_null: bool) -> Option<bool> {
788
295k
    match (lhs_null, rhs_null) {
789
196k
        (true, true) => Some(true),
790
7
        (false, true) | (true, false) => Some(false),
791
98.4k
        _ => None,
792
    }
793
295k
}
794
795
#[cfg(test)]
796
mod tests {
797
    use std::sync::Arc;
798
799
    use arrow::{
800
        array::AsArray,
801
        datatypes::{Int64Type, StringViewType},
802
    };
803
    use arrow_array::{ArrayRef, Int64Array, StringArray, StringViewArray};
804
    use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
805
    use datafusion_physical_expr::binary_map::OutputType;
806
807
    use crate::aggregates::group_values::group_column::{
808
        ByteViewGroupValueBuilder, PrimitiveGroupValueBuilder,
809
    };
810
811
    use super::{ByteGroupValueBuilder, GroupColumn};
812
813
    #[test]
814
1
    fn test_take_n() {
815
1
        let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
816
1
        let array = Arc::new(StringArray::from(vec![Some("a"), None])) as ArrayRef;
817
1
        // a, null, null
818
1
        builder.append_val(&array, 0);
819
1
        builder.append_val(&array, 1);
820
1
        builder.append_val(&array, 1);
821
1
822
1
        // (a, null) remaining: null
823
1
        let output = builder.take_n(2);
824
1
        assert_eq!(&output, &array);
825
826
        // null, a, null, a
827
1
        builder.append_val(&array, 0);
828
1
        builder.append_val(&array, 1);
829
1
        builder.append_val(&array, 0);
830
1
831
1
        // (null, a) remaining: (null, a)
832
1
        let output = builder.take_n(2);
833
1
        let array = Arc::new(StringArray::from(vec![None, Some("a")])) as ArrayRef;
834
1
        assert_eq!(&output, &array);
835
836
1
        let array = Arc::new(StringArray::from(vec![
837
1
            Some("a"),
838
1
            None,
839
1
            Some("longstringfortest"),
840
1
        ])) as ArrayRef;
841
1
842
1
        // null, a, longstringfortest, null, null
843
1
        builder.append_val(&array, 2);
844
1
        builder.append_val(&array, 1);
845
1
        builder.append_val(&array, 1);
846
1
847
1
        // (null, a, longstringfortest, null) remaining: (null)
848
1
        let output = builder.take_n(4);
849
1
        let array = Arc::new(StringArray::from(vec![
850
1
            None,
851
1
            Some("a"),
852
1
            Some("longstringfortest"),
853
1
            None,
854
1
        ])) as ArrayRef;
855
1
        assert_eq!(&output, &array);
856
1
    }
857
858
    #[test]
859
1
    fn test_nullable_primitive_equal_to() {
860
1
        // Will cover such cases:
861
1
        //   - exist null, input not null
862
1
        //   - exist null, input null; values not equal
863
1
        //   - exist null, input null; values equal
864
1
        //   - exist not null, input null
865
1
        //   - exist not null, input not null; values not equal
866
1
        //   - exist not null, input not null; values equal
867
1
868
1
        // Define PrimitiveGroupValueBuilder
869
1
        let mut builder = PrimitiveGroupValueBuilder::<Int64Type, true>::new();
870
1
        let builder_array = Arc::new(Int64Array::from(vec![
871
1
            None,
872
1
            None,
873
1
            None,
874
1
            Some(1),
875
1
            Some(2),
876
1
            Some(3),
877
1
        ])) as ArrayRef;
878
1
        builder.append_val(&builder_array, 0);
879
1
        builder.append_val(&builder_array, 1);
880
1
        builder.append_val(&builder_array, 2);
881
1
        builder.append_val(&builder_array, 3);
882
1
        builder.append_val(&builder_array, 4);
883
1
        builder.append_val(&builder_array, 5);
884
1
885
1
        // Define input array
886
1
        let (_nulls, values, _) =
887
1
            Int64Array::from(vec![Some(1), Some(2), None, None, Some(1), Some(3)])
888
1
                .into_parts();
889
1
890
1
        // explicitly build a boolean buffer where one of the null values also happens to match
891
1
        let mut boolean_buffer_builder = BooleanBufferBuilder::new(6);
892
1
        boolean_buffer_builder.append(true);
893
1
        boolean_buffer_builder.append(false); // this sets Some(2) to null above
894
1
        boolean_buffer_builder.append(false);
895
1
        boolean_buffer_builder.append(false);
896
1
        boolean_buffer_builder.append(true);
897
1
        boolean_buffer_builder.append(true);
898
1
        let nulls = NullBuffer::new(boolean_buffer_builder.finish());
899
1
        let input_array = Arc::new(Int64Array::new(values, Some(nulls))) as ArrayRef;
900
1
901
1
        // Check
902
1
        assert!(!builder.equal_to(0, &input_array, 0));
903
1
        assert!(builder.equal_to(1, &input_array, 1));
904
1
        assert!(builder.equal_to(2, &input_array, 2));
905
1
        assert!(!builder.equal_to(3, &input_array, 3));
906
1
        assert!(!builder.equal_to(4, &input_array, 4));
907
1
        assert!(builder.equal_to(5, &input_array, 5));
908
1
    }
909
910
    #[test]
911
1
    fn test_not_nullable_primitive_equal_to() {
912
1
        // Will cover such cases:
913
1
        //   - values equal
914
1
        //   - values not equal
915
1
916
1
        // Define PrimitiveGroupValueBuilder
917
1
        let mut builder = PrimitiveGroupValueBuilder::<Int64Type, false>::new();
918
1
        let builder_array =
919
1
            Arc::new(Int64Array::from(vec![Some(0), Some(1)])) as ArrayRef;
920
1
        builder.append_val(&builder_array, 0);
921
1
        builder.append_val(&builder_array, 1);
922
1
923
1
        // Define input array
924
1
        let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef;
925
1
926
1
        // Check
927
1
        assert!(builder.equal_to(0, &input_array, 0));
928
1
        assert!(!builder.equal_to(1, &input_array, 1));
929
1
    }
930
931
    #[test]
932
1
    fn test_byte_array_equal_to() {
933
1
        // Will cover such cases:
934
1
        //   - exist null, input not null
935
1
        //   - exist null, input null; values not equal
936
1
        //   - exist null, input null; values equal
937
1
        //   - exist not null, input null
938
1
        //   - exist not null, input not null; values not equal
939
1
        //   - exist not null, input not null; values equal
940
1
941
1
        // Define PrimitiveGroupValueBuilder
942
1
        let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
943
1
        let builder_array = Arc::new(StringArray::from(vec![
944
1
            None,
945
1
            None,
946
1
            None,
947
1
            Some("foo"),
948
1
            Some("bar"),
949
1
            Some("baz"),
950
1
        ])) as ArrayRef;
951
1
        builder.append_val(&builder_array, 0);
952
1
        builder.append_val(&builder_array, 1);
953
1
        builder.append_val(&builder_array, 2);
954
1
        builder.append_val(&builder_array, 3);
955
1
        builder.append_val(&builder_array, 4);
956
1
        builder.append_val(&builder_array, 5);
957
1
958
1
        // Define input array
959
1
        let (offsets, buffer, _nulls) = StringArray::from(vec![
960
1
            Some("foo"),
961
1
            Some("bar"),
962
1
            None,
963
1
            None,
964
1
            Some("foo"),
965
1
            Some("baz"),
966
1
        ])
967
1
        .into_parts();
968
1
969
1
        // explicitly build a boolean buffer where one of the null values also happens to match
970
1
        let mut boolean_buffer_builder = BooleanBufferBuilder::new(6);
971
1
        boolean_buffer_builder.append(true);
972
1
        boolean_buffer_builder.append(false); // this sets Some("bar") to null above
973
1
        boolean_buffer_builder.append(false);
974
1
        boolean_buffer_builder.append(false);
975
1
        boolean_buffer_builder.append(true);
976
1
        boolean_buffer_builder.append(true);
977
1
        let nulls = NullBuffer::new(boolean_buffer_builder.finish());
978
1
        let input_array =
979
1
            Arc::new(StringArray::new(offsets, buffer, Some(nulls))) as ArrayRef;
980
1
981
1
        // Check
982
1
        assert!(!builder.equal_to(0, &input_array, 0));
983
1
        assert!(builder.equal_to(1, &input_array, 1));
984
1
        assert!(builder.equal_to(2, &input_array, 2));
985
1
        assert!(!builder.equal_to(3, &input_array, 3));
986
1
        assert!(!builder.equal_to(4, &input_array, 4));
987
1
        assert!(builder.equal_to(5, &input_array, 5));
988
1
    }
989
990
    #[test]
991
1
    fn test_byte_view_append_val() {
992
1
        let mut builder =
993
1
            ByteViewGroupValueBuilder::<StringViewType>::new().with_max_block_size(60);
994
1
        let builder_array = StringViewArray::from(vec![
995
1
            Some("this string is quite long"), // in buffer 0
996
1
            Some("foo"),
997
1
            None,
998
1
            Some("bar"),
999
1
            Some("this string is also quite long"), // buffer 0
1000
1
            Some("this string is quite long"),      // buffer 1
1001
1
            Some("bar"),
1002
1
        ]);
1003
1
        let builder_array: ArrayRef = Arc::new(builder_array);
1004
7
        for row in 0..
builder_array.len()1
{
1005
7
            builder.append_val(&builder_array, row);
1006
7
        }
1007
1008
1
        let output = Box::new(builder).build();
1009
1
        // should be 2 output buffers to hold all the data
1010
1
        assert_eq!(output.as_string_view().data_buffers().len(), 2,);
1011
1
        assert_eq!(&output, &builder_array)
1012
1
    }
1013
1014
    #[test]
1015
1
    fn test_byte_view_equal_to() {
1016
1
        // Will cover such cases:
1017
1
        //   - exist null, input not null
1018
1
        //   - exist null, input null; values not equal
1019
1
        //   - exist null, input null; values equal
1020
1
        //   - exist not null, input null
1021
1
        //   - exist not null, input not null; values not equal
1022
1
        //   - exist not null, input not null; values equal
1023
1
1024
1
        let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
1025
1
        let builder_array = Arc::new(StringViewArray::from(vec![
1026
1
            None,
1027
1
            None,
1028
1
            None,
1029
1
            Some("foo"),
1030
1
            Some("bar"),
1031
1
            Some("this string is quite long"),
1032
1
            Some("baz"),
1033
1
        ])) as ArrayRef;
1034
1
        builder.append_val(&builder_array, 0);
1035
1
        builder.append_val(&builder_array, 1);
1036
1
        builder.append_val(&builder_array, 2);
1037
1
        builder.append_val(&builder_array, 3);
1038
1
        builder.append_val(&builder_array, 4);
1039
1
        builder.append_val(&builder_array, 5);
1040
1
        builder.append_val(&builder_array, 6);
1041
1
1042
1
        // Define input array
1043
1
        let (views, buffer, _nulls) = StringViewArray::from(vec![
1044
1
            Some("foo"),
1045
1
            Some("bar"),                       // set to null
1046
1
            Some("this string is quite long"), // set to null
1047
1
            None,
1048
1
            None,
1049
1
            Some("foo"),
1050
1
            Some("baz"),
1051
1
        ])
1052
1
        .into_parts();
1053
1
1054
1
        // explicitly build a boolean buffer where one of the null values also happens to match
1055
1
        let mut boolean_buffer_builder = BooleanBufferBuilder::new(6);
1056
1
        boolean_buffer_builder.append(true);
1057
1
        boolean_buffer_builder.append(false); // this sets Some("bar") to null above
1058
1
        boolean_buffer_builder.append(false); // this sets Some("thisstringisquitelong") to null above
1059
1
        boolean_buffer_builder.append(false);
1060
1
        boolean_buffer_builder.append(false);
1061
1
        boolean_buffer_builder.append(true);
1062
1
        boolean_buffer_builder.append(true);
1063
1
        let nulls = NullBuffer::new(boolean_buffer_builder.finish());
1064
1
        let input_array =
1065
1
            Arc::new(StringViewArray::new(views, buffer, Some(nulls))) as ArrayRef;
1066
1
1067
1
        // Check
1068
1
        assert!(!builder.equal_to(0, &input_array, 0));
1069
1
        assert!(builder.equal_to(1, &input_array, 1));
1070
1
        assert!(builder.equal_to(2, &input_array, 2));
1071
1
        assert!(!builder.equal_to(3, &input_array, 3));
1072
1
        assert!(!builder.equal_to(4, &input_array, 4));
1073
1
        assert!(!builder.equal_to(5, &input_array, 5));
1074
1
        assert!(builder.equal_to(6, &input_array, 6));
1075
1
    }
1076
1077
    #[test]
1078
1
    fn test_byte_view_take_n() {
1079
1
        // ####### Define cases and init #######
1080
1
1081
1
        // `take_n` is really complex, we should consider and test following situations:
1082
1
        //   1. Take nulls
1083
1
        //   2. Take all `inlined`s
1084
1
        //   3. Take non-inlined + partial last buffer in `completed`
1085
1
        //   4. Take non-inlined + whole last buffer in `completed`
1086
1
        //   5. Take non-inlined + partial last `in_progress`
1087
1
        //   6. Take non-inlined + while last buffer in ``in_progress`
1088
1
        //   7. Take all views at once
1089
1
1090
1
        let mut builder =
1091
1
            ByteViewGroupValueBuilder::<StringViewType>::new().with_max_block_size(60);
1092
1
        let input_array = StringViewArray::from(vec![
1093
1
            //  Test situation 1
1094
1
            None,
1095
1
            None,
1096
1
            // Test situation 2 (also test take null together)
1097
1
            None,
1098
1
            Some("foo"),
1099
1
            Some("bar"),
1100
1
            // Test situation 3 (also test take null + inlined)
1101
1
            None,
1102
1
            Some("foo"),
1103
1
            Some("this string is quite long"),
1104
1
            Some("this string is also quite long"),
1105
1
            // Test situation 4 (also test take null + inlined)
1106
1
            None,
1107
1
            Some("bar"),
1108
1
            Some("this string is quite long"),
1109
1
            // Test situation 5 (also test take null + inlined)
1110
1
            None,
1111
1
            Some("foo"),
1112
1
            Some("another string that is is quite long"),
1113
1
            Some("this string not so long"),
1114
1
            // Test situation 6 (also test take null + inlined + insert again after taking)
1115
1
            None,
1116
1
            Some("bar"),
1117
1
            Some("this string is quite long"),
1118
1
            // Insert 4 and just take 3 to ensure it will go the path of situation 6
1119
1
            None,
1120
1
            // Finally, we create a new builder,  insert the whole array and then
1121
1
            // take whole at once for testing situation 7
1122
1
        ]);
1123
1
1124
1
        let input_array: ArrayRef = Arc::new(input_array);
1125
1
        let first_ones_to_append = 16; // For testing situation 1~5
1126
1
        let second_ones_to_append = 3; // For testing situation 6
1127
1
        let final_ones_to_append = input_array.len(); // For testing situation 7
1128
1129
        // ####### Test situation 1~5 #######
1130
16
        for row in 0..
first_ones_to_append1
{
1131
16
            builder.append_val(&input_array, row);
1132
16
        }
1133
1134
1
        assert_eq!(builder.completed.len(), 2);
1135
1
        assert_eq!(builder.in_progress.len(), 59);
1136
1137
        // Situation 1
1138
1
        let taken_array = builder.take_n(2);
1139
1
        assert_eq!(&taken_array, &input_array.slice(0, 2));
1140
1141
        // Situation 2
1142
1
        let taken_array = builder.take_n(3);
1143
1
        assert_eq!(&taken_array, &input_array.slice(2, 3));
1144
1145
        // Situation 3
1146
1
        let taken_array = builder.take_n(3);
1147
1
        assert_eq!(&taken_array, &input_array.slice(5, 3));
1148
1149
1
        let taken_array = builder.take_n(1);
1150
1
        assert_eq!(&taken_array, &input_array.slice(8, 1));
1151
1152
        // Situation 4
1153
1
        let taken_array = builder.take_n(3);
1154
1
        assert_eq!(&taken_array, &input_array.slice(9, 3));
1155
1156
        // Situation 5
1157
1
        let taken_array = builder.take_n(3);
1158
1
        assert_eq!(&taken_array, &input_array.slice(12, 3));
1159
1160
1
        let taken_array = builder.take_n(1);
1161
1
        assert_eq!(&taken_array, &input_array.slice(15, 1));
1162
1163
        // ####### Test situation 6 #######
1164
1
        assert!(builder.completed.is_empty());
1165
1
        assert!(builder.in_progress.is_empty());
1166
1
        assert!(builder.views.is_empty());
1167
1168
3
        for row in 
first_ones_to_append..first_ones_to_append + second_ones_to_append1
{
1169
3
            builder.append_val(&input_array, row);
1170
3
        }
1171
1172
1
        assert!(builder.completed.is_empty());
1173
1
        assert_eq!(builder.in_progress.len(), 25);
1174
1175
1
        let taken_array = builder.take_n(3);
1176
1
        assert_eq!(&taken_array, &input_array.slice(16, 3));
1177
1178
        // ####### Test situation 7 #######
1179
        // Create a new builder
1180
1
        let mut builder =
1181
1
            ByteViewGroupValueBuilder::<StringViewType>::new().with_max_block_size(60);
1182
1183
20
        for row in 0..
final_ones_to_append1
{
1184
20
            builder.append_val(&input_array, row);
1185
20
        }
1186
1187
1
        assert_eq!(builder.completed.len(), 3);
1188
1
        assert_eq!(builder.in_progress.len(), 25);
1189
1190
1
        let taken_array = builder.take_n(final_ones_to_append);
1191
1
        assert_eq!(&taken_array, &input_array);
1192
1
    }
1193
}