Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/common/src/scalar/mod.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! [`ScalarValue`]: stores single  values
19
20
mod consts;
21
mod struct_builder;
22
23
use std::borrow::Borrow;
24
use std::cmp::Ordering;
25
use std::collections::{HashSet, VecDeque};
26
use std::convert::Infallible;
27
use std::fmt;
28
use std::hash::Hash;
29
use std::hash::Hasher;
30
use std::iter::repeat;
31
use std::str::FromStr;
32
use std::sync::Arc;
33
34
use crate::arrow_datafusion_err;
35
use crate::cast::{
36
    as_decimal128_array, as_decimal256_array, as_dictionary_array,
37
    as_fixed_size_binary_array, as_fixed_size_list_array,
38
};
39
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
40
use crate::hash_utils::create_hashes;
41
use crate::utils::{
42
    array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array,
43
};
44
use arrow::compute::kernels::numeric::*;
45
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
46
use arrow::{
47
    array::*,
48
    compute::kernels::cast::{cast_with_options, CastOptions},
49
    datatypes::{
50
        i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
51
        Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
52
        Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
53
        IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
54
        TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
55
        UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
56
    },
57
};
58
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
59
use arrow_schema::{UnionFields, UnionMode};
60
61
use half::f16;
62
pub use struct_builder::ScalarStructBuilder;
63
64
/// A dynamically typed, nullable single value.
65
///
66
/// While an arrow  [`Array`]) stores one or more values of the same type, in a
67
/// single column, a `ScalarValue` stores a single value of a single type, the
68
/// equivalent of 1 row and one column.
69
///
70
/// ```text
71
///  ┌────────┐
72
///  │ value1 │
73
///  │ value2 │                  ┌────────┐
74
///  │ value3 │                  │ value2 │
75
///  │  ...   │                  └────────┘
76
///  │ valueN │
77
///  └────────┘
78
///
79
///    Array                     ScalarValue
80
///
81
/// stores multiple,             stores a single,
82
/// possibly null, values of     possible null, value
83
/// the same type
84
/// ```
85
///
86
/// # Performance
87
///
88
/// In general, performance will be better using arrow [`Array`]s rather than
89
/// [`ScalarValue`], as it is far more efficient to process multiple values at
90
/// once (vectorized processing).
91
///
92
/// # Example
93
/// ```
94
/// # use datafusion_common::ScalarValue;
95
/// // Create single scalar value for an Int32 value
96
/// let s1 = ScalarValue::Int32(Some(10));
97
///
98
/// // You can also create values using the From impl:
99
/// let s2 = ScalarValue::from(10i32);
100
/// assert_eq!(s1, s2);
101
/// ```
102
///
103
/// # Null Handling
104
///
105
/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
106
/// "typed" in the sense that a null value in an [`Int32Array`] is different
107
/// from a null value in a [`Float64Array`], and is different from the values in
108
/// a [`NullArray`].
109
///
110
/// ```
111
/// # fn main() -> datafusion_common::Result<()> {
112
/// # use std::collections::hash_set::Difference;
113
/// # use datafusion_common::ScalarValue;
114
/// # use arrow::datatypes::DataType;
115
/// // You can create a 'null' Int32 value directly:
116
/// let s1 = ScalarValue::Int32(None);
117
///
118
/// // You can also create a null value for a given datatype:
119
/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
120
/// assert_eq!(s1, s2);
121
///
122
/// // Note that this is DIFFERENT than a `ScalarValue::Null`
123
/// let s3 = ScalarValue::Null;
124
/// assert_ne!(s1, s3);
125
/// # Ok(())
126
/// # }
127
/// ```
128
///
129
/// # Nested Types
130
///
131
/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
132
/// single element array of the corresponding type.
133
///
134
/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
135
/// ```
136
/// # use std::sync::Arc;
137
/// # use arrow::datatypes::{DataType, Field};
138
/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
139
/// // Build a struct like: {a: 1, b: "foo"}
140
/// let field_a = Field::new("a", DataType::Int32, false);
141
/// let field_b = Field::new("b", DataType::Utf8, false);
142
///
143
/// let s1 = ScalarStructBuilder::new()
144
///    .with_scalar(field_a, ScalarValue::from(1i32))
145
///    .with_scalar(field_b, ScalarValue::from("foo"))
146
///    .build();
147
/// ```
148
///
149
/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
150
/// ```
151
/// # use std::sync::Arc;
152
/// # use arrow::datatypes::{DataType, Field};
153
/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
154
/// // Build a struct representing a NULL value
155
/// let fields = vec![
156
///     Field::new("a", DataType::Int32, false),
157
///     Field::new("b", DataType::Utf8, false),
158
/// ];
159
///
160
/// let s1 = ScalarStructBuilder::new_null(fields);
161
/// ```
162
///
163
/// ## Example: Creating [`ScalarValue::Struct`] directly
164
/// ```
165
/// # use std::sync::Arc;
166
/// # use arrow::datatypes::{DataType, Field, Fields};
167
/// # use arrow_array::{ArrayRef, Int32Array, StructArray, StringArray};
168
/// # use datafusion_common::ScalarValue;
169
/// // Build a struct like: {a: 1, b: "foo"}
170
/// // Field description
171
/// let fields = Fields::from(vec![
172
///   Field::new("a", DataType::Int32, false),
173
///   Field::new("b", DataType::Utf8, false),
174
/// ]);
175
/// // one row arrays for each field
176
/// let arrays: Vec<ArrayRef> = vec![
177
///   Arc::new(Int32Array::from(vec![1])),
178
///   Arc::new(StringArray::from(vec!["foo"])),
179
/// ];
180
/// // no nulls for this array
181
/// let nulls = None;
182
/// let arr = StructArray::new(fields, arrays, nulls);
183
///
184
/// // Create a ScalarValue::Struct directly
185
/// let s1 = ScalarValue::Struct(Arc::new(arr));
186
/// ```
187
///
188
///
189
/// # Further Reading
190
/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
191
/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
192
/// for the definitive reference.
193
#[derive(Clone)]
194
pub enum ScalarValue {
195
    /// represents `DataType::Null` (castable to/from any other type)
196
    Null,
197
    /// true or false value
198
    Boolean(Option<bool>),
199
    /// 16bit float
200
    Float16(Option<f16>),
201
    /// 32bit float
202
    Float32(Option<f32>),
203
    /// 64bit float
204
    Float64(Option<f64>),
205
    /// 128bit decimal, using the i128 to represent the decimal, precision scale
206
    Decimal128(Option<i128>, u8, i8),
207
    /// 256bit decimal, using the i256 to represent the decimal, precision scale
208
    Decimal256(Option<i256>, u8, i8),
209
    /// signed 8bit int
210
    Int8(Option<i8>),
211
    /// signed 16bit int
212
    Int16(Option<i16>),
213
    /// signed 32bit int
214
    Int32(Option<i32>),
215
    /// signed 64bit int
216
    Int64(Option<i64>),
217
    /// unsigned 8bit int
218
    UInt8(Option<u8>),
219
    /// unsigned 16bit int
220
    UInt16(Option<u16>),
221
    /// unsigned 32bit int
222
    UInt32(Option<u32>),
223
    /// unsigned 64bit int
224
    UInt64(Option<u64>),
225
    /// utf-8 encoded string.
226
    Utf8(Option<String>),
227
    /// utf-8 encoded string but from view types.
228
    Utf8View(Option<String>),
229
    /// utf-8 encoded string representing a LargeString's arrow type.
230
    LargeUtf8(Option<String>),
231
    /// binary
232
    Binary(Option<Vec<u8>>),
233
    /// binary but from view types.
234
    BinaryView(Option<Vec<u8>>),
235
    /// fixed size binary
236
    FixedSizeBinary(i32, Option<Vec<u8>>),
237
    /// large binary
238
    LargeBinary(Option<Vec<u8>>),
239
    /// Fixed size list scalar.
240
    ///
241
    /// The array must be a FixedSizeListArray with length 1.
242
    FixedSizeList(Arc<FixedSizeListArray>),
243
    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
244
    ///
245
    /// The array must be a ListArray with length 1.
246
    List(Arc<ListArray>),
247
    /// The array must be a LargeListArray with length 1.
248
    LargeList(Arc<LargeListArray>),
249
    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
250
    /// [`ScalarValue`] for examples of how to create instances of this type.
251
    Struct(Arc<StructArray>),
252
    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
253
    Map(Arc<MapArray>),
254
    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
255
    Date32(Option<i32>),
256
    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
257
    Date64(Option<i64>),
258
    /// Time stored as a signed 32bit int as seconds since midnight
259
    Time32Second(Option<i32>),
260
    /// Time stored as a signed 32bit int as milliseconds since midnight
261
    Time32Millisecond(Option<i32>),
262
    /// Time stored as a signed 64bit int as microseconds since midnight
263
    Time64Microsecond(Option<i64>),
264
    /// Time stored as a signed 64bit int as nanoseconds since midnight
265
    Time64Nanosecond(Option<i64>),
266
    /// Timestamp Second
267
    TimestampSecond(Option<i64>, Option<Arc<str>>),
268
    /// Timestamp Milliseconds
269
    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
270
    /// Timestamp Microseconds
271
    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
272
    /// Timestamp Nanoseconds
273
    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
274
    /// Number of elapsed whole months
275
    IntervalYearMonth(Option<i32>),
276
    /// Number of elapsed days and milliseconds (no leap seconds)
277
    /// stored as 2 contiguous 32-bit signed integers
278
    IntervalDayTime(Option<IntervalDayTime>),
279
    /// A triple of the number of elapsed months, days, and nanoseconds.
280
    /// Months and days are encoded as 32-bit signed integers.
281
    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
282
    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
283
    /// Duration in seconds
284
    DurationSecond(Option<i64>),
285
    /// Duration in milliseconds
286
    DurationMillisecond(Option<i64>),
287
    /// Duration in microseconds
288
    DurationMicrosecond(Option<i64>),
289
    /// Duration in nanoseconds
290
    DurationNanosecond(Option<i64>),
291
    /// A nested datatype that can represent slots of differing types. Components:
292
    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
293
    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
294
    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
295
    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
296
    /// Dictionary type: index type and value
297
    Dictionary(Box<DataType>, Box<ScalarValue>),
298
}
299
300
impl Hash for Fl<f16> {
301
0
    fn hash<H: Hasher>(&self, state: &mut H) {
302
0
        self.0.to_bits().hash(state);
303
0
    }
304
}
305
306
// manual implementation of `PartialEq`
307
impl PartialEq for ScalarValue {
308
103k
    fn eq(&self, other: &Self) -> bool {
309
        use ScalarValue::*;
310
        // This purposely doesn't have a catch-all "(_, _)" so that
311
        // any newly added enum variant will require editing this list
312
        // or else face a compile error
313
103k
        match (self, other) {
314
0
            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
315
0
                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
316
            }
317
0
            (Decimal128(_, _, _), _) => false,
318
0
            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
319
0
                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
320
            }
321
0
            (Decimal256(_, _, _), _) => false,
322
86.2k
            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
323
0
            (Boolean(_), _) => false,
324
4
            (Float32(v1), Float32(v2)) => match (v1, v2) {
325
4
                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
326
0
                _ => v1.eq(v2),
327
            },
328
0
            (Float16(v1), Float16(v2)) => match (v1, v2) {
329
0
                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
330
0
                _ => v1.eq(v2),
331
            },
332
4
            (Float32(_), _) => false,
333
0
            (Float16(_), _) => false,
334
5.92k
            (Float64(v1), Float64(v2)) => match (v1, v2) {
335
4.96k
                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
336
960
                _ => v1.eq(v2),
337
            },
338
0
            (Float64(_), _) => false,
339
0
            (Int8(v1), Int8(v2)) => v1.eq(v2),
340
0
            (Int8(_), _) => false,
341
0
            (Int16(v1), Int16(v2)) => v1.eq(v2),
342
0
            (Int16(_), _) => false,
343
6.88k
            (Int32(v1), Int32(v2)) => v1.eq(v2),
344
0
            (Int32(_), _) => false,
345
380
            (Int64(v1), Int64(v2)) => v1.eq(v2),
346
0
            (Int64(_), _) => false,
347
0
            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
348
0
            (UInt8(_), _) => false,
349
0
            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
350
0
            (UInt16(_), _) => false,
351
1
            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
352
0
            (UInt32(_), _) => false,
353
0
            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
354
0
            (UInt64(_), _) => false,
355
8
            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
356
0
            (Utf8(_), _) => false,
357
0
            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
358
0
            (Utf8View(_), _) => false,
359
0
            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
360
0
            (LargeUtf8(_), _) => false,
361
0
            (Binary(v1), Binary(v2)) => v1.eq(v2),
362
0
            (Binary(_), _) => false,
363
0
            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
364
0
            (BinaryView(_), _) => false,
365
0
            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
366
0
            (FixedSizeBinary(_, _), _) => false,
367
0
            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
368
0
            (LargeBinary(_), _) => false,
369
0
            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
370
0
            (FixedSizeList(_), _) => false,
371
0
            (List(v1), List(v2)) => v1.eq(v2),
372
0
            (List(_), _) => false,
373
0
            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
374
0
            (LargeList(_), _) => false,
375
0
            (Struct(v1), Struct(v2)) => v1.eq(v2),
376
0
            (Struct(_), _) => false,
377
0
            (Map(v1), Map(v2)) => v1.eq(v2),
378
0
            (Map(_), _) => false,
379
0
            (Date32(v1), Date32(v2)) => v1.eq(v2),
380
0
            (Date32(_), _) => false,
381
0
            (Date64(v1), Date64(v2)) => v1.eq(v2),
382
0
            (Date64(_), _) => false,
383
0
            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
384
0
            (Time32Second(_), _) => false,
385
0
            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
386
0
            (Time32Millisecond(_), _) => false,
387
0
            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
388
0
            (Time64Microsecond(_), _) => false,
389
0
            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
390
0
            (Time64Nanosecond(_), _) => false,
391
0
            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
392
0
            (TimestampSecond(_, _), _) => false,
393
2.11k
            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
394
0
            (TimestampMillisecond(_, _), _) => false,
395
0
            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
396
0
            (TimestampMicrosecond(_, _), _) => false,
397
0
            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
398
0
            (TimestampNanosecond(_, _), _) => false,
399
0
            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
400
0
            (DurationSecond(_), _) => false,
401
576
            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
402
0
            (DurationMillisecond(_), _) => false,
403
0
            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
404
0
            (DurationMicrosecond(_), _) => false,
405
0
            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
406
0
            (DurationNanosecond(_), _) => false,
407
0
            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
408
0
            (IntervalYearMonth(_), _) => false,
409
1.50k
            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
410
0
            (IntervalDayTime(_), _) => false,
411
0
            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
412
0
            (IntervalMonthDayNano(_), _) => false,
413
2
            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
414
2
                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
415
            }
416
0
            (Union(_, _, _), _) => false,
417
0
            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
418
0
            (Dictionary(_, _), _) => false,
419
0
            (Null, Null) => true,
420
0
            (Null, _) => false,
421
        }
422
103k
    }
423
}
424
425
// manual implementation of `PartialOrd`
426
impl PartialOrd for ScalarValue {
427
157k
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
428
        use ScalarValue::*;
429
        // This purposely doesn't have a catch-all "(_, _)" so that
430
        // any newly added enum variant will require editing this list
431
        // or else face a compile error
432
157k
        match (self, other) {
433
4
            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
434
4
                if p1.eq(p2) && s1.eq(s2) {
435
4
                    v1.partial_cmp(v2)
436
                } else {
437
                    // Two decimal values can be compared if they have the same precision and scale.
438
0
                    None
439
                }
440
            }
441
0
            (Decimal128(_, _, _), _) => None,
442
0
            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
443
0
                if p1.eq(p2) && s1.eq(s2) {
444
0
                    v1.partial_cmp(v2)
445
                } else {
446
                    // Two decimal values can be compared if they have the same precision and scale.
447
0
                    None
448
                }
449
            }
450
0
            (Decimal256(_, _, _), _) => None,
451
86.3k
            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
452
0
            (Boolean(_), _) => None,
453
14
            (Float32(v1), Float32(v2)) => match (v1, v2) {
454
14
                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
455
0
                _ => v1.partial_cmp(v2),
456
            },
457
0
            (Float16(v1), Float16(v2)) => match (v1, v2) {
458
0
                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
459
0
                _ => v1.partial_cmp(v2),
460
            },
461
0
            (Float32(_), _) => None,
462
0
            (Float16(_), _) => None,
463
23.1k
            (Float64(v1), Float64(v2)) => match (v1, v2) {
464
23.1k
                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
465
0
                _ => v1.partial_cmp(v2),
466
            },
467
0
            (Float64(_), _) => None,
468
0
            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
469
0
            (Int8(_), _) => None,
470
0
            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
471
0
            (Int16(_), _) => None,
472
26.4k
            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
473
0
            (Int32(_), _) => None,
474
2.58k
            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
475
0
            (Int64(_), _) => None,
476
0
            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
477
0
            (UInt8(_), _) => None,
478
0
            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
479
0
            (UInt16(_), _) => None,
480
0
            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
481
0
            (UInt32(_), _) => None,
482
50
            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
483
0
            (UInt64(_), _) => None,
484
2
            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
485
0
            (Utf8(_), _) => None,
486
0
            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
487
0
            (LargeUtf8(_), _) => None,
488
0
            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
489
0
            (Utf8View(_), _) => None,
490
0
            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
491
0
            (Binary(_), _) => None,
492
0
            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
493
0
            (BinaryView(_), _) => None,
494
0
            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
495
0
            (FixedSizeBinary(_, _), _) => None,
496
0
            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
497
0
            (LargeBinary(_), _) => None,
498
            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
499
0
            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
500
0
            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
501
0
                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
502
            }
503
0
            (LargeList(arr1), LargeList(arr2)) => {
504
0
                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
505
            }
506
0
            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
507
0
            (Struct(struct_arr1), Struct(struct_arr2)) => {
508
0
                partial_cmp_struct(struct_arr1, struct_arr2)
509
            }
510
0
            (Struct(_), _) => None,
511
0
            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
512
0
            (Map(_), _) => None,
513
0
            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
514
0
            (Date32(_), _) => None,
515
0
            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
516
0
            (Date64(_), _) => None,
517
0
            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
518
0
            (Time32Second(_), _) => None,
519
0
            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
520
0
            (Time32Millisecond(_), _) => None,
521
0
            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
522
0
            (Time64Microsecond(_), _) => None,
523
0
            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
524
0
            (Time64Nanosecond(_), _) => None,
525
0
            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
526
0
            (TimestampSecond(_, _), _) => None,
527
7.65k
            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
528
7.65k
                v1.partial_cmp(v2)
529
            }
530
0
            (TimestampMillisecond(_, _), _) => None,
531
0
            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
532
0
                v1.partial_cmp(v2)
533
            }
534
0
            (TimestampMicrosecond(_, _), _) => None,
535
0
            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
536
0
                v1.partial_cmp(v2)
537
            }
538
0
            (TimestampNanosecond(_, _), _) => None,
539
0
            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
540
0
            (IntervalYearMonth(_), _) => None,
541
5.76k
            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
542
0
            (IntervalDayTime(_), _) => None,
543
0
            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
544
0
            (IntervalMonthDayNano(_), _) => None,
545
0
            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
546
0
            (DurationSecond(_), _) => None,
547
4.96k
            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
548
0
            (DurationMillisecond(_), _) => None,
549
0
            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
550
0
            (DurationMicrosecond(_), _) => None,
551
0
            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
552
0
            (DurationNanosecond(_), _) => None,
553
0
            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
554
0
                if t1.eq(t2) && m1.eq(m2) {
555
0
                    v1.partial_cmp(v2)
556
                } else {
557
0
                    None
558
                }
559
            }
560
0
            (Union(_, _, _), _) => None,
561
0
            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
562
0
                // Don't compare if the key types don't match (it is effectively a different datatype)
563
0
                if k1 == k2 {
564
0
                    v1.partial_cmp(v2)
565
                } else {
566
0
                    None
567
                }
568
            }
569
0
            (Dictionary(_, _), _) => None,
570
0
            (Null, Null) => Some(Ordering::Equal),
571
0
            (Null, _) => None,
572
        }
573
157k
    }
574
}
575
576
/// List/LargeList/FixedSizeList scalars always have a single element
577
/// array. This function returns that array
578
0
fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
579
0
    assert_eq!(arr.len(), 1);
580
0
    if let Some(arr) = arr.as_list_opt::<i32>() {
581
0
        arr.value(0)
582
0
    } else if let Some(arr) = arr.as_list_opt::<i64>() {
583
0
        arr.value(0)
584
0
    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
585
0
        arr.value(0)
586
    } else {
587
0
        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
588
    }
589
0
}
590
591
/// Compares two List/LargeList/FixedSizeList scalars
592
0
fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
593
0
    if arr1.data_type() != arr2.data_type() {
594
0
        return None;
595
0
    }
596
0
    let arr1 = first_array_for_list(arr1);
597
0
    let arr2 = first_array_for_list(arr2);
598
599
0
    let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?;
600
0
    let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?;
601
602
0
    for j in 0..lt_res.len() {
603
0
        if lt_res.is_valid(j) && lt_res.value(j) {
604
0
            return Some(Ordering::Less);
605
0
        }
606
0
        if eq_res.is_valid(j) && !eq_res.value(j) {
607
0
            return Some(Ordering::Greater);
608
0
        }
609
    }
610
611
0
    Some(Ordering::Equal)
612
0
}
613
614
0
fn partial_cmp_struct(s1: &Arc<StructArray>, s2: &Arc<StructArray>) -> Option<Ordering> {
615
0
    if s1.len() != s2.len() {
616
0
        return None;
617
0
    }
618
0
619
0
    if s1.data_type() != s2.data_type() {
620
0
        return None;
621
0
    }
622
623
0
    for col_index in 0..s1.num_columns() {
624
0
        let arr1 = s1.column(col_index);
625
0
        let arr2 = s2.column(col_index);
626
627
0
        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
628
0
        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
629
630
0
        for j in 0..lt_res.len() {
631
0
            if lt_res.is_valid(j) && lt_res.value(j) {
632
0
                return Some(Ordering::Less);
633
0
            }
634
0
            if eq_res.is_valid(j) && !eq_res.value(j) {
635
0
                return Some(Ordering::Greater);
636
0
            }
637
        }
638
    }
639
0
    Some(Ordering::Equal)
640
0
}
641
642
0
fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
643
0
    if m1.len() != m2.len() {
644
0
        return None;
645
0
    }
646
0
647
0
    if m1.data_type() != m2.data_type() {
648
0
        return None;
649
0
    }
650
651
0
    for col_index in 0..m1.len() {
652
0
        let arr1 = m1.entries().column(col_index);
653
0
        let arr2 = m2.entries().column(col_index);
654
655
0
        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
656
0
        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
657
658
0
        for j in 0..lt_res.len() {
659
0
            if lt_res.is_valid(j) && lt_res.value(j) {
660
0
                return Some(Ordering::Less);
661
0
            }
662
0
            if eq_res.is_valid(j) && !eq_res.value(j) {
663
0
                return Some(Ordering::Greater);
664
0
            }
665
        }
666
    }
667
0
    Some(Ordering::Equal)
668
0
}
669
670
impl Eq for ScalarValue {}
671
672
//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
673
struct Fl<T>(T);
674
675
macro_rules! hash_float_value {
676
    ($(($t:ty, $i:ty)),+) => {
677
        $(impl std::hash::Hash for Fl<$t> {
678
            #[inline]
679
0
            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
680
0
                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
681
0
            }
682
        })+
683
    };
684
}
685
686
hash_float_value!((f64, u64), (f32, u32));
687
688
// manual implementation of `Hash`
689
//
690
// # Panics
691
//
692
// Panics if there is an error when creating hash values for rows
693
impl std::hash::Hash for ScalarValue {
694
63
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
695
        use ScalarValue::*;
696
63
        match self {
697
0
            Decimal128(v, p, s) => {
698
0
                v.hash(state);
699
0
                p.hash(state);
700
0
                s.hash(state)
701
            }
702
0
            Decimal256(v, p, s) => {
703
0
                v.hash(state);
704
0
                p.hash(state);
705
0
                s.hash(state)
706
            }
707
0
            Boolean(v) => v.hash(state),
708
0
            Float16(v) => v.map(Fl).hash(state),
709
0
            Float32(v) => v.map(Fl).hash(state),
710
0
            Float64(v) => v.map(Fl).hash(state),
711
0
            Int8(v) => v.hash(state),
712
0
            Int16(v) => v.hash(state),
713
0
            Int32(v) => v.hash(state),
714
63
            Int64(v) => v.hash(state),
715
0
            UInt8(v) => v.hash(state),
716
0
            UInt16(v) => v.hash(state),
717
0
            UInt32(v) => v.hash(state),
718
0
            UInt64(v) => v.hash(state),
719
0
            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
720
0
            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
721
0
                v.hash(state)
722
            }
723
0
            List(arr) => {
724
0
                hash_nested_array(arr.to_owned() as ArrayRef, state);
725
0
            }
726
0
            LargeList(arr) => {
727
0
                hash_nested_array(arr.to_owned() as ArrayRef, state);
728
0
            }
729
0
            FixedSizeList(arr) => {
730
0
                hash_nested_array(arr.to_owned() as ArrayRef, state);
731
0
            }
732
0
            Struct(arr) => {
733
0
                hash_nested_array(arr.to_owned() as ArrayRef, state);
734
0
            }
735
0
            Map(arr) => {
736
0
                hash_nested_array(arr.to_owned() as ArrayRef, state);
737
0
            }
738
0
            Date32(v) => v.hash(state),
739
0
            Date64(v) => v.hash(state),
740
0
            Time32Second(v) => v.hash(state),
741
0
            Time32Millisecond(v) => v.hash(state),
742
0
            Time64Microsecond(v) => v.hash(state),
743
0
            Time64Nanosecond(v) => v.hash(state),
744
0
            TimestampSecond(v, _) => v.hash(state),
745
0
            TimestampMillisecond(v, _) => v.hash(state),
746
0
            TimestampMicrosecond(v, _) => v.hash(state),
747
0
            TimestampNanosecond(v, _) => v.hash(state),
748
0
            DurationSecond(v) => v.hash(state),
749
0
            DurationMillisecond(v) => v.hash(state),
750
0
            DurationMicrosecond(v) => v.hash(state),
751
0
            DurationNanosecond(v) => v.hash(state),
752
0
            IntervalYearMonth(v) => v.hash(state),
753
0
            IntervalDayTime(v) => v.hash(state),
754
0
            IntervalMonthDayNano(v) => v.hash(state),
755
0
            Union(v, t, m) => {
756
0
                v.hash(state);
757
0
                t.hash(state);
758
0
                m.hash(state);
759
0
            }
760
0
            Dictionary(k, v) => {
761
0
                k.hash(state);
762
0
                v.hash(state);
763
0
            }
764
            // stable hash for Null value
765
0
            Null => 1.hash(state),
766
        }
767
63
    }
768
}
769
770
0
fn hash_nested_array<H: std::hash::Hasher>(arr: ArrayRef, state: &mut H) {
771
0
    let arrays = vec![arr.to_owned()];
772
0
    let hashes_buffer = &mut vec![0; arr.len()];
773
0
    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
774
0
    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
775
0
    // Hash back to std::hash::Hasher
776
0
    hashes.hash(state);
777
0
}
778
779
/// Return a reference to the values array and the index into it for a
780
/// dictionary array
781
///
782
/// # Errors
783
///
784
/// Errors if the array cannot be downcasted to DictionaryArray
785
#[inline]
786
0
pub fn get_dict_value<K: ArrowDictionaryKeyType>(
787
0
    array: &dyn Array,
788
0
    index: usize,
789
0
) -> Result<(&ArrayRef, Option<usize>)> {
790
0
    let dict_array = as_dictionary_array::<K>(array)?;
791
0
    Ok((dict_array.values(), dict_array.key(index)))
792
0
}
793
794
/// Create a dictionary array representing `value` repeated `size`
795
/// times
796
0
fn dict_from_scalar<K: ArrowDictionaryKeyType>(
797
0
    value: &ScalarValue,
798
0
    size: usize,
799
0
) -> Result<ArrayRef> {
800
    // values array is one element long (the value)
801
0
    let values_array = value.to_array_of_size(1)?;
802
803
    // Create a key array with `size` elements, each of 0
804
0
    let key_array: PrimitiveArray<K> = std::iter::repeat(if value.is_null() {
805
0
        None
806
    } else {
807
0
        Some(K::default_value())
808
    })
809
0
    .take(size)
810
0
    .collect();
811
0
812
0
    // create a new DictionaryArray
813
0
    //
814
0
    // Note: this path could be made faster by using the ArrayData
815
0
    // APIs and skipping validation, if it every comes up in
816
0
    // performance traces.
817
0
    Ok(Arc::new(
818
0
        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
819
    ))
820
0
}
821
822
/// Create a dictionary array representing all the values in values
823
0
fn dict_from_values<K: ArrowDictionaryKeyType>(
824
0
    values_array: ArrayRef,
825
0
) -> Result<ArrayRef> {
826
    // Create a key array with `size` elements of 0..array_len for all
827
    // non-null value elements
828
0
    let key_array: PrimitiveArray<K> = (0..values_array.len())
829
0
        .map(|index| {
830
0
            if values_array.is_valid(index) {
831
0
                let native_index = K::Native::from_usize(index).ok_or_else(|| {
832
0
                    DataFusionError::Internal(format!(
833
0
                        "Can not create index of type {} from value {}",
834
0
                        K::DATA_TYPE,
835
0
                        index
836
0
                    ))
837
0
                })?;
838
0
                Ok(Some(native_index))
839
            } else {
840
0
                Ok(None)
841
            }
842
0
        })
843
0
        .collect::<Result<Vec<_>>>()?
844
0
        .into_iter()
845
0
        .collect();
846
847
    // create a new DictionaryArray
848
    //
849
    // Note: this path could be made faster by using the ArrayData
850
    // APIs and skipping validation, if it every comes up in
851
    // performance traces.
852
0
    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
853
0
    Ok(Arc::new(dict_array))
854
0
}
855
856
macro_rules! typed_cast_tz {
857
    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{
858
        use std::any::type_name;
859
        let array = $array
860
            .as_any()
861
            .downcast_ref::<$ARRAYTYPE>()
862
0
            .ok_or_else(|| {
863
0
                DataFusionError::Internal(format!(
864
0
                    "could not cast value to {}",
865
0
                    type_name::<$ARRAYTYPE>()
866
0
                ))
867
0
            })?;
868
        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
869
            match array.is_null($index) {
870
                true => None,
871
                false => Some(array.value($index).into()),
872
            },
873
            $TZ.clone(),
874
        ))
875
    }};
876
}
877
878
macro_rules! typed_cast {
879
    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
880
        use std::any::type_name;
881
        let array = $array
882
            .as_any()
883
            .downcast_ref::<$ARRAYTYPE>()
884
0
            .ok_or_else(|| {
885
0
                DataFusionError::Internal(format!(
886
0
                    "could not cast value to {}",
887
0
                    type_name::<$ARRAYTYPE>()
888
0
                ))
889
0
            })?;
890
        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
891
            match array.is_null($index) {
892
                true => None,
893
                false => Some(array.value($index).into()),
894
            },
895
        ))
896
    }};
897
}
898
899
macro_rules! build_array_from_option {
900
    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
901
        match $EXPR {
902
            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
903
            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
904
        }
905
    }};
906
    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
907
        match $EXPR {
908
            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
909
            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
910
        }
911
    }};
912
}
913
914
macro_rules! build_timestamp_array_from_option {
915
    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
916
        match $EXPR {
917
            Some(value) => {
918
                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
919
            }
920
            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
921
        }
922
    };
923
}
924
925
macro_rules! eq_array_primitive {
926
    ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{
927
        use std::any::type_name;
928
        let array = $array
929
            .as_any()
930
            .downcast_ref::<$ARRAYTYPE>()
931
0
            .ok_or_else(|| {
932
0
                DataFusionError::Internal(format!(
933
0
                    "could not cast value to {}",
934
0
                    type_name::<$ARRAYTYPE>()
935
0
                ))
936
0
            })?;
937
        let is_valid = array.is_valid($index);
938
        Ok::<bool, DataFusionError>(match $VALUE {
939
            Some(val) => is_valid && &array.value($index) == val,
940
            None => !is_valid,
941
        })
942
    }};
943
}
944
945
impl ScalarValue {
946
    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
947
    ///
948
    /// # Panics
949
    ///
950
    /// Panics if d is not compatible with T
951
0
    pub fn new_primitive<T: ArrowPrimitiveType>(
952
0
        a: Option<T::Native>,
953
0
        d: &DataType,
954
0
    ) -> Result<Self> {
955
0
        match a {
956
0
            None => d.try_into(),
957
0
            Some(v) => {
958
0
                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
959
0
                    .with_data_type(d.clone());
960
0
                Self::try_from_array(&array, 0)
961
            }
962
        }
963
0
    }
964
965
    /// Create a decimal Scalar from value/precision and scale.
966
0
    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
967
0
        // make sure the precision and scale is valid
968
0
        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
969
0
            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
970
0
        }
971
0
        _internal_err!(
972
0
            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
973
0
        )
974
0
    }
975
976
    /// Returns a [`ScalarValue::Utf8`] representing `val`
977
0
    pub fn new_utf8(val: impl Into<String>) -> Self {
978
0
        ScalarValue::from(val.into())
979
0
    }
980
981
    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
982
    /// `years` years and `months` months
983
0
    pub fn new_interval_ym(years: i32, months: i32) -> Self {
984
0
        let val = IntervalYearMonthType::make_value(years, months);
985
0
        ScalarValue::IntervalYearMonth(Some(val))
986
0
    }
987
988
    /// Returns a [`ScalarValue::IntervalDayTime`] representing
989
    /// `days` days and `millis` milliseconds
990
2.81k
    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
991
2.81k
        let val = IntervalDayTimeType::make_value(days, millis);
992
2.81k
        Self::IntervalDayTime(Some(val))
993
2.81k
    }
994
995
    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
996
    /// `months` months and `days` days, and `nanos` nanoseconds
997
0
    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
998
0
        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
999
0
        ScalarValue::IntervalMonthDayNano(Some(val))
1000
0
    }
1001
1002
    /// Returns a [`ScalarValue`] representing
1003
    /// `value` and `tz_opt` timezone
1004
0
    pub fn new_timestamp<T: ArrowTimestampType>(
1005
0
        value: Option<i64>,
1006
0
        tz_opt: Option<Arc<str>>,
1007
0
    ) -> Self {
1008
0
        match T::UNIT {
1009
0
            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1010
0
            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1011
0
            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1012
0
            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1013
        }
1014
0
    }
1015
1016
    /// Returns a [`ScalarValue`] representing PI
1017
0
    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1018
0
        match datatype {
1019
0
            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1020
0
            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1021
0
            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1022
        }
1023
0
    }
1024
1025
    /// Returns a [`ScalarValue`] representing PI's upper bound
1026
0
    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1027
0
        // TODO: replace the constants with next_up/next_down when
1028
0
        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
1029
0
        match datatype {
1030
0
            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1031
0
            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1032
            _ => {
1033
0
                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1034
            }
1035
        }
1036
0
    }
1037
1038
    /// Returns a [`ScalarValue`] representing -PI's lower bound
1039
0
    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1040
0
        match datatype {
1041
0
            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1042
0
            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1043
            _ => {
1044
0
                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1045
            }
1046
        }
1047
0
    }
1048
1049
    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1050
0
    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1051
0
        match datatype {
1052
0
            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1053
0
            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1054
            _ => {
1055
0
                _internal_err!(
1056
0
                    "PI_UPPER/2 is not supported for data type: {:?}",
1057
0
                    datatype
1058
0
                )
1059
            }
1060
        }
1061
0
    }
1062
1063
    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1064
0
    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1065
0
        match datatype {
1066
            DataType::Float32 => {
1067
0
                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1068
            }
1069
            DataType::Float64 => {
1070
0
                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1071
            }
1072
            _ => {
1073
0
                _internal_err!(
1074
0
                    "-PI/2_LOWER is not supported for data type: {:?}",
1075
0
                    datatype
1076
0
                )
1077
            }
1078
        }
1079
0
    }
1080
1081
    /// Returns a [`ScalarValue`] representing -PI
1082
0
    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1083
0
        match datatype {
1084
0
            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1085
0
            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1086
0
            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1087
        }
1088
0
    }
1089
1090
    /// Returns a [`ScalarValue`] representing PI/2
1091
0
    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1092
0
        match datatype {
1093
0
            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1094
0
            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1095
0
            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1096
        }
1097
0
    }
1098
1099
    /// Returns a [`ScalarValue`] representing -PI/2
1100
0
    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1101
0
        match datatype {
1102
0
            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1103
0
            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1104
0
            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1105
        }
1106
0
    }
1107
1108
    /// Returns a [`ScalarValue`] representing infinity
1109
0
    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1110
0
        match datatype {
1111
0
            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1112
0
            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1113
            _ => {
1114
0
                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1115
            }
1116
        }
1117
0
    }
1118
1119
    /// Returns a [`ScalarValue`] representing negative infinity
1120
0
    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1121
0
        match datatype {
1122
0
            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1123
0
            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1124
            _ => {
1125
0
                _internal_err!(
1126
0
                    "Negative Infinity is not supported for data type: {:?}",
1127
0
                    datatype
1128
0
                )
1129
            }
1130
        }
1131
0
    }
1132
1133
    /// Create a zero value in the given type.
1134
0
    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1135
0
        Ok(match datatype {
1136
0
            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1137
0
            DataType::Int8 => ScalarValue::Int8(Some(0)),
1138
0
            DataType::Int16 => ScalarValue::Int16(Some(0)),
1139
0
            DataType::Int32 => ScalarValue::Int32(Some(0)),
1140
0
            DataType::Int64 => ScalarValue::Int64(Some(0)),
1141
0
            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1142
0
            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1143
0
            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1144
0
            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1145
0
            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1146
0
            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1147
0
            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1148
0
            DataType::Timestamp(TimeUnit::Second, tz) => {
1149
0
                ScalarValue::TimestampSecond(Some(0), tz.clone())
1150
            }
1151
0
            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1152
0
                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1153
            }
1154
0
            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1155
0
                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1156
            }
1157
0
            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1158
0
                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1159
            }
1160
            DataType::Interval(IntervalUnit::YearMonth) => {
1161
0
                ScalarValue::IntervalYearMonth(Some(0))
1162
            }
1163
            DataType::Interval(IntervalUnit::DayTime) => {
1164
0
                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1165
            }
1166
            DataType::Interval(IntervalUnit::MonthDayNano) => {
1167
0
                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1168
            }
1169
0
            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1170
            DataType::Duration(TimeUnit::Millisecond) => {
1171
0
                ScalarValue::DurationMillisecond(Some(0))
1172
            }
1173
            DataType::Duration(TimeUnit::Microsecond) => {
1174
0
                ScalarValue::DurationMicrosecond(Some(0))
1175
            }
1176
            DataType::Duration(TimeUnit::Nanosecond) => {
1177
0
                ScalarValue::DurationNanosecond(Some(0))
1178
            }
1179
            _ => {
1180
0
                return _not_impl_err!(
1181
0
                    "Can't create a zero scalar from data_type \"{datatype:?}\""
1182
0
                );
1183
            }
1184
        })
1185
0
    }
1186
1187
    /// Create an one value in the given type.
1188
0
    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1189
0
        Ok(match datatype {
1190
0
            DataType::Int8 => ScalarValue::Int8(Some(1)),
1191
0
            DataType::Int16 => ScalarValue::Int16(Some(1)),
1192
0
            DataType::Int32 => ScalarValue::Int32(Some(1)),
1193
0
            DataType::Int64 => ScalarValue::Int64(Some(1)),
1194
0
            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1195
0
            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1196
0
            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1197
0
            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1198
0
            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1199
0
            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1200
0
            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1201
            _ => {
1202
0
                return _not_impl_err!(
1203
0
                    "Can't create an one scalar from data_type \"{datatype:?}\""
1204
0
                );
1205
            }
1206
        })
1207
0
    }
1208
1209
    /// Create a negative one value in the given type.
1210
0
    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1211
0
        Ok(match datatype {
1212
0
            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1213
0
            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1214
0
            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1215
0
            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1216
0
            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1217
0
            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1218
0
            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1219
            _ => {
1220
0
                return _not_impl_err!(
1221
0
                    "Can't create a negative one scalar from data_type \"{datatype:?}\""
1222
0
                );
1223
            }
1224
        })
1225
0
    }
1226
1227
0
    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1228
0
        Ok(match datatype {
1229
0
            DataType::Int8 => ScalarValue::Int8(Some(10)),
1230
0
            DataType::Int16 => ScalarValue::Int16(Some(10)),
1231
0
            DataType::Int32 => ScalarValue::Int32(Some(10)),
1232
0
            DataType::Int64 => ScalarValue::Int64(Some(10)),
1233
0
            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1234
0
            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1235
0
            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1236
0
            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1237
0
            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1238
0
            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1239
0
            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1240
            _ => {
1241
0
                return _not_impl_err!(
1242
0
                    "Can't create a ten scalar from data_type \"{datatype:?}\""
1243
0
                );
1244
            }
1245
        })
1246
0
    }
1247
1248
    /// return the [`DataType`] of this `ScalarValue`
1249
1.10M
    pub fn data_type(&self) -> DataType {
1250
1.10M
        match self {
1251
115k
            ScalarValue::Boolean(_) => DataType::Boolean,
1252
0
            ScalarValue::UInt8(_) => DataType::UInt8,
1253
0
            ScalarValue::UInt16(_) => DataType::UInt16,
1254
135
            ScalarValue::UInt32(_) => DataType::UInt32,
1255
5
            ScalarValue::UInt64(_) => DataType::UInt64,
1256
4
            ScalarValue::Int8(_) => DataType::Int8,
1257
0
            ScalarValue::Int16(_) => DataType::Int16,
1258
424k
            ScalarValue::Int32(_) => DataType::Int32,
1259
34.7k
            ScalarValue::Int64(_) => DataType::Int64,
1260
11
            ScalarValue::Decimal128(_, precision, scale) => {
1261
11
                DataType::Decimal128(*precision, *scale)
1262
            }
1263
0
            ScalarValue::Decimal256(_, precision, scale) => {
1264
0
                DataType::Decimal256(*precision, *scale)
1265
            }
1266
0
            ScalarValue::TimestampSecond(_, tz_opt) => {
1267
0
                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1268
            }
1269
100k
            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1270
100k
                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1271
            }
1272
0
            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1273
0
                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1274
            }
1275
0
            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1276
0
                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1277
            }
1278
0
            ScalarValue::Float16(_) => DataType::Float16,
1279
71
            ScalarValue::Float32(_) => DataType::Float32,
1280
310k
            ScalarValue::Float64(_) => DataType::Float64,
1281
0
            ScalarValue::Utf8(_) => DataType::Utf8,
1282
0
            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1283
0
            ScalarValue::Utf8View(_) => DataType::Utf8View,
1284
0
            ScalarValue::Binary(_) => DataType::Binary,
1285
0
            ScalarValue::BinaryView(_) => DataType::BinaryView,
1286
0
            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1287
0
            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1288
0
            ScalarValue::List(arr) => arr.data_type().to_owned(),
1289
0
            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1290
0
            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1291
3
            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1292
0
            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1293
0
            ScalarValue::Date32(_) => DataType::Date32,
1294
0
            ScalarValue::Date64(_) => DataType::Date64,
1295
0
            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1296
0
            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1297
0
            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1298
0
            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1299
            ScalarValue::IntervalYearMonth(_) => {
1300
0
                DataType::Interval(IntervalUnit::YearMonth)
1301
            }
1302
66.5k
            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1303
            ScalarValue::IntervalMonthDayNano(_) => {
1304
0
                DataType::Interval(IntervalUnit::MonthDayNano)
1305
            }
1306
0
            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1307
            ScalarValue::DurationMillisecond(_) => {
1308
47.4k
                DataType::Duration(TimeUnit::Millisecond)
1309
            }
1310
            ScalarValue::DurationMicrosecond(_) => {
1311
0
                DataType::Duration(TimeUnit::Microsecond)
1312
            }
1313
            ScalarValue::DurationNanosecond(_) => {
1314
0
                DataType::Duration(TimeUnit::Nanosecond)
1315
            }
1316
14
            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1317
0
            ScalarValue::Dictionary(k, v) => {
1318
0
                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1319
            }
1320
934
            ScalarValue::Null => DataType::Null,
1321
        }
1322
1.10M
    }
1323
1324
    /// Calculate arithmetic negation for a scalar value
1325
0
    pub fn arithmetic_negate(&self) -> Result<Self> {
1326
0
        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1327
0
            v: T,
1328
0
            ctx: impl Fn() -> String,
1329
0
        ) -> Result<T> {
1330
0
            v.neg_checked()
1331
0
                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1332
0
        }
1333
0
        match self {
1334
            ScalarValue::Int8(None)
1335
            | ScalarValue::Int16(None)
1336
            | ScalarValue::Int32(None)
1337
            | ScalarValue::Int64(None)
1338
            | ScalarValue::Float16(None)
1339
            | ScalarValue::Float32(None)
1340
0
            | ScalarValue::Float64(None) => Ok(self.clone()),
1341
0
            ScalarValue::Float16(Some(v)) => {
1342
0
                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1343
            }
1344
0
            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1345
0
            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1346
0
            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1347
0
            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1348
0
            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1349
0
            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1350
0
            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1351
0
                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1352
0
                    format!("In negation of IntervalYearMonth({v})")
1353
0
                })?)),
1354
            ),
1355
0
            ScalarValue::IntervalDayTime(Some(v)) => {
1356
0
                let (days, ms) = IntervalDayTimeType::to_parts(*v);
1357
0
                let val = IntervalDayTimeType::make_value(
1358
0
                    neg_checked_with_ctx(days, || {
1359
0
                        format!("In negation of days {days} in IntervalDayTime")
1360
0
                    })?,
1361
0
                    neg_checked_with_ctx(ms, || {
1362
0
                        format!("In negation of milliseconds {ms} in IntervalDayTime")
1363
0
                    })?,
1364
                );
1365
0
                Ok(ScalarValue::IntervalDayTime(Some(val)))
1366
            }
1367
0
            ScalarValue::IntervalMonthDayNano(Some(v)) => {
1368
0
                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1369
0
                let val = IntervalMonthDayNanoType::make_value(
1370
0
                    neg_checked_with_ctx(months, || {
1371
0
                        format!("In negation of months {months} of IntervalMonthDayNano")
1372
0
                    })?,
1373
0
                    neg_checked_with_ctx(days, || {
1374
0
                        format!("In negation of days {days} of IntervalMonthDayNano")
1375
0
                    })?,
1376
0
                    neg_checked_with_ctx(nanos, || {
1377
0
                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1378
0
                    })?,
1379
                );
1380
0
                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1381
            }
1382
0
            ScalarValue::Decimal128(Some(v), precision, scale) => {
1383
0
                Ok(ScalarValue::Decimal128(
1384
0
                    Some(neg_checked_with_ctx(*v, || {
1385
0
                        format!("In negation of Decimal128({v}, {precision}, {scale})")
1386
0
                    })?),
1387
0
                    *precision,
1388
0
                    *scale,
1389
                ))
1390
            }
1391
0
            ScalarValue::Decimal256(Some(v), precision, scale) => {
1392
0
                Ok(ScalarValue::Decimal256(
1393
0
                    Some(neg_checked_with_ctx(*v, || {
1394
0
                        format!("In negation of Decimal256({v}, {precision}, {scale})")
1395
0
                    })?),
1396
0
                    *precision,
1397
0
                    *scale,
1398
                ))
1399
            }
1400
0
            ScalarValue::TimestampSecond(Some(v), tz) => {
1401
0
                Ok(ScalarValue::TimestampSecond(
1402
0
                    Some(neg_checked_with_ctx(*v, || {
1403
0
                        format!("In negation of TimestampSecond({v})")
1404
0
                    })?),
1405
0
                    tz.clone(),
1406
                ))
1407
            }
1408
0
            ScalarValue::TimestampNanosecond(Some(v), tz) => {
1409
0
                Ok(ScalarValue::TimestampNanosecond(
1410
0
                    Some(neg_checked_with_ctx(*v, || {
1411
0
                        format!("In negation of TimestampNanoSecond({v})")
1412
0
                    })?),
1413
0
                    tz.clone(),
1414
                ))
1415
            }
1416
0
            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1417
0
                Ok(ScalarValue::TimestampMicrosecond(
1418
0
                    Some(neg_checked_with_ctx(*v, || {
1419
0
                        format!("In negation of TimestampMicroSecond({v})")
1420
0
                    })?),
1421
0
                    tz.clone(),
1422
                ))
1423
            }
1424
0
            ScalarValue::TimestampMillisecond(Some(v), tz) => {
1425
0
                Ok(ScalarValue::TimestampMillisecond(
1426
0
                    Some(neg_checked_with_ctx(*v, || {
1427
0
                        format!("In negation of TimestampMilliSecond({v})")
1428
0
                    })?),
1429
0
                    tz.clone(),
1430
                ))
1431
            }
1432
0
            value => _internal_err!(
1433
0
                "Can not run arithmetic negative on scalar value {value:?}"
1434
0
            ),
1435
        }
1436
0
    }
1437
1438
    /// Wrapping addition of `ScalarValue`
1439
    ///
1440
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1441
    /// should operate on Arrays directly, using vectorized array kernels
1442
22
    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1443
22
        let r = add_wrapping(&self.to_scalar()
?0
, &other.borrow().to_scalar()
?0
)
?0
;
1444
22
        Self::try_from_array(r.as_ref(), 0)
1445
22
    }
1446
    /// Checked addition of `ScalarValue`
1447
    ///
1448
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1449
    /// should operate on Arrays directly, using vectorized array kernels
1450
22.0k
    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1451
22.0k
        let r = add(&self.to_scalar()
?0
, &other.borrow().to_scalar()
?0
)
?0
;
1452
22.0k
        Self::try_from_array(r.as_ref(), 0)
1453
22.0k
    }
1454
1455
    /// Wrapping subtraction of `ScalarValue`
1456
    ///
1457
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1458
    /// should operate on Arrays directly, using vectorized array kernels
1459
0
    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1460
0
        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1461
0
        Self::try_from_array(r.as_ref(), 0)
1462
0
    }
1463
1464
    /// Checked subtraction of `ScalarValue`
1465
    ///
1466
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1467
    /// should operate on Arrays directly, using vectorized array kernels
1468
22.0k
    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1469
22.0k
        let r = sub(&self.to_scalar()
?0
, &other.borrow().to_scalar()
?0
)
?0
;
1470
22.0k
        Self::try_from_array(r.as_ref(), 0)
1471
22.0k
    }
1472
1473
    /// Wrapping multiplication of `ScalarValue`
1474
    ///
1475
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1476
    /// should operate on Arrays directly, using vectorized array kernels.
1477
0
    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1478
0
        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1479
0
        Self::try_from_array(r.as_ref(), 0)
1480
0
    }
1481
1482
    /// Checked multiplication of `ScalarValue`
1483
    ///
1484
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1485
    /// should operate on Arrays directly, using vectorized array kernels.
1486
0
    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1487
0
        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1488
0
        Self::try_from_array(r.as_ref(), 0)
1489
0
    }
1490
1491
    /// Performs `lhs / rhs`
1492
    ///
1493
    /// Overflow or division by zero will result in an error, with exception to
1494
    /// floating point numbers, which instead follow the IEEE 754 rules.
1495
    ///
1496
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1497
    /// should operate on Arrays directly, using vectorized array kernels.
1498
0
    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1499
0
        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1500
0
        Self::try_from_array(r.as_ref(), 0)
1501
0
    }
1502
1503
    /// Performs `lhs % rhs`
1504
    ///
1505
    /// Overflow or division by zero will result in an error, with exception to
1506
    /// floating point numbers, which instead follow the IEEE 754 rules.
1507
    ///
1508
    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1509
    /// should operate on Arrays directly, using vectorized array kernels.
1510
0
    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1511
0
        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1512
0
        Self::try_from_array(r.as_ref(), 0)
1513
0
    }
1514
1515
0
    pub fn is_unsigned(&self) -> bool {
1516
0
        matches!(
1517
0
            self,
1518
            ScalarValue::UInt8(_)
1519
                | ScalarValue::UInt16(_)
1520
                | ScalarValue::UInt32(_)
1521
                | ScalarValue::UInt64(_)
1522
        )
1523
0
    }
1524
1525
    /// whether this value is null or not.
1526
799k
    pub fn is_null(&self) -> bool {
1527
799k
        match self {
1528
172k
            ScalarValue::Boolean(v) => v.is_none(),
1529
0
            ScalarValue::Null => true,
1530
0
            ScalarValue::Float16(v) => v.is_none(),
1531
24
            ScalarValue::Float32(v) => v.is_none(),
1532
223k
            ScalarValue::Float64(v) => v.is_none(),
1533
4
            ScalarValue::Decimal128(v, _, _) => v.is_none(),
1534
0
            ScalarValue::Decimal256(v, _, _) => v.is_none(),
1535
0
            ScalarValue::Int8(v) => v.is_none(),
1536
0
            ScalarValue::Int16(v) => v.is_none(),
1537
223k
            ScalarValue::Int32(v) => v.is_none(),
1538
24.3k
            ScalarValue::Int64(v) => v.is_none(),
1539
0
            ScalarValue::UInt8(v) => v.is_none(),
1540
0
            ScalarValue::UInt16(v) => v.is_none(),
1541
133
            ScalarValue::UInt32(v) => v.is_none(),
1542
126
            ScalarValue::UInt64(v) => v.is_none(),
1543
0
            ScalarValue::Utf8(v)
1544
0
            | ScalarValue::Utf8View(v)
1545
0
            | ScalarValue::LargeUtf8(v) => v.is_none(),
1546
0
            ScalarValue::Binary(v)
1547
0
            | ScalarValue::BinaryView(v)
1548
0
            | ScalarValue::FixedSizeBinary(_, v)
1549
0
            | ScalarValue::LargeBinary(v) => v.is_none(),
1550
            // arr.len() should be 1 for a list scalar, but we don't seem to
1551
            // enforce that anywhere, so we still check against array length.
1552
0
            ScalarValue::List(arr) => arr.len() == arr.null_count(),
1553
0
            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1554
0
            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1555
0
            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1556
0
            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1557
0
            ScalarValue::Date32(v) => v.is_none(),
1558
0
            ScalarValue::Date64(v) => v.is_none(),
1559
0
            ScalarValue::Time32Second(v) => v.is_none(),
1560
0
            ScalarValue::Time32Millisecond(v) => v.is_none(),
1561
0
            ScalarValue::Time64Microsecond(v) => v.is_none(),
1562
0
            ScalarValue::Time64Nanosecond(v) => v.is_none(),
1563
0
            ScalarValue::TimestampSecond(v, _) => v.is_none(),
1564
81.1k
            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1565
0
            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1566
0
            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1567
0
            ScalarValue::IntervalYearMonth(v) => v.is_none(),
1568
42.4k
            ScalarValue::IntervalDayTime(v) => v.is_none(),
1569
0
            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1570
0
            ScalarValue::DurationSecond(v) => v.is_none(),
1571
31.4k
            ScalarValue::DurationMillisecond(v) => v.is_none(),
1572
0
            ScalarValue::DurationMicrosecond(v) => v.is_none(),
1573
0
            ScalarValue::DurationNanosecond(v) => v.is_none(),
1574
2
            ScalarValue::Union(v, _, _) => match v {
1575
0
                Some((_, s)) => s.is_null(),
1576
2
                None => true,
1577
            },
1578
0
            ScalarValue::Dictionary(_, v) => v.is_null(),
1579
        }
1580
799k
    }
1581
1582
    /// Absolute distance between two numeric values (of the same type). This method will return
1583
    /// None if either one of the arguments are null. It might also return None if the resulting
1584
    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
1585
    /// rounded to the nearest integer.
1586
    ///
1587
    ///
1588
    /// Note: the datatype itself must support subtraction.
1589
118
    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1590
118
        match (self, other) {
1591
0
            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1592
0
            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1593
73
            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1594
34
            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1595
0
            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1596
0
            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1597
0
            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1598
0
            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1599
            // TODO: we might want to look into supporting ceil/floor here for floats.
1600
0
            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1601
0
                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1602
            }
1603
0
            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1604
0
                Some((l - r).abs().round() as _)
1605
            }
1606
0
            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1607
0
                Some((l - r).abs().round() as _)
1608
            }
1609
11
            _ => None,
1610
        }
1611
118
    }
1612
1613
    /// Converts a scalar value into an 1-row array.
1614
    ///
1615
    /// # Errors
1616
    ///
1617
    /// Errors if the ScalarValue cannot be converted into a 1-row array
1618
3.10k
    pub fn to_array(&self) -> Result<ArrayRef> {
1619
3.10k
        self.to_array_of_size(1)
1620
3.10k
    }
1621
1622
    /// Converts a scalar into an arrow [`Scalar`] (which implements
1623
    /// the [`Datum`] interface).
1624
    ///
1625
    /// This can be used to call arrow compute kernels such as `lt`
1626
    ///
1627
    /// # Errors
1628
    ///
1629
    /// Errors if the ScalarValue cannot be converted into a 1-row array
1630
    ///
1631
    /// # Example
1632
    /// ```
1633
    /// use datafusion_common::ScalarValue;
1634
    /// use arrow::array::{BooleanArray, Int32Array};
1635
    ///
1636
    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
1637
    /// let five = ScalarValue::Int32(Some(5));
1638
    ///
1639
    /// let result = arrow::compute::kernels::cmp::lt(
1640
    ///   &arr,
1641
    ///   &five.to_scalar().unwrap(),
1642
    /// ).unwrap();
1643
    ///
1644
    /// let expected = BooleanArray::from(vec![
1645
    ///     Some(true),
1646
    ///     None,
1647
    ///     Some(false)
1648
    ///   ]
1649
    /// );
1650
    ///
1651
    /// assert_eq!(&result, &expected);
1652
    /// ```
1653
    /// [`Datum`]: arrow_array::Datum
1654
176k
    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1655
176k
        Ok(Scalar::new(self.to_array_of_size(1)
?0
))
1656
176k
    }
1657
1658
    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
1659
    /// corresponding to those values. For example, an iterator of
1660
    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
1661
    ///
1662
    /// Returns an error if the iterator is empty or if the
1663
    /// [`ScalarValue`]s are not all the same type
1664
    ///
1665
    /// # Panics
1666
    ///
1667
    /// Panics if `self` is a dictionary with invalid key type
1668
    ///
1669
    /// # Example
1670
    /// ```
1671
    /// use datafusion_common::ScalarValue;
1672
    /// use arrow::array::{ArrayRef, BooleanArray};
1673
    ///
1674
    /// let scalars = vec![
1675
    ///   ScalarValue::Boolean(Some(true)),
1676
    ///   ScalarValue::Boolean(None),
1677
    ///   ScalarValue::Boolean(Some(false)),
1678
    /// ];
1679
    ///
1680
    /// // Build an Array from the list of ScalarValues
1681
    /// let array = ScalarValue::iter_to_array(scalars.into_iter())
1682
    ///   .unwrap();
1683
    ///
1684
    /// let expected: ArrayRef = std::sync::Arc::new(
1685
    ///   BooleanArray::from(vec![
1686
    ///     Some(true),
1687
    ///     None,
1688
    ///     Some(false)
1689
    ///   ]
1690
    /// ));
1691
    ///
1692
    /// assert_eq!(&array, &expected);
1693
    /// ```
1694
183
    pub fn iter_to_array(
1695
183
        scalars: impl IntoIterator<Item = ScalarValue>,
1696
183
    ) -> Result<ArrayRef> {
1697
183
        let mut scalars = scalars.into_iter().peekable();
1698
1699
        // figure out the type based on the first element
1700
183
        let data_type = match scalars.peek() {
1701
            None => {
1702
0
                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1703
            }
1704
183
            Some(sv) => sv.data_type(),
1705
        };
1706
1707
        /// Creates an array of $ARRAY_TY by unpacking values of
1708
        /// SCALAR_TY for primitive types
1709
        macro_rules! build_array_primitive {
1710
            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1711
                {
1712
379
                    let array = scalars.map(|sv| {
1713
379
                        if let ScalarValue::$SCALAR_TY(v) = sv {
1714
379
                            Ok(v)
1715
                        } else {
1716
0
                            _exec_err!(
1717
0
                                "Inconsistent types in ScalarValue::iter_to_array. \
1718
0
                                    Expected {:?}, got {:?}",
1719
0
                                data_type, sv
1720
0
                            )
1721
                        }
1722
379
                    })
1723
                    .collect::<Result<$ARRAY_TY>>()?;
1724
                    Arc::new(array)
1725
                }
1726
            }};
1727
        }
1728
1729
        macro_rules! build_array_primitive_tz {
1730
            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1731
                {
1732
0
                    let array = scalars.map(|sv| {
1733
0
                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
1734
0
                            Ok(v)
1735
                        } else {
1736
0
                            _exec_err!(
1737
0
                                "Inconsistent types in ScalarValue::iter_to_array. \
1738
0
                                    Expected {:?}, got {:?}",
1739
0
                                data_type, sv
1740
0
                            )
1741
                        }
1742
0
                    })
1743
                    .collect::<Result<$ARRAY_TY>>()?;
1744
                    Arc::new(array.with_timezone_opt($TZ.clone()))
1745
                }
1746
            }};
1747
        }
1748
1749
        /// Creates an array of $ARRAY_TY by unpacking values of
1750
        /// SCALAR_TY for "string-like" types.
1751
        macro_rules! build_array_string {
1752
            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1753
                {
1754
0
                    let array = scalars.map(|sv| {
1755
0
                        if let ScalarValue::$SCALAR_TY(v) = sv {
1756
0
                            Ok(v)
1757
                        } else {
1758
0
                            _exec_err!(
1759
0
                                "Inconsistent types in ScalarValue::iter_to_array. \
1760
0
                                    Expected {:?}, got {:?}",
1761
0
                                data_type, sv
1762
0
                            )
1763
                        }
1764
0
                    })
1765
                    .collect::<Result<$ARRAY_TY>>()?;
1766
                    Arc::new(array)
1767
                }
1768
            }};
1769
        }
1770
1771
183
        let array: ArrayRef = match 
&data_type0
{
1772
0
            DataType::Decimal128(precision, scale) => {
1773
0
                let decimal_array =
1774
0
                    ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1775
0
                Arc::new(decimal_array)
1776
            }
1777
0
            DataType::Decimal256(precision, scale) => {
1778
0
                let decimal_array =
1779
0
                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1780
0
                Arc::new(decimal_array)
1781
            }
1782
0
            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1783
52
            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1784
0
            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1785
0
            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1786
116
            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1787
0
            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1788
0
            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1789
9
            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1790
4
            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1791
0
            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1792
0
            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1793
2
            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1794
0
            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1795
0
            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1796
0
            DataType::Utf8 => build_array_string!(StringArray, Utf8),
1797
0
            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1798
0
            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1799
0
            DataType::Binary => build_array_string!(BinaryArray, Binary),
1800
0
            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1801
0
            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1802
0
            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1803
            DataType::Time32(TimeUnit::Second) => {
1804
0
                build_array_primitive!(Time32SecondArray, Time32Second)
1805
            }
1806
            DataType::Time32(TimeUnit::Millisecond) => {
1807
0
                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1808
            }
1809
            DataType::Time64(TimeUnit::Microsecond) => {
1810
0
                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1811
            }
1812
            DataType::Time64(TimeUnit::Nanosecond) => {
1813
0
                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
1814
            }
1815
0
            DataType::Timestamp(TimeUnit::Second, tz) => {
1816
0
                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
1817
            }
1818
0
            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1819
0
                build_array_primitive_tz!(
1820
                    TimestampMillisecondArray,
1821
                    TimestampMillisecond,
1822
0
                    tz
1823
                )
1824
            }
1825
0
            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1826
0
                build_array_primitive_tz!(
1827
                    TimestampMicrosecondArray,
1828
                    TimestampMicrosecond,
1829
0
                    tz
1830
                )
1831
            }
1832
0
            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1833
0
                build_array_primitive_tz!(
1834
                    TimestampNanosecondArray,
1835
                    TimestampNanosecond,
1836
0
                    tz
1837
                )
1838
            }
1839
            DataType::Duration(TimeUnit::Second) => {
1840
0
                build_array_primitive!(DurationSecondArray, DurationSecond)
1841
            }
1842
            DataType::Duration(TimeUnit::Millisecond) => {
1843
0
                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
1844
            }
1845
            DataType::Duration(TimeUnit::Microsecond) => {
1846
0
                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
1847
            }
1848
            DataType::Duration(TimeUnit::Nanosecond) => {
1849
0
                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
1850
            }
1851
            DataType::Interval(IntervalUnit::DayTime) => {
1852
0
                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
1853
            }
1854
            DataType::Interval(IntervalUnit::YearMonth) => {
1855
0
                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
1856
            }
1857
            DataType::Interval(IntervalUnit::MonthDayNano) => {
1858
0
                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
1859
            }
1860
            DataType::FixedSizeList(_, _) => {
1861
                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
1862
                // The length of nulls here we got is 1, so we need to resize the length of nulls to
1863
                // the length of non-nulls.
1864
0
                let mut arrays =
1865
0
                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
1866
0
                let first_non_null_data_type = arrays
1867
0
                    .iter()
1868
0
                    .find(|sv| !sv.is_null(0))
1869
0
                    .map(|sv| sv.data_type().to_owned());
1870
0
                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
1871
0
                    for array in arrays.iter_mut() {
1872
0
                        if array.is_null(0) {
1873
0
                            *array = Arc::new(FixedSizeListArray::new_null(
1874
0
                                Arc::clone(&f),
1875
0
                                l,
1876
0
                                1,
1877
0
                            ));
1878
0
                        }
1879
                    }
1880
0
                }
1881
0
                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
1882
0
                arrow::compute::concat(arrays.as_slice())?
1883
            }
1884
            DataType::List(_)
1885
            | DataType::LargeList(_)
1886
            | DataType::Map(_, _)
1887
            | DataType::Struct(_)
1888
            | DataType::Union(_, _) => {
1889
0
                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
1890
0
                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
1891
0
                arrow::compute::concat(arrays.as_slice())?
1892
            }
1893
0
            DataType::Dictionary(key_type, value_type) => {
1894
                // create the values array
1895
0
                let value_scalars = scalars
1896
0
                    .map(|scalar| match scalar {
1897
0
                        ScalarValue::Dictionary(inner_key_type, scalar) => {
1898
0
                            if &inner_key_type == key_type {
1899
0
                                Ok(*scalar)
1900
                            } else {
1901
0
                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
1902
                            }
1903
                        }
1904
                        _ => {
1905
0
                            _exec_err!(
1906
0
                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
1907
0
                            )
1908
                        }
1909
0
                    })
1910
0
                    .collect::<Result<Vec<_>>>()?;
1911
1912
0
                let values = Self::iter_to_array(value_scalars)?;
1913
0
                assert_eq!(values.data_type(), value_type.as_ref());
1914
1915
0
                match key_type.as_ref() {
1916
0
                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
1917
0
                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
1918
0
                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
1919
0
                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
1920
0
                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
1921
0
                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
1922
0
                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
1923
0
                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
1924
0
                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
1925
                }
1926
            }
1927
0
            DataType::FixedSizeBinary(size) => {
1928
0
                let array = scalars
1929
0
                    .map(|sv| {
1930
0
                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
1931
0
                            Ok(v)
1932
                        } else {
1933
0
                            _exec_err!(
1934
0
                                "Inconsistent types in ScalarValue::iter_to_array. \
1935
0
                                Expected {data_type:?}, got {sv:?}"
1936
0
                            )
1937
                        }
1938
0
                    })
1939
0
                    .collect::<Result<Vec<_>>>()?;
1940
0
                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1941
0
                    array.into_iter(),
1942
0
                    *size,
1943
0
                )?;
1944
0
                Arc::new(array)
1945
            }
1946
            // explicitly enumerate unsupported types so newly added
1947
            // types must be aknowledged, Time32 and Time64 types are
1948
            // not supported if the TimeUnit is not valid (Time32 can
1949
            // only be used with Second and Millisecond, Time64 only
1950
            // with Microsecond and Nanosecond)
1951
            DataType::Time32(TimeUnit::Microsecond)
1952
            | DataType::Time32(TimeUnit::Nanosecond)
1953
            | DataType::Time64(TimeUnit::Second)
1954
            | DataType::Time64(TimeUnit::Millisecond)
1955
            | DataType::RunEndEncoded(_, _)
1956
            | DataType::ListView(_)
1957
            | DataType::LargeListView(_) => {
1958
0
                return _not_impl_err!(
1959
0
                    "Unsupported creation of {:?} array from ScalarValue {:?}",
1960
0
                    data_type,
1961
0
                    scalars.peek()
1962
0
                );
1963
            }
1964
        };
1965
183
        Ok(array)
1966
183
    }
1967
1968
0
    fn iter_to_null_array(
1969
0
        scalars: impl IntoIterator<Item = ScalarValue>,
1970
0
    ) -> Result<ArrayRef> {
1971
0
        let length = scalars.into_iter().try_fold(
1972
0
            0usize,
1973
0
            |r, element: ScalarValue| match element {
1974
0
                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
1975
0
                s => {
1976
0
                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
1977
                }
1978
0
            },
1979
0
        )?;
1980
0
        Ok(new_null_array(&DataType::Null, length))
1981
0
    }
1982
1983
0
    fn iter_to_decimal_array(
1984
0
        scalars: impl IntoIterator<Item = ScalarValue>,
1985
0
        precision: u8,
1986
0
        scale: i8,
1987
0
    ) -> Result<Decimal128Array> {
1988
0
        let array = scalars
1989
0
            .into_iter()
1990
0
            .map(|element: ScalarValue| match element {
1991
0
                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
1992
0
                s => {
1993
0
                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
1994
                }
1995
0
            })
1996
0
            .collect::<Result<Decimal128Array>>()?
1997
0
            .with_precision_and_scale(precision, scale)?;
1998
0
        Ok(array)
1999
0
    }
2000
2001
0
    fn iter_to_decimal256_array(
2002
0
        scalars: impl IntoIterator<Item = ScalarValue>,
2003
0
        precision: u8,
2004
0
        scale: i8,
2005
0
    ) -> Result<Decimal256Array> {
2006
0
        let array = scalars
2007
0
            .into_iter()
2008
0
            .map(|element: ScalarValue| match element {
2009
0
                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2010
0
                s => {
2011
0
                    _internal_err!(
2012
0
                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2013
0
                    )
2014
                }
2015
0
            })
2016
0
            .collect::<Result<Decimal256Array>>()?
2017
0
            .with_precision_and_scale(precision, scale)?;
2018
0
        Ok(array)
2019
0
    }
2020
2021
0
    fn build_decimal_array(
2022
0
        value: Option<i128>,
2023
0
        precision: u8,
2024
0
        scale: i8,
2025
0
        size: usize,
2026
0
    ) -> Result<Decimal128Array> {
2027
0
        Ok(match value {
2028
0
            Some(val) => Decimal128Array::from(vec![val; size])
2029
0
                .with_precision_and_scale(precision, scale)?,
2030
            None => {
2031
0
                let mut builder = Decimal128Array::builder(size)
2032
0
                    .with_precision_and_scale(precision, scale)?;
2033
0
                builder.append_nulls(size);
2034
0
                builder.finish()
2035
            }
2036
        })
2037
0
    }
2038
2039
0
    fn build_decimal256_array(
2040
0
        value: Option<i256>,
2041
0
        precision: u8,
2042
0
        scale: i8,
2043
0
        size: usize,
2044
0
    ) -> Result<Decimal256Array> {
2045
0
        Ok(std::iter::repeat(value)
2046
0
            .take(size)
2047
0
            .collect::<Decimal256Array>()
2048
0
            .with_precision_and_scale(precision, scale)?)
2049
0
    }
2050
2051
    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2052
    /// `data_type`, to a single element [`ListArray`].
2053
    ///
2054
    /// Example
2055
    /// ```
2056
    /// use datafusion_common::ScalarValue;
2057
    /// use arrow::array::{ListArray, Int32Array};
2058
    /// use arrow::datatypes::{DataType, Int32Type};
2059
    /// use datafusion_common::cast::as_list_array;
2060
    ///
2061
    /// let scalars = vec![
2062
    ///    ScalarValue::Int32(Some(1)),
2063
    ///    ScalarValue::Int32(None),
2064
    ///    ScalarValue::Int32(Some(2))
2065
    /// ];
2066
    ///
2067
    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2068
    ///
2069
    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2070
    ///     vec![
2071
    ///        Some(vec![Some(1), None, Some(2)])
2072
    ///     ]);
2073
    ///
2074
    /// assert_eq!(*result, expected);
2075
    /// ```
2076
0
    pub fn new_list(
2077
0
        values: &[ScalarValue],
2078
0
        data_type: &DataType,
2079
0
        nullable: bool,
2080
0
    ) -> Arc<ListArray> {
2081
0
        let values = if values.is_empty() {
2082
0
            new_empty_array(data_type)
2083
        } else {
2084
0
            Self::iter_to_array(values.iter().cloned()).unwrap()
2085
        };
2086
0
        Arc::new(array_into_list_array(values, nullable))
2087
0
    }
2088
2089
    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2090
0
    pub fn new_list_nullable(
2091
0
        values: &[ScalarValue],
2092
0
        data_type: &DataType,
2093
0
    ) -> Arc<ListArray> {
2094
0
        Self::new_list(values, data_type, true)
2095
0
    }
2096
2097
    /// Create ListArray with Null with specific data type
2098
    ///
2099
    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2100
0
    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2101
0
        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2102
0
        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2103
0
            &data_type, null_len,
2104
0
        ))))
2105
0
    }
2106
2107
    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2108
    /// `data_type`, to a [`ListArray`].
2109
    ///
2110
    /// Example
2111
    /// ```
2112
    /// use datafusion_common::ScalarValue;
2113
    /// use arrow::array::{ListArray, Int32Array};
2114
    /// use arrow::datatypes::{DataType, Int32Type};
2115
    /// use datafusion_common::cast::as_list_array;
2116
    ///
2117
    /// let scalars = vec![
2118
    ///    ScalarValue::Int32(Some(1)),
2119
    ///    ScalarValue::Int32(None),
2120
    ///    ScalarValue::Int32(Some(2))
2121
    /// ];
2122
    ///
2123
    /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2124
    ///
2125
    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2126
    ///     vec![
2127
    ///        Some(vec![Some(1), None, Some(2)])
2128
    ///     ]);
2129
    ///
2130
    /// assert_eq!(*result, expected);
2131
    /// ```
2132
0
    pub fn new_list_from_iter(
2133
0
        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2134
0
        data_type: &DataType,
2135
0
        nullable: bool,
2136
0
    ) -> Arc<ListArray> {
2137
0
        let values = if values.len() == 0 {
2138
0
            new_empty_array(data_type)
2139
        } else {
2140
0
            Self::iter_to_array(values).unwrap()
2141
        };
2142
0
        Arc::new(array_into_list_array(values, nullable))
2143
0
    }
2144
2145
    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2146
    /// `data_type`, to a [`LargeListArray`].
2147
    ///
2148
    /// Example
2149
    /// ```
2150
    /// use datafusion_common::ScalarValue;
2151
    /// use arrow::array::{LargeListArray, Int32Array};
2152
    /// use arrow::datatypes::{DataType, Int32Type};
2153
    /// use datafusion_common::cast::as_large_list_array;
2154
    ///
2155
    /// let scalars = vec![
2156
    ///    ScalarValue::Int32(Some(1)),
2157
    ///    ScalarValue::Int32(None),
2158
    ///    ScalarValue::Int32(Some(2))
2159
    /// ];
2160
    ///
2161
    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2162
    ///
2163
    /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(
2164
    ///     vec![
2165
    ///        Some(vec![Some(1), None, Some(2)])
2166
    ///     ]);
2167
    ///
2168
    /// assert_eq!(*result, expected);
2169
    /// ```
2170
0
    pub fn new_large_list(
2171
0
        values: &[ScalarValue],
2172
0
        data_type: &DataType,
2173
0
    ) -> Arc<LargeListArray> {
2174
0
        let values = if values.is_empty() {
2175
0
            new_empty_array(data_type)
2176
        } else {
2177
0
            Self::iter_to_array(values.iter().cloned()).unwrap()
2178
        };
2179
0
        Arc::new(array_into_large_list_array(values))
2180
0
    }
2181
2182
    /// Converts a scalar value into an array of `size` rows.
2183
    ///
2184
    /// # Errors
2185
    ///
2186
    /// Errors if `self` is
2187
    /// - a decimal that fails be converted to a decimal array of size
2188
    /// - a `Fixedsizelist` that fails to be concatenated into an array of size
2189
    /// - a `List` that fails to be concatenated into an array of size
2190
    /// - a `Dictionary` that fails be converted to a dictionary array of size
2191
179k
    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2192
179k
        Ok(match self {
2193
0
            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2194
0
                ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2195
            ),
2196
0
            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2197
0
                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2198
            ),
2199
0
            ScalarValue::Boolean(e) => {
2200
0
                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2201
            }
2202
44.2k
            ScalarValue::Float64(e) => {
2203
44.2k
                
build_array_from_option!44.2k
(Float64, Float64Array, e,
size16
)
2204
            }
2205
8
            ScalarValue::Float32(e) => {
2206
8
                
build_array_from_option!0
(Float32, Float32Array, e, size)
2207
            }
2208
0
            ScalarValue::Float16(e) => {
2209
0
                build_array_from_option!(Float16, Float16Array, e, size)
2210
            }
2211
16
            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, 
size0
),
2212
0
            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2213
100k
            ScalarValue::Int32(e) => 
build_array_from_option!99.5k
(Int32, Int32Array, e,
size811
),
2214
5.85k
            ScalarValue::Int64(e) => 
build_array_from_option!5.07k
(Int64, Int64Array, e,
size779
),
2215
0
            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2216
0
            ScalarValue::UInt16(e) => {
2217
0
                build_array_from_option!(UInt16, UInt16Array, e, size)
2218
            }
2219
16
            ScalarValue::UInt32(e) => {
2220
16
                
build_array_from_option!0
(UInt32, UInt32Array, e, size)
2221
            }
2222
44
            ScalarValue::UInt64(e) => {
2223
44
                build_array_from_option!(UInt64, UInt64Array, e, 
size0
)
2224
            }
2225
0
            ScalarValue::TimestampSecond(e, tz_opt) => {
2226
0
                build_timestamp_array_from_option!(
2227
0
                    TimeUnit::Second,
2228
0
                    tz_opt.clone(),
2229
                    TimestampSecondArray,
2230
0
                    e,
2231
0
                    size
2232
                )
2233
            }
2234
11.2k
            ScalarValue::TimestampMillisecond(e, tz_opt) => {
2235
11.2k
                build_timestamp_array_from_option!(
2236
0
                    TimeUnit::Millisecond,
2237
0
                    tz_opt.clone(),
2238
                    TimestampMillisecondArray,
2239
11.2k
                    e,
2240
0
                    size
2241
                )
2242
            }
2243
2244
0
            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2245
0
                build_timestamp_array_from_option!(
2246
0
                    TimeUnit::Microsecond,
2247
0
                    tz_opt.clone(),
2248
                    TimestampMicrosecondArray,
2249
0
                    e,
2250
0
                    size
2251
                )
2252
            }
2253
0
            ScalarValue::TimestampNanosecond(e, tz_opt) => {
2254
0
                build_timestamp_array_from_option!(
2255
0
                    TimeUnit::Nanosecond,
2256
0
                    tz_opt.clone(),
2257
                    TimestampNanosecondArray,
2258
0
                    e,
2259
0
                    size
2260
                )
2261
            }
2262
0
            ScalarValue::Utf8(e) => match e {
2263
0
                Some(value) => {
2264
0
                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
2265
                }
2266
0
                None => new_null_array(&DataType::Utf8, size),
2267
            },
2268
0
            ScalarValue::Utf8View(e) => match e {
2269
0
                Some(value) => {
2270
0
                    Arc::new(StringViewArray::from_iter_values(repeat(value).take(size)))
2271
                }
2272
0
                None => new_null_array(&DataType::Utf8View, size),
2273
            },
2274
0
            ScalarValue::LargeUtf8(e) => match e {
2275
0
                Some(value) => {
2276
0
                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
2277
                }
2278
0
                None => new_null_array(&DataType::LargeUtf8, size),
2279
            },
2280
0
            ScalarValue::Binary(e) => match e {
2281
0
                Some(value) => Arc::new(
2282
0
                    repeat(Some(value.as_slice()))
2283
0
                        .take(size)
2284
0
                        .collect::<BinaryArray>(),
2285
0
                ),
2286
                None => {
2287
0
                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
2288
                }
2289
            },
2290
0
            ScalarValue::BinaryView(e) => match e {
2291
0
                Some(value) => Arc::new(
2292
0
                    repeat(Some(value.as_slice()))
2293
0
                        .take(size)
2294
0
                        .collect::<BinaryViewArray>(),
2295
0
                ),
2296
                None => {
2297
0
                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryViewArray>())
2298
                }
2299
            },
2300
0
            ScalarValue::FixedSizeBinary(s, e) => match e {
2301
0
                Some(value) => Arc::new(
2302
0
                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2303
0
                        repeat(Some(value.as_slice())).take(size),
2304
0
                        *s,
2305
0
                    )
2306
0
                    .unwrap(),
2307
0
                ),
2308
0
                None => Arc::new(
2309
0
                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2310
0
                        repeat(None::<&[u8]>).take(size),
2311
0
                        *s,
2312
0
                    )
2313
0
                    .unwrap(),
2314
0
                ),
2315
            },
2316
0
            ScalarValue::LargeBinary(e) => match e {
2317
0
                Some(value) => Arc::new(
2318
0
                    repeat(Some(value.as_slice()))
2319
0
                        .take(size)
2320
0
                        .collect::<LargeBinaryArray>(),
2321
0
                ),
2322
0
                None => Arc::new(
2323
0
                    repeat(None::<&str>)
2324
0
                        .take(size)
2325
0
                        .collect::<LargeBinaryArray>(),
2326
0
                ),
2327
            },
2328
0
            ScalarValue::List(arr) => {
2329
0
                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2330
            }
2331
0
            ScalarValue::LargeList(arr) => {
2332
0
                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2333
            }
2334
0
            ScalarValue::FixedSizeList(arr) => {
2335
0
                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2336
            }
2337
0
            ScalarValue::Struct(arr) => {
2338
0
                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2339
            }
2340
0
            ScalarValue::Map(arr) => {
2341
0
                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2342
            }
2343
0
            ScalarValue::Date32(e) => {
2344
0
                build_array_from_option!(Date32, Date32Array, e, size)
2345
            }
2346
0
            ScalarValue::Date64(e) => {
2347
0
                build_array_from_option!(Date64, Date64Array, e, size)
2348
            }
2349
0
            ScalarValue::Time32Second(e) => {
2350
0
                build_array_from_option!(
2351
                    Time32,
2352
                    TimeUnit::Second,
2353
                    Time32SecondArray,
2354
0
                    e,
2355
0
                    size
2356
                )
2357
            }
2358
0
            ScalarValue::Time32Millisecond(e) => {
2359
0
                build_array_from_option!(
2360
                    Time32,
2361
                    TimeUnit::Millisecond,
2362
                    Time32MillisecondArray,
2363
0
                    e,
2364
0
                    size
2365
                )
2366
            }
2367
0
            ScalarValue::Time64Microsecond(e) => {
2368
0
                build_array_from_option!(
2369
                    Time64,
2370
                    TimeUnit::Microsecond,
2371
                    Time64MicrosecondArray,
2372
0
                    e,
2373
0
                    size
2374
                )
2375
            }
2376
0
            ScalarValue::Time64Nanosecond(e) => {
2377
0
                build_array_from_option!(
2378
                    Time64,
2379
                    TimeUnit::Nanosecond,
2380
                    Time64NanosecondArray,
2381
0
                    e,
2382
0
                    size
2383
                )
2384
            }
2385
10.6k
            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2386
                Interval,
2387
                IntervalUnit::DayTime,
2388
                IntervalDayTimeArray,
2389
10.6k
                e,
2390
0
                size
2391
            ),
2392
0
            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2393
                Interval,
2394
                IntervalUnit::YearMonth,
2395
                IntervalYearMonthArray,
2396
0
                e,
2397
0
                size
2398
            ),
2399
0
            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2400
                Interval,
2401
                IntervalUnit::MonthDayNano,
2402
                IntervalMonthDayNanoArray,
2403
0
                e,
2404
0
                size
2405
            ),
2406
0
            ScalarValue::DurationSecond(e) => build_array_from_option!(
2407
                Duration,
2408
                TimeUnit::Second,
2409
                DurationSecondArray,
2410
0
                e,
2411
0
                size
2412
            ),
2413
7.39k
            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2414
                Duration,
2415
                TimeUnit::Millisecond,
2416
                DurationMillisecondArray,
2417
7.39k
                e,
2418
0
                size
2419
            ),
2420
0
            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2421
                Duration,
2422
                TimeUnit::Microsecond,
2423
                DurationMicrosecondArray,
2424
0
                e,
2425
0
                size
2426
            ),
2427
0
            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2428
                Duration,
2429
                TimeUnit::Nanosecond,
2430
                DurationNanosecondArray,
2431
0
                e,
2432
0
                size
2433
            ),
2434
0
            ScalarValue::Union(value, fields, _mode) => match value {
2435
0
                Some((v_id, value)) => {
2436
0
                    let mut new_fields = Vec::with_capacity(fields.len());
2437
0
                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2438
0
                    for (f_id, field) in fields.iter() {
2439
0
                        let ar = if f_id == *v_id {
2440
0
                            value.to_array_of_size(size)?
2441
                        } else {
2442
0
                            let dt = field.data_type();
2443
0
                            new_null_array(dt, size)
2444
                        };
2445
0
                        let field = (**field).clone();
2446
0
                        child_arrays.push(ar);
2447
0
                        new_fields.push(field.clone());
2448
                    }
2449
0
                    let type_ids = repeat(*v_id).take(size);
2450
0
                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2451
0
                    let value_offsets: Option<ScalarBuffer<i32>> = None;
2452
0
                    let ar = UnionArray::try_new(
2453
0
                        fields.clone(),
2454
0
                        type_ids,
2455
0
                        value_offsets,
2456
0
                        child_arrays,
2457
0
                    )
2458
0
                    .map_err(|e| DataFusionError::ArrowError(e, None))?;
2459
0
                    Arc::new(ar)
2460
                }
2461
                None => {
2462
0
                    let dt = self.data_type();
2463
0
                    new_null_array(&dt, size)
2464
                }
2465
            },
2466
0
            ScalarValue::Dictionary(key_type, v) => {
2467
0
                // values array is one element long (the value)
2468
0
                match key_type.as_ref() {
2469
0
                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2470
0
                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2471
0
                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2472
0
                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2473
0
                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2474
0
                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2475
0
                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2476
0
                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2477
0
                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2478
                }
2479
            }
2480
0
            ScalarValue::Null => new_null_array(&DataType::Null, size),
2481
        })
2482
179k
    }
2483
2484
0
    fn get_decimal_value_from_array(
2485
0
        array: &dyn Array,
2486
0
        index: usize,
2487
0
        precision: u8,
2488
0
        scale: i8,
2489
0
    ) -> Result<ScalarValue> {
2490
0
        match array.data_type() {
2491
            DataType::Decimal128(_, _) => {
2492
0
                let array = as_decimal128_array(array)?;
2493
0
                if array.is_null(index) {
2494
0
                    Ok(ScalarValue::Decimal128(None, precision, scale))
2495
                } else {
2496
0
                    let value = array.value(index);
2497
0
                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2498
                }
2499
            }
2500
            DataType::Decimal256(_, _) => {
2501
0
                let array = as_decimal256_array(array)?;
2502
0
                if array.is_null(index) {
2503
0
                    Ok(ScalarValue::Decimal256(None, precision, scale))
2504
                } else {
2505
0
                    let value = array.value(index);
2506
0
                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2507
                }
2508
            }
2509
0
            _ => _internal_err!("Unsupported decimal type"),
2510
        }
2511
0
    }
2512
2513
0
    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2514
0
        let arrays = std::iter::repeat(arr).take(size).collect::<Vec<_>>();
2515
0
        let ret = match !arrays.is_empty() {
2516
0
            true => arrow::compute::concat(arrays.as_slice())?,
2517
0
            false => arr.slice(0, 0),
2518
        };
2519
0
        Ok(ret)
2520
0
    }
2521
2522
    /// Retrieve ScalarValue for each row in `array`
2523
    ///
2524
    /// Example 1: Array (ScalarValue::Int32)
2525
    /// ```
2526
    /// use datafusion_common::ScalarValue;
2527
    /// use arrow::array::ListArray;
2528
    /// use arrow::datatypes::{DataType, Int32Type};
2529
    ///
2530
    /// // Equivalent to [[1,2,3], [4,5]]
2531
    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2532
    ///    Some(vec![Some(1), Some(2), Some(3)]),
2533
    ///    Some(vec![Some(4), Some(5)])
2534
    /// ]);
2535
    ///
2536
    /// // Convert the array into Scalar Values for each row
2537
    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2538
    ///
2539
    /// let expected = vec![
2540
    /// vec![
2541
    ///     ScalarValue::Int32(Some(1)),
2542
    ///     ScalarValue::Int32(Some(2)),
2543
    ///     ScalarValue::Int32(Some(3)),
2544
    /// ],
2545
    /// vec![
2546
    ///    ScalarValue::Int32(Some(4)),
2547
    ///    ScalarValue::Int32(Some(5)),
2548
    /// ],
2549
    /// ];
2550
    ///
2551
    /// assert_eq!(scalar_vec, expected);
2552
    /// ```
2553
    ///
2554
    /// Example 2: Nested array (ScalarValue::List)
2555
    /// ```
2556
    /// use datafusion_common::ScalarValue;
2557
    /// use arrow::array::ListArray;
2558
    /// use arrow::datatypes::{DataType, Int32Type};
2559
    /// use datafusion_common::utils::array_into_list_array_nullable;
2560
    /// use std::sync::Arc;
2561
    ///
2562
    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2563
    ///    Some(vec![Some(1), Some(2), Some(3)]),
2564
    ///    Some(vec![Some(4), Some(5)])
2565
    /// ]);
2566
    ///
2567
    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
2568
    /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr));
2569
    ///
2570
    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
2571
    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2572
    ///
2573
    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2574
    ///     Some(vec![Some(1), Some(2), Some(3)]),
2575
    /// ]);
2576
    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2577
    ///     Some(vec![Some(4), Some(5)]),
2578
    /// ]);
2579
    ///
2580
    /// let expected = vec![
2581
    ///   vec![
2582
    ///     ScalarValue::List(Arc::new(l1)),
2583
    ///     ScalarValue::List(Arc::new(l2)),
2584
    ///   ],
2585
    /// ];
2586
    ///
2587
    /// assert_eq!(scalar_vec, expected);
2588
    /// ```
2589
0
    pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2590
0
        let mut scalars = Vec::with_capacity(array.len());
2591
2592
0
        for index in 0..array.len() {
2593
0
            let nested_array = array.as_list::<i32>().value(index);
2594
0
            let scalar_values = (0..nested_array.len())
2595
0
                .map(|i| ScalarValue::try_from_array(&nested_array, i))
2596
0
                .collect::<Result<Vec<_>>>()?;
2597
0
            scalars.push(scalar_values);
2598
        }
2599
2600
0
        Ok(scalars)
2601
0
    }
2602
2603
    // TODO: Support more types after other ScalarValue is wrapped with ArrayRef
2604
    /// Get raw data (inner array) inside ScalarValue
2605
0
    pub fn raw_data(&self) -> Result<ArrayRef> {
2606
0
        match self {
2607
0
            ScalarValue::List(arr) => Ok(arr.to_owned()),
2608
0
            _ => _internal_err!("ScalarValue is not a list"),
2609
        }
2610
0
    }
2611
2612
    /// Converts a value in `array` at `index` into a ScalarValue
2613
65.1k
    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2614
65.1k
        // handle NULL value
2615
65.1k
        if !array.is_valid(index) {
2616
1.64k
            return array.data_type().try_into();
2617
63.5k
        }
2618
63.5k
2619
63.5k
        Ok(match array.data_type() {
2620
0
            DataType::Null => ScalarValue::Null,
2621
0
            DataType::Decimal128(precision, scale) => {
2622
0
                ScalarValue::get_decimal_value_from_array(
2623
0
                    array, index, *precision, *scale,
2624
0
                )?
2625
            }
2626
0
            DataType::Decimal256(precision, scale) => {
2627
0
                ScalarValue::get_decimal_value_from_array(
2628
0
                    array, index, *precision, *scale,
2629
0
                )?
2630
            }
2631
66
            DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)
?0
,
2632
21.9k
            DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)
?0
,
2633
0
            DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?,
2634
0
            DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?,
2635
107
            DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)
?0
,
2636
0
            DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?,
2637
0
            DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?,
2638
0
            DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?,
2639
2.32k
            DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)
?0
,
2640
24.3k
            DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)
?0
,
2641
0
            DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?,
2642
0
            DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?,
2643
0
            DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?,
2644
            DataType::LargeBinary => {
2645
0
                typed_cast!(array, index, LargeBinaryArray, LargeBinary)?
2646
            }
2647
            DataType::BinaryView => {
2648
0
                typed_cast!(array, index, BinaryViewArray, BinaryView)?
2649
            }
2650
0
            DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?,
2651
            DataType::LargeUtf8 => {
2652
0
                typed_cast!(array, index, LargeStringArray, LargeUtf8)?
2653
            }
2654
0
            DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?,
2655
0
            DataType::List(field) => {
2656
0
                let list_array = array.as_list::<i32>();
2657
0
                let nested_array = list_array.value(index);
2658
0
                // Produces a single element `ListArray` with the value at `index`.
2659
0
                let arr =
2660
0
                    Arc::new(array_into_list_array(nested_array, field.is_nullable()));
2661
0
2662
0
                ScalarValue::List(arr)
2663
            }
2664
            DataType::LargeList(_) => {
2665
0
                let list_array = as_large_list_array(array);
2666
0
                let nested_array = list_array.value(index);
2667
0
                // Produces a single element `LargeListArray` with the value at `index`.
2668
0
                let arr = Arc::new(array_into_large_list_array(nested_array));
2669
0
2670
0
                ScalarValue::LargeList(arr)
2671
            }
2672
            // TODO: There is no test for FixedSizeList now, add it later
2673
            DataType::FixedSizeList(_, _) => {
2674
0
                let list_array = as_fixed_size_list_array(array)?;
2675
0
                let nested_array = list_array.value(index);
2676
0
                // Produces a single element `ListArray` with the value at `index`.
2677
0
                let list_size = nested_array.len();
2678
0
                let arr =
2679
0
                    Arc::new(array_into_fixed_size_list_array(nested_array, list_size));
2680
0
2681
0
                ScalarValue::FixedSizeList(arr)
2682
            }
2683
0
            DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?,
2684
0
            DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?,
2685
            DataType::Time32(TimeUnit::Second) => {
2686
0
                typed_cast!(array, index, Time32SecondArray, Time32Second)?
2687
            }
2688
            DataType::Time32(TimeUnit::Millisecond) => {
2689
0
                typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)?
2690
            }
2691
            DataType::Time64(TimeUnit::Microsecond) => {
2692
0
                typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)?
2693
            }
2694
            DataType::Time64(TimeUnit::Nanosecond) => {
2695
0
                typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)?
2696
            }
2697
0
            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2698
0
                array,
2699
0
                index,
2700
                TimestampSecondArray,
2701
                TimestampSecond,
2702
0
                tz_opt
2703
0
            )?,
2704
9.41k
            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2705
9.41k
                array,
2706
9.41k
                index,
2707
                TimestampMillisecondArray,
2708
                TimestampMillisecond,
2709
9.41k
                tz_opt
2710
0
            )?,
2711
0
            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2712
0
                array,
2713
0
                index,
2714
                TimestampMicrosecondArray,
2715
                TimestampMicrosecond,
2716
0
                tz_opt
2717
0
            )?,
2718
0
            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2719
0
                array,
2720
0
                index,
2721
                TimestampNanosecondArray,
2722
                TimestampNanosecond,
2723
0
                tz_opt
2724
0
            )?,
2725
0
            DataType::Dictionary(key_type, _) => {
2726
0
                let (values_array, values_index) = match key_type.as_ref() {
2727
0
                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2728
0
                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2729
0
                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2730
0
                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2731
0
                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2732
0
                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2733
0
                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2734
0
                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2735
0
                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2736
                };
2737
                // look up the index in the values dictionary
2738
0
                let value = match values_index {
2739
0
                    Some(values_index) => {
2740
0
                        ScalarValue::try_from_array(values_array, values_index)
2741
                    }
2742
                    // else entry was null, so return null
2743
0
                    None => values_array.data_type().try_into(),
2744
0
                }?;
2745
2746
0
                Self::Dictionary(key_type.clone(), Box::new(value))
2747
            }
2748
            DataType::Struct(_) => {
2749
0
                let a = array.slice(index, 1);
2750
0
                Self::Struct(Arc::new(a.as_struct().to_owned()))
2751
            }
2752
            DataType::FixedSizeBinary(_) => {
2753
0
                let array = as_fixed_size_binary_array(array)?;
2754
0
                let size = match array.data_type() {
2755
0
                    DataType::FixedSizeBinary(size) => *size,
2756
0
                    _ => unreachable!(),
2757
                };
2758
                ScalarValue::FixedSizeBinary(
2759
0
                    size,
2760
0
                    match array.is_null(index) {
2761
0
                        true => None,
2762
0
                        false => Some(array.value(index).into()),
2763
                    },
2764
                )
2765
            }
2766
            DataType::Interval(IntervalUnit::DayTime) => {
2767
3.91k
                typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)
?0
2768
            }
2769
            DataType::Interval(IntervalUnit::YearMonth) => {
2770
0
                typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)?
2771
            }
2772
0
            DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!(
2773
0
                array,
2774
0
                index,
2775
                IntervalMonthDayNanoArray,
2776
                IntervalMonthDayNano
2777
0
            )?,
2778
2779
            DataType::Duration(TimeUnit::Second) => {
2780
0
                typed_cast!(array, index, DurationSecondArray, DurationSecond)?
2781
            }
2782
            DataType::Duration(TimeUnit::Millisecond) => {
2783
1.34k
                typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)
?0
2784
            }
2785
            DataType::Duration(TimeUnit::Microsecond) => {
2786
0
                typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)?
2787
            }
2788
            DataType::Duration(TimeUnit::Nanosecond) => {
2789
0
                typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)?
2790
            }
2791
            DataType::Map(_, _) => {
2792
0
                let a = array.slice(index, 1);
2793
0
                Self::Map(Arc::new(a.as_map().to_owned()))
2794
            }
2795
0
            DataType::Union(fields, mode) => {
2796
0
                let array = as_union_array(array);
2797
0
                let ti = array.type_id(index);
2798
0
                let index = array.value_offset(index);
2799
0
                let value = ScalarValue::try_from_array(array.child(ti), index)?;
2800
0
                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
2801
            }
2802
0
            other => {
2803
0
                return _not_impl_err!(
2804
0
                    "Can't create a scalar from array of type \"{other:?}\""
2805
0
                );
2806
            }
2807
        })
2808
65.1k
    }
2809
2810
    /// Try to parse `value` into a ScalarValue of type `target_type`
2811
0
    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
2812
0
        let value = ScalarValue::from(value);
2813
0
        let cast_options = CastOptions {
2814
0
            safe: false,
2815
0
            format_options: Default::default(),
2816
0
        };
2817
0
        let cast_arr = cast_with_options(&value.to_array()?, target_type, &cast_options)?;
2818
0
        ScalarValue::try_from_array(&cast_arr, 0)
2819
0
    }
2820
2821
    /// Try to cast this value to a ScalarValue of type `data_type`
2822
0
    pub fn cast_to(&self, data_type: &DataType) -> Result<Self> {
2823
0
        let cast_options = CastOptions {
2824
0
            safe: false,
2825
0
            format_options: Default::default(),
2826
0
        };
2827
0
        let cast_arr = cast_with_options(&self.to_array()?, data_type, &cast_options)?;
2828
0
        ScalarValue::try_from_array(&cast_arr, 0)
2829
0
    }
2830
2831
0
    fn eq_array_decimal(
2832
0
        array: &ArrayRef,
2833
0
        index: usize,
2834
0
        value: Option<&i128>,
2835
0
        precision: u8,
2836
0
        scale: i8,
2837
0
    ) -> Result<bool> {
2838
0
        let array = as_decimal128_array(array)?;
2839
0
        if array.precision() != precision || array.scale() != scale {
2840
0
            return Ok(false);
2841
0
        }
2842
0
        let is_null = array.is_null(index);
2843
0
        if let Some(v) = value {
2844
0
            Ok(!array.is_null(index) && array.value(index) == *v)
2845
        } else {
2846
0
            Ok(is_null)
2847
        }
2848
0
    }
2849
2850
0
    fn eq_array_decimal256(
2851
0
        array: &ArrayRef,
2852
0
        index: usize,
2853
0
        value: Option<&i256>,
2854
0
        precision: u8,
2855
0
        scale: i8,
2856
0
    ) -> Result<bool> {
2857
0
        let array = as_decimal256_array(array)?;
2858
0
        if array.precision() != precision || array.scale() != scale {
2859
0
            return Ok(false);
2860
0
        }
2861
0
        let is_null = array.is_null(index);
2862
0
        if let Some(v) = value {
2863
0
            Ok(!array.is_null(index) && array.value(index) == *v)
2864
        } else {
2865
0
            Ok(is_null)
2866
        }
2867
0
    }
2868
2869
    /// Compares a single row of array @ index for equality with self,
2870
    /// in an optimized fashion.
2871
    ///
2872
    /// This method implements an optimized version of:
2873
    ///
2874
    /// ```text
2875
    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
2876
    ///     arr_scalar.eq(self)
2877
    /// ```
2878
    ///
2879
    /// *Performance note*: the arrow compute kernels should be
2880
    /// preferred over this function if at all possible as they can be
2881
    /// vectorized and are generally much faster.
2882
    ///
2883
    /// This function has a few narrow usescases such as hash table key
2884
    /// comparisons where comparing a single row at a time is necessary.
2885
    ///
2886
    /// # Errors
2887
    ///
2888
    /// Errors if
2889
    /// - it fails to downcast `array` to the data type of `self`
2890
    /// - `self` is a `Struct`
2891
    ///
2892
    /// # Panics
2893
    ///
2894
    /// Panics if `self` is a dictionary with invalid key type
2895
    #[inline]
2896
0
    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
2897
0
        Ok(match self {
2898
0
            ScalarValue::Decimal128(v, precision, scale) => {
2899
0
                ScalarValue::eq_array_decimal(
2900
0
                    array,
2901
0
                    index,
2902
0
                    v.as_ref(),
2903
0
                    *precision,
2904
0
                    *scale,
2905
0
                )?
2906
            }
2907
0
            ScalarValue::Decimal256(v, precision, scale) => {
2908
0
                ScalarValue::eq_array_decimal256(
2909
0
                    array,
2910
0
                    index,
2911
0
                    v.as_ref(),
2912
0
                    *precision,
2913
0
                    *scale,
2914
0
                )?
2915
            }
2916
0
            ScalarValue::Boolean(val) => {
2917
0
                eq_array_primitive!(array, index, BooleanArray, val)?
2918
            }
2919
0
            ScalarValue::Float16(val) => {
2920
0
                eq_array_primitive!(array, index, Float16Array, val)?
2921
            }
2922
0
            ScalarValue::Float32(val) => {
2923
0
                eq_array_primitive!(array, index, Float32Array, val)?
2924
            }
2925
0
            ScalarValue::Float64(val) => {
2926
0
                eq_array_primitive!(array, index, Float64Array, val)?
2927
            }
2928
0
            ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?,
2929
0
            ScalarValue::Int16(val) => {
2930
0
                eq_array_primitive!(array, index, Int16Array, val)?
2931
            }
2932
0
            ScalarValue::Int32(val) => {
2933
0
                eq_array_primitive!(array, index, Int32Array, val)?
2934
            }
2935
0
            ScalarValue::Int64(val) => {
2936
0
                eq_array_primitive!(array, index, Int64Array, val)?
2937
            }
2938
0
            ScalarValue::UInt8(val) => {
2939
0
                eq_array_primitive!(array, index, UInt8Array, val)?
2940
            }
2941
0
            ScalarValue::UInt16(val) => {
2942
0
                eq_array_primitive!(array, index, UInt16Array, val)?
2943
            }
2944
0
            ScalarValue::UInt32(val) => {
2945
0
                eq_array_primitive!(array, index, UInt32Array, val)?
2946
            }
2947
0
            ScalarValue::UInt64(val) => {
2948
0
                eq_array_primitive!(array, index, UInt64Array, val)?
2949
            }
2950
0
            ScalarValue::Utf8(val) => {
2951
0
                eq_array_primitive!(array, index, StringArray, val)?
2952
            }
2953
0
            ScalarValue::Utf8View(val) => {
2954
0
                eq_array_primitive!(array, index, StringViewArray, val)?
2955
            }
2956
0
            ScalarValue::LargeUtf8(val) => {
2957
0
                eq_array_primitive!(array, index, LargeStringArray, val)?
2958
            }
2959
0
            ScalarValue::Binary(val) => {
2960
0
                eq_array_primitive!(array, index, BinaryArray, val)?
2961
            }
2962
0
            ScalarValue::BinaryView(val) => {
2963
0
                eq_array_primitive!(array, index, BinaryViewArray, val)?
2964
            }
2965
0
            ScalarValue::FixedSizeBinary(_, val) => {
2966
0
                eq_array_primitive!(array, index, FixedSizeBinaryArray, val)?
2967
            }
2968
0
            ScalarValue::LargeBinary(val) => {
2969
0
                eq_array_primitive!(array, index, LargeBinaryArray, val)?
2970
            }
2971
0
            ScalarValue::List(arr) => {
2972
0
                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
2973
            }
2974
0
            ScalarValue::LargeList(arr) => {
2975
0
                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
2976
            }
2977
0
            ScalarValue::FixedSizeList(arr) => {
2978
0
                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
2979
            }
2980
0
            ScalarValue::Struct(arr) => {
2981
0
                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
2982
            }
2983
0
            ScalarValue::Map(arr) => {
2984
0
                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
2985
            }
2986
0
            ScalarValue::Date32(val) => {
2987
0
                eq_array_primitive!(array, index, Date32Array, val)?
2988
            }
2989
0
            ScalarValue::Date64(val) => {
2990
0
                eq_array_primitive!(array, index, Date64Array, val)?
2991
            }
2992
0
            ScalarValue::Time32Second(val) => {
2993
0
                eq_array_primitive!(array, index, Time32SecondArray, val)?
2994
            }
2995
0
            ScalarValue::Time32Millisecond(val) => {
2996
0
                eq_array_primitive!(array, index, Time32MillisecondArray, val)?
2997
            }
2998
0
            ScalarValue::Time64Microsecond(val) => {
2999
0
                eq_array_primitive!(array, index, Time64MicrosecondArray, val)?
3000
            }
3001
0
            ScalarValue::Time64Nanosecond(val) => {
3002
0
                eq_array_primitive!(array, index, Time64NanosecondArray, val)?
3003
            }
3004
0
            ScalarValue::TimestampSecond(val, _) => {
3005
0
                eq_array_primitive!(array, index, TimestampSecondArray, val)?
3006
            }
3007
0
            ScalarValue::TimestampMillisecond(val, _) => {
3008
0
                eq_array_primitive!(array, index, TimestampMillisecondArray, val)?
3009
            }
3010
0
            ScalarValue::TimestampMicrosecond(val, _) => {
3011
0
                eq_array_primitive!(array, index, TimestampMicrosecondArray, val)?
3012
            }
3013
0
            ScalarValue::TimestampNanosecond(val, _) => {
3014
0
                eq_array_primitive!(array, index, TimestampNanosecondArray, val)?
3015
            }
3016
0
            ScalarValue::IntervalYearMonth(val) => {
3017
0
                eq_array_primitive!(array, index, IntervalYearMonthArray, val)?
3018
            }
3019
0
            ScalarValue::IntervalDayTime(val) => {
3020
0
                eq_array_primitive!(array, index, IntervalDayTimeArray, val)?
3021
            }
3022
0
            ScalarValue::IntervalMonthDayNano(val) => {
3023
0
                eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)?
3024
            }
3025
0
            ScalarValue::DurationSecond(val) => {
3026
0
                eq_array_primitive!(array, index, DurationSecondArray, val)?
3027
            }
3028
0
            ScalarValue::DurationMillisecond(val) => {
3029
0
                eq_array_primitive!(array, index, DurationMillisecondArray, val)?
3030
            }
3031
0
            ScalarValue::DurationMicrosecond(val) => {
3032
0
                eq_array_primitive!(array, index, DurationMicrosecondArray, val)?
3033
            }
3034
0
            ScalarValue::DurationNanosecond(val) => {
3035
0
                eq_array_primitive!(array, index, DurationNanosecondArray, val)?
3036
            }
3037
0
            ScalarValue::Union(value, _, _) => {
3038
0
                let array = as_union_array(array);
3039
0
                let ti = array.type_id(index);
3040
0
                let index = array.value_offset(index);
3041
0
                if let Some((ti_v, value)) = value {
3042
0
                    ti_v == &ti && value.eq_array(array.child(ti), index)?
3043
                } else {
3044
0
                    array.child(ti).is_null(index)
3045
                }
3046
            }
3047
0
            ScalarValue::Dictionary(key_type, v) => {
3048
0
                let (values_array, values_index) = match key_type.as_ref() {
3049
0
                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3050
0
                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3051
0
                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3052
0
                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3053
0
                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3054
0
                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3055
0
                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3056
0
                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3057
0
                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3058
                };
3059
                // was the value in the array non null?
3060
0
                match values_index {
3061
0
                    Some(values_index) => v.eq_array(values_array, values_index)?,
3062
0
                    None => v.is_null(),
3063
                }
3064
            }
3065
0
            ScalarValue::Null => array.is_null(index),
3066
        })
3067
0
    }
3068
3069
0
    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3070
0
        let right = arr2.slice(index, 1);
3071
0
        arr1 == &right
3072
0
    }
3073
3074
    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
3075
    /// includes the allocated size (`capacity`) rather than the current length (`len`)
3076
1.16k
    pub fn size(&self) -> usize {
3077
1.16k
        std::mem::size_of_val(self)
3078
1.16k
            + match self {
3079
                ScalarValue::Null
3080
                | ScalarValue::Boolean(_)
3081
                | ScalarValue::Float16(_)
3082
                | ScalarValue::Float32(_)
3083
                | ScalarValue::Float64(_)
3084
                | ScalarValue::Decimal128(_, _, _)
3085
                | ScalarValue::Decimal256(_, _, _)
3086
                | ScalarValue::Int8(_)
3087
                | ScalarValue::Int16(_)
3088
                | ScalarValue::Int32(_)
3089
                | ScalarValue::Int64(_)
3090
                | ScalarValue::UInt8(_)
3091
                | ScalarValue::UInt16(_)
3092
                | ScalarValue::UInt32(_)
3093
                | ScalarValue::UInt64(_)
3094
                | ScalarValue::Date32(_)
3095
                | ScalarValue::Date64(_)
3096
                | ScalarValue::Time32Second(_)
3097
                | ScalarValue::Time32Millisecond(_)
3098
                | ScalarValue::Time64Microsecond(_)
3099
                | ScalarValue::Time64Nanosecond(_)
3100
                | ScalarValue::IntervalYearMonth(_)
3101
                | ScalarValue::IntervalDayTime(_)
3102
                | ScalarValue::IntervalMonthDayNano(_)
3103
                | ScalarValue::DurationSecond(_)
3104
                | ScalarValue::DurationMillisecond(_)
3105
                | ScalarValue::DurationMicrosecond(_)
3106
1.16k
                | ScalarValue::DurationNanosecond(_) => 0,
3107
0
                ScalarValue::Utf8(s)
3108
0
                | ScalarValue::LargeUtf8(s)
3109
0
                | ScalarValue::Utf8View(s) => {
3110
0
                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3111
                }
3112
0
                ScalarValue::TimestampSecond(_, s)
3113
0
                | ScalarValue::TimestampMillisecond(_, s)
3114
0
                | ScalarValue::TimestampMicrosecond(_, s)
3115
0
                | ScalarValue::TimestampNanosecond(_, s) => {
3116
0
                    s.as_ref().map(|s| s.len()).unwrap_or_default()
3117
                }
3118
0
                ScalarValue::Binary(b)
3119
0
                | ScalarValue::FixedSizeBinary(_, b)
3120
0
                | ScalarValue::LargeBinary(b)
3121
0
                | ScalarValue::BinaryView(b) => {
3122
0
                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3123
                }
3124
0
                ScalarValue::List(arr) => arr.get_array_memory_size(),
3125
0
                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3126
0
                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3127
0
                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3128
0
                ScalarValue::Map(arr) => arr.get_array_memory_size(),
3129
0
                ScalarValue::Union(vals, fields, _mode) => {
3130
0
                    vals.as_ref()
3131
0
                        .map(|(_id, sv)| sv.size() - std::mem::size_of_val(sv))
3132
0
                        .unwrap_or_default()
3133
0
                        // `fields` is boxed, so it is NOT already included in `self`
3134
0
                        + std::mem::size_of_val(fields)
3135
0
                        + (std::mem::size_of::<Field>() * fields.len())
3136
0
                        + fields.iter().map(|(_idx, field)| field.size() - std::mem::size_of_val(field)).sum::<usize>()
3137
                }
3138
0
                ScalarValue::Dictionary(dt, sv) => {
3139
0
                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
3140
0
                    dt.size() + sv.size()
3141
                }
3142
            }
3143
1.16k
    }
3144
3145
    /// Estimates [size](Self::size) of [`Vec`] in bytes.
3146
    ///
3147
    /// Includes the size of the [`Vec`] container itself.
3148
584
    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3149
584
        std::mem::size_of_val(vec)
3150
584
            + (std::mem::size_of::<ScalarValue>() * vec.capacity())
3151
584
            + vec
3152
584
                .iter()
3153
584
                .map(|sv| sv.size() - std::mem::size_of_val(sv))
3154
584
                .sum::<usize>()
3155
584
    }
3156
3157
    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
3158
    ///
3159
    /// Includes the size of the [`VecDeque`] container itself.
3160
0
    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3161
0
        std::mem::size_of_val(vec_deque)
3162
0
            + (std::mem::size_of::<ScalarValue>() * vec_deque.capacity())
3163
0
            + vec_deque
3164
0
                .iter()
3165
0
                .map(|sv| sv.size() - std::mem::size_of_val(sv))
3166
0
                .sum::<usize>()
3167
0
    }
3168
3169
    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
3170
    ///
3171
    /// Includes the size of the [`HashSet`] container itself.
3172
0
    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3173
0
        std::mem::size_of_val(set)
3174
0
            + (std::mem::size_of::<ScalarValue>() * set.capacity())
3175
0
            + set
3176
0
                .iter()
3177
0
                .map(|sv| sv.size() - std::mem::size_of_val(sv))
3178
0
                .sum::<usize>()
3179
0
    }
3180
}
3181
3182
macro_rules! impl_scalar {
3183
    ($ty:ty, $scalar:tt) => {
3184
        impl From<$ty> for ScalarValue {
3185
132
            fn from(value: $ty) -> Self {
3186
132
                ScalarValue::$scalar(Some(value))
3187
132
            }
3188
        }
3189
3190
        impl From<Option<$ty>> for ScalarValue {
3191
0
            fn from(value: Option<$ty>) -> Self {
3192
0
                ScalarValue::$scalar(value)
3193
0
            }
3194
        }
3195
    };
3196
}
3197
3198
impl_scalar!(f64, Float64);
3199
impl_scalar!(f32, Float32);
3200
impl_scalar!(i8, Int8);
3201
impl_scalar!(i16, Int16);
3202
impl_scalar!(i32, Int32);
3203
impl_scalar!(i64, Int64);
3204
impl_scalar!(bool, Boolean);
3205
impl_scalar!(u8, UInt8);
3206
impl_scalar!(u16, UInt16);
3207
impl_scalar!(u32, UInt32);
3208
impl_scalar!(u64, UInt64);
3209
3210
impl From<&str> for ScalarValue {
3211
20
    fn from(value: &str) -> Self {
3212
20
        Some(value).into()
3213
20
    }
3214
}
3215
3216
impl From<Option<&str>> for ScalarValue {
3217
20
    fn from(value: Option<&str>) -> Self {
3218
20
        let value = value.map(|s| s.to_string());
3219
20
        ScalarValue::Utf8(value)
3220
20
    }
3221
}
3222
3223
/// Wrapper to create ScalarValue::Struct for convenience
3224
impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3225
0
    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3226
0
        value
3227
0
            .into_iter()
3228
0
            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3229
0
                builder.with_name_and_scalar(name, value)
3230
0
            })
3231
0
            .build()
3232
0
            .unwrap()
3233
0
    }
3234
}
3235
3236
impl FromStr for ScalarValue {
3237
    type Err = Infallible;
3238
3239
0
    fn from_str(s: &str) -> Result<Self, Self::Err> {
3240
0
        Ok(s.into())
3241
0
    }
3242
}
3243
3244
impl From<String> for ScalarValue {
3245
0
    fn from(value: String) -> Self {
3246
0
        ScalarValue::Utf8(Some(value))
3247
0
    }
3248
}
3249
3250
macro_rules! impl_try_from {
3251
    ($SCALAR:ident, $NATIVE:ident) => {
3252
        impl TryFrom<ScalarValue> for $NATIVE {
3253
            type Error = DataFusionError;
3254
3255
0
            fn try_from(value: ScalarValue) -> Result<Self> {
3256
0
                match value {
3257
0
                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3258
0
                    _ => _internal_err!(
3259
0
                        "Cannot convert {:?} to {}",
3260
0
                        value,
3261
0
                        std::any::type_name::<Self>()
3262
0
                    ),
3263
                }
3264
0
            }
3265
        }
3266
    };
3267
}
3268
3269
impl_try_from!(Int8, i8);
3270
impl_try_from!(Int16, i16);
3271
3272
// special implementation for i32 because of Date32 and Time32
3273
impl TryFrom<ScalarValue> for i32 {
3274
    type Error = DataFusionError;
3275
3276
0
    fn try_from(value: ScalarValue) -> Result<Self> {
3277
0
        match value {
3278
0
            ScalarValue::Int32(Some(inner_value))
3279
0
            | ScalarValue::Date32(Some(inner_value))
3280
0
            | ScalarValue::Time32Second(Some(inner_value))
3281
0
            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3282
0
            _ => _internal_err!(
3283
0
                "Cannot convert {:?} to {}",
3284
0
                value,
3285
0
                std::any::type_name::<Self>()
3286
0
            ),
3287
        }
3288
0
    }
3289
}
3290
3291
// special implementation for i64 because of Date64, Time64 and Timestamp
3292
impl TryFrom<ScalarValue> for i64 {
3293
    type Error = DataFusionError;
3294
3295
0
    fn try_from(value: ScalarValue) -> Result<Self> {
3296
0
        match value {
3297
0
            ScalarValue::Int64(Some(inner_value))
3298
0
            | ScalarValue::Date64(Some(inner_value))
3299
0
            | ScalarValue::Time64Microsecond(Some(inner_value))
3300
0
            | ScalarValue::Time64Nanosecond(Some(inner_value))
3301
0
            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3302
0
            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3303
0
            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3304
0
            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3305
0
            _ => _internal_err!(
3306
0
                "Cannot convert {:?} to {}",
3307
0
                value,
3308
0
                std::any::type_name::<Self>()
3309
0
            ),
3310
        }
3311
0
    }
3312
}
3313
3314
// special implementation for i128 because of Decimal128
3315
impl TryFrom<ScalarValue> for i128 {
3316
    type Error = DataFusionError;
3317
3318
0
    fn try_from(value: ScalarValue) -> Result<Self> {
3319
0
        match value {
3320
0
            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3321
0
            _ => _internal_err!(
3322
0
                "Cannot convert {:?} to {}",
3323
0
                value,
3324
0
                std::any::type_name::<Self>()
3325
0
            ),
3326
        }
3327
0
    }
3328
}
3329
3330
// special implementation for i256 because of Decimal128
3331
impl TryFrom<ScalarValue> for i256 {
3332
    type Error = DataFusionError;
3333
3334
0
    fn try_from(value: ScalarValue) -> Result<Self> {
3335
0
        match value {
3336
0
            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3337
0
            _ => _internal_err!(
3338
0
                "Cannot convert {:?} to {}",
3339
0
                value,
3340
0
                std::any::type_name::<Self>()
3341
0
            ),
3342
        }
3343
0
    }
3344
}
3345
3346
impl_try_from!(UInt8, u8);
3347
impl_try_from!(UInt16, u16);
3348
impl_try_from!(UInt32, u32);
3349
impl_try_from!(UInt64, u64);
3350
impl_try_from!(Float32, f32);
3351
impl_try_from!(Float64, f64);
3352
impl_try_from!(Boolean, bool);
3353
3354
impl TryFrom<DataType> for ScalarValue {
3355
    type Error = DataFusionError;
3356
3357
    /// Create a Null instance of ScalarValue for this datatype
3358
11.4k
    fn try_from(datatype: DataType) -> Result<Self> {
3359
11.4k
        (&datatype).try_into()
3360
11.4k
    }
3361
}
3362
3363
impl TryFrom<&DataType> for ScalarValue {
3364
    type Error = DataFusionError;
3365
3366
    /// Create a Null instance of ScalarValue for this datatype
3367
116k
    fn try_from(data_type: &DataType) -> Result<Self> {
3368
12.7k
        Ok(match data_type {
3369
3.38k
            DataType::Boolean => ScalarValue::Boolean(None),
3370
0
            DataType::Float16 => ScalarValue::Float16(None),
3371
39.6k
            DataType::Float64 => ScalarValue::Float64(None),
3372
22
            DataType::Float32 => ScalarValue::Float32(None),
3373
0
            DataType::Int8 => ScalarValue::Int8(None),
3374
0
            DataType::Int16 => ScalarValue::Int16(None),
3375
41.5k
            DataType::Int32 => ScalarValue::Int32(None),
3376
4.20k
            DataType::Int64 => ScalarValue::Int64(None),
3377
0
            DataType::UInt8 => ScalarValue::UInt8(None),
3378
0
            DataType::UInt16 => ScalarValue::UInt16(None),
3379
133
            DataType::UInt32 => ScalarValue::UInt32(None),
3380
5
            DataType::UInt64 => ScalarValue::UInt64(None),
3381
0
            DataType::Decimal128(precision, scale) => {
3382
0
                ScalarValue::Decimal128(None, *precision, *scale)
3383
            }
3384
0
            DataType::Decimal256(precision, scale) => {
3385
0
                ScalarValue::Decimal256(None, *precision, *scale)
3386
            }
3387
0
            DataType::Utf8 => ScalarValue::Utf8(None),
3388
0
            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
3389
0
            DataType::Utf8View => ScalarValue::Utf8View(None),
3390
0
            DataType::Binary => ScalarValue::Binary(None),
3391
0
            DataType::BinaryView => ScalarValue::BinaryView(None),
3392
0
            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
3393
0
            DataType::LargeBinary => ScalarValue::LargeBinary(None),
3394
0
            DataType::Date32 => ScalarValue::Date32(None),
3395
0
            DataType::Date64 => ScalarValue::Date64(None),
3396
0
            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
3397
            DataType::Time32(TimeUnit::Millisecond) => {
3398
0
                ScalarValue::Time32Millisecond(None)
3399
            }
3400
            DataType::Time64(TimeUnit::Microsecond) => {
3401
0
                ScalarValue::Time64Microsecond(None)
3402
            }
3403
0
            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
3404
0
            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
3405
0
                ScalarValue::TimestampSecond(None, tz_opt.clone())
3406
            }
3407
12.7k
            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
3408
12.7k
                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
3409
            }
3410
0
            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
3411
0
                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
3412
            }
3413
0
            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
3414
0
                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
3415
            }
3416
            DataType::Interval(IntervalUnit::YearMonth) => {
3417
0
                ScalarValue::IntervalYearMonth(None)
3418
            }
3419
            DataType::Interval(IntervalUnit::DayTime) => {
3420
6.56k
                ScalarValue::IntervalDayTime(None)
3421
            }
3422
            DataType::Interval(IntervalUnit::MonthDayNano) => {
3423
0
                ScalarValue::IntervalMonthDayNano(None)
3424
            }
3425
0
            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
3426
            DataType::Duration(TimeUnit::Millisecond) => {
3427
6.97k
                ScalarValue::DurationMillisecond(None)
3428
            }
3429
            DataType::Duration(TimeUnit::Microsecond) => {
3430
0
                ScalarValue::DurationMicrosecond(None)
3431
            }
3432
            DataType::Duration(TimeUnit::Nanosecond) => {
3433
0
                ScalarValue::DurationNanosecond(None)
3434
            }
3435
0
            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
3436
0
                index_type.clone(),
3437
0
                Box::new(value_type.as_ref().try_into()?),
3438
            ),
3439
            // `ScalaValue::List` contains single element `ListArray`.
3440
0
            DataType::List(field_ref) => ScalarValue::List(Arc::new(
3441
0
                GenericListArray::new_null(Arc::clone(field_ref), 1),
3442
0
            )),
3443
            // `ScalarValue::LargeList` contains single element `LargeListArray`.
3444
0
            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
3445
0
                GenericListArray::new_null(Arc::clone(field_ref), 1),
3446
0
            )),
3447
            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
3448
0
            DataType::FixedSizeList(field_ref, fixed_length) => {
3449
0
                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
3450
0
                    Arc::clone(field_ref),
3451
0
                    *fixed_length,
3452
0
                    1,
3453
0
                )))
3454
            }
3455
3
            DataType::Struct(fields) => ScalarValue::Struct(
3456
3
                new_null_array(&DataType::Struct(fields.to_owned()), 1)
3457
3
                    .as_struct()
3458
3
                    .to_owned()
3459
3
                    .into(),
3460
3
            ),
3461
0
            DataType::Map(fields, sorted) => ScalarValue::Map(
3462
0
                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
3463
0
                    .as_map()
3464
0
                    .to_owned()
3465
0
                    .into(),
3466
0
            ),
3467
2
            DataType::Union(fields, mode) => {
3468
2
                ScalarValue::Union(None, fields.clone(), *mode)
3469
            }
3470
934
            DataType::Null => ScalarValue::Null,
3471
            _ => {
3472
0
                return _not_impl_err!(
3473
0
                    "Can't create a scalar from data_type \"{data_type:?}\""
3474
0
                );
3475
            }
3476
        })
3477
116k
    }
3478
}
3479
3480
macro_rules! format_option {
3481
    ($F:expr, $EXPR:expr) => {{
3482
        match $EXPR {
3483
            Some(e) => write!($F, "{e}"),
3484
            None => write!($F, "NULL"),
3485
        }
3486
    }};
3487
}
3488
3489
// Implement Display trait for ScalarValue
3490
//
3491
// # Panics
3492
//
3493
// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
3494
impl fmt::Display for ScalarValue {
3495
8
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3496
8
        match self {
3497
0
            ScalarValue::Decimal128(v, p, s) => {
3498
0
                write!(f, "{v:?},{p:?},{s:?}")?;
3499
            }
3500
0
            ScalarValue::Decimal256(v, p, s) => {
3501
0
                write!(f, "{v:?},{p:?},{s:?}")?;
3502
            }
3503
0
            ScalarValue::Boolean(e) => format_option!(f, e)?,
3504
0
            ScalarValue::Float16(e) => format_option!(f, e)?,
3505
0
            ScalarValue::Float32(e) => format_option!(f, e)?,
3506
2
            ScalarValue::Float64(e) => format_option!(
f0
, e)
?0
,
3507
0
            ScalarValue::Int8(e) => format_option!(f, e)?,
3508
0
            ScalarValue::Int16(e) => format_option!(f, e)?,
3509
0
            ScalarValue::Int32(e) => format_option!(f, e)?,
3510
2
            ScalarValue::Int64(e) => format_option!(
f0
, e)
?0
,
3511
0
            ScalarValue::UInt8(e) => format_option!(f, e)?,
3512
0
            ScalarValue::UInt16(e) => format_option!(f, e)?,
3513
0
            ScalarValue::UInt32(e) => format_option!(f, e)?,
3514
4
            ScalarValue::UInt64(e) => 
format_option!1
(
f3
, e)
?0
,
3515
0
            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3516
0
            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3517
0
            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3518
0
            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3519
0
            ScalarValue::Utf8(e)
3520
0
            | ScalarValue::LargeUtf8(e)
3521
0
            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3522
0
            ScalarValue::Binary(e)
3523
0
            | ScalarValue::FixedSizeBinary(_, e)
3524
0
            | ScalarValue::LargeBinary(e)
3525
0
            | ScalarValue::BinaryView(e) => match e {
3526
0
                Some(bytes) => {
3527
                    // print up to first 10 bytes, with trailing ... if needed
3528
0
                    for b in bytes.iter().take(10) {
3529
0
                        write!(f, "{b:02X}")?;
3530
                    }
3531
0
                    if bytes.len() > 10 {
3532
0
                        write!(f, "...")?;
3533
0
                    }
3534
                }
3535
0
                None => write!(f, "NULL")?,
3536
            },
3537
0
            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3538
0
            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3539
0
            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3540
0
            ScalarValue::Date32(e) => {
3541
0
                format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))?
3542
            }
3543
0
            ScalarValue::Date64(e) => {
3544
0
                format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))?
3545
            }
3546
0
            ScalarValue::Time32Second(e) => format_option!(f, e)?,
3547
0
            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3548
0
            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3549
0
            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3550
0
            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3551
0
            ScalarValue::IntervalMonthDayNano(e) => {
3552
0
                format_option!(f, e.map(|v| format!("{v:?}")))?
3553
            }
3554
0
            ScalarValue::IntervalDayTime(e) => {
3555
0
                format_option!(f, e.map(|v| format!("{v:?}")))?;
3556
            }
3557
0
            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3558
0
            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3559
0
            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3560
0
            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3561
0
            ScalarValue::Struct(struct_arr) => {
3562
0
                // ScalarValue Struct should always have a single element
3563
0
                assert_eq!(struct_arr.len(), 1);
3564
3565
0
                if struct_arr.null_count() == struct_arr.len() {
3566
0
                    write!(f, "NULL")?;
3567
0
                    return Ok(());
3568
0
                }
3569
0
3570
0
                let columns = struct_arr.columns();
3571
0
                let fields = struct_arr.fields();
3572
0
                let nulls = struct_arr.nulls();
3573
0
3574
0
                write!(
3575
0
                    f,
3576
0
                    "{{{}}}",
3577
0
                    columns
3578
0
                        .iter()
3579
0
                        .zip(fields.iter())
3580
0
                        .enumerate()
3581
0
                        .map(|(index, (column, field))| {
3582
0
                            if nulls.is_some_and(|b| b.is_null(index)) {
3583
0
                                format!("{}:NULL", field.name())
3584
0
                            } else if let DataType::Struct(_) = field.data_type() {
3585
0
                                let sv = ScalarValue::Struct(Arc::new(
3586
0
                                    column.as_struct().to_owned(),
3587
0
                                ));
3588
0
                                format!("{}:{sv}", field.name())
3589
                            } else {
3590
0
                                let sv = array_value_to_string(column, 0).unwrap();
3591
0
                                format!("{}:{sv}", field.name())
3592
                            }
3593
0
                        })
3594
0
                        .collect::<Vec<_>>()
3595
0
                        .join(",")
3596
0
                )?
3597
            }
3598
0
            ScalarValue::Map(map_arr) => {
3599
0
                if map_arr.null_count() == map_arr.len() {
3600
0
                    write!(f, "NULL")?;
3601
0
                    return Ok(());
3602
0
                }
3603
0
3604
0
                write!(
3605
0
                    f,
3606
0
                    "[{}]",
3607
0
                    map_arr
3608
0
                        .iter()
3609
0
                        .map(|struct_array| {
3610
0
                            if let Some(arr) = struct_array {
3611
0
                                let mut buffer = VecDeque::new();
3612
0
                                for i in 0..arr.len() {
3613
0
                                    let key =
3614
0
                                        array_value_to_string(arr.column(0), i).unwrap();
3615
0
                                    let value =
3616
0
                                        array_value_to_string(arr.column(1), i).unwrap();
3617
0
                                    buffer.push_back(format!("{}:{}", key, value));
3618
0
                                }
3619
0
                                format!(
3620
0
                                    "{{{}}}",
3621
0
                                    buffer
3622
0
                                        .into_iter()
3623
0
                                        .collect::<Vec<_>>()
3624
0
                                        .join(",")
3625
0
                                        .as_str()
3626
0
                                )
3627
                            } else {
3628
0
                                "NULL".to_string()
3629
                            }
3630
0
                        })
3631
0
                        .collect::<Vec<_>>()
3632
0
                        .join(",")
3633
0
                )?
3634
            }
3635
0
            ScalarValue::Union(val, _fields, _mode) => match val {
3636
0
                Some((id, val)) => write!(f, "{}:{}", id, val)?,
3637
0
                None => write!(f, "NULL")?,
3638
            },
3639
0
            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3640
0
            ScalarValue::Null => write!(f, "NULL")?,
3641
        };
3642
8
        Ok(())
3643
8
    }
3644
}
3645
3646
0
fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3647
0
    // ScalarValue List, LargeList, FixedSizeList should always have a single element
3648
0
    assert_eq!(arr.len(), 1);
3649
0
    let options = FormatOptions::default().with_display_error(true);
3650
0
    let formatter =
3651
0
        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3652
0
    let value_formatter = formatter.value(0);
3653
0
    write!(f, "{value_formatter}")
3654
0
}
3655
3656
/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
3657
0
fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3658
0
    let mut iter = data.iter();
3659
0
    if let Some(b) = iter.next() {
3660
0
        write!(f, "{b}")?;
3661
0
    }
3662
0
    for b in iter {
3663
0
        write!(f, ",{b}")?;
3664
    }
3665
0
    Ok(())
3666
0
}
3667
3668
impl fmt::Debug for ScalarValue {
3669
4
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3670
0
        match self {
3671
0
            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3672
0
            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3673
0
            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3674
0
            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3675
0
            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3676
0
            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3677
0
            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3678
0
            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3679
0
            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3680
0
            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3681
0
            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
3682
0
            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
3683
0
            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
3684
4
            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
3685
0
            ScalarValue::TimestampSecond(_, tz_opt) => {
3686
0
                write!(f, "TimestampSecond({self}, {tz_opt:?})")
3687
            }
3688
0
            ScalarValue::TimestampMillisecond(_, tz_opt) => {
3689
0
                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
3690
            }
3691
0
            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
3692
0
                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
3693
            }
3694
0
            ScalarValue::TimestampNanosecond(_, tz_opt) => {
3695
0
                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
3696
            }
3697
0
            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
3698
0
            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
3699
0
            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
3700
0
            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
3701
0
            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
3702
0
            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
3703
0
            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
3704
0
            ScalarValue::Binary(Some(b)) => {
3705
0
                write!(f, "Binary(\"")?;
3706
0
                fmt_binary(b.as_slice(), f)?;
3707
0
                write!(f, "\")")
3708
            }
3709
0
            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
3710
0
            ScalarValue::BinaryView(Some(b)) => {
3711
0
                write!(f, "BinaryView(\"")?;
3712
0
                fmt_binary(b.as_slice(), f)?;
3713
0
                write!(f, "\")")
3714
            }
3715
0
            ScalarValue::FixedSizeBinary(size, None) => {
3716
0
                write!(f, "FixedSizeBinary({size}, {self})")
3717
            }
3718
0
            ScalarValue::FixedSizeBinary(size, Some(b)) => {
3719
0
                write!(f, "FixedSizeBinary({size}, \"")?;
3720
0
                fmt_binary(b.as_slice(), f)?;
3721
0
                write!(f, "\")")
3722
            }
3723
0
            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
3724
0
            ScalarValue::LargeBinary(Some(b)) => {
3725
0
                write!(f, "LargeBinary(\"")?;
3726
0
                fmt_binary(b.as_slice(), f)?;
3727
0
                write!(f, "\")")
3728
            }
3729
0
            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
3730
0
            ScalarValue::List(_) => write!(f, "List({self})"),
3731
0
            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
3732
0
            ScalarValue::Struct(struct_arr) => {
3733
0
                // ScalarValue Struct should always have a single element
3734
0
                assert_eq!(struct_arr.len(), 1);
3735
3736
0
                let columns = struct_arr.columns();
3737
0
                let fields = struct_arr.fields();
3738
0
3739
0
                write!(
3740
0
                    f,
3741
0
                    "Struct({{{}}})",
3742
0
                    columns
3743
0
                        .iter()
3744
0
                        .zip(fields.iter())
3745
0
                        .map(|(column, field)| {
3746
0
                            let sv = array_value_to_string(column, 0).unwrap();
3747
0
                            let name = field.name();
3748
0
                            format!("{name}:{sv}")
3749
0
                        })
3750
0
                        .collect::<Vec<_>>()
3751
0
                        .join(",")
3752
0
                )
3753
            }
3754
0
            ScalarValue::Map(map_arr) => {
3755
0
                write!(
3756
0
                    f,
3757
0
                    "Map([{}])",
3758
0
                    map_arr
3759
0
                        .iter()
3760
0
                        .map(|struct_array| {
3761
0
                            if let Some(arr) = struct_array {
3762
0
                                let buffer: Vec<String> = (0..arr.len())
3763
0
                                    .map(|i| {
3764
0
                                        let key = array_value_to_string(arr.column(0), i)
3765
0
                                            .unwrap();
3766
0
                                        let value =
3767
0
                                            array_value_to_string(arr.column(1), i)
3768
0
                                                .unwrap();
3769
0
                                        format!("{key:?}:{value:?}")
3770
0
                                    })
3771
0
                                    .collect();
3772
0
                                format!("{{{}}}", buffer.join(","))
3773
                            } else {
3774
0
                                "NULL".to_string()
3775
                            }
3776
0
                        })
3777
0
                        .collect::<Vec<_>>()
3778
0
                        .join(",")
3779
0
                )
3780
            }
3781
0
            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
3782
0
            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
3783
0
            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
3784
            ScalarValue::Time32Millisecond(_) => {
3785
0
                write!(f, "Time32Millisecond(\"{self}\")")
3786
            }
3787
            ScalarValue::Time64Microsecond(_) => {
3788
0
                write!(f, "Time64Microsecond(\"{self}\")")
3789
            }
3790
            ScalarValue::Time64Nanosecond(_) => {
3791
0
                write!(f, "Time64Nanosecond(\"{self}\")")
3792
            }
3793
            ScalarValue::IntervalDayTime(_) => {
3794
0
                write!(f, "IntervalDayTime(\"{self}\")")
3795
            }
3796
            ScalarValue::IntervalYearMonth(_) => {
3797
0
                write!(f, "IntervalYearMonth(\"{self}\")")
3798
            }
3799
            ScalarValue::IntervalMonthDayNano(_) => {
3800
0
                write!(f, "IntervalMonthDayNano(\"{self}\")")
3801
            }
3802
0
            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
3803
            ScalarValue::DurationMillisecond(_) => {
3804
0
                write!(f, "DurationMillisecond(\"{self}\")")
3805
            }
3806
            ScalarValue::DurationMicrosecond(_) => {
3807
0
                write!(f, "DurationMicrosecond(\"{self}\")")
3808
            }
3809
            ScalarValue::DurationNanosecond(_) => {
3810
0
                write!(f, "DurationNanosecond(\"{self}\")")
3811
            }
3812
0
            ScalarValue::Union(val, _fields, _mode) => match val {
3813
0
                Some((id, val)) => write!(f, "Union {}:{}", id, val),
3814
0
                None => write!(f, "Union(NULL)"),
3815
            },
3816
0
            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
3817
0
            ScalarValue::Null => write!(f, "NULL"),
3818
        }
3819
4
    }
3820
}
3821
3822
/// Trait used to map a NativeType to a ScalarValue
3823
pub trait ScalarType<T: ArrowNativeType> {
3824
    /// returns a scalar from an optional T
3825
    fn scalar(r: Option<T>) -> ScalarValue;
3826
}
3827
3828
impl ScalarType<f32> for Float32Type {
3829
0
    fn scalar(r: Option<f32>) -> ScalarValue {
3830
0
        ScalarValue::Float32(r)
3831
0
    }
3832
}
3833
3834
impl ScalarType<i64> for TimestampSecondType {
3835
0
    fn scalar(r: Option<i64>) -> ScalarValue {
3836
0
        ScalarValue::TimestampSecond(r, None)
3837
0
    }
3838
}
3839
3840
impl ScalarType<i64> for TimestampMillisecondType {
3841
0
    fn scalar(r: Option<i64>) -> ScalarValue {
3842
0
        ScalarValue::TimestampMillisecond(r, None)
3843
0
    }
3844
}
3845
3846
impl ScalarType<i64> for TimestampMicrosecondType {
3847
0
    fn scalar(r: Option<i64>) -> ScalarValue {
3848
0
        ScalarValue::TimestampMicrosecond(r, None)
3849
0
    }
3850
}
3851
3852
impl ScalarType<i64> for TimestampNanosecondType {
3853
0
    fn scalar(r: Option<i64>) -> ScalarValue {
3854
0
        ScalarValue::TimestampNanosecond(r, None)
3855
0
    }
3856
}
3857
3858
impl ScalarType<i32> for Date32Type {
3859
0
    fn scalar(r: Option<i32>) -> ScalarValue {
3860
0
        ScalarValue::Date32(r)
3861
0
    }
3862
}
3863
3864
#[cfg(test)]
3865
mod tests {
3866
3867
    use super::*;
3868
    use crate::cast::{
3869
        as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
3870
    };
3871
3872
    use crate::assert_batches_eq;
3873
    use crate::utils::array_into_list_array_nullable;
3874
    use arrow::buffer::OffsetBuffer;
3875
    use arrow::compute::{is_null, kernels};
3876
    use arrow::error::ArrowError;
3877
    use arrow::util::pretty::pretty_format_columns;
3878
    use arrow_buffer::Buffer;
3879
    use arrow_schema::Fields;
3880
    use chrono::NaiveDate;
3881
    use rand::Rng;
3882
3883
    #[test]
3884
    fn test_scalar_value_from_for_map() {
3885
        let string_builder = StringBuilder::new();
3886
        let int_builder = Int32Builder::with_capacity(4);
3887
        let mut builder = MapBuilder::new(None, string_builder, int_builder);
3888
        builder.keys().append_value("joe");
3889
        builder.values().append_value(1);
3890
        builder.append(true).unwrap();
3891
3892
        builder.keys().append_value("blogs");
3893
        builder.values().append_value(2);
3894
        builder.keys().append_value("foo");
3895
        builder.values().append_value(4);
3896
        builder.append(true).unwrap();
3897
        builder.append(true).unwrap();
3898
        builder.append(false).unwrap();
3899
3900
        let expected = builder.finish();
3901
3902
        let sv = ScalarValue::Map(Arc::new(expected.clone()));
3903
        let map_arr = sv.to_array().unwrap();
3904
        let actual = as_map_array(&map_arr).unwrap();
3905
        assert_eq!(actual, &expected);
3906
    }
3907
3908
    #[test]
3909
    fn test_scalar_value_from_for_struct() {
3910
        let boolean = Arc::new(BooleanArray::from(vec![false]));
3911
        let int = Arc::new(Int32Array::from(vec![42]));
3912
3913
        let expected = StructArray::from(vec![
3914
            (
3915
                Arc::new(Field::new("b", DataType::Boolean, false)),
3916
                Arc::clone(&boolean) as ArrayRef,
3917
            ),
3918
            (
3919
                Arc::new(Field::new("c", DataType::Int32, false)),
3920
                Arc::clone(&int) as ArrayRef,
3921
            ),
3922
        ]);
3923
3924
        let sv = ScalarStructBuilder::new()
3925
            .with_array(Field::new("b", DataType::Boolean, false), boolean)
3926
            .with_array(Field::new("c", DataType::Int32, false), int)
3927
            .build()
3928
            .unwrap();
3929
3930
        let struct_arr = sv.to_array().unwrap();
3931
        let actual = as_struct_array(&struct_arr).unwrap();
3932
        assert_eq!(actual, &expected);
3933
    }
3934
3935
    #[test]
3936
    #[should_panic(
3937
        expected = "Error building ScalarValue::Struct. Expected array with exactly one element, found array with 4 elements"
3938
    )]
3939
    fn test_scalar_value_from_for_struct_should_panic() {
3940
        let _ = ScalarStructBuilder::new()
3941
            .with_array(
3942
                Field::new("bool", DataType::Boolean, false),
3943
                Arc::new(BooleanArray::from(vec![false, true, false, false])),
3944
            )
3945
            .with_array(
3946
                Field::new("i32", DataType::Int32, false),
3947
                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
3948
            )
3949
            .build()
3950
            .unwrap();
3951
    }
3952
3953
    #[test]
3954
    fn test_to_array_of_size_for_nested() {
3955
        // Struct
3956
        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
3957
        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
3958
3959
        let struct_array = StructArray::from(vec![
3960
            (
3961
                Arc::new(Field::new("b", DataType::Boolean, false)),
3962
                Arc::clone(&boolean) as ArrayRef,
3963
            ),
3964
            (
3965
                Arc::new(Field::new("c", DataType::Int32, false)),
3966
                Arc::clone(&int) as ArrayRef,
3967
            ),
3968
        ]);
3969
        let sv = ScalarValue::Struct(Arc::new(struct_array));
3970
        let actual_arr = sv.to_array_of_size(2).unwrap();
3971
3972
        let boolean = Arc::new(BooleanArray::from(vec![
3973
            false, false, true, true, false, false, true, true,
3974
        ]));
3975
        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
3976
3977
        let struct_array = StructArray::from(vec![
3978
            (
3979
                Arc::new(Field::new("b", DataType::Boolean, false)),
3980
                Arc::clone(&boolean) as ArrayRef,
3981
            ),
3982
            (
3983
                Arc::new(Field::new("c", DataType::Int32, false)),
3984
                Arc::clone(&int) as ArrayRef,
3985
            ),
3986
        ]);
3987
3988
        let actual = as_struct_array(&actual_arr).unwrap();
3989
        assert_eq!(actual, &struct_array);
3990
3991
        // List
3992
        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3993
            Some(1),
3994
            None,
3995
            Some(2),
3996
        ])]);
3997
3998
        let sv = ScalarValue::List(Arc::new(arr));
3999
        let actual_arr = sv
4000
            .to_array_of_size(2)
4001
            .expect("Failed to convert to array of size");
4002
        let actual_list_arr = actual_arr.as_list::<i32>();
4003
4004
        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4005
            Some(vec![Some(1), None, Some(2)]),
4006
            Some(vec![Some(1), None, Some(2)]),
4007
        ]);
4008
4009
        assert_eq!(&arr, actual_list_arr);
4010
    }
4011
4012
    #[test]
4013
    fn test_to_array_of_size_for_fsl() {
4014
        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4015
        let field = Arc::new(Field::new("item", DataType::Int32, true));
4016
        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4017
        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4018
        let actual_arr = sv
4019
            .to_array_of_size(2)
4020
            .expect("Failed to convert to array of size");
4021
4022
        let expected_values =
4023
            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4024
        let expected_arr =
4025
            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4026
4027
        assert_eq!(
4028
            &expected_arr,
4029
            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4030
        );
4031
4032
        let empty_array = sv
4033
            .to_array_of_size(0)
4034
            .expect("Failed to convert to empty array");
4035
4036
        assert_eq!(empty_array.len(), 0);
4037
    }
4038
4039
    #[test]
4040
    fn test_list_to_array_string() {
4041
        let scalars = vec![
4042
            ScalarValue::from("rust"),
4043
            ScalarValue::from("arrow"),
4044
            ScalarValue::from("data-fusion"),
4045
        ];
4046
4047
        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4048
4049
        let expected = array_into_list_array_nullable(Arc::new(StringArray::from(vec![
4050
            "rust",
4051
            "arrow",
4052
            "data-fusion",
4053
        ])));
4054
        assert_eq!(*result, expected);
4055
    }
4056
4057
    fn build_list<O: OffsetSizeTrait>(
4058
        values: Vec<Option<Vec<Option<i64>>>>,
4059
    ) -> Vec<ScalarValue> {
4060
        values
4061
            .into_iter()
4062
            .map(|v| {
4063
                let arr = if v.is_some() {
4064
                    Arc::new(
4065
                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4066
                            vec![v],
4067
                        ),
4068
                    )
4069
                } else if O::IS_LARGE {
4070
                    new_null_array(
4071
                        &DataType::LargeList(Arc::new(Field::new(
4072
                            "item",
4073
                            DataType::Int64,
4074
                            true,
4075
                        ))),
4076
                        1,
4077
                    )
4078
                } else {
4079
                    new_null_array(
4080
                        &DataType::List(Arc::new(Field::new(
4081
                            "item",
4082
                            DataType::Int64,
4083
                            true,
4084
                        ))),
4085
                        1,
4086
                    )
4087
                };
4088
4089
                if O::IS_LARGE {
4090
                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4091
                } else {
4092
                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4093
                }
4094
            })
4095
            .collect()
4096
    }
4097
4098
    #[test]
4099
    fn test_iter_to_array_fixed_size_list() {
4100
        let field = Arc::new(Field::new("item", DataType::Int32, true));
4101
        let f1 = Arc::new(FixedSizeListArray::new(
4102
            Arc::clone(&field),
4103
            3,
4104
            Arc::new(Int32Array::from(vec![1, 2, 3])),
4105
            None,
4106
        ));
4107
        let f2 = Arc::new(FixedSizeListArray::new(
4108
            Arc::clone(&field),
4109
            3,
4110
            Arc::new(Int32Array::from(vec![4, 5, 6])),
4111
            None,
4112
        ));
4113
        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4114
4115
        let scalars = vec![
4116
            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4117
            ScalarValue::FixedSizeList(f1),
4118
            ScalarValue::FixedSizeList(f2),
4119
            ScalarValue::FixedSizeList(f_nulls),
4120
        ];
4121
4122
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4123
4124
        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4125
            vec![
4126
                None,
4127
                Some(vec![Some(1), Some(2), Some(3)]),
4128
                Some(vec![Some(4), Some(5), Some(6)]),
4129
                None,
4130
            ],
4131
            3,
4132
        );
4133
        assert_eq!(array.as_ref(), &expected);
4134
    }
4135
4136
    #[test]
4137
    fn test_iter_to_array_struct() {
4138
        let s1 = StructArray::from(vec![
4139
            (
4140
                Arc::new(Field::new("A", DataType::Boolean, false)),
4141
                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4142
            ),
4143
            (
4144
                Arc::new(Field::new("B", DataType::Int32, false)),
4145
                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4146
            ),
4147
        ]);
4148
4149
        let s2 = StructArray::from(vec![
4150
            (
4151
                Arc::new(Field::new("A", DataType::Boolean, false)),
4152
                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4153
            ),
4154
            (
4155
                Arc::new(Field::new("B", DataType::Int32, false)),
4156
                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4157
            ),
4158
        ]);
4159
4160
        let scalars = vec![
4161
            ScalarValue::Struct(Arc::new(s1)),
4162
            ScalarValue::Struct(Arc::new(s2)),
4163
        ];
4164
4165
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4166
4167
        let expected = StructArray::from(vec![
4168
            (
4169
                Arc::new(Field::new("A", DataType::Boolean, false)),
4170
                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4171
            ),
4172
            (
4173
                Arc::new(Field::new("B", DataType::Int32, false)),
4174
                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4175
            ),
4176
        ]);
4177
        assert_eq!(array.as_ref(), &expected);
4178
    }
4179
4180
    #[test]
4181
    fn test_iter_to_array_struct_with_nulls() {
4182
        // non-null
4183
        let s1 = StructArray::from((
4184
            vec![
4185
                (
4186
                    Arc::new(Field::new("A", DataType::Int32, false)),
4187
                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4188
                ),
4189
                (
4190
                    Arc::new(Field::new("B", DataType::Int64, false)),
4191
                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4192
                ),
4193
            ],
4194
            // Present the null mask, 1 is non-null, 0 is null
4195
            Buffer::from(&[1]),
4196
        ));
4197
4198
        // null
4199
        let s2 = StructArray::from((
4200
            vec![
4201
                (
4202
                    Arc::new(Field::new("A", DataType::Int32, false)),
4203
                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4204
                ),
4205
                (
4206
                    Arc::new(Field::new("B", DataType::Int64, false)),
4207
                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4208
                ),
4209
            ],
4210
            Buffer::from(&[0]),
4211
        ));
4212
4213
        let scalars = vec![
4214
            ScalarValue::Struct(Arc::new(s1)),
4215
            ScalarValue::Struct(Arc::new(s2)),
4216
        ];
4217
4218
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4219
        let struct_array = array.as_struct();
4220
        assert!(struct_array.is_valid(0));
4221
        assert!(struct_array.is_null(1));
4222
    }
4223
4224
    #[test]
4225
    fn iter_to_array_primitive_test() {
4226
        // List[[1,2,3]], List[null], List[[4,5]]
4227
        let scalars = build_list::<i32>(vec![
4228
            Some(vec![Some(1), Some(2), Some(3)]),
4229
            None,
4230
            Some(vec![Some(4), Some(5)]),
4231
        ]);
4232
4233
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4234
        let list_array = as_list_array(&array);
4235
        // List[[1,2,3], null, [4,5]]
4236
        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4237
            Some(vec![Some(1), Some(2), Some(3)]),
4238
            None,
4239
            Some(vec![Some(4), Some(5)]),
4240
        ]);
4241
        assert_eq!(list_array, &expected);
4242
4243
        let scalars = build_list::<i64>(vec![
4244
            Some(vec![Some(1), Some(2), Some(3)]),
4245
            None,
4246
            Some(vec![Some(4), Some(5)]),
4247
        ]);
4248
4249
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4250
        let list_array = as_large_list_array(&array);
4251
        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4252
            Some(vec![Some(1), Some(2), Some(3)]),
4253
            None,
4254
            Some(vec![Some(4), Some(5)]),
4255
        ]);
4256
        assert_eq!(list_array, &expected);
4257
    }
4258
4259
    #[test]
4260
    fn iter_to_array_string_test() {
4261
        let arr1 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![
4262
            "foo", "bar", "baz",
4263
        ])));
4264
        let arr2 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![
4265
            "rust", "world",
4266
        ])));
4267
4268
        let scalars = vec![
4269
            ScalarValue::List(Arc::new(arr1)),
4270
            ScalarValue::List(Arc::new(arr2)),
4271
        ];
4272
4273
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4274
        let result = array.as_list::<i32>();
4275
4276
        // build expected array
4277
        let string_builder = StringBuilder::with_capacity(5, 25);
4278
        let mut list_of_string_builder = ListBuilder::new(string_builder);
4279
4280
        list_of_string_builder.values().append_value("foo");
4281
        list_of_string_builder.values().append_value("bar");
4282
        list_of_string_builder.values().append_value("baz");
4283
        list_of_string_builder.append(true);
4284
4285
        list_of_string_builder.values().append_value("rust");
4286
        list_of_string_builder.values().append_value("world");
4287
        list_of_string_builder.append(true);
4288
        let expected = list_of_string_builder.finish();
4289
4290
        assert_eq!(result, &expected);
4291
    }
4292
4293
    #[test]
4294
    fn test_list_scalar_eq_to_array() {
4295
        let list_array: ArrayRef =
4296
            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4297
                Some(vec![Some(0), Some(1), Some(2)]),
4298
                None,
4299
                Some(vec![None, Some(5)]),
4300
            ]));
4301
4302
        let fsl_array: ArrayRef =
4303
            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4304
                Some(vec![Some(0), Some(1), Some(2)]),
4305
                None,
4306
                Some(vec![Some(3), None, Some(5)]),
4307
            ]));
4308
4309
        for arr in [list_array, fsl_array] {
4310
            for i in 0..arr.len() {
4311
                let scalar =
4312
                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4313
                assert!(scalar.eq_array(&arr, i).unwrap());
4314
            }
4315
        }
4316
    }
4317
4318
    #[test]
4319
    fn scalar_add_trait_test() -> Result<()> {
4320
        let float_value = ScalarValue::Float64(Some(123.));
4321
        let float_value_2 = ScalarValue::Float64(Some(123.));
4322
        assert_eq!(
4323
            (float_value.add(&float_value_2))?,
4324
            ScalarValue::Float64(Some(246.))
4325
        );
4326
        assert_eq!(
4327
            (float_value.add(float_value_2))?,
4328
            ScalarValue::Float64(Some(246.))
4329
        );
4330
        Ok(())
4331
    }
4332
4333
    #[test]
4334
    fn scalar_sub_trait_test() -> Result<()> {
4335
        let float_value = ScalarValue::Float64(Some(123.));
4336
        let float_value_2 = ScalarValue::Float64(Some(123.));
4337
        assert_eq!(
4338
            float_value.sub(&float_value_2)?,
4339
            ScalarValue::Float64(Some(0.))
4340
        );
4341
        assert_eq!(
4342
            float_value.sub(float_value_2)?,
4343
            ScalarValue::Float64(Some(0.))
4344
        );
4345
        Ok(())
4346
    }
4347
4348
    #[test]
4349
    fn scalar_sub_trait_int32_test() -> Result<()> {
4350
        let int_value = ScalarValue::Int32(Some(42));
4351
        let int_value_2 = ScalarValue::Int32(Some(100));
4352
        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4353
        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4354
        Ok(())
4355
    }
4356
4357
    #[test]
4358
    fn scalar_sub_trait_int32_overflow_test() {
4359
        let int_value = ScalarValue::Int32(Some(i32::MAX));
4360
        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4361
        let err = int_value
4362
            .sub_checked(&int_value_2)
4363
            .unwrap_err()
4364
            .strip_backtrace();
4365
        assert_eq!(
4366
            err,
4367
            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4368
        )
4369
    }
4370
4371
    #[test]
4372
    fn scalar_sub_trait_int64_test() -> Result<()> {
4373
        let int_value = ScalarValue::Int64(Some(42));
4374
        let int_value_2 = ScalarValue::Int64(Some(100));
4375
        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4376
        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4377
        Ok(())
4378
    }
4379
4380
    #[test]
4381
    fn scalar_sub_trait_int64_overflow_test() {
4382
        let int_value = ScalarValue::Int64(Some(i64::MAX));
4383
        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4384
        let err = int_value
4385
            .sub_checked(&int_value_2)
4386
            .unwrap_err()
4387
            .strip_backtrace();
4388
        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4389
    }
4390
4391
    #[test]
4392
    fn scalar_add_overflow_test() -> Result<()> {
4393
        check_scalar_add_overflow::<Int8Type>(
4394
            ScalarValue::Int8(Some(i8::MAX)),
4395
            ScalarValue::Int8(Some(i8::MAX)),
4396
        );
4397
        check_scalar_add_overflow::<UInt8Type>(
4398
            ScalarValue::UInt8(Some(u8::MAX)),
4399
            ScalarValue::UInt8(Some(u8::MAX)),
4400
        );
4401
        check_scalar_add_overflow::<Int16Type>(
4402
            ScalarValue::Int16(Some(i16::MAX)),
4403
            ScalarValue::Int16(Some(i16::MAX)),
4404
        );
4405
        check_scalar_add_overflow::<UInt16Type>(
4406
            ScalarValue::UInt16(Some(u16::MAX)),
4407
            ScalarValue::UInt16(Some(u16::MAX)),
4408
        );
4409
        check_scalar_add_overflow::<Int32Type>(
4410
            ScalarValue::Int32(Some(i32::MAX)),
4411
            ScalarValue::Int32(Some(i32::MAX)),
4412
        );
4413
        check_scalar_add_overflow::<UInt32Type>(
4414
            ScalarValue::UInt32(Some(u32::MAX)),
4415
            ScalarValue::UInt32(Some(u32::MAX)),
4416
        );
4417
        check_scalar_add_overflow::<Int64Type>(
4418
            ScalarValue::Int64(Some(i64::MAX)),
4419
            ScalarValue::Int64(Some(i64::MAX)),
4420
        );
4421
        check_scalar_add_overflow::<UInt64Type>(
4422
            ScalarValue::UInt64(Some(u64::MAX)),
4423
            ScalarValue::UInt64(Some(u64::MAX)),
4424
        );
4425
4426
        Ok(())
4427
    }
4428
4429
    // Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
4430
    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4431
    where
4432
        T: ArrowNumericType,
4433
    {
4434
        let scalar_result = left.add_checked(&right);
4435
4436
        let left_array = left.to_array().expect("Failed to convert to array");
4437
        let right_array = right.to_array().expect("Failed to convert to array");
4438
        let arrow_left_array = left_array.as_primitive::<T>();
4439
        let arrow_right_array = right_array.as_primitive::<T>();
4440
        let arrow_result = kernels::numeric::add(arrow_left_array, arrow_right_array);
4441
4442
        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4443
    }
4444
4445
    #[test]
4446
    fn test_interval_add_timestamp() -> Result<()> {
4447
        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4448
            months: 1,
4449
            days: 2,
4450
            nanoseconds: 3,
4451
        }));
4452
        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4453
        let result = interval.add(&timestamp)?;
4454
        let expect = timestamp.add(&interval)?;
4455
        assert_eq!(result, expect);
4456
4457
        let interval = ScalarValue::IntervalYearMonth(Some(123));
4458
        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4459
        let result = interval.add(&timestamp)?;
4460
        let expect = timestamp.add(&interval)?;
4461
        assert_eq!(result, expect);
4462
4463
        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4464
            days: 1,
4465
            milliseconds: 23,
4466
        }));
4467
        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4468
        let result = interval.add(&timestamp)?;
4469
        let expect = timestamp.add(&interval)?;
4470
        assert_eq!(result, expect);
4471
        Ok(())
4472
    }
4473
4474
    #[test]
4475
    fn scalar_decimal_test() -> Result<()> {
4476
        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4477
        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4478
        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4479
        assert_eq!(123_i128, try_into_value);
4480
        assert!(!decimal_value.is_null());
4481
        let neg_decimal_value = decimal_value.arithmetic_negate()?;
4482
        match neg_decimal_value {
4483
            ScalarValue::Decimal128(v, _, _) => {
4484
                assert_eq!(-123, v.unwrap());
4485
            }
4486
            _ => {
4487
                unreachable!();
4488
            }
4489
        }
4490
4491
        // decimal scalar to array
4492
        let array = decimal_value
4493
            .to_array()
4494
            .expect("Failed to convert to array");
4495
        let array = as_decimal128_array(&array)?;
4496
        assert_eq!(1, array.len());
4497
        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4498
        assert_eq!(123i128, array.value(0));
4499
4500
        // decimal scalar to array with size
4501
        let array = decimal_value
4502
            .to_array_of_size(10)
4503
            .expect("Failed to convert to array of size");
4504
        let array_decimal = as_decimal128_array(&array)?;
4505
        assert_eq!(10, array.len());
4506
        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4507
        assert_eq!(123i128, array_decimal.value(0));
4508
        assert_eq!(123i128, array_decimal.value(9));
4509
        // test eq array
4510
        assert!(decimal_value
4511
            .eq_array(&array, 1)
4512
            .expect("Failed to compare arrays"));
4513
        assert!(decimal_value
4514
            .eq_array(&array, 5)
4515
            .expect("Failed to compare arrays"));
4516
        // test try from array
4517
        assert_eq!(
4518
            decimal_value,
4519
            ScalarValue::try_from_array(&array, 5).unwrap()
4520
        );
4521
4522
        assert_eq!(
4523
            decimal_value,
4524
            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4525
        );
4526
4527
        // test compare
4528
        let left = ScalarValue::Decimal128(Some(123), 10, 2);
4529
        let right = ScalarValue::Decimal128(Some(124), 10, 2);
4530
        assert!(!left.eq(&right));
4531
        let result = left < right;
4532
        assert!(result);
4533
        let result = left <= right;
4534
        assert!(result);
4535
        let right = ScalarValue::Decimal128(Some(124), 10, 3);
4536
        // make sure that two decimals with diff datatype can't be compared.
4537
        let result = left.partial_cmp(&right);
4538
        assert_eq!(None, result);
4539
4540
        let decimal_vec = vec![
4541
            ScalarValue::Decimal128(Some(1), 10, 2),
4542
            ScalarValue::Decimal128(Some(2), 10, 2),
4543
            ScalarValue::Decimal128(Some(3), 10, 2),
4544
        ];
4545
        // convert the vec to decimal array and check the result
4546
        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4547
        assert_eq!(3, array.len());
4548
        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4549
4550
        let decimal_vec = vec![
4551
            ScalarValue::Decimal128(Some(1), 10, 2),
4552
            ScalarValue::Decimal128(Some(2), 10, 2),
4553
            ScalarValue::Decimal128(Some(3), 10, 2),
4554
            ScalarValue::Decimal128(None, 10, 2),
4555
        ];
4556
        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4557
        assert_eq!(4, array.len());
4558
        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4559
4560
        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4561
            .unwrap()
4562
            .eq_array(&array, 0)
4563
            .expect("Failed to compare arrays"));
4564
        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4565
            .unwrap()
4566
            .eq_array(&array, 1)
4567
            .expect("Failed to compare arrays"));
4568
        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4569
            .unwrap()
4570
            .eq_array(&array, 2)
4571
            .expect("Failed to compare arrays"));
4572
        assert_eq!(
4573
            ScalarValue::Decimal128(None, 10, 2),
4574
            ScalarValue::try_from_array(&array, 3).unwrap()
4575
        );
4576
4577
        Ok(())
4578
    }
4579
4580
    #[test]
4581
    fn test_list_partial_cmp() {
4582
        let a =
4583
            ScalarValue::List(Arc::new(
4584
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4585
                    Some(1),
4586
                    Some(2),
4587
                    Some(3),
4588
                ])]),
4589
            ));
4590
        let b =
4591
            ScalarValue::List(Arc::new(
4592
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4593
                    Some(1),
4594
                    Some(2),
4595
                    Some(3),
4596
                ])]),
4597
            ));
4598
        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4599
4600
        let a =
4601
            ScalarValue::List(Arc::new(
4602
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4603
                    Some(10),
4604
                    Some(2),
4605
                    Some(3),
4606
                ])]),
4607
            ));
4608
        let b =
4609
            ScalarValue::List(Arc::new(
4610
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4611
                    Some(1),
4612
                    Some(2),
4613
                    Some(30),
4614
                ])]),
4615
            ));
4616
        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4617
4618
        let a =
4619
            ScalarValue::List(Arc::new(
4620
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4621
                    Some(10),
4622
                    Some(2),
4623
                    Some(3),
4624
                ])]),
4625
            ));
4626
        let b =
4627
            ScalarValue::List(Arc::new(
4628
                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4629
                    Some(10),
4630
                    Some(2),
4631
                    Some(30),
4632
                ])]),
4633
            ));
4634
        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4635
    }
4636
4637
    #[test]
4638
    fn scalar_value_to_array_u64() -> Result<()> {
4639
        let value = ScalarValue::UInt64(Some(13u64));
4640
        let array = value.to_array().expect("Failed to convert to array");
4641
        let array = as_uint64_array(&array)?;
4642
        assert_eq!(array.len(), 1);
4643
        assert!(!array.is_null(0));
4644
        assert_eq!(array.value(0), 13);
4645
4646
        let value = ScalarValue::UInt64(None);
4647
        let array = value.to_array().expect("Failed to convert to array");
4648
        let array = as_uint64_array(&array)?;
4649
        assert_eq!(array.len(), 1);
4650
        assert!(array.is_null(0));
4651
        Ok(())
4652
    }
4653
4654
    #[test]
4655
    fn scalar_value_to_array_u32() -> Result<()> {
4656
        let value = ScalarValue::UInt32(Some(13u32));
4657
        let array = value.to_array().expect("Failed to convert to array");
4658
        let array = as_uint32_array(&array)?;
4659
        assert_eq!(array.len(), 1);
4660
        assert!(!array.is_null(0));
4661
        assert_eq!(array.value(0), 13);
4662
4663
        let value = ScalarValue::UInt32(None);
4664
        let array = value.to_array().expect("Failed to convert to array");
4665
        let array = as_uint32_array(&array)?;
4666
        assert_eq!(array.len(), 1);
4667
        assert!(array.is_null(0));
4668
        Ok(())
4669
    }
4670
4671
    #[test]
4672
    fn scalar_list_null_to_array() {
4673
        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
4674
4675
        assert_eq!(list_array.len(), 1);
4676
        assert_eq!(list_array.values().len(), 0);
4677
    }
4678
4679
    #[test]
4680
    fn scalar_large_list_null_to_array() {
4681
        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
4682
4683
        assert_eq!(list_array.len(), 1);
4684
        assert_eq!(list_array.values().len(), 0);
4685
    }
4686
4687
    #[test]
4688
    fn scalar_list_to_array() -> Result<()> {
4689
        let values = vec![
4690
            ScalarValue::UInt64(Some(100)),
4691
            ScalarValue::UInt64(None),
4692
            ScalarValue::UInt64(Some(101)),
4693
        ];
4694
        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
4695
        assert_eq!(list_array.len(), 1);
4696
        assert_eq!(list_array.values().len(), 3);
4697
4698
        let prim_array_ref = list_array.value(0);
4699
        let prim_array = as_uint64_array(&prim_array_ref)?;
4700
        assert_eq!(prim_array.len(), 3);
4701
        assert_eq!(prim_array.value(0), 100);
4702
        assert!(prim_array.is_null(1));
4703
        assert_eq!(prim_array.value(2), 101);
4704
        Ok(())
4705
    }
4706
4707
    #[test]
4708
    fn scalar_large_list_to_array() -> Result<()> {
4709
        let values = vec![
4710
            ScalarValue::UInt64(Some(100)),
4711
            ScalarValue::UInt64(None),
4712
            ScalarValue::UInt64(Some(101)),
4713
        ];
4714
        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
4715
        assert_eq!(list_array.len(), 1);
4716
        assert_eq!(list_array.values().len(), 3);
4717
4718
        let prim_array_ref = list_array.value(0);
4719
        let prim_array = as_uint64_array(&prim_array_ref)?;
4720
        assert_eq!(prim_array.len(), 3);
4721
        assert_eq!(prim_array.value(0), 100);
4722
        assert!(prim_array.is_null(1));
4723
        assert_eq!(prim_array.value(2), 101);
4724
        Ok(())
4725
    }
4726
4727
    /// Creates array directly and via ScalarValue and ensures they are the same
4728
    macro_rules! check_scalar_iter {
4729
        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4730
            let scalars: Vec<_> =
4731
                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
4732
4733
            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4734
4735
            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4736
4737
            assert_eq!(&array, &expected);
4738
        }};
4739
    }
4740
4741
    /// Creates array directly and via ScalarValue and ensures they are the same
4742
    /// but for variants that carry a timezone field.
4743
    macro_rules! check_scalar_iter_tz {
4744
        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4745
            let scalars: Vec<_> = $INPUT
4746
                .iter()
4747
                .map(|v| ScalarValue::$SCALAR_T(*v, None))
4748
                .collect();
4749
4750
            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4751
4752
            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4753
4754
            assert_eq!(&array, &expected);
4755
        }};
4756
    }
4757
4758
    /// Creates array directly and via ScalarValue and ensures they
4759
    /// are the same, for string  arrays
4760
    macro_rules! check_scalar_iter_string {
4761
        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4762
            let scalars: Vec<_> = $INPUT
4763
                .iter()
4764
                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
4765
                .collect();
4766
4767
            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4768
4769
            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
4770
4771
            assert_eq!(&array, &expected);
4772
        }};
4773
    }
4774
4775
    /// Creates array directly and via ScalarValue and ensures they
4776
    /// are the same, for binary arrays
4777
    macro_rules! check_scalar_iter_binary {
4778
        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
4779
            let scalars: Vec<_> = $INPUT
4780
                .iter()
4781
                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
4782
                .collect();
4783
4784
            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
4785
4786
            let expected: $ARRAYTYPE =
4787
                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
4788
4789
            let expected: ArrayRef = Arc::new(expected);
4790
4791
            assert_eq!(&array, &expected);
4792
        }};
4793
    }
4794
4795
    #[test]
4796
    // despite clippy claiming they are useless, the code doesn't compile otherwise.
4797
    #[allow(clippy::useless_vec)]
4798
    fn scalar_iter_to_array_boolean() {
4799
        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
4800
        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
4801
        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
4802
4803
        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
4804
        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
4805
        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
4806
        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
4807
4808
        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
4809
        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
4810
        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
4811
        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
4812
4813
        check_scalar_iter_tz!(
4814
            TimestampSecond,
4815
            TimestampSecondArray,
4816
            vec![Some(1), None, Some(3)]
4817
        );
4818
        check_scalar_iter_tz!(
4819
            TimestampMillisecond,
4820
            TimestampMillisecondArray,
4821
            vec![Some(1), None, Some(3)]
4822
        );
4823
        check_scalar_iter_tz!(
4824
            TimestampMicrosecond,
4825
            TimestampMicrosecondArray,
4826
            vec![Some(1), None, Some(3)]
4827
        );
4828
        check_scalar_iter_tz!(
4829
            TimestampNanosecond,
4830
            TimestampNanosecondArray,
4831
            vec![Some(1), None, Some(3)]
4832
        );
4833
4834
        check_scalar_iter_string!(
4835
            Utf8,
4836
            StringArray,
4837
            vec![Some("foo"), None, Some("bar")]
4838
        );
4839
        check_scalar_iter_string!(
4840
            LargeUtf8,
4841
            LargeStringArray,
4842
            vec![Some("foo"), None, Some("bar")]
4843
        );
4844
        check_scalar_iter_binary!(
4845
            Binary,
4846
            BinaryArray,
4847
            vec![Some(b"foo"), None, Some(b"bar")]
4848
        );
4849
        check_scalar_iter_binary!(
4850
            LargeBinary,
4851
            LargeBinaryArray,
4852
            vec![Some(b"foo"), None, Some(b"bar")]
4853
        );
4854
    }
4855
4856
    #[test]
4857
    fn scalar_iter_to_array_empty() {
4858
        let scalars = vec![] as Vec<ScalarValue>;
4859
4860
        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
4861
        assert!(
4862
            result
4863
                .to_string()
4864
                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
4865
            "{}",
4866
            result
4867
        );
4868
    }
4869
4870
    #[test]
4871
    fn scalar_iter_to_dictionary() {
4872
        fn make_val(v: Option<String>) -> ScalarValue {
4873
            let key_type = DataType::Int32;
4874
            let value = ScalarValue::Utf8(v);
4875
            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
4876
        }
4877
4878
        let scalars = [
4879
            make_val(Some("Foo".into())),
4880
            make_val(None),
4881
            make_val(Some("Bar".into())),
4882
        ];
4883
4884
        let array = ScalarValue::iter_to_array(scalars).unwrap();
4885
        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
4886
        let values_array = as_string_array(array.values()).unwrap();
4887
4888
        let values = array
4889
            .keys_iter()
4890
            .map(|k| {
4891
                k.map(|k| {
4892
                    assert!(values_array.is_valid(k));
4893
                    values_array.value(k)
4894
                })
4895
            })
4896
            .collect::<Vec<_>>();
4897
4898
        let expected = vec![Some("Foo"), None, Some("Bar")];
4899
        assert_eq!(values, expected);
4900
    }
4901
4902
    #[test]
4903
    fn scalar_iter_to_array_mismatched_types() {
4904
        use ScalarValue::*;
4905
        // If the scalar values are not all the correct type, error here
4906
        let scalars = [Boolean(Some(true)), Int32(Some(5))];
4907
4908
        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
4909
        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
4910
                "{}", result);
4911
    }
4912
4913
    #[test]
4914
    fn scalar_try_from_array_null() {
4915
        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
4916
        let array: ArrayRef = Arc::new(array);
4917
4918
        assert_eq!(
4919
            ScalarValue::Int64(Some(33)),
4920
            ScalarValue::try_from_array(&array, 0).unwrap()
4921
        );
4922
        assert_eq!(
4923
            ScalarValue::Int64(None),
4924
            ScalarValue::try_from_array(&array, 1).unwrap()
4925
        );
4926
    }
4927
4928
    #[test]
4929
    fn scalar_try_from_array_list_array_null() {
4930
        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4931
            Some(vec![Some(1), Some(2)]),
4932
            None,
4933
        ]);
4934
4935
        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
4936
        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
4937
4938
        let data_type =
4939
            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
4940
4941
        assert_eq!(non_null_list_scalar.data_type(), data_type);
4942
        assert_eq!(null_list_scalar.data_type(), data_type);
4943
    }
4944
4945
    #[test]
4946
    fn scalar_try_from_list_datatypes() {
4947
        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
4948
4949
        // Test for List
4950
        let data_type = &DataType::List(Arc::clone(&inner_field));
4951
        let scalar: ScalarValue = data_type.try_into().unwrap();
4952
        let expected = ScalarValue::List(
4953
            new_null_array(data_type, 1)
4954
                .as_list::<i32>()
4955
                .to_owned()
4956
                .into(),
4957
        );
4958
        assert_eq!(expected, scalar);
4959
        assert!(expected.is_null());
4960
4961
        // Test for LargeList
4962
        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
4963
        let scalar: ScalarValue = data_type.try_into().unwrap();
4964
        let expected = ScalarValue::LargeList(
4965
            new_null_array(data_type, 1)
4966
                .as_list::<i64>()
4967
                .to_owned()
4968
                .into(),
4969
        );
4970
        assert_eq!(expected, scalar);
4971
        assert!(expected.is_null());
4972
4973
        // Test for FixedSizeList(5)
4974
        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
4975
        let scalar: ScalarValue = data_type.try_into().unwrap();
4976
        let expected = ScalarValue::FixedSizeList(
4977
            new_null_array(data_type, 1)
4978
                .as_fixed_size_list()
4979
                .to_owned()
4980
                .into(),
4981
        );
4982
        assert_eq!(expected, scalar);
4983
        assert!(expected.is_null());
4984
    }
4985
4986
    #[test]
4987
    fn scalar_try_from_list_of_list() {
4988
        let data_type = DataType::List(Arc::new(Field::new(
4989
            "item",
4990
            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
4991
            true,
4992
        )));
4993
        let data_type = &data_type;
4994
        let scalar: ScalarValue = data_type.try_into().unwrap();
4995
4996
        let expected = ScalarValue::List(
4997
            new_null_array(
4998
                &DataType::List(Arc::new(Field::new(
4999
                    "item",
5000
                    DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
5001
                    true,
5002
                ))),
5003
                1,
5004
            )
5005
            .as_list::<i32>()
5006
            .to_owned()
5007
            .into(),
5008
        );
5009
5010
        assert_eq!(expected, scalar)
5011
    }
5012
5013
    #[test]
5014
    fn scalar_try_from_not_equal_list_nested_list() {
5015
        let list_data_type =
5016
            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
5017
        let data_type = &list_data_type;
5018
        let list_scalar: ScalarValue = data_type.try_into().unwrap();
5019
5020
        let nested_list_data_type = DataType::List(Arc::new(Field::new(
5021
            "item",
5022
            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
5023
            true,
5024
        )));
5025
        let data_type = &nested_list_data_type;
5026
        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5027
5028
        assert_ne!(list_scalar, nested_list_scalar);
5029
    }
5030
5031
    #[test]
5032
    fn scalar_try_from_dict_datatype() {
5033
        let data_type =
5034
            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5035
        let data_type = &data_type;
5036
        let expected = ScalarValue::Dictionary(
5037
            Box::new(DataType::Int8),
5038
            Box::new(ScalarValue::Utf8(None)),
5039
        );
5040
        assert_eq!(expected, data_type.try_into().unwrap())
5041
    }
5042
5043
    #[test]
5044
    fn size_of_scalar() {
5045
        // Since ScalarValues are used in a non trivial number of places,
5046
        // making it larger means significant more memory consumption
5047
        // per distinct value.
5048
        //
5049
        // Thus this test ensures that no code change makes ScalarValue larger
5050
        //
5051
        // The alignment requirements differ across architectures and
5052
        // thus the size of the enum appears to as well
5053
5054
        // The value may also change depending on rust version
5055
        assert_eq!(std::mem::size_of::<ScalarValue>(), 64);
5056
    }
5057
5058
    #[test]
5059
    fn memory_size() {
5060
        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5061
        assert_eq!(sv.size(), std::mem::size_of::<ScalarValue>() + 10,);
5062
        let sv_size = sv.size();
5063
5064
        let mut v = Vec::with_capacity(10);
5065
        // do NOT clone `sv` here because this may shrink the vector capacity
5066
        v.push(sv);
5067
        assert_eq!(v.capacity(), 10);
5068
        assert_eq!(
5069
            ScalarValue::size_of_vec(&v),
5070
            std::mem::size_of::<Vec<ScalarValue>>()
5071
                + (9 * std::mem::size_of::<ScalarValue>())
5072
                + sv_size,
5073
        );
5074
5075
        let mut s = HashSet::with_capacity(0);
5076
        // do NOT clone `sv` here because this may shrink the vector capacity
5077
        s.insert(v.pop().unwrap());
5078
        // hashsets may easily grow during insert, so capacity is dynamic
5079
        let s_capacity = s.capacity();
5080
        assert_eq!(
5081
            ScalarValue::size_of_hashset(&s),
5082
            std::mem::size_of::<HashSet<ScalarValue>>()
5083
                + ((s_capacity - 1) * std::mem::size_of::<ScalarValue>())
5084
                + sv_size,
5085
        );
5086
    }
5087
5088
    #[test]
5089
    fn scalar_eq_array() {
5090
        // Validate that eq_array has the same semantics as ScalarValue::eq
5091
        macro_rules! make_typed_vec {
5092
            ($INPUT:expr, $TYPE:ident) => {{
5093
                $INPUT
5094
                    .iter()
5095
                    .map(|v| v.map(|v| v as $TYPE))
5096
                    .collect::<Vec<_>>()
5097
            }};
5098
        }
5099
5100
        let bool_vals = [Some(true), None, Some(false)];
5101
        let f32_vals = [Some(-1.0), None, Some(1.0)];
5102
        let f64_vals = make_typed_vec!(f32_vals, f64);
5103
5104
        let i8_vals = [Some(-1), None, Some(1)];
5105
        let i16_vals = make_typed_vec!(i8_vals, i16);
5106
        let i32_vals = make_typed_vec!(i8_vals, i32);
5107
        let i64_vals = make_typed_vec!(i8_vals, i64);
5108
5109
        let u8_vals = [Some(0), None, Some(1)];
5110
        let u16_vals = make_typed_vec!(u8_vals, u16);
5111
        let u32_vals = make_typed_vec!(u8_vals, u32);
5112
        let u64_vals = make_typed_vec!(u8_vals, u64);
5113
5114
        let str_vals = [Some("foo"), None, Some("bar")];
5115
5116
        let interval_dt_vals = [
5117
            Some(IntervalDayTime::MINUS_ONE),
5118
            None,
5119
            Some(IntervalDayTime::ONE),
5120
        ];
5121
        let interval_mdn_vals = [
5122
            Some(IntervalMonthDayNano::MINUS_ONE),
5123
            None,
5124
            Some(IntervalMonthDayNano::ONE),
5125
        ];
5126
5127
        /// Test each value in `scalar` with the corresponding element
5128
        /// at `array`. Assumes each element is unique (aka not equal
5129
        /// with all other indexes)
5130
        #[derive(Debug)]
5131
        struct TestCase {
5132
            array: ArrayRef,
5133
            scalars: Vec<ScalarValue>,
5134
        }
5135
5136
        /// Create a test case for casing the input to the specified array type
5137
        macro_rules! make_test_case {
5138
            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5139
                TestCase {
5140
                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5141
                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5142
                }
5143
            }};
5144
5145
            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5146
                let tz = $TZ;
5147
                TestCase {
5148
                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5149
                    scalars: $INPUT
5150
                        .iter()
5151
                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5152
                        .collect(),
5153
                }
5154
            }};
5155
        }
5156
5157
        macro_rules! make_str_test_case {
5158
            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5159
                TestCase {
5160
                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5161
                    scalars: $INPUT
5162
                        .iter()
5163
                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5164
                        .collect(),
5165
                }
5166
            }};
5167
        }
5168
5169
        macro_rules! make_binary_test_case {
5170
            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5171
                TestCase {
5172
                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5173
                    scalars: $INPUT
5174
                        .iter()
5175
                        .map(|v| {
5176
                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5177
                        })
5178
                        .collect(),
5179
                }
5180
            }};
5181
        }
5182
5183
        /// create a test case for DictionaryArray<$INDEX_TY>
5184
        macro_rules! make_str_dict_test_case {
5185
            ($INPUT:expr, $INDEX_TY:ident) => {{
5186
                TestCase {
5187
                    array: Arc::new(
5188
                        $INPUT
5189
                            .iter()
5190
                            .cloned()
5191
                            .collect::<DictionaryArray<$INDEX_TY>>(),
5192
                    ),
5193
                    scalars: $INPUT
5194
                        .iter()
5195
                        .map(|v| {
5196
                            ScalarValue::Dictionary(
5197
                                Box::new($INDEX_TY::DATA_TYPE),
5198
                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5199
                            )
5200
                        })
5201
                        .collect(),
5202
                }
5203
            }};
5204
        }
5205
5206
        let cases = vec![
5207
            make_test_case!(bool_vals, BooleanArray, Boolean),
5208
            make_test_case!(f32_vals, Float32Array, Float32),
5209
            make_test_case!(f64_vals, Float64Array, Float64),
5210
            make_test_case!(i8_vals, Int8Array, Int8),
5211
            make_test_case!(i16_vals, Int16Array, Int16),
5212
            make_test_case!(i32_vals, Int32Array, Int32),
5213
            make_test_case!(i64_vals, Int64Array, Int64),
5214
            make_test_case!(u8_vals, UInt8Array, UInt8),
5215
            make_test_case!(u16_vals, UInt16Array, UInt16),
5216
            make_test_case!(u32_vals, UInt32Array, UInt32),
5217
            make_test_case!(u64_vals, UInt64Array, UInt64),
5218
            make_str_test_case!(str_vals, StringArray, Utf8),
5219
            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5220
            make_binary_test_case!(str_vals, BinaryArray, Binary),
5221
            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5222
            make_test_case!(i32_vals, Date32Array, Date32),
5223
            make_test_case!(i64_vals, Date64Array, Date64),
5224
            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5225
            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5226
            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5227
            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5228
            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5229
            make_test_case!(
5230
                i64_vals,
5231
                TimestampSecondArray,
5232
                TimestampSecond,
5233
                Some("UTC".into())
5234
            ),
5235
            make_test_case!(
5236
                i64_vals,
5237
                TimestampMillisecondArray,
5238
                TimestampMillisecond,
5239
                None
5240
            ),
5241
            make_test_case!(
5242
                i64_vals,
5243
                TimestampMillisecondArray,
5244
                TimestampMillisecond,
5245
                Some("UTC".into())
5246
            ),
5247
            make_test_case!(
5248
                i64_vals,
5249
                TimestampMicrosecondArray,
5250
                TimestampMicrosecond,
5251
                None
5252
            ),
5253
            make_test_case!(
5254
                i64_vals,
5255
                TimestampMicrosecondArray,
5256
                TimestampMicrosecond,
5257
                Some("UTC".into())
5258
            ),
5259
            make_test_case!(
5260
                i64_vals,
5261
                TimestampNanosecondArray,
5262
                TimestampNanosecond,
5263
                None
5264
            ),
5265
            make_test_case!(
5266
                i64_vals,
5267
                TimestampNanosecondArray,
5268
                TimestampNanosecond,
5269
                Some("UTC".into())
5270
            ),
5271
            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5272
            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5273
            make_test_case!(
5274
                interval_mdn_vals,
5275
                IntervalMonthDayNanoArray,
5276
                IntervalMonthDayNano
5277
            ),
5278
            make_str_dict_test_case!(str_vals, Int8Type),
5279
            make_str_dict_test_case!(str_vals, Int16Type),
5280
            make_str_dict_test_case!(str_vals, Int32Type),
5281
            make_str_dict_test_case!(str_vals, Int64Type),
5282
            make_str_dict_test_case!(str_vals, UInt8Type),
5283
            make_str_dict_test_case!(str_vals, UInt16Type),
5284
            make_str_dict_test_case!(str_vals, UInt32Type),
5285
            make_str_dict_test_case!(str_vals, UInt64Type),
5286
        ];
5287
5288
        for case in cases {
5289
            println!("**** Test Case *****");
5290
            let TestCase { array, scalars } = case;
5291
            println!("Input array type: {}", array.data_type());
5292
            println!("Input scalars: {scalars:#?}");
5293
            assert_eq!(array.len(), scalars.len());
5294
5295
            for (index, scalar) in scalars.into_iter().enumerate() {
5296
                assert!(
5297
                    scalar
5298
                        .eq_array(&array, index)
5299
                        .expect("Failed to compare arrays"),
5300
                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
5301
                );
5302
5303
                // test that all other elements are *not* equal
5304
                for other_index in 0..array.len() {
5305
                    if index != other_index {
5306
                        assert!(
5307
                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5308
                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5309
                        );
5310
                    }
5311
                }
5312
            }
5313
        }
5314
    }
5315
5316
    #[test]
5317
    fn scalar_partial_ordering() {
5318
        use ScalarValue::*;
5319
5320
        assert_eq!(
5321
            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5322
            Some(Ordering::Greater)
5323
        );
5324
        assert_eq!(
5325
            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5326
            Some(Ordering::Less)
5327
        );
5328
        assert_eq!(
5329
            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5330
            Some(Ordering::Equal)
5331
        );
5332
        // For different data type, `partial_cmp` returns None.
5333
        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5334
        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5335
5336
        assert_eq!(
5337
            ScalarValue::from(vec![
5338
                ("A", ScalarValue::from(1.0)),
5339
                ("B", ScalarValue::from("Z")),
5340
            ])
5341
            .partial_cmp(&ScalarValue::from(vec![
5342
                ("A", ScalarValue::from(2.0)),
5343
                ("B", ScalarValue::from("A")),
5344
            ])),
5345
            Some(Ordering::Less)
5346
        );
5347
5348
        // For different struct fields, `partial_cmp` returns None.
5349
        assert_eq!(
5350
            ScalarValue::from(vec![
5351
                ("A", ScalarValue::from(1.0)),
5352
                ("B", ScalarValue::from("Z")),
5353
            ])
5354
            .partial_cmp(&ScalarValue::from(vec![
5355
                ("a", ScalarValue::from(2.0)),
5356
                ("b", ScalarValue::from("A")),
5357
            ])),
5358
            None
5359
        );
5360
    }
5361
5362
    #[test]
5363
    fn test_scalar_value_from_string() {
5364
        let scalar = ScalarValue::from("foo");
5365
        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5366
        let scalar = ScalarValue::from("foo".to_string());
5367
        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5368
        let scalar = ScalarValue::from_str("foo").unwrap();
5369
        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5370
    }
5371
5372
    #[test]
5373
    fn test_scalar_struct() {
5374
        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5375
        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5376
        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5377
5378
        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5379
        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5380
        let field_d = Arc::new(Field::new(
5381
            "D",
5382
            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5383
            false,
5384
        ));
5385
5386
        let struct_array = StructArray::from(vec![
5387
            (
5388
                Arc::clone(&field_e),
5389
                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5390
            ),
5391
            (
5392
                Arc::clone(&field_f),
5393
                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5394
            ),
5395
        ]);
5396
5397
        let struct_array = StructArray::from(vec![
5398
            (
5399
                Arc::clone(&field_a),
5400
                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5401
            ),
5402
            (
5403
                Arc::clone(&field_b),
5404
                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5405
            ),
5406
            (
5407
                Arc::clone(&field_c),
5408
                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5409
            ),
5410
            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5411
        ]);
5412
        let scalar = ScalarValue::Struct(Arc::new(struct_array));
5413
5414
        let array = scalar
5415
            .to_array_of_size(2)
5416
            .expect("Failed to convert to array of size");
5417
5418
        let expected = Arc::new(StructArray::from(vec![
5419
            (
5420
                Arc::clone(&field_a),
5421
                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5422
            ),
5423
            (
5424
                Arc::clone(&field_b),
5425
                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5426
            ),
5427
            (
5428
                Arc::clone(&field_c),
5429
                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5430
            ),
5431
            (
5432
                Arc::clone(&field_d),
5433
                Arc::new(StructArray::from(vec![
5434
                    (
5435
                        Arc::clone(&field_e),
5436
                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5437
                    ),
5438
                    (
5439
                        Arc::clone(&field_f),
5440
                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5441
                    ),
5442
                ])) as ArrayRef,
5443
            ),
5444
        ])) as ArrayRef;
5445
5446
        assert_eq!(&array, &expected);
5447
5448
        // Construct from second element of ArrayRef
5449
        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5450
        assert_eq!(constructed, scalar);
5451
5452
        // None version
5453
        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5454
        assert!(none_scalar.is_null());
5455
        assert_eq!(
5456
            format!("{none_scalar:?}"),
5457
            String::from("Struct({A:,B:,C:,D:})")
5458
        );
5459
5460
        // Construct with convenience From<Vec<(&str, ScalarValue)>>
5461
        let constructed = ScalarValue::from(vec![
5462
            ("A", ScalarValue::from(23)),
5463
            ("B", ScalarValue::from(false)),
5464
            ("C", ScalarValue::from("Hello")),
5465
            (
5466
                "D",
5467
                ScalarValue::from(vec![
5468
                    ("e", ScalarValue::from(2i16)),
5469
                    ("f", ScalarValue::from(3i64)),
5470
                ]),
5471
            ),
5472
        ]);
5473
        assert_eq!(constructed, scalar);
5474
5475
        // Build Array from Vec of structs
5476
        let scalars = vec![
5477
            ScalarValue::from(vec![
5478
                ("A", ScalarValue::from(23)),
5479
                ("B", ScalarValue::from(false)),
5480
                ("C", ScalarValue::from("Hello")),
5481
                (
5482
                    "D",
5483
                    ScalarValue::from(vec![
5484
                        ("e", ScalarValue::from(2i16)),
5485
                        ("f", ScalarValue::from(3i64)),
5486
                    ]),
5487
                ),
5488
            ]),
5489
            ScalarValue::from(vec![
5490
                ("A", ScalarValue::from(7)),
5491
                ("B", ScalarValue::from(true)),
5492
                ("C", ScalarValue::from("World")),
5493
                (
5494
                    "D",
5495
                    ScalarValue::from(vec![
5496
                        ("e", ScalarValue::from(4i16)),
5497
                        ("f", ScalarValue::from(5i64)),
5498
                    ]),
5499
                ),
5500
            ]),
5501
            ScalarValue::from(vec![
5502
                ("A", ScalarValue::from(-1000)),
5503
                ("B", ScalarValue::from(true)),
5504
                ("C", ScalarValue::from("!!!!!")),
5505
                (
5506
                    "D",
5507
                    ScalarValue::from(vec![
5508
                        ("e", ScalarValue::from(6i16)),
5509
                        ("f", ScalarValue::from(7i64)),
5510
                    ]),
5511
                ),
5512
            ]),
5513
        ];
5514
        let array = ScalarValue::iter_to_array(scalars).unwrap();
5515
5516
        let expected = Arc::new(StructArray::from(vec![
5517
            (
5518
                Arc::clone(&field_a),
5519
                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5520
            ),
5521
            (
5522
                Arc::clone(&field_b),
5523
                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5524
            ),
5525
            (
5526
                Arc::clone(&field_c),
5527
                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5528
            ),
5529
            (
5530
                Arc::clone(&field_d),
5531
                Arc::new(StructArray::from(vec![
5532
                    (
5533
                        Arc::clone(&field_e),
5534
                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5535
                    ),
5536
                    (
5537
                        Arc::clone(&field_f),
5538
                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5539
                    ),
5540
                ])) as ArrayRef,
5541
            ),
5542
        ])) as ArrayRef;
5543
5544
        assert_eq!(&array, &expected);
5545
    }
5546
5547
    #[test]
5548
    fn test_scalar_union_sparse() {
5549
        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
5550
        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
5551
        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
5552
        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
5553
5554
        let mut values_a = vec![None; 6];
5555
        values_a[0] = Some(42);
5556
        let mut values_b = vec![None; 6];
5557
        values_b[1] = Some(true);
5558
        let mut values_c = vec![None; 6];
5559
        values_c[2] = Some("foo");
5560
        let children: Vec<ArrayRef> = vec![
5561
            Arc::new(Int32Array::from(values_a)),
5562
            Arc::new(BooleanArray::from(values_b)),
5563
            Arc::new(StringArray::from(values_c)),
5564
        ];
5565
5566
        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
5567
        let array: ArrayRef = Arc::new(
5568
            UnionArray::try_new(fields.clone(), type_ids, None, children)
5569
                .expect("UnionArray"),
5570
        );
5571
5572
        let expected = [
5573
            (0, ScalarValue::from(42)),
5574
            (1, ScalarValue::from(true)),
5575
            (2, ScalarValue::from("foo")),
5576
            (0, ScalarValue::Int32(None)),
5577
            (1, ScalarValue::Boolean(None)),
5578
            (2, ScalarValue::Utf8(None)),
5579
        ];
5580
5581
        for (i, (ti, value)) in expected.into_iter().enumerate() {
5582
            let is_null = value.is_null();
5583
            let value = Some((ti, Box::new(value)));
5584
            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
5585
            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
5586
5587
            assert_eq!(
5588
                actual, expected,
5589
                "[{i}] {actual} was not equal to {expected}"
5590
            );
5591
5592
            assert!(
5593
                expected.eq_array(&array, i).expect("eq_array"),
5594
                "[{i}] {expected}.eq_array was false"
5595
            );
5596
5597
            if is_null {
5598
                assert!(actual.is_null(), "[{i}] {actual} was not null")
5599
            }
5600
        }
5601
    }
5602
5603
    #[test]
5604
    fn test_scalar_union_dense() {
5605
        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
5606
        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
5607
        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
5608
        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
5609
        let children: Vec<ArrayRef> = vec![
5610
            Arc::new(Int32Array::from(vec![Some(42), None])),
5611
            Arc::new(BooleanArray::from(vec![Some(true), None])),
5612
            Arc::new(StringArray::from(vec![Some("foo"), None])),
5613
        ];
5614
5615
        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
5616
        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
5617
        let array: ArrayRef = Arc::new(
5618
            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
5619
                .expect("UnionArray"),
5620
        );
5621
5622
        let expected = [
5623
            (0, ScalarValue::from(42)),
5624
            (1, ScalarValue::from(true)),
5625
            (2, ScalarValue::from("foo")),
5626
            (0, ScalarValue::Int32(None)),
5627
            (1, ScalarValue::Boolean(None)),
5628
            (2, ScalarValue::Utf8(None)),
5629
        ];
5630
5631
        for (i, (ti, value)) in expected.into_iter().enumerate() {
5632
            let is_null = value.is_null();
5633
            let value = Some((ti, Box::new(value)));
5634
            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
5635
            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
5636
5637
            assert_eq!(
5638
                actual, expected,
5639
                "[{i}] {actual} was not equal to {expected}"
5640
            );
5641
5642
            assert!(
5643
                expected.eq_array(&array, i).expect("eq_array"),
5644
                "[{i}] {expected}.eq_array was false"
5645
            );
5646
5647
            if is_null {
5648
                assert!(actual.is_null(), "[{i}] {actual} was not null")
5649
            }
5650
        }
5651
    }
5652
5653
    #[test]
5654
    fn test_lists_in_struct() {
5655
        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
5656
        let field_primitive_list = Arc::new(Field::new(
5657
            "primitive_list",
5658
            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
5659
            false,
5660
        ));
5661
5662
        // Define primitive list scalars
5663
        let l0 =
5664
            ScalarValue::List(Arc::new(
5665
                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5666
                    Some(1),
5667
                    Some(2),
5668
                    Some(3),
5669
                ])]),
5670
            ));
5671
        let l1 =
5672
            ScalarValue::List(Arc::new(
5673
                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5674
                    Some(4),
5675
                    Some(5),
5676
                ])]),
5677
            ));
5678
        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
5679
            Int32Type,
5680
            _,
5681
            _,
5682
        >(vec![Some(vec![Some(6)])])));
5683
5684
        // Define struct scalars
5685
        let s0 = ScalarValue::from(vec![
5686
            ("A", ScalarValue::from("First")),
5687
            ("primitive_list", l0),
5688
        ]);
5689
5690
        let s1 = ScalarValue::from(vec![
5691
            ("A", ScalarValue::from("Second")),
5692
            ("primitive_list", l1),
5693
        ]);
5694
5695
        let s2 = ScalarValue::from(vec![
5696
            ("A", ScalarValue::from("Third")),
5697
            ("primitive_list", l2),
5698
        ]);
5699
5700
        // iter_to_array for struct scalars
5701
        let array =
5702
            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
5703
5704
        let array = as_struct_array(&array).unwrap();
5705
        let expected = StructArray::from(vec![
5706
            (
5707
                Arc::clone(&field_a),
5708
                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
5709
            ),
5710
            (
5711
                Arc::clone(&field_primitive_list),
5712
                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5713
                    Some(vec![Some(1), Some(2), Some(3)]),
5714
                    Some(vec![Some(4), Some(5)]),
5715
                    Some(vec![Some(6)]),
5716
                ])),
5717
            ),
5718
        ]);
5719
5720
        assert_eq!(array, &expected);
5721
5722
        // Define list-of-structs scalars
5723
5724
        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
5725
        let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array)));
5726
5727
        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
5728
        let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array)));
5729
5730
        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
5731
        let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array)));
5732
5733
        // iter_to_array for list-of-struct
5734
        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
5735
        let array = array.as_list::<i32>();
5736
5737
        // Construct expected array with array builders
5738
        let field_a_builder = StringBuilder::with_capacity(4, 1024);
5739
        let primitive_value_builder = Int32Array::builder(8);
5740
        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
5741
5742
        let element_builder = StructBuilder::new(
5743
            vec![field_a, field_primitive_list],
5744
            vec![
5745
                Box::new(field_a_builder),
5746
                Box::new(field_primitive_list_builder),
5747
            ],
5748
        );
5749
5750
        let mut list_builder = ListBuilder::new(element_builder);
5751
5752
        list_builder
5753
            .values()
5754
            .field_builder::<StringBuilder>(0)
5755
            .unwrap()
5756
            .append_value("First");
5757
        list_builder
5758
            .values()
5759
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5760
            .unwrap()
5761
            .values()
5762
            .append_value(1);
5763
        list_builder
5764
            .values()
5765
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5766
            .unwrap()
5767
            .values()
5768
            .append_value(2);
5769
        list_builder
5770
            .values()
5771
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5772
            .unwrap()
5773
            .values()
5774
            .append_value(3);
5775
        list_builder
5776
            .values()
5777
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5778
            .unwrap()
5779
            .append(true);
5780
        list_builder.values().append(true);
5781
5782
        list_builder
5783
            .values()
5784
            .field_builder::<StringBuilder>(0)
5785
            .unwrap()
5786
            .append_value("Second");
5787
        list_builder
5788
            .values()
5789
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5790
            .unwrap()
5791
            .values()
5792
            .append_value(4);
5793
        list_builder
5794
            .values()
5795
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5796
            .unwrap()
5797
            .values()
5798
            .append_value(5);
5799
        list_builder
5800
            .values()
5801
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5802
            .unwrap()
5803
            .append(true);
5804
        list_builder.values().append(true);
5805
        list_builder.append(true);
5806
5807
        list_builder
5808
            .values()
5809
            .field_builder::<StringBuilder>(0)
5810
            .unwrap()
5811
            .append_value("Third");
5812
        list_builder
5813
            .values()
5814
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5815
            .unwrap()
5816
            .values()
5817
            .append_value(6);
5818
        list_builder
5819
            .values()
5820
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5821
            .unwrap()
5822
            .append(true);
5823
        list_builder.values().append(true);
5824
        list_builder.append(true);
5825
5826
        list_builder
5827
            .values()
5828
            .field_builder::<StringBuilder>(0)
5829
            .unwrap()
5830
            .append_value("Second");
5831
        list_builder
5832
            .values()
5833
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5834
            .unwrap()
5835
            .values()
5836
            .append_value(4);
5837
        list_builder
5838
            .values()
5839
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5840
            .unwrap()
5841
            .values()
5842
            .append_value(5);
5843
        list_builder
5844
            .values()
5845
            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
5846
            .unwrap()
5847
            .append(true);
5848
        list_builder.values().append(true);
5849
        list_builder.append(true);
5850
5851
        let expected = list_builder.finish();
5852
5853
        assert_eq!(array, &expected);
5854
    }
5855
5856
    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
5857
        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
5858
        ListArray::new(
5859
            Arc::new(Field::new(
5860
                "item",
5861
                DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
5862
                true,
5863
            )),
5864
            OffsetBuffer::<i32>::from_lengths([1]),
5865
            Arc::new(a1),
5866
            None,
5867
        )
5868
    }
5869
5870
    #[test]
5871
    fn test_nested_lists() {
5872
        // Define inner list scalars
5873
        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
5874
        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
5875
        let arr3 = build_2d_list(vec![Some(6)]);
5876
5877
        let array = ScalarValue::iter_to_array(vec![
5878
            ScalarValue::List(Arc::new(arr1)),
5879
            ScalarValue::List(Arc::new(arr2)),
5880
            ScalarValue::List(Arc::new(arr3)),
5881
        ])
5882
        .unwrap();
5883
        let array = array.as_list::<i32>();
5884
5885
        // Construct expected array with array builders
5886
        let inner_builder = Int32Array::builder(6);
5887
        let middle_builder = ListBuilder::new(inner_builder);
5888
        let mut outer_builder = ListBuilder::new(middle_builder);
5889
5890
        outer_builder.values().values().append_value(1);
5891
        outer_builder.values().values().append_value(2);
5892
        outer_builder.values().values().append_value(3);
5893
        outer_builder.values().append(true);
5894
        outer_builder.append(true);
5895
5896
        outer_builder.values().values().append_value(4);
5897
        outer_builder.values().values().append_value(5);
5898
        outer_builder.values().append(true);
5899
        outer_builder.append(true);
5900
5901
        outer_builder.values().values().append_value(6);
5902
        outer_builder.values().append(true);
5903
        outer_builder.append(true);
5904
5905
        let expected = outer_builder.finish();
5906
5907
        assert_eq!(array, &expected);
5908
    }
5909
5910
    #[test]
5911
    fn scalar_timestamp_ns_utc_timezone() {
5912
        let scalar = ScalarValue::TimestampNanosecond(
5913
            Some(1599566400000000000),
5914
            Some("UTC".into()),
5915
        );
5916
5917
        assert_eq!(
5918
            scalar.data_type(),
5919
            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
5920
        );
5921
5922
        let array = scalar.to_array().expect("Failed to convert to array");
5923
        assert_eq!(array.len(), 1);
5924
        assert_eq!(
5925
            array.data_type(),
5926
            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
5927
        );
5928
5929
        let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
5930
        assert_eq!(
5931
            newscalar.data_type(),
5932
            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
5933
        );
5934
    }
5935
5936
    #[test]
5937
    fn cast_round_trip() {
5938
        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
5939
        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
5940
5941
        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
5942
5943
        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
5944
5945
        check_scalar_cast(
5946
            ScalarValue::from("foo"),
5947
            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
5948
        );
5949
5950
        check_scalar_cast(
5951
            ScalarValue::Utf8(None),
5952
            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
5953
        );
5954
5955
        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
5956
        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
5957
        check_scalar_cast(
5958
            ScalarValue::from("larger than 12 bytes string"),
5959
            DataType::Utf8View,
5960
        );
5961
    }
5962
5963
    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
5964
    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
5965
        // convert from scalar --> Array to call cast
5966
        let scalar_array = scalar.to_array().expect("Failed to convert to array");
5967
        // cast the actual value
5968
        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
5969
5970
        // turn it back to a scalar
5971
        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
5972
        assert_eq!(cast_scalar.data_type(), desired_type);
5973
5974
        // Some time later the "cast" scalar is turned back into an array:
5975
        let array = cast_scalar
5976
            .to_array_of_size(10)
5977
            .expect("Failed to convert to array of size");
5978
5979
        // The datatype should be "Dictionary" but is actually Utf8!!!
5980
        assert_eq!(array.data_type(), &desired_type)
5981
    }
5982
5983
    #[test]
5984
    fn test_scalar_negative() -> Result<()> {
5985
        // positive test
5986
        let value = ScalarValue::Int32(Some(12));
5987
        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
5988
        let value = ScalarValue::Int32(None);
5989
        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
5990
5991
        // negative test
5992
        let value = ScalarValue::UInt8(Some(12));
5993
        assert!(value.arithmetic_negate().is_err());
5994
        let value = ScalarValue::Boolean(None);
5995
        assert!(value.arithmetic_negate().is_err());
5996
        Ok(())
5997
    }
5998
5999
    #[test]
6000
    #[allow(arithmetic_overflow)] // we want to test them
6001
    fn test_scalar_negative_overflows() -> Result<()> {
6002
        macro_rules! test_overflow_on_value {
6003
            ($($val:expr),* $(,)?) => {$(
6004
                {
6005
                    let value: ScalarValue = $val;
6006
                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6007
                    let root_err = err.find_root();
6008
                    match  root_err{
6009
                        DataFusionError::ArrowError(
6010
                            ArrowError::ArithmeticOverflow(_),
6011
                            _,
6012
                        ) => {}
6013
                        _ => return Err(err),
6014
                    };
6015
                }
6016
            )*};
6017
        }
6018
        test_overflow_on_value!(
6019
            // the integers
6020
            i8::MIN.into(),
6021
            i16::MIN.into(),
6022
            i32::MIN.into(),
6023
            i64::MIN.into(),
6024
            // for decimals, only value needs to be tested
6025
            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6026
            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6027
            // interval, check all possible values
6028
            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6029
            ScalarValue::new_interval_dt(i32::MIN, 999),
6030
            ScalarValue::new_interval_dt(1, i32::MIN),
6031
            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6032
            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6033
            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6034
            // tz doesn't matter when negating
6035
            ScalarValue::TimestampSecond(Some(i64::MIN), None),
6036
            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6037
            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6038
            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6039
        );
6040
6041
        let float_cases = [
6042
            (
6043
                ScalarValue::Float16(Some(f16::MIN)),
6044
                ScalarValue::Float16(Some(f16::MAX)),
6045
            ),
6046
            (
6047
                ScalarValue::Float16(Some(f16::MAX)),
6048
                ScalarValue::Float16(Some(f16::MIN)),
6049
            ),
6050
            (f32::MIN.into(), f32::MAX.into()),
6051
            (f32::MAX.into(), f32::MIN.into()),
6052
            (f64::MIN.into(), f64::MAX.into()),
6053
            (f64::MAX.into(), f64::MIN.into()),
6054
        ];
6055
        // skip float 16 because they aren't supported
6056
        for (test, expected) in float_cases.into_iter().skip(2) {
6057
            assert_eq!(test.arithmetic_negate()?, expected);
6058
        }
6059
        Ok(())
6060
    }
6061
6062
    #[test]
6063
    fn f16_test_overflow() {
6064
        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
6065
        let cases = [
6066
            (
6067
                ScalarValue::Float16(Some(f16::MIN)),
6068
                ScalarValue::Float16(Some(f16::MAX)),
6069
            ),
6070
            (
6071
                ScalarValue::Float16(Some(f16::MAX)),
6072
                ScalarValue::Float16(Some(f16::MIN)),
6073
            ),
6074
        ];
6075
6076
        for (test, expected) in cases {
6077
            assert_eq!(test.arithmetic_negate().unwrap(), expected);
6078
        }
6079
    }
6080
6081
    macro_rules! expect_operation_error {
6082
        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6083
            #[test]
6084
            fn $TEST_NAME() {
6085
                let lhs = ScalarValue::UInt64(Some(12));
6086
                let rhs = ScalarValue::Int32(Some(-3));
6087
                match lhs.$FUNCTION(&rhs) {
6088
                    Ok(_result) => {
6089
                        panic!(
6090
                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6091
                            lhs, rhs
6092
                        );
6093
                    }
6094
                    Err(e) => {
6095
                        let error_message = e.to_string();
6096
                        assert!(
6097
                            error_message.contains($EXPECTED_ERROR),
6098
                            "Expected error '{}' not found in actual error '{}'",
6099
                            $EXPECTED_ERROR,
6100
                            error_message
6101
                        );
6102
                    }
6103
                }
6104
            }
6105
        };
6106
    }
6107
6108
    expect_operation_error!(
6109
        expect_add_error,
6110
        add,
6111
        "Invalid arithmetic operation: UInt64 + Int32"
6112
    );
6113
    expect_operation_error!(
6114
        expect_sub_error,
6115
        sub,
6116
        "Invalid arithmetic operation: UInt64 - Int32"
6117
    );
6118
6119
    macro_rules! decimal_op_test_cases {
6120
    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6121
            $(
6122
6123
                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6124
                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6125
                let result = left.$OPERATION(&right).unwrap();
6126
                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6127
6128
            )+
6129
        };
6130
    }
6131
6132
    #[test]
6133
    fn decimal_operations() {
6134
        decimal_op_test_cases!(
6135
            add,
6136
            [
6137
                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6138
                // test sum decimal with diff scale
6139
                [
6140
                    Some(123),
6141
                    10,
6142
                    3,
6143
                    Some(124),
6144
                    10,
6145
                    2,
6146
                    Some(123 + 124 * 10_i128.pow(1)),
6147
                    12,
6148
                    3
6149
                ],
6150
                // diff precision and scale for decimal data type
6151
                [
6152
                    Some(123),
6153
                    10,
6154
                    2,
6155
                    Some(124),
6156
                    11,
6157
                    3,
6158
                    Some(123 * 10_i128.pow(3 - 2) + 124),
6159
                    12,
6160
                    3
6161
                ]
6162
            ]
6163
        );
6164
    }
6165
6166
    #[test]
6167
    fn decimal_operations_with_nulls() {
6168
        decimal_op_test_cases!(
6169
            add,
6170
            [
6171
                // Case: (None, Some, 0)
6172
                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6173
                // Case: (Some, None, 0)
6174
                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6175
                // Case: (Some, None, _) + Side=False
6176
                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6177
                // Case: (None, Some, _) + Side=False
6178
                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6179
                // Case: (Some, None, _) + Side=True
6180
                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6181
                // Case: (None, Some, _) + Side=True
6182
                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6183
            ]
6184
        );
6185
    }
6186
6187
    #[test]
6188
    fn test_scalar_distance() {
6189
        let cases = [
6190
            // scalar (lhs), scalar (rhs), expected distance
6191
            // ---------------------------------------------
6192
            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6193
            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6194
            (
6195
                ScalarValue::Int16(Some(-5)),
6196
                ScalarValue::Int16(Some(5)),
6197
                10,
6198
            ),
6199
            (
6200
                ScalarValue::Int16(Some(5)),
6201
                ScalarValue::Int16(Some(-5)),
6202
                10,
6203
            ),
6204
            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6205
            (
6206
                ScalarValue::Int32(Some(-5)),
6207
                ScalarValue::Int32(Some(-10)),
6208
                5,
6209
            ),
6210
            (
6211
                ScalarValue::Int64(Some(-10)),
6212
                ScalarValue::Int64(Some(-5)),
6213
                5,
6214
            ),
6215
            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6216
            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6217
            (
6218
                ScalarValue::UInt16(Some(5)),
6219
                ScalarValue::UInt16(Some(10)),
6220
                5,
6221
            ),
6222
            (
6223
                ScalarValue::UInt32(Some(10)),
6224
                ScalarValue::UInt32(Some(5)),
6225
                5,
6226
            ),
6227
            (
6228
                ScalarValue::UInt64(Some(5)),
6229
                ScalarValue::UInt64(Some(10)),
6230
                5,
6231
            ),
6232
            (
6233
                ScalarValue::Float16(Some(f16::from_f32(1.1))),
6234
                ScalarValue::Float16(Some(f16::from_f32(1.9))),
6235
                1,
6236
            ),
6237
            (
6238
                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6239
                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6240
                4,
6241
            ),
6242
            (
6243
                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6244
                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6245
                4,
6246
            ),
6247
            (
6248
                ScalarValue::Float32(Some(1.0)),
6249
                ScalarValue::Float32(Some(2.0)),
6250
                1,
6251
            ),
6252
            (
6253
                ScalarValue::Float32(Some(2.0)),
6254
                ScalarValue::Float32(Some(1.0)),
6255
                1,
6256
            ),
6257
            (
6258
                ScalarValue::Float64(Some(0.0)),
6259
                ScalarValue::Float64(Some(0.0)),
6260
                0,
6261
            ),
6262
            (
6263
                ScalarValue::Float64(Some(-5.0)),
6264
                ScalarValue::Float64(Some(-10.0)),
6265
                5,
6266
            ),
6267
            (
6268
                ScalarValue::Float64(Some(-10.0)),
6269
                ScalarValue::Float64(Some(-5.0)),
6270
                5,
6271
            ),
6272
            // Floats are currently special cased to f64/f32 and the result is rounded
6273
            // rather than ceiled/floored. In the future we might want to take a mode
6274
            // which specified the rounding behavior.
6275
            (
6276
                ScalarValue::Float32(Some(1.2)),
6277
                ScalarValue::Float32(Some(1.3)),
6278
                0,
6279
            ),
6280
            (
6281
                ScalarValue::Float32(Some(1.1)),
6282
                ScalarValue::Float32(Some(1.9)),
6283
                1,
6284
            ),
6285
            (
6286
                ScalarValue::Float64(Some(-5.3)),
6287
                ScalarValue::Float64(Some(-9.2)),
6288
                4,
6289
            ),
6290
            (
6291
                ScalarValue::Float64(Some(-5.3)),
6292
                ScalarValue::Float64(Some(-9.7)),
6293
                4,
6294
            ),
6295
            (
6296
                ScalarValue::Float64(Some(-5.3)),
6297
                ScalarValue::Float64(Some(-9.9)),
6298
                5,
6299
            ),
6300
        ];
6301
        for (lhs, rhs, expected) in cases.iter() {
6302
            let distance = lhs.distance(rhs).unwrap();
6303
            assert_eq!(distance, *expected);
6304
        }
6305
    }
6306
6307
    #[test]
6308
    fn test_scalar_distance_invalid() {
6309
        let cases = [
6310
            // scalar (lhs), scalar (rhs)
6311
            // --------------------------
6312
            // Same type but with nulls
6313
            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6314
            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6315
            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6316
            // Different type
6317
            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6318
            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6319
            (
6320
                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6321
                ScalarValue::Float32(Some(1.0)),
6322
            ),
6323
            (
6324
                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6325
                ScalarValue::Int32(Some(1)),
6326
            ),
6327
            (
6328
                ScalarValue::Float64(Some(1.1)),
6329
                ScalarValue::Float32(Some(2.2)),
6330
            ),
6331
            (
6332
                ScalarValue::UInt64(Some(777)),
6333
                ScalarValue::Int32(Some(111)),
6334
            ),
6335
            // Different types with nulls
6336
            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
6337
            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
6338
            // Unsupported types
6339
            (ScalarValue::from("foo"), ScalarValue::from("bar")),
6340
            (
6341
                ScalarValue::Boolean(Some(true)),
6342
                ScalarValue::Boolean(Some(false)),
6343
            ),
6344
            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
6345
            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
6346
            (
6347
                ScalarValue::Decimal128(Some(123), 5, 5),
6348
                ScalarValue::Decimal128(Some(120), 5, 5),
6349
            ),
6350
        ];
6351
        for (lhs, rhs) in cases {
6352
            let distance = lhs.distance(&rhs);
6353
            assert!(distance.is_none());
6354
        }
6355
    }
6356
6357
    #[test]
6358
    fn test_scalar_interval_negate() {
6359
        let cases = [
6360
            (
6361
                ScalarValue::new_interval_ym(1, 12),
6362
                ScalarValue::new_interval_ym(-1, -12),
6363
            ),
6364
            (
6365
                ScalarValue::new_interval_dt(1, 999),
6366
                ScalarValue::new_interval_dt(-1, -999),
6367
            ),
6368
            (
6369
                ScalarValue::new_interval_mdn(12, 15, 123_456),
6370
                ScalarValue::new_interval_mdn(-12, -15, -123_456),
6371
            ),
6372
        ];
6373
        for (expr, expected) in cases.iter() {
6374
            let result = expr.arithmetic_negate().unwrap();
6375
            assert_eq!(*expected, result, "-expr:{expr:?}");
6376
        }
6377
    }
6378
6379
    #[test]
6380
    fn test_scalar_interval_add() {
6381
        let cases = [
6382
            (
6383
                ScalarValue::new_interval_ym(1, 12),
6384
                ScalarValue::new_interval_ym(1, 12),
6385
                ScalarValue::new_interval_ym(2, 24),
6386
            ),
6387
            (
6388
                ScalarValue::new_interval_dt(1, 999),
6389
                ScalarValue::new_interval_dt(1, 999),
6390
                ScalarValue::new_interval_dt(2, 1998),
6391
            ),
6392
            (
6393
                ScalarValue::new_interval_mdn(12, 15, 123_456),
6394
                ScalarValue::new_interval_mdn(12, 15, 123_456),
6395
                ScalarValue::new_interval_mdn(24, 30, 246_912),
6396
            ),
6397
        ];
6398
        for (lhs, rhs, expected) in cases.iter() {
6399
            let result = lhs.add(rhs).unwrap();
6400
            let result_commute = rhs.add(lhs).unwrap();
6401
            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
6402
            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
6403
        }
6404
    }
6405
6406
    #[test]
6407
    fn test_scalar_interval_sub() {
6408
        let cases = [
6409
            (
6410
                ScalarValue::new_interval_ym(1, 12),
6411
                ScalarValue::new_interval_ym(1, 12),
6412
                ScalarValue::new_interval_ym(0, 0),
6413
            ),
6414
            (
6415
                ScalarValue::new_interval_dt(1, 999),
6416
                ScalarValue::new_interval_dt(1, 999),
6417
                ScalarValue::new_interval_dt(0, 0),
6418
            ),
6419
            (
6420
                ScalarValue::new_interval_mdn(12, 15, 123_456),
6421
                ScalarValue::new_interval_mdn(12, 15, 123_456),
6422
                ScalarValue::new_interval_mdn(0, 0, 0),
6423
            ),
6424
        ];
6425
        for (lhs, rhs, expected) in cases.iter() {
6426
            let result = lhs.sub(rhs).unwrap();
6427
            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
6428
        }
6429
    }
6430
6431
    #[test]
6432
    fn timestamp_op_random_tests() {
6433
        // timestamp1 + (or -) interval = timestamp2
6434
        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
6435
        let sample_size = 1000;
6436
        let timestamps1 = get_random_timestamps(sample_size);
6437
        let intervals = get_random_intervals(sample_size);
6438
        // ts(sec) + interval(ns) = ts(sec); however,
6439
        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
6440
        // timestamps are more precise than intervals in tests.
6441
        for (idx, ts1) in timestamps1.iter().enumerate() {
6442
            if idx % 2 == 0 {
6443
                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
6444
                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
6445
                assert_eq!(ts1, &back);
6446
            } else {
6447
                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
6448
                let back = timestamp2.add(intervals[idx].clone()).unwrap();
6449
                assert_eq!(ts1, &back);
6450
            };
6451
        }
6452
    }
6453
6454
    #[test]
6455
    fn test_struct_nulls() {
6456
        let fields_b = Fields::from(vec![
6457
            Field::new("ba", DataType::UInt64, true),
6458
            Field::new("bb", DataType::UInt64, true),
6459
        ]);
6460
        let fields = Fields::from(vec![
6461
            Field::new("a", DataType::UInt64, true),
6462
            Field::new("b", DataType::Struct(fields_b.clone()), true),
6463
        ]);
6464
6465
        let struct_value = vec![
6466
            (
6467
                Arc::clone(&fields[0]),
6468
                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
6469
            ),
6470
            (
6471
                Arc::clone(&fields[1]),
6472
                Arc::new(StructArray::from(vec![
6473
                    (
6474
                        Arc::clone(&fields_b[0]),
6475
                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
6476
                    ),
6477
                    (
6478
                        Arc::clone(&fields_b[1]),
6479
                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
6480
                    ),
6481
                ])) as ArrayRef,
6482
            ),
6483
        ];
6484
6485
        let struct_value_with_nulls = vec![
6486
            (
6487
                Arc::clone(&fields[0]),
6488
                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
6489
            ),
6490
            (
6491
                Arc::clone(&fields[1]),
6492
                Arc::new(StructArray::from((
6493
                    vec![
6494
                        (
6495
                            Arc::clone(&fields_b[0]),
6496
                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
6497
                        ),
6498
                        (
6499
                            Arc::clone(&fields_b[1]),
6500
                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
6501
                        ),
6502
                    ],
6503
                    Buffer::from(&[0]),
6504
                ))) as ArrayRef,
6505
            ),
6506
        ];
6507
6508
        let scalars = vec![
6509
            // all null
6510
            ScalarValue::Struct(Arc::new(StructArray::from((
6511
                struct_value.clone(),
6512
                Buffer::from(&[0]),
6513
            )))),
6514
            // field 1 valid, field 2 null
6515
            ScalarValue::Struct(Arc::new(StructArray::from((
6516
                struct_value_with_nulls.clone(),
6517
                Buffer::from(&[1]),
6518
            )))),
6519
            // all valid
6520
            ScalarValue::Struct(Arc::new(StructArray::from((
6521
                struct_value.clone(),
6522
                Buffer::from(&[1]),
6523
            )))),
6524
        ];
6525
6526
        let check_array = |array| {
6527
            let is_null = is_null(&array).unwrap();
6528
            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
6529
6530
            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
6531
            let formatted = formatted.split('\n').collect::<Vec<_>>();
6532
            let expected = vec![
6533
                "+---------------------------+",
6534
                "| col                       |",
6535
                "+---------------------------+",
6536
                "|                           |",
6537
                "| {a: 1, b: }               |",
6538
                "| {a: 1, b: {ba: 2, bb: 3}} |",
6539
                "+---------------------------+",
6540
            ];
6541
            assert_eq!(
6542
                formatted, expected,
6543
                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
6544
            );
6545
        };
6546
6547
        // test `ScalarValue::iter_to_array`
6548
        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
6549
        check_array(array);
6550
6551
        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
6552
        let arrays = scalars
6553
            .iter()
6554
            .map(ScalarValue::to_array)
6555
            .collect::<Result<Vec<_>>>()
6556
            .expect("Failed to convert to array");
6557
        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
6558
        let array = arrow::compute::concat(&arrays).unwrap();
6559
        check_array(array);
6560
    }
6561
6562
    #[test]
6563
    fn test_struct_display() {
6564
        let field_a = Field::new("a", DataType::Int32, true);
6565
        let field_b = Field::new("b", DataType::Utf8, true);
6566
6567
        let s = ScalarStructBuilder::new()
6568
            .with_scalar(field_a, ScalarValue::from(1i32))
6569
            .with_scalar(field_b, ScalarValue::Utf8(None))
6570
            .build()
6571
            .unwrap();
6572
6573
        assert_eq!(s.to_string(), "{a:1,b:}");
6574
        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
6575
6576
        let ScalarValue::Struct(arr) = s else {
6577
            panic!("Expected struct");
6578
        };
6579
6580
        //verify compared to arrow display
6581
        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6582
        let expected = [
6583
            "+-------------+",
6584
            "| s           |",
6585
            "+-------------+",
6586
            "| {a: 1, b: } |",
6587
            "+-------------+",
6588
        ];
6589
        assert_batches_eq!(&expected, &[batch]);
6590
    }
6591
6592
    #[test]
6593
    fn test_struct_display_null() {
6594
        let fields = vec![Field::new("a", DataType::Int32, false)];
6595
        let s = ScalarStructBuilder::new_null(fields);
6596
        assert_eq!(s.to_string(), "NULL");
6597
6598
        let ScalarValue::Struct(arr) = s else {
6599
            panic!("Expected struct");
6600
        };
6601
6602
        //verify compared to arrow display
6603
        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6604
6605
        #[rustfmt::skip]
6606
            let expected = [
6607
            "+---+",
6608
            "| s |",
6609
            "+---+",
6610
            "|   |",
6611
            "+---+",
6612
        ];
6613
        assert_batches_eq!(&expected, &[batch]);
6614
    }
6615
6616
    #[test]
6617
    fn test_map_display_and_debug() {
6618
        let string_builder = StringBuilder::new();
6619
        let int_builder = Int32Builder::with_capacity(4);
6620
        let mut builder = MapBuilder::new(None, string_builder, int_builder);
6621
        builder.keys().append_value("joe");
6622
        builder.values().append_value(1);
6623
        builder.append(true).unwrap();
6624
6625
        builder.keys().append_value("blogs");
6626
        builder.values().append_value(2);
6627
        builder.keys().append_value("foo");
6628
        builder.values().append_value(4);
6629
        builder.append(true).unwrap();
6630
        builder.append(true).unwrap();
6631
        builder.append(false).unwrap();
6632
6633
        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
6634
6635
        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
6636
        assert_eq!(
6637
            format!("{map_value:?}"),
6638
            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
6639
        );
6640
6641
        let ScalarValue::Map(arr) = map_value else {
6642
            panic!("Expected map");
6643
        };
6644
6645
        //verify compared to arrow display
6646
        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
6647
        let expected = [
6648
            "+--------------------+",
6649
            "| m                  |",
6650
            "+--------------------+",
6651
            "| {joe: 1}           |",
6652
            "| {blogs: 2, foo: 4} |",
6653
            "| {}                 |",
6654
            "|                    |",
6655
            "+--------------------+",
6656
        ];
6657
        assert_batches_eq!(&expected, &[batch]);
6658
    }
6659
6660
    #[test]
6661
    fn test_binary_display() {
6662
        let no_binary_value = ScalarValue::Binary(None);
6663
        assert_eq!(format!("{no_binary_value}"), "NULL");
6664
        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
6665
        assert_eq!(format!("{single_binary_value}"), "2A");
6666
        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
6667
        assert_eq!(format!("{small_binary_value}"), "010203");
6668
        let large_binary_value =
6669
            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6670
        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
6671
6672
        let no_binary_value = ScalarValue::BinaryView(None);
6673
        assert_eq!(format!("{no_binary_value}"), "NULL");
6674
        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
6675
        assert_eq!(format!("{small_binary_value}"), "010203");
6676
        let large_binary_value =
6677
            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6678
        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
6679
6680
        let no_binary_value = ScalarValue::LargeBinary(None);
6681
        assert_eq!(format!("{no_binary_value}"), "NULL");
6682
        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
6683
        assert_eq!(format!("{small_binary_value}"), "010203");
6684
        let large_binary_value =
6685
            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6686
        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
6687
6688
        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
6689
        assert_eq!(format!("{no_binary_value}"), "NULL");
6690
        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
6691
        assert_eq!(format!("{small_binary_value}"), "010203");
6692
        let large_binary_value = ScalarValue::FixedSizeBinary(
6693
            11,
6694
            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
6695
        );
6696
        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
6697
    }
6698
6699
    #[test]
6700
    fn test_binary_debug() {
6701
        let no_binary_value = ScalarValue::Binary(None);
6702
        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
6703
        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
6704
        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
6705
        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
6706
        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
6707
        let large_binary_value =
6708
            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6709
        assert_eq!(
6710
            format!("{large_binary_value:?}"),
6711
            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
6712
        );
6713
6714
        let no_binary_value = ScalarValue::BinaryView(None);
6715
        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
6716
        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
6717
        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
6718
        let large_binary_value =
6719
            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6720
        assert_eq!(
6721
            format!("{large_binary_value:?}"),
6722
            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
6723
        );
6724
6725
        let no_binary_value = ScalarValue::LargeBinary(None);
6726
        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
6727
        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
6728
        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
6729
        let large_binary_value =
6730
            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
6731
        assert_eq!(
6732
            format!("{large_binary_value:?}"),
6733
            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
6734
        );
6735
6736
        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
6737
        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
6738
        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
6739
        assert_eq!(
6740
            format!("{small_binary_value:?}"),
6741
            "FixedSizeBinary(3, \"1,2,3\")"
6742
        );
6743
        let large_binary_value = ScalarValue::FixedSizeBinary(
6744
            11,
6745
            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
6746
        );
6747
        assert_eq!(
6748
            format!("{large_binary_value:?}"),
6749
            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
6750
        );
6751
    }
6752
6753
    #[test]
6754
    fn test_build_timestamp_millisecond_list() {
6755
        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
6756
        let arr = ScalarValue::new_list_nullable(
6757
            &values,
6758
            &DataType::Timestamp(TimeUnit::Millisecond, None),
6759
        );
6760
        assert_eq!(1, arr.len());
6761
    }
6762
6763
    #[test]
6764
    fn test_newlist_timestamp_zone() {
6765
        let s: &'static str = "UTC";
6766
        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
6767
        let arr = ScalarValue::new_list_nullable(
6768
            &values,
6769
            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
6770
        );
6771
        assert_eq!(1, arr.len());
6772
        assert_eq!(
6773
            arr.data_type(),
6774
            &DataType::List(Arc::new(Field::new(
6775
                "item",
6776
                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
6777
                true,
6778
            )))
6779
        );
6780
    }
6781
6782
    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
6783
        let vector_size = sample_size;
6784
        let mut timestamp = vec![];
6785
        let mut rng = rand::thread_rng();
6786
        for i in 0..vector_size {
6787
            let year = rng.gen_range(1995..=2050);
6788
            let month = rng.gen_range(1..=12);
6789
            let day = rng.gen_range(1..=28); // to exclude invalid dates
6790
            let hour = rng.gen_range(0..=23);
6791
            let minute = rng.gen_range(0..=59);
6792
            let second = rng.gen_range(0..=59);
6793
            if i % 4 == 0 {
6794
                timestamp.push(ScalarValue::TimestampSecond(
6795
                    Some(
6796
                        NaiveDate::from_ymd_opt(year, month, day)
6797
                            .unwrap()
6798
                            .and_hms_opt(hour, minute, second)
6799
                            .unwrap()
6800
                            .and_utc()
6801
                            .timestamp(),
6802
                    ),
6803
                    None,
6804
                ))
6805
            } else if i % 4 == 1 {
6806
                let millisec = rng.gen_range(0..=999);
6807
                timestamp.push(ScalarValue::TimestampMillisecond(
6808
                    Some(
6809
                        NaiveDate::from_ymd_opt(year, month, day)
6810
                            .unwrap()
6811
                            .and_hms_milli_opt(hour, minute, second, millisec)
6812
                            .unwrap()
6813
                            .and_utc()
6814
                            .timestamp_millis(),
6815
                    ),
6816
                    None,
6817
                ))
6818
            } else if i % 4 == 2 {
6819
                let microsec = rng.gen_range(0..=999_999);
6820
                timestamp.push(ScalarValue::TimestampMicrosecond(
6821
                    Some(
6822
                        NaiveDate::from_ymd_opt(year, month, day)
6823
                            .unwrap()
6824
                            .and_hms_micro_opt(hour, minute, second, microsec)
6825
                            .unwrap()
6826
                            .and_utc()
6827
                            .timestamp_micros(),
6828
                    ),
6829
                    None,
6830
                ))
6831
            } else if i % 4 == 3 {
6832
                let nanosec = rng.gen_range(0..=999_999_999);
6833
                timestamp.push(ScalarValue::TimestampNanosecond(
6834
                    Some(
6835
                        NaiveDate::from_ymd_opt(year, month, day)
6836
                            .unwrap()
6837
                            .and_hms_nano_opt(hour, minute, second, nanosec)
6838
                            .unwrap()
6839
                            .and_utc()
6840
                            .timestamp_nanos_opt()
6841
                            .unwrap(),
6842
                    ),
6843
                    None,
6844
                ))
6845
            }
6846
        }
6847
        timestamp
6848
    }
6849
6850
    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
6851
        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
6852
        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
6853
6854
        let vector_size = sample_size;
6855
        let mut intervals = vec![];
6856
        let mut rng = rand::thread_rng();
6857
        const SECS_IN_ONE_DAY: i32 = 86_400;
6858
        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
6859
        for i in 0..vector_size {
6860
            if i % 4 == 0 {
6861
                let days = rng.gen_range(0..5000);
6862
                // to not break second precision
6863
                let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000;
6864
                intervals.push(ScalarValue::new_interval_dt(days, millis));
6865
            } else if i % 4 == 1 {
6866
                let days = rng.gen_range(0..5000);
6867
                let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32));
6868
                intervals.push(ScalarValue::new_interval_dt(days, millisec));
6869
            } else if i % 4 == 2 {
6870
                let days = rng.gen_range(0..5000);
6871
                // to not break microsec precision
6872
                let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000;
6873
                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
6874
            } else {
6875
                let days = rng.gen_range(0..5000);
6876
                let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY);
6877
                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
6878
            }
6879
        }
6880
        intervals
6881
    }
6882
6883
    fn union_fields() -> UnionFields {
6884
        [
6885
            (0, Arc::new(Field::new("A", DataType::Int32, true))),
6886
            (1, Arc::new(Field::new("B", DataType::Float64, true))),
6887
        ]
6888
        .into_iter()
6889
        .collect()
6890
    }
6891
6892
    #[test]
6893
    fn sparse_scalar_union_is_null() {
6894
        let sparse_scalar = ScalarValue::Union(
6895
            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
6896
            union_fields(),
6897
            UnionMode::Sparse,
6898
        );
6899
        assert!(sparse_scalar.is_null());
6900
    }
6901
6902
    #[test]
6903
    fn dense_scalar_union_is_null() {
6904
        let dense_scalar = ScalarValue::Union(
6905
            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
6906
            union_fields(),
6907
            UnionMode::Dense,
6908
        );
6909
        assert!(dense_scalar.is_null());
6910
    }
6911
6912
    #[test]
6913
    fn null_dictionary_scalar_produces_null_dictionary_array() {
6914
        let dictionary_scalar = ScalarValue::Dictionary(
6915
            Box::new(DataType::Int32),
6916
            Box::new(ScalarValue::Null),
6917
        );
6918
        assert!(dictionary_scalar.is_null());
6919
        let dictionary_array = dictionary_scalar.to_array().unwrap();
6920
        assert!(dictionary_array.is_null(0));
6921
    }
6922
}