/Users/andrewlamb/Software/datafusion/datafusion/common/src/scalar/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! [`ScalarValue`]: stores single values |
19 | | |
20 | | mod consts; |
21 | | mod struct_builder; |
22 | | |
23 | | use std::borrow::Borrow; |
24 | | use std::cmp::Ordering; |
25 | | use std::collections::{HashSet, VecDeque}; |
26 | | use std::convert::Infallible; |
27 | | use std::fmt; |
28 | | use std::hash::Hash; |
29 | | use std::hash::Hasher; |
30 | | use std::iter::repeat; |
31 | | use std::str::FromStr; |
32 | | use std::sync::Arc; |
33 | | |
34 | | use crate::arrow_datafusion_err; |
35 | | use crate::cast::{ |
36 | | as_decimal128_array, as_decimal256_array, as_dictionary_array, |
37 | | as_fixed_size_binary_array, as_fixed_size_list_array, |
38 | | }; |
39 | | use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; |
40 | | use crate::hash_utils::create_hashes; |
41 | | use crate::utils::{ |
42 | | array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array, |
43 | | }; |
44 | | use arrow::compute::kernels::numeric::*; |
45 | | use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; |
46 | | use arrow::{ |
47 | | array::*, |
48 | | compute::kernels::cast::{cast_with_options, CastOptions}, |
49 | | datatypes::{ |
50 | | i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, |
51 | | Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, |
52 | | Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, |
53 | | IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, |
54 | | TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, |
55 | | UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION, |
56 | | }, |
57 | | }; |
58 | | use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; |
59 | | use arrow_schema::{UnionFields, UnionMode}; |
60 | | |
61 | | use half::f16; |
62 | | pub use struct_builder::ScalarStructBuilder; |
63 | | |
64 | | /// A dynamically typed, nullable single value. |
65 | | /// |
66 | | /// While an arrow [`Array`]) stores one or more values of the same type, in a |
67 | | /// single column, a `ScalarValue` stores a single value of a single type, the |
68 | | /// equivalent of 1 row and one column. |
69 | | /// |
70 | | /// ```text |
71 | | /// ┌────────┐ |
72 | | /// │ value1 │ |
73 | | /// │ value2 │ ┌────────┐ |
74 | | /// │ value3 │ │ value2 │ |
75 | | /// │ ... │ └────────┘ |
76 | | /// │ valueN │ |
77 | | /// └────────┘ |
78 | | /// |
79 | | /// Array ScalarValue |
80 | | /// |
81 | | /// stores multiple, stores a single, |
82 | | /// possibly null, values of possible null, value |
83 | | /// the same type |
84 | | /// ``` |
85 | | /// |
86 | | /// # Performance |
87 | | /// |
88 | | /// In general, performance will be better using arrow [`Array`]s rather than |
89 | | /// [`ScalarValue`], as it is far more efficient to process multiple values at |
90 | | /// once (vectorized processing). |
91 | | /// |
92 | | /// # Example |
93 | | /// ``` |
94 | | /// # use datafusion_common::ScalarValue; |
95 | | /// // Create single scalar value for an Int32 value |
96 | | /// let s1 = ScalarValue::Int32(Some(10)); |
97 | | /// |
98 | | /// // You can also create values using the From impl: |
99 | | /// let s2 = ScalarValue::from(10i32); |
100 | | /// assert_eq!(s1, s2); |
101 | | /// ``` |
102 | | /// |
103 | | /// # Null Handling |
104 | | /// |
105 | | /// `ScalarValue` represents null values in the same way as Arrow. Nulls are |
106 | | /// "typed" in the sense that a null value in an [`Int32Array`] is different |
107 | | /// from a null value in a [`Float64Array`], and is different from the values in |
108 | | /// a [`NullArray`]. |
109 | | /// |
110 | | /// ``` |
111 | | /// # fn main() -> datafusion_common::Result<()> { |
112 | | /// # use std::collections::hash_set::Difference; |
113 | | /// # use datafusion_common::ScalarValue; |
114 | | /// # use arrow::datatypes::DataType; |
115 | | /// // You can create a 'null' Int32 value directly: |
116 | | /// let s1 = ScalarValue::Int32(None); |
117 | | /// |
118 | | /// // You can also create a null value for a given datatype: |
119 | | /// let s2 = ScalarValue::try_from(&DataType::Int32)?; |
120 | | /// assert_eq!(s1, s2); |
121 | | /// |
122 | | /// // Note that this is DIFFERENT than a `ScalarValue::Null` |
123 | | /// let s3 = ScalarValue::Null; |
124 | | /// assert_ne!(s1, s3); |
125 | | /// # Ok(()) |
126 | | /// # } |
127 | | /// ``` |
128 | | /// |
129 | | /// # Nested Types |
130 | | /// |
131 | | /// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a |
132 | | /// single element array of the corresponding type. |
133 | | /// |
134 | | /// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`] |
135 | | /// ``` |
136 | | /// # use std::sync::Arc; |
137 | | /// # use arrow::datatypes::{DataType, Field}; |
138 | | /// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder}; |
139 | | /// // Build a struct like: {a: 1, b: "foo"} |
140 | | /// let field_a = Field::new("a", DataType::Int32, false); |
141 | | /// let field_b = Field::new("b", DataType::Utf8, false); |
142 | | /// |
143 | | /// let s1 = ScalarStructBuilder::new() |
144 | | /// .with_scalar(field_a, ScalarValue::from(1i32)) |
145 | | /// .with_scalar(field_b, ScalarValue::from("foo")) |
146 | | /// .build(); |
147 | | /// ``` |
148 | | /// |
149 | | /// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`] |
150 | | /// ``` |
151 | | /// # use std::sync::Arc; |
152 | | /// # use arrow::datatypes::{DataType, Field}; |
153 | | /// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder}; |
154 | | /// // Build a struct representing a NULL value |
155 | | /// let fields = vec![ |
156 | | /// Field::new("a", DataType::Int32, false), |
157 | | /// Field::new("b", DataType::Utf8, false), |
158 | | /// ]; |
159 | | /// |
160 | | /// let s1 = ScalarStructBuilder::new_null(fields); |
161 | | /// ``` |
162 | | /// |
163 | | /// ## Example: Creating [`ScalarValue::Struct`] directly |
164 | | /// ``` |
165 | | /// # use std::sync::Arc; |
166 | | /// # use arrow::datatypes::{DataType, Field, Fields}; |
167 | | /// # use arrow_array::{ArrayRef, Int32Array, StructArray, StringArray}; |
168 | | /// # use datafusion_common::ScalarValue; |
169 | | /// // Build a struct like: {a: 1, b: "foo"} |
170 | | /// // Field description |
171 | | /// let fields = Fields::from(vec![ |
172 | | /// Field::new("a", DataType::Int32, false), |
173 | | /// Field::new("b", DataType::Utf8, false), |
174 | | /// ]); |
175 | | /// // one row arrays for each field |
176 | | /// let arrays: Vec<ArrayRef> = vec![ |
177 | | /// Arc::new(Int32Array::from(vec![1])), |
178 | | /// Arc::new(StringArray::from(vec!["foo"])), |
179 | | /// ]; |
180 | | /// // no nulls for this array |
181 | | /// let nulls = None; |
182 | | /// let arr = StructArray::new(fields, arrays, nulls); |
183 | | /// |
184 | | /// // Create a ScalarValue::Struct directly |
185 | | /// let s1 = ScalarValue::Struct(Arc::new(arr)); |
186 | | /// ``` |
187 | | /// |
188 | | /// |
189 | | /// # Further Reading |
190 | | /// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for |
191 | | /// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375) |
192 | | /// for the definitive reference. |
193 | | #[derive(Clone)] |
194 | | pub enum ScalarValue { |
195 | | /// represents `DataType::Null` (castable to/from any other type) |
196 | | Null, |
197 | | /// true or false value |
198 | | Boolean(Option<bool>), |
199 | | /// 16bit float |
200 | | Float16(Option<f16>), |
201 | | /// 32bit float |
202 | | Float32(Option<f32>), |
203 | | /// 64bit float |
204 | | Float64(Option<f64>), |
205 | | /// 128bit decimal, using the i128 to represent the decimal, precision scale |
206 | | Decimal128(Option<i128>, u8, i8), |
207 | | /// 256bit decimal, using the i256 to represent the decimal, precision scale |
208 | | Decimal256(Option<i256>, u8, i8), |
209 | | /// signed 8bit int |
210 | | Int8(Option<i8>), |
211 | | /// signed 16bit int |
212 | | Int16(Option<i16>), |
213 | | /// signed 32bit int |
214 | | Int32(Option<i32>), |
215 | | /// signed 64bit int |
216 | | Int64(Option<i64>), |
217 | | /// unsigned 8bit int |
218 | | UInt8(Option<u8>), |
219 | | /// unsigned 16bit int |
220 | | UInt16(Option<u16>), |
221 | | /// unsigned 32bit int |
222 | | UInt32(Option<u32>), |
223 | | /// unsigned 64bit int |
224 | | UInt64(Option<u64>), |
225 | | /// utf-8 encoded string. |
226 | | Utf8(Option<String>), |
227 | | /// utf-8 encoded string but from view types. |
228 | | Utf8View(Option<String>), |
229 | | /// utf-8 encoded string representing a LargeString's arrow type. |
230 | | LargeUtf8(Option<String>), |
231 | | /// binary |
232 | | Binary(Option<Vec<u8>>), |
233 | | /// binary but from view types. |
234 | | BinaryView(Option<Vec<u8>>), |
235 | | /// fixed size binary |
236 | | FixedSizeBinary(i32, Option<Vec<u8>>), |
237 | | /// large binary |
238 | | LargeBinary(Option<Vec<u8>>), |
239 | | /// Fixed size list scalar. |
240 | | /// |
241 | | /// The array must be a FixedSizeListArray with length 1. |
242 | | FixedSizeList(Arc<FixedSizeListArray>), |
243 | | /// Represents a single element of a [`ListArray`] as an [`ArrayRef`] |
244 | | /// |
245 | | /// The array must be a ListArray with length 1. |
246 | | List(Arc<ListArray>), |
247 | | /// The array must be a LargeListArray with length 1. |
248 | | LargeList(Arc<LargeListArray>), |
249 | | /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See |
250 | | /// [`ScalarValue`] for examples of how to create instances of this type. |
251 | | Struct(Arc<StructArray>), |
252 | | /// Represents a single element [`MapArray`] as an [`ArrayRef`]. |
253 | | Map(Arc<MapArray>), |
254 | | /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01 |
255 | | Date32(Option<i32>), |
256 | | /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01 |
257 | | Date64(Option<i64>), |
258 | | /// Time stored as a signed 32bit int as seconds since midnight |
259 | | Time32Second(Option<i32>), |
260 | | /// Time stored as a signed 32bit int as milliseconds since midnight |
261 | | Time32Millisecond(Option<i32>), |
262 | | /// Time stored as a signed 64bit int as microseconds since midnight |
263 | | Time64Microsecond(Option<i64>), |
264 | | /// Time stored as a signed 64bit int as nanoseconds since midnight |
265 | | Time64Nanosecond(Option<i64>), |
266 | | /// Timestamp Second |
267 | | TimestampSecond(Option<i64>, Option<Arc<str>>), |
268 | | /// Timestamp Milliseconds |
269 | | TimestampMillisecond(Option<i64>, Option<Arc<str>>), |
270 | | /// Timestamp Microseconds |
271 | | TimestampMicrosecond(Option<i64>, Option<Arc<str>>), |
272 | | /// Timestamp Nanoseconds |
273 | | TimestampNanosecond(Option<i64>, Option<Arc<str>>), |
274 | | /// Number of elapsed whole months |
275 | | IntervalYearMonth(Option<i32>), |
276 | | /// Number of elapsed days and milliseconds (no leap seconds) |
277 | | /// stored as 2 contiguous 32-bit signed integers |
278 | | IntervalDayTime(Option<IntervalDayTime>), |
279 | | /// A triple of the number of elapsed months, days, and nanoseconds. |
280 | | /// Months and days are encoded as 32-bit signed integers. |
281 | | /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds). |
282 | | IntervalMonthDayNano(Option<IntervalMonthDayNano>), |
283 | | /// Duration in seconds |
284 | | DurationSecond(Option<i64>), |
285 | | /// Duration in milliseconds |
286 | | DurationMillisecond(Option<i64>), |
287 | | /// Duration in microseconds |
288 | | DurationMicrosecond(Option<i64>), |
289 | | /// Duration in nanoseconds |
290 | | DurationNanosecond(Option<i64>), |
291 | | /// A nested datatype that can represent slots of differing types. Components: |
292 | | /// `.0`: a tuple of union `type_id` and the single value held by this Scalar |
293 | | /// `.1`: the list of fields, zero-to-one of which will by set in `.0` |
294 | | /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came |
295 | | Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode), |
296 | | /// Dictionary type: index type and value |
297 | | Dictionary(Box<DataType>, Box<ScalarValue>), |
298 | | } |
299 | | |
300 | | impl Hash for Fl<f16> { |
301 | 0 | fn hash<H: Hasher>(&self, state: &mut H) { |
302 | 0 | self.0.to_bits().hash(state); |
303 | 0 | } |
304 | | } |
305 | | |
306 | | // manual implementation of `PartialEq` |
307 | | impl PartialEq for ScalarValue { |
308 | 103k | fn eq(&self, other: &Self) -> bool { |
309 | | use ScalarValue::*; |
310 | | // This purposely doesn't have a catch-all "(_, _)" so that |
311 | | // any newly added enum variant will require editing this list |
312 | | // or else face a compile error |
313 | 103k | match (self, other) { |
314 | 0 | (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => { |
315 | 0 | v1.eq(v2) && p1.eq(p2) && s1.eq(s2) |
316 | | } |
317 | 0 | (Decimal128(_, _, _), _) => false, |
318 | 0 | (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => { |
319 | 0 | v1.eq(v2) && p1.eq(p2) && s1.eq(s2) |
320 | | } |
321 | 0 | (Decimal256(_, _, _), _) => false, |
322 | 86.2k | (Boolean(v1), Boolean(v2)) => v1.eq(v2), |
323 | 0 | (Boolean(_), _) => false, |
324 | 4 | (Float32(v1), Float32(v2)) => match (v1, v2) { |
325 | 4 | (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(), |
326 | 0 | _ => v1.eq(v2), |
327 | | }, |
328 | 0 | (Float16(v1), Float16(v2)) => match (v1, v2) { |
329 | 0 | (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(), |
330 | 0 | _ => v1.eq(v2), |
331 | | }, |
332 | 4 | (Float32(_), _) => false, |
333 | 0 | (Float16(_), _) => false, |
334 | 5.92k | (Float64(v1), Float64(v2)) => match (v1, v2) { |
335 | 4.96k | (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(), |
336 | 960 | _ => v1.eq(v2), |
337 | | }, |
338 | 0 | (Float64(_), _) => false, |
339 | 0 | (Int8(v1), Int8(v2)) => v1.eq(v2), |
340 | 0 | (Int8(_), _) => false, |
341 | 0 | (Int16(v1), Int16(v2)) => v1.eq(v2), |
342 | 0 | (Int16(_), _) => false, |
343 | 6.88k | (Int32(v1), Int32(v2)) => v1.eq(v2), |
344 | 0 | (Int32(_), _) => false, |
345 | 380 | (Int64(v1), Int64(v2)) => v1.eq(v2), |
346 | 0 | (Int64(_), _) => false, |
347 | 0 | (UInt8(v1), UInt8(v2)) => v1.eq(v2), |
348 | 0 | (UInt8(_), _) => false, |
349 | 0 | (UInt16(v1), UInt16(v2)) => v1.eq(v2), |
350 | 0 | (UInt16(_), _) => false, |
351 | 1 | (UInt32(v1), UInt32(v2)) => v1.eq(v2), |
352 | 0 | (UInt32(_), _) => false, |
353 | 0 | (UInt64(v1), UInt64(v2)) => v1.eq(v2), |
354 | 0 | (UInt64(_), _) => false, |
355 | 8 | (Utf8(v1), Utf8(v2)) => v1.eq(v2), |
356 | 0 | (Utf8(_), _) => false, |
357 | 0 | (Utf8View(v1), Utf8View(v2)) => v1.eq(v2), |
358 | 0 | (Utf8View(_), _) => false, |
359 | 0 | (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2), |
360 | 0 | (LargeUtf8(_), _) => false, |
361 | 0 | (Binary(v1), Binary(v2)) => v1.eq(v2), |
362 | 0 | (Binary(_), _) => false, |
363 | 0 | (BinaryView(v1), BinaryView(v2)) => v1.eq(v2), |
364 | 0 | (BinaryView(_), _) => false, |
365 | 0 | (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2), |
366 | 0 | (FixedSizeBinary(_, _), _) => false, |
367 | 0 | (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2), |
368 | 0 | (LargeBinary(_), _) => false, |
369 | 0 | (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2), |
370 | 0 | (FixedSizeList(_), _) => false, |
371 | 0 | (List(v1), List(v2)) => v1.eq(v2), |
372 | 0 | (List(_), _) => false, |
373 | 0 | (LargeList(v1), LargeList(v2)) => v1.eq(v2), |
374 | 0 | (LargeList(_), _) => false, |
375 | 0 | (Struct(v1), Struct(v2)) => v1.eq(v2), |
376 | 0 | (Struct(_), _) => false, |
377 | 0 | (Map(v1), Map(v2)) => v1.eq(v2), |
378 | 0 | (Map(_), _) => false, |
379 | 0 | (Date32(v1), Date32(v2)) => v1.eq(v2), |
380 | 0 | (Date32(_), _) => false, |
381 | 0 | (Date64(v1), Date64(v2)) => v1.eq(v2), |
382 | 0 | (Date64(_), _) => false, |
383 | 0 | (Time32Second(v1), Time32Second(v2)) => v1.eq(v2), |
384 | 0 | (Time32Second(_), _) => false, |
385 | 0 | (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2), |
386 | 0 | (Time32Millisecond(_), _) => false, |
387 | 0 | (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2), |
388 | 0 | (Time64Microsecond(_), _) => false, |
389 | 0 | (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2), |
390 | 0 | (Time64Nanosecond(_), _) => false, |
391 | 0 | (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2), |
392 | 0 | (TimestampSecond(_, _), _) => false, |
393 | 2.11k | (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2), |
394 | 0 | (TimestampMillisecond(_, _), _) => false, |
395 | 0 | (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2), |
396 | 0 | (TimestampMicrosecond(_, _), _) => false, |
397 | 0 | (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2), |
398 | 0 | (TimestampNanosecond(_, _), _) => false, |
399 | 0 | (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2), |
400 | 0 | (DurationSecond(_), _) => false, |
401 | 576 | (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2), |
402 | 0 | (DurationMillisecond(_), _) => false, |
403 | 0 | (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2), |
404 | 0 | (DurationMicrosecond(_), _) => false, |
405 | 0 | (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2), |
406 | 0 | (DurationNanosecond(_), _) => false, |
407 | 0 | (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2), |
408 | 0 | (IntervalYearMonth(_), _) => false, |
409 | 1.50k | (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2), |
410 | 0 | (IntervalDayTime(_), _) => false, |
411 | 0 | (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2), |
412 | 0 | (IntervalMonthDayNano(_), _) => false, |
413 | 2 | (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => { |
414 | 2 | val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2) |
415 | | } |
416 | 0 | (Union(_, _, _), _) => false, |
417 | 0 | (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2), |
418 | 0 | (Dictionary(_, _), _) => false, |
419 | 0 | (Null, Null) => true, |
420 | 0 | (Null, _) => false, |
421 | | } |
422 | 103k | } |
423 | | } |
424 | | |
425 | | // manual implementation of `PartialOrd` |
426 | | impl PartialOrd for ScalarValue { |
427 | 157k | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
428 | | use ScalarValue::*; |
429 | | // This purposely doesn't have a catch-all "(_, _)" so that |
430 | | // any newly added enum variant will require editing this list |
431 | | // or else face a compile error |
432 | 157k | match (self, other) { |
433 | 4 | (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => { |
434 | 4 | if p1.eq(p2) && s1.eq(s2) { |
435 | 4 | v1.partial_cmp(v2) |
436 | | } else { |
437 | | // Two decimal values can be compared if they have the same precision and scale. |
438 | 0 | None |
439 | | } |
440 | | } |
441 | 0 | (Decimal128(_, _, _), _) => None, |
442 | 0 | (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => { |
443 | 0 | if p1.eq(p2) && s1.eq(s2) { |
444 | 0 | v1.partial_cmp(v2) |
445 | | } else { |
446 | | // Two decimal values can be compared if they have the same precision and scale. |
447 | 0 | None |
448 | | } |
449 | | } |
450 | 0 | (Decimal256(_, _, _), _) => None, |
451 | 86.3k | (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2), |
452 | 0 | (Boolean(_), _) => None, |
453 | 14 | (Float32(v1), Float32(v2)) => match (v1, v2) { |
454 | 14 | (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)), |
455 | 0 | _ => v1.partial_cmp(v2), |
456 | | }, |
457 | 0 | (Float16(v1), Float16(v2)) => match (v1, v2) { |
458 | 0 | (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)), |
459 | 0 | _ => v1.partial_cmp(v2), |
460 | | }, |
461 | 0 | (Float32(_), _) => None, |
462 | 0 | (Float16(_), _) => None, |
463 | 23.1k | (Float64(v1), Float64(v2)) => match (v1, v2) { |
464 | 23.1k | (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)), |
465 | 0 | _ => v1.partial_cmp(v2), |
466 | | }, |
467 | 0 | (Float64(_), _) => None, |
468 | 0 | (Int8(v1), Int8(v2)) => v1.partial_cmp(v2), |
469 | 0 | (Int8(_), _) => None, |
470 | 0 | (Int16(v1), Int16(v2)) => v1.partial_cmp(v2), |
471 | 0 | (Int16(_), _) => None, |
472 | 26.4k | (Int32(v1), Int32(v2)) => v1.partial_cmp(v2), |
473 | 0 | (Int32(_), _) => None, |
474 | 2.58k | (Int64(v1), Int64(v2)) => v1.partial_cmp(v2), |
475 | 0 | (Int64(_), _) => None, |
476 | 0 | (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2), |
477 | 0 | (UInt8(_), _) => None, |
478 | 0 | (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2), |
479 | 0 | (UInt16(_), _) => None, |
480 | 0 | (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2), |
481 | 0 | (UInt32(_), _) => None, |
482 | 50 | (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2), |
483 | 0 | (UInt64(_), _) => None, |
484 | 2 | (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2), |
485 | 0 | (Utf8(_), _) => None, |
486 | 0 | (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2), |
487 | 0 | (LargeUtf8(_), _) => None, |
488 | 0 | (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2), |
489 | 0 | (Utf8View(_), _) => None, |
490 | 0 | (Binary(v1), Binary(v2)) => v1.partial_cmp(v2), |
491 | 0 | (Binary(_), _) => None, |
492 | 0 | (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2), |
493 | 0 | (BinaryView(_), _) => None, |
494 | 0 | (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2), |
495 | 0 | (FixedSizeBinary(_, _), _) => None, |
496 | 0 | (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2), |
497 | 0 | (LargeBinary(_), _) => None, |
498 | | // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1 |
499 | 0 | (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()), |
500 | 0 | (FixedSizeList(arr1), FixedSizeList(arr2)) => { |
501 | 0 | partial_cmp_list(arr1.as_ref(), arr2.as_ref()) |
502 | | } |
503 | 0 | (LargeList(arr1), LargeList(arr2)) => { |
504 | 0 | partial_cmp_list(arr1.as_ref(), arr2.as_ref()) |
505 | | } |
506 | 0 | (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None, |
507 | 0 | (Struct(struct_arr1), Struct(struct_arr2)) => { |
508 | 0 | partial_cmp_struct(struct_arr1, struct_arr2) |
509 | | } |
510 | 0 | (Struct(_), _) => None, |
511 | 0 | (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2), |
512 | 0 | (Map(_), _) => None, |
513 | 0 | (Date32(v1), Date32(v2)) => v1.partial_cmp(v2), |
514 | 0 | (Date32(_), _) => None, |
515 | 0 | (Date64(v1), Date64(v2)) => v1.partial_cmp(v2), |
516 | 0 | (Date64(_), _) => None, |
517 | 0 | (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2), |
518 | 0 | (Time32Second(_), _) => None, |
519 | 0 | (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2), |
520 | 0 | (Time32Millisecond(_), _) => None, |
521 | 0 | (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2), |
522 | 0 | (Time64Microsecond(_), _) => None, |
523 | 0 | (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2), |
524 | 0 | (Time64Nanosecond(_), _) => None, |
525 | 0 | (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2), |
526 | 0 | (TimestampSecond(_, _), _) => None, |
527 | 7.65k | (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => { |
528 | 7.65k | v1.partial_cmp(v2) |
529 | | } |
530 | 0 | (TimestampMillisecond(_, _), _) => None, |
531 | 0 | (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => { |
532 | 0 | v1.partial_cmp(v2) |
533 | | } |
534 | 0 | (TimestampMicrosecond(_, _), _) => None, |
535 | 0 | (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => { |
536 | 0 | v1.partial_cmp(v2) |
537 | | } |
538 | 0 | (TimestampNanosecond(_, _), _) => None, |
539 | 0 | (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2), |
540 | 0 | (IntervalYearMonth(_), _) => None, |
541 | 5.76k | (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2), |
542 | 0 | (IntervalDayTime(_), _) => None, |
543 | 0 | (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2), |
544 | 0 | (IntervalMonthDayNano(_), _) => None, |
545 | 0 | (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2), |
546 | 0 | (DurationSecond(_), _) => None, |
547 | 4.96k | (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2), |
548 | 0 | (DurationMillisecond(_), _) => None, |
549 | 0 | (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2), |
550 | 0 | (DurationMicrosecond(_), _) => None, |
551 | 0 | (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2), |
552 | 0 | (DurationNanosecond(_), _) => None, |
553 | 0 | (Union(v1, t1, m1), Union(v2, t2, m2)) => { |
554 | 0 | if t1.eq(t2) && m1.eq(m2) { |
555 | 0 | v1.partial_cmp(v2) |
556 | | } else { |
557 | 0 | None |
558 | | } |
559 | | } |
560 | 0 | (Union(_, _, _), _) => None, |
561 | 0 | (Dictionary(k1, v1), Dictionary(k2, v2)) => { |
562 | 0 | // Don't compare if the key types don't match (it is effectively a different datatype) |
563 | 0 | if k1 == k2 { |
564 | 0 | v1.partial_cmp(v2) |
565 | | } else { |
566 | 0 | None |
567 | | } |
568 | | } |
569 | 0 | (Dictionary(_, _), _) => None, |
570 | 0 | (Null, Null) => Some(Ordering::Equal), |
571 | 0 | (Null, _) => None, |
572 | | } |
573 | 157k | } |
574 | | } |
575 | | |
576 | | /// List/LargeList/FixedSizeList scalars always have a single element |
577 | | /// array. This function returns that array |
578 | 0 | fn first_array_for_list(arr: &dyn Array) -> ArrayRef { |
579 | 0 | assert_eq!(arr.len(), 1); |
580 | 0 | if let Some(arr) = arr.as_list_opt::<i32>() { |
581 | 0 | arr.value(0) |
582 | 0 | } else if let Some(arr) = arr.as_list_opt::<i64>() { |
583 | 0 | arr.value(0) |
584 | 0 | } else if let Some(arr) = arr.as_fixed_size_list_opt() { |
585 | 0 | arr.value(0) |
586 | | } else { |
587 | 0 | unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen") |
588 | | } |
589 | 0 | } |
590 | | |
591 | | /// Compares two List/LargeList/FixedSizeList scalars |
592 | 0 | fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> { |
593 | 0 | if arr1.data_type() != arr2.data_type() { |
594 | 0 | return None; |
595 | 0 | } |
596 | 0 | let arr1 = first_array_for_list(arr1); |
597 | 0 | let arr2 = first_array_for_list(arr2); |
598 | | |
599 | 0 | let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?; |
600 | 0 | let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?; |
601 | | |
602 | 0 | for j in 0..lt_res.len() { |
603 | 0 | if lt_res.is_valid(j) && lt_res.value(j) { |
604 | 0 | return Some(Ordering::Less); |
605 | 0 | } |
606 | 0 | if eq_res.is_valid(j) && !eq_res.value(j) { |
607 | 0 | return Some(Ordering::Greater); |
608 | 0 | } |
609 | | } |
610 | | |
611 | 0 | Some(Ordering::Equal) |
612 | 0 | } |
613 | | |
614 | 0 | fn partial_cmp_struct(s1: &Arc<StructArray>, s2: &Arc<StructArray>) -> Option<Ordering> { |
615 | 0 | if s1.len() != s2.len() { |
616 | 0 | return None; |
617 | 0 | } |
618 | 0 |
|
619 | 0 | if s1.data_type() != s2.data_type() { |
620 | 0 | return None; |
621 | 0 | } |
622 | | |
623 | 0 | for col_index in 0..s1.num_columns() { |
624 | 0 | let arr1 = s1.column(col_index); |
625 | 0 | let arr2 = s2.column(col_index); |
626 | | |
627 | 0 | let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?; |
628 | 0 | let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?; |
629 | | |
630 | 0 | for j in 0..lt_res.len() { |
631 | 0 | if lt_res.is_valid(j) && lt_res.value(j) { |
632 | 0 | return Some(Ordering::Less); |
633 | 0 | } |
634 | 0 | if eq_res.is_valid(j) && !eq_res.value(j) { |
635 | 0 | return Some(Ordering::Greater); |
636 | 0 | } |
637 | | } |
638 | | } |
639 | 0 | Some(Ordering::Equal) |
640 | 0 | } |
641 | | |
642 | 0 | fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> { |
643 | 0 | if m1.len() != m2.len() { |
644 | 0 | return None; |
645 | 0 | } |
646 | 0 |
|
647 | 0 | if m1.data_type() != m2.data_type() { |
648 | 0 | return None; |
649 | 0 | } |
650 | | |
651 | 0 | for col_index in 0..m1.len() { |
652 | 0 | let arr1 = m1.entries().column(col_index); |
653 | 0 | let arr2 = m2.entries().column(col_index); |
654 | | |
655 | 0 | let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?; |
656 | 0 | let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?; |
657 | | |
658 | 0 | for j in 0..lt_res.len() { |
659 | 0 | if lt_res.is_valid(j) && lt_res.value(j) { |
660 | 0 | return Some(Ordering::Less); |
661 | 0 | } |
662 | 0 | if eq_res.is_valid(j) && !eq_res.value(j) { |
663 | 0 | return Some(Ordering::Greater); |
664 | 0 | } |
665 | | } |
666 | | } |
667 | 0 | Some(Ordering::Equal) |
668 | 0 | } |
669 | | |
670 | | impl Eq for ScalarValue {} |
671 | | |
672 | | //Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper |
673 | | struct Fl<T>(T); |
674 | | |
675 | | macro_rules! hash_float_value { |
676 | | ($(($t:ty, $i:ty)),+) => { |
677 | | $(impl std::hash::Hash for Fl<$t> { |
678 | | #[inline] |
679 | 0 | fn hash<H: std::hash::Hasher>(&self, state: &mut H) { |
680 | 0 | state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes()) |
681 | 0 | } |
682 | | })+ |
683 | | }; |
684 | | } |
685 | | |
686 | | hash_float_value!((f64, u64), (f32, u32)); |
687 | | |
688 | | // manual implementation of `Hash` |
689 | | // |
690 | | // # Panics |
691 | | // |
692 | | // Panics if there is an error when creating hash values for rows |
693 | | impl std::hash::Hash for ScalarValue { |
694 | 63 | fn hash<H: std::hash::Hasher>(&self, state: &mut H) { |
695 | | use ScalarValue::*; |
696 | 63 | match self { |
697 | 0 | Decimal128(v, p, s) => { |
698 | 0 | v.hash(state); |
699 | 0 | p.hash(state); |
700 | 0 | s.hash(state) |
701 | | } |
702 | 0 | Decimal256(v, p, s) => { |
703 | 0 | v.hash(state); |
704 | 0 | p.hash(state); |
705 | 0 | s.hash(state) |
706 | | } |
707 | 0 | Boolean(v) => v.hash(state), |
708 | 0 | Float16(v) => v.map(Fl).hash(state), |
709 | 0 | Float32(v) => v.map(Fl).hash(state), |
710 | 0 | Float64(v) => v.map(Fl).hash(state), |
711 | 0 | Int8(v) => v.hash(state), |
712 | 0 | Int16(v) => v.hash(state), |
713 | 0 | Int32(v) => v.hash(state), |
714 | 63 | Int64(v) => v.hash(state), |
715 | 0 | UInt8(v) => v.hash(state), |
716 | 0 | UInt16(v) => v.hash(state), |
717 | 0 | UInt32(v) => v.hash(state), |
718 | 0 | UInt64(v) => v.hash(state), |
719 | 0 | Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state), |
720 | 0 | Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => { |
721 | 0 | v.hash(state) |
722 | | } |
723 | 0 | List(arr) => { |
724 | 0 | hash_nested_array(arr.to_owned() as ArrayRef, state); |
725 | 0 | } |
726 | 0 | LargeList(arr) => { |
727 | 0 | hash_nested_array(arr.to_owned() as ArrayRef, state); |
728 | 0 | } |
729 | 0 | FixedSizeList(arr) => { |
730 | 0 | hash_nested_array(arr.to_owned() as ArrayRef, state); |
731 | 0 | } |
732 | 0 | Struct(arr) => { |
733 | 0 | hash_nested_array(arr.to_owned() as ArrayRef, state); |
734 | 0 | } |
735 | 0 | Map(arr) => { |
736 | 0 | hash_nested_array(arr.to_owned() as ArrayRef, state); |
737 | 0 | } |
738 | 0 | Date32(v) => v.hash(state), |
739 | 0 | Date64(v) => v.hash(state), |
740 | 0 | Time32Second(v) => v.hash(state), |
741 | 0 | Time32Millisecond(v) => v.hash(state), |
742 | 0 | Time64Microsecond(v) => v.hash(state), |
743 | 0 | Time64Nanosecond(v) => v.hash(state), |
744 | 0 | TimestampSecond(v, _) => v.hash(state), |
745 | 0 | TimestampMillisecond(v, _) => v.hash(state), |
746 | 0 | TimestampMicrosecond(v, _) => v.hash(state), |
747 | 0 | TimestampNanosecond(v, _) => v.hash(state), |
748 | 0 | DurationSecond(v) => v.hash(state), |
749 | 0 | DurationMillisecond(v) => v.hash(state), |
750 | 0 | DurationMicrosecond(v) => v.hash(state), |
751 | 0 | DurationNanosecond(v) => v.hash(state), |
752 | 0 | IntervalYearMonth(v) => v.hash(state), |
753 | 0 | IntervalDayTime(v) => v.hash(state), |
754 | 0 | IntervalMonthDayNano(v) => v.hash(state), |
755 | 0 | Union(v, t, m) => { |
756 | 0 | v.hash(state); |
757 | 0 | t.hash(state); |
758 | 0 | m.hash(state); |
759 | 0 | } |
760 | 0 | Dictionary(k, v) => { |
761 | 0 | k.hash(state); |
762 | 0 | v.hash(state); |
763 | 0 | } |
764 | | // stable hash for Null value |
765 | 0 | Null => 1.hash(state), |
766 | | } |
767 | 63 | } |
768 | | } |
769 | | |
770 | 0 | fn hash_nested_array<H: std::hash::Hasher>(arr: ArrayRef, state: &mut H) { |
771 | 0 | let arrays = vec![arr.to_owned()]; |
772 | 0 | let hashes_buffer = &mut vec![0; arr.len()]; |
773 | 0 | let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0); |
774 | 0 | let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap(); |
775 | 0 | // Hash back to std::hash::Hasher |
776 | 0 | hashes.hash(state); |
777 | 0 | } |
778 | | |
779 | | /// Return a reference to the values array and the index into it for a |
780 | | /// dictionary array |
781 | | /// |
782 | | /// # Errors |
783 | | /// |
784 | | /// Errors if the array cannot be downcasted to DictionaryArray |
785 | | #[inline] |
786 | 0 | pub fn get_dict_value<K: ArrowDictionaryKeyType>( |
787 | 0 | array: &dyn Array, |
788 | 0 | index: usize, |
789 | 0 | ) -> Result<(&ArrayRef, Option<usize>)> { |
790 | 0 | let dict_array = as_dictionary_array::<K>(array)?; |
791 | 0 | Ok((dict_array.values(), dict_array.key(index))) |
792 | 0 | } |
793 | | |
794 | | /// Create a dictionary array representing `value` repeated `size` |
795 | | /// times |
796 | 0 | fn dict_from_scalar<K: ArrowDictionaryKeyType>( |
797 | 0 | value: &ScalarValue, |
798 | 0 | size: usize, |
799 | 0 | ) -> Result<ArrayRef> { |
800 | | // values array is one element long (the value) |
801 | 0 | let values_array = value.to_array_of_size(1)?; |
802 | | |
803 | | // Create a key array with `size` elements, each of 0 |
804 | 0 | let key_array: PrimitiveArray<K> = std::iter::repeat(if value.is_null() { |
805 | 0 | None |
806 | | } else { |
807 | 0 | Some(K::default_value()) |
808 | | }) |
809 | 0 | .take(size) |
810 | 0 | .collect(); |
811 | 0 |
|
812 | 0 | // create a new DictionaryArray |
813 | 0 | // |
814 | 0 | // Note: this path could be made faster by using the ArrayData |
815 | 0 | // APIs and skipping validation, if it every comes up in |
816 | 0 | // performance traces. |
817 | 0 | Ok(Arc::new( |
818 | 0 | DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above |
819 | | )) |
820 | 0 | } |
821 | | |
822 | | /// Create a dictionary array representing all the values in values |
823 | 0 | fn dict_from_values<K: ArrowDictionaryKeyType>( |
824 | 0 | values_array: ArrayRef, |
825 | 0 | ) -> Result<ArrayRef> { |
826 | | // Create a key array with `size` elements of 0..array_len for all |
827 | | // non-null value elements |
828 | 0 | let key_array: PrimitiveArray<K> = (0..values_array.len()) |
829 | 0 | .map(|index| { |
830 | 0 | if values_array.is_valid(index) { |
831 | 0 | let native_index = K::Native::from_usize(index).ok_or_else(|| { |
832 | 0 | DataFusionError::Internal(format!( |
833 | 0 | "Can not create index of type {} from value {}", |
834 | 0 | K::DATA_TYPE, |
835 | 0 | index |
836 | 0 | )) |
837 | 0 | })?; |
838 | 0 | Ok(Some(native_index)) |
839 | | } else { |
840 | 0 | Ok(None) |
841 | | } |
842 | 0 | }) |
843 | 0 | .collect::<Result<Vec<_>>>()? |
844 | 0 | .into_iter() |
845 | 0 | .collect(); |
846 | | |
847 | | // create a new DictionaryArray |
848 | | // |
849 | | // Note: this path could be made faster by using the ArrayData |
850 | | // APIs and skipping validation, if it every comes up in |
851 | | // performance traces. |
852 | 0 | let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?; |
853 | 0 | Ok(Arc::new(dict_array)) |
854 | 0 | } |
855 | | |
856 | | macro_rules! typed_cast_tz { |
857 | | ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{ |
858 | | use std::any::type_name; |
859 | | let array = $array |
860 | | .as_any() |
861 | | .downcast_ref::<$ARRAYTYPE>() |
862 | 0 | .ok_or_else(|| { |
863 | 0 | DataFusionError::Internal(format!( |
864 | 0 | "could not cast value to {}", |
865 | 0 | type_name::<$ARRAYTYPE>() |
866 | 0 | )) |
867 | 0 | })?; |
868 | | Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR( |
869 | | match array.is_null($index) { |
870 | | true => None, |
871 | | false => Some(array.value($index).into()), |
872 | | }, |
873 | | $TZ.clone(), |
874 | | )) |
875 | | }}; |
876 | | } |
877 | | |
878 | | macro_rules! typed_cast { |
879 | | ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{ |
880 | | use std::any::type_name; |
881 | | let array = $array |
882 | | .as_any() |
883 | | .downcast_ref::<$ARRAYTYPE>() |
884 | 0 | .ok_or_else(|| { |
885 | 0 | DataFusionError::Internal(format!( |
886 | 0 | "could not cast value to {}", |
887 | 0 | type_name::<$ARRAYTYPE>() |
888 | 0 | )) |
889 | 0 | })?; |
890 | | Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR( |
891 | | match array.is_null($index) { |
892 | | true => None, |
893 | | false => Some(array.value($index).into()), |
894 | | }, |
895 | | )) |
896 | | }}; |
897 | | } |
898 | | |
899 | | macro_rules! build_array_from_option { |
900 | | ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{ |
901 | | match $EXPR { |
902 | | Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)), |
903 | | None => new_null_array(&DataType::$DATA_TYPE, $SIZE), |
904 | | } |
905 | | }}; |
906 | | ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{ |
907 | | match $EXPR { |
908 | | Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)), |
909 | | None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE), |
910 | | } |
911 | | }}; |
912 | | } |
913 | | |
914 | | macro_rules! build_timestamp_array_from_option { |
915 | | ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => { |
916 | | match $EXPR { |
917 | | Some(value) => { |
918 | | Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ)) |
919 | | } |
920 | | None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE), |
921 | | } |
922 | | }; |
923 | | } |
924 | | |
925 | | macro_rules! eq_array_primitive { |
926 | | ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{ |
927 | | use std::any::type_name; |
928 | | let array = $array |
929 | | .as_any() |
930 | | .downcast_ref::<$ARRAYTYPE>() |
931 | 0 | .ok_or_else(|| { |
932 | 0 | DataFusionError::Internal(format!( |
933 | 0 | "could not cast value to {}", |
934 | 0 | type_name::<$ARRAYTYPE>() |
935 | 0 | )) |
936 | 0 | })?; |
937 | | let is_valid = array.is_valid($index); |
938 | | Ok::<bool, DataFusionError>(match $VALUE { |
939 | | Some(val) => is_valid && &array.value($index) == val, |
940 | | None => !is_valid, |
941 | | }) |
942 | | }}; |
943 | | } |
944 | | |
945 | | impl ScalarValue { |
946 | | /// Create a [`Result<ScalarValue>`] with the provided value and datatype |
947 | | /// |
948 | | /// # Panics |
949 | | /// |
950 | | /// Panics if d is not compatible with T |
951 | 0 | pub fn new_primitive<T: ArrowPrimitiveType>( |
952 | 0 | a: Option<T::Native>, |
953 | 0 | d: &DataType, |
954 | 0 | ) -> Result<Self> { |
955 | 0 | match a { |
956 | 0 | None => d.try_into(), |
957 | 0 | Some(v) => { |
958 | 0 | let array = PrimitiveArray::<T>::new(vec![v].into(), None) |
959 | 0 | .with_data_type(d.clone()); |
960 | 0 | Self::try_from_array(&array, 0) |
961 | | } |
962 | | } |
963 | 0 | } |
964 | | |
965 | | /// Create a decimal Scalar from value/precision and scale. |
966 | 0 | pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> { |
967 | 0 | // make sure the precision and scale is valid |
968 | 0 | if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision { |
969 | 0 | return Ok(ScalarValue::Decimal128(Some(value), precision, scale)); |
970 | 0 | } |
971 | 0 | _internal_err!( |
972 | 0 | "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}" |
973 | 0 | ) |
974 | 0 | } |
975 | | |
976 | | /// Returns a [`ScalarValue::Utf8`] representing `val` |
977 | 0 | pub fn new_utf8(val: impl Into<String>) -> Self { |
978 | 0 | ScalarValue::from(val.into()) |
979 | 0 | } |
980 | | |
981 | | /// Returns a [`ScalarValue::IntervalYearMonth`] representing |
982 | | /// `years` years and `months` months |
983 | 0 | pub fn new_interval_ym(years: i32, months: i32) -> Self { |
984 | 0 | let val = IntervalYearMonthType::make_value(years, months); |
985 | 0 | ScalarValue::IntervalYearMonth(Some(val)) |
986 | 0 | } |
987 | | |
988 | | /// Returns a [`ScalarValue::IntervalDayTime`] representing |
989 | | /// `days` days and `millis` milliseconds |
990 | 2.81k | pub fn new_interval_dt(days: i32, millis: i32) -> Self { |
991 | 2.81k | let val = IntervalDayTimeType::make_value(days, millis); |
992 | 2.81k | Self::IntervalDayTime(Some(val)) |
993 | 2.81k | } |
994 | | |
995 | | /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing |
996 | | /// `months` months and `days` days, and `nanos` nanoseconds |
997 | 0 | pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self { |
998 | 0 | let val = IntervalMonthDayNanoType::make_value(months, days, nanos); |
999 | 0 | ScalarValue::IntervalMonthDayNano(Some(val)) |
1000 | 0 | } |
1001 | | |
1002 | | /// Returns a [`ScalarValue`] representing |
1003 | | /// `value` and `tz_opt` timezone |
1004 | 0 | pub fn new_timestamp<T: ArrowTimestampType>( |
1005 | 0 | value: Option<i64>, |
1006 | 0 | tz_opt: Option<Arc<str>>, |
1007 | 0 | ) -> Self { |
1008 | 0 | match T::UNIT { |
1009 | 0 | TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt), |
1010 | 0 | TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt), |
1011 | 0 | TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt), |
1012 | 0 | TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt), |
1013 | | } |
1014 | 0 | } |
1015 | | |
1016 | | /// Returns a [`ScalarValue`] representing PI |
1017 | 0 | pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> { |
1018 | 0 | match datatype { |
1019 | 0 | DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)), |
1020 | 0 | DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)), |
1021 | 0 | _ => _internal_err!("PI is not supported for data type: {:?}", datatype), |
1022 | | } |
1023 | 0 | } |
1024 | | |
1025 | | /// Returns a [`ScalarValue`] representing PI's upper bound |
1026 | 0 | pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> { |
1027 | 0 | // TODO: replace the constants with next_up/next_down when |
1028 | 0 | // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up |
1029 | 0 | match datatype { |
1030 | 0 | DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)), |
1031 | 0 | DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)), |
1032 | | _ => { |
1033 | 0 | _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype) |
1034 | | } |
1035 | | } |
1036 | 0 | } |
1037 | | |
1038 | | /// Returns a [`ScalarValue`] representing -PI's lower bound |
1039 | 0 | pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> { |
1040 | 0 | match datatype { |
1041 | 0 | DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)), |
1042 | 0 | DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)), |
1043 | | _ => { |
1044 | 0 | _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype) |
1045 | | } |
1046 | | } |
1047 | 0 | } |
1048 | | |
1049 | | /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound |
1050 | 0 | pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> { |
1051 | 0 | match datatype { |
1052 | 0 | DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)), |
1053 | 0 | DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)), |
1054 | | _ => { |
1055 | 0 | _internal_err!( |
1056 | 0 | "PI_UPPER/2 is not supported for data type: {:?}", |
1057 | 0 | datatype |
1058 | 0 | ) |
1059 | | } |
1060 | | } |
1061 | 0 | } |
1062 | | |
1063 | | // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound |
1064 | 0 | pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> { |
1065 | 0 | match datatype { |
1066 | | DataType::Float32 => { |
1067 | 0 | Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32)) |
1068 | | } |
1069 | | DataType::Float64 => { |
1070 | 0 | Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64)) |
1071 | | } |
1072 | | _ => { |
1073 | 0 | _internal_err!( |
1074 | 0 | "-PI/2_LOWER is not supported for data type: {:?}", |
1075 | 0 | datatype |
1076 | 0 | ) |
1077 | | } |
1078 | | } |
1079 | 0 | } |
1080 | | |
1081 | | /// Returns a [`ScalarValue`] representing -PI |
1082 | 0 | pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> { |
1083 | 0 | match datatype { |
1084 | 0 | DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)), |
1085 | 0 | DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)), |
1086 | 0 | _ => _internal_err!("-PI is not supported for data type: {:?}", datatype), |
1087 | | } |
1088 | 0 | } |
1089 | | |
1090 | | /// Returns a [`ScalarValue`] representing PI/2 |
1091 | 0 | pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> { |
1092 | 0 | match datatype { |
1093 | 0 | DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)), |
1094 | 0 | DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)), |
1095 | 0 | _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype), |
1096 | | } |
1097 | 0 | } |
1098 | | |
1099 | | /// Returns a [`ScalarValue`] representing -PI/2 |
1100 | 0 | pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> { |
1101 | 0 | match datatype { |
1102 | 0 | DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)), |
1103 | 0 | DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)), |
1104 | 0 | _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype), |
1105 | | } |
1106 | 0 | } |
1107 | | |
1108 | | /// Returns a [`ScalarValue`] representing infinity |
1109 | 0 | pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> { |
1110 | 0 | match datatype { |
1111 | 0 | DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)), |
1112 | 0 | DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)), |
1113 | | _ => { |
1114 | 0 | _internal_err!("Infinity is not supported for data type: {:?}", datatype) |
1115 | | } |
1116 | | } |
1117 | 0 | } |
1118 | | |
1119 | | /// Returns a [`ScalarValue`] representing negative infinity |
1120 | 0 | pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> { |
1121 | 0 | match datatype { |
1122 | 0 | DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)), |
1123 | 0 | DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)), |
1124 | | _ => { |
1125 | 0 | _internal_err!( |
1126 | 0 | "Negative Infinity is not supported for data type: {:?}", |
1127 | 0 | datatype |
1128 | 0 | ) |
1129 | | } |
1130 | | } |
1131 | 0 | } |
1132 | | |
1133 | | /// Create a zero value in the given type. |
1134 | 0 | pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> { |
1135 | 0 | Ok(match datatype { |
1136 | 0 | DataType::Boolean => ScalarValue::Boolean(Some(false)), |
1137 | 0 | DataType::Int8 => ScalarValue::Int8(Some(0)), |
1138 | 0 | DataType::Int16 => ScalarValue::Int16(Some(0)), |
1139 | 0 | DataType::Int32 => ScalarValue::Int32(Some(0)), |
1140 | 0 | DataType::Int64 => ScalarValue::Int64(Some(0)), |
1141 | 0 | DataType::UInt8 => ScalarValue::UInt8(Some(0)), |
1142 | 0 | DataType::UInt16 => ScalarValue::UInt16(Some(0)), |
1143 | 0 | DataType::UInt32 => ScalarValue::UInt32(Some(0)), |
1144 | 0 | DataType::UInt64 => ScalarValue::UInt64(Some(0)), |
1145 | 0 | DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))), |
1146 | 0 | DataType::Float32 => ScalarValue::Float32(Some(0.0)), |
1147 | 0 | DataType::Float64 => ScalarValue::Float64(Some(0.0)), |
1148 | 0 | DataType::Timestamp(TimeUnit::Second, tz) => { |
1149 | 0 | ScalarValue::TimestampSecond(Some(0), tz.clone()) |
1150 | | } |
1151 | 0 | DataType::Timestamp(TimeUnit::Millisecond, tz) => { |
1152 | 0 | ScalarValue::TimestampMillisecond(Some(0), tz.clone()) |
1153 | | } |
1154 | 0 | DataType::Timestamp(TimeUnit::Microsecond, tz) => { |
1155 | 0 | ScalarValue::TimestampMicrosecond(Some(0), tz.clone()) |
1156 | | } |
1157 | 0 | DataType::Timestamp(TimeUnit::Nanosecond, tz) => { |
1158 | 0 | ScalarValue::TimestampNanosecond(Some(0), tz.clone()) |
1159 | | } |
1160 | | DataType::Interval(IntervalUnit::YearMonth) => { |
1161 | 0 | ScalarValue::IntervalYearMonth(Some(0)) |
1162 | | } |
1163 | | DataType::Interval(IntervalUnit::DayTime) => { |
1164 | 0 | ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO)) |
1165 | | } |
1166 | | DataType::Interval(IntervalUnit::MonthDayNano) => { |
1167 | 0 | ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO)) |
1168 | | } |
1169 | 0 | DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)), |
1170 | | DataType::Duration(TimeUnit::Millisecond) => { |
1171 | 0 | ScalarValue::DurationMillisecond(Some(0)) |
1172 | | } |
1173 | | DataType::Duration(TimeUnit::Microsecond) => { |
1174 | 0 | ScalarValue::DurationMicrosecond(Some(0)) |
1175 | | } |
1176 | | DataType::Duration(TimeUnit::Nanosecond) => { |
1177 | 0 | ScalarValue::DurationNanosecond(Some(0)) |
1178 | | } |
1179 | | _ => { |
1180 | 0 | return _not_impl_err!( |
1181 | 0 | "Can't create a zero scalar from data_type \"{datatype:?}\"" |
1182 | 0 | ); |
1183 | | } |
1184 | | }) |
1185 | 0 | } |
1186 | | |
1187 | | /// Create an one value in the given type. |
1188 | 0 | pub fn new_one(datatype: &DataType) -> Result<ScalarValue> { |
1189 | 0 | Ok(match datatype { |
1190 | 0 | DataType::Int8 => ScalarValue::Int8(Some(1)), |
1191 | 0 | DataType::Int16 => ScalarValue::Int16(Some(1)), |
1192 | 0 | DataType::Int32 => ScalarValue::Int32(Some(1)), |
1193 | 0 | DataType::Int64 => ScalarValue::Int64(Some(1)), |
1194 | 0 | DataType::UInt8 => ScalarValue::UInt8(Some(1)), |
1195 | 0 | DataType::UInt16 => ScalarValue::UInt16(Some(1)), |
1196 | 0 | DataType::UInt32 => ScalarValue::UInt32(Some(1)), |
1197 | 0 | DataType::UInt64 => ScalarValue::UInt64(Some(1)), |
1198 | 0 | DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))), |
1199 | 0 | DataType::Float32 => ScalarValue::Float32(Some(1.0)), |
1200 | 0 | DataType::Float64 => ScalarValue::Float64(Some(1.0)), |
1201 | | _ => { |
1202 | 0 | return _not_impl_err!( |
1203 | 0 | "Can't create an one scalar from data_type \"{datatype:?}\"" |
1204 | 0 | ); |
1205 | | } |
1206 | | }) |
1207 | 0 | } |
1208 | | |
1209 | | /// Create a negative one value in the given type. |
1210 | 0 | pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> { |
1211 | 0 | Ok(match datatype { |
1212 | 0 | DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)), |
1213 | 0 | DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)), |
1214 | 0 | DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)), |
1215 | 0 | DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)), |
1216 | 0 | DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))), |
1217 | 0 | DataType::Float32 => ScalarValue::Float32(Some(-1.0)), |
1218 | 0 | DataType::Float64 => ScalarValue::Float64(Some(-1.0)), |
1219 | | _ => { |
1220 | 0 | return _not_impl_err!( |
1221 | 0 | "Can't create a negative one scalar from data_type \"{datatype:?}\"" |
1222 | 0 | ); |
1223 | | } |
1224 | | }) |
1225 | 0 | } |
1226 | | |
1227 | 0 | pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> { |
1228 | 0 | Ok(match datatype { |
1229 | 0 | DataType::Int8 => ScalarValue::Int8(Some(10)), |
1230 | 0 | DataType::Int16 => ScalarValue::Int16(Some(10)), |
1231 | 0 | DataType::Int32 => ScalarValue::Int32(Some(10)), |
1232 | 0 | DataType::Int64 => ScalarValue::Int64(Some(10)), |
1233 | 0 | DataType::UInt8 => ScalarValue::UInt8(Some(10)), |
1234 | 0 | DataType::UInt16 => ScalarValue::UInt16(Some(10)), |
1235 | 0 | DataType::UInt32 => ScalarValue::UInt32(Some(10)), |
1236 | 0 | DataType::UInt64 => ScalarValue::UInt64(Some(10)), |
1237 | 0 | DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))), |
1238 | 0 | DataType::Float32 => ScalarValue::Float32(Some(10.0)), |
1239 | 0 | DataType::Float64 => ScalarValue::Float64(Some(10.0)), |
1240 | | _ => { |
1241 | 0 | return _not_impl_err!( |
1242 | 0 | "Can't create a ten scalar from data_type \"{datatype:?}\"" |
1243 | 0 | ); |
1244 | | } |
1245 | | }) |
1246 | 0 | } |
1247 | | |
1248 | | /// return the [`DataType`] of this `ScalarValue` |
1249 | 1.10M | pub fn data_type(&self) -> DataType { |
1250 | 1.10M | match self { |
1251 | 115k | ScalarValue::Boolean(_) => DataType::Boolean, |
1252 | 0 | ScalarValue::UInt8(_) => DataType::UInt8, |
1253 | 0 | ScalarValue::UInt16(_) => DataType::UInt16, |
1254 | 135 | ScalarValue::UInt32(_) => DataType::UInt32, |
1255 | 5 | ScalarValue::UInt64(_) => DataType::UInt64, |
1256 | 4 | ScalarValue::Int8(_) => DataType::Int8, |
1257 | 0 | ScalarValue::Int16(_) => DataType::Int16, |
1258 | 424k | ScalarValue::Int32(_) => DataType::Int32, |
1259 | 34.7k | ScalarValue::Int64(_) => DataType::Int64, |
1260 | 11 | ScalarValue::Decimal128(_, precision, scale) => { |
1261 | 11 | DataType::Decimal128(*precision, *scale) |
1262 | | } |
1263 | 0 | ScalarValue::Decimal256(_, precision, scale) => { |
1264 | 0 | DataType::Decimal256(*precision, *scale) |
1265 | | } |
1266 | 0 | ScalarValue::TimestampSecond(_, tz_opt) => { |
1267 | 0 | DataType::Timestamp(TimeUnit::Second, tz_opt.clone()) |
1268 | | } |
1269 | 100k | ScalarValue::TimestampMillisecond(_, tz_opt) => { |
1270 | 100k | DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone()) |
1271 | | } |
1272 | 0 | ScalarValue::TimestampMicrosecond(_, tz_opt) => { |
1273 | 0 | DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone()) |
1274 | | } |
1275 | 0 | ScalarValue::TimestampNanosecond(_, tz_opt) => { |
1276 | 0 | DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()) |
1277 | | } |
1278 | 0 | ScalarValue::Float16(_) => DataType::Float16, |
1279 | 71 | ScalarValue::Float32(_) => DataType::Float32, |
1280 | 310k | ScalarValue::Float64(_) => DataType::Float64, |
1281 | 0 | ScalarValue::Utf8(_) => DataType::Utf8, |
1282 | 0 | ScalarValue::LargeUtf8(_) => DataType::LargeUtf8, |
1283 | 0 | ScalarValue::Utf8View(_) => DataType::Utf8View, |
1284 | 0 | ScalarValue::Binary(_) => DataType::Binary, |
1285 | 0 | ScalarValue::BinaryView(_) => DataType::BinaryView, |
1286 | 0 | ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz), |
1287 | 0 | ScalarValue::LargeBinary(_) => DataType::LargeBinary, |
1288 | 0 | ScalarValue::List(arr) => arr.data_type().to_owned(), |
1289 | 0 | ScalarValue::LargeList(arr) => arr.data_type().to_owned(), |
1290 | 0 | ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(), |
1291 | 3 | ScalarValue::Struct(arr) => arr.data_type().to_owned(), |
1292 | 0 | ScalarValue::Map(arr) => arr.data_type().to_owned(), |
1293 | 0 | ScalarValue::Date32(_) => DataType::Date32, |
1294 | 0 | ScalarValue::Date64(_) => DataType::Date64, |
1295 | 0 | ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second), |
1296 | 0 | ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond), |
1297 | 0 | ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond), |
1298 | 0 | ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond), |
1299 | | ScalarValue::IntervalYearMonth(_) => { |
1300 | 0 | DataType::Interval(IntervalUnit::YearMonth) |
1301 | | } |
1302 | 66.5k | ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime), |
1303 | | ScalarValue::IntervalMonthDayNano(_) => { |
1304 | 0 | DataType::Interval(IntervalUnit::MonthDayNano) |
1305 | | } |
1306 | 0 | ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second), |
1307 | | ScalarValue::DurationMillisecond(_) => { |
1308 | 47.4k | DataType::Duration(TimeUnit::Millisecond) |
1309 | | } |
1310 | | ScalarValue::DurationMicrosecond(_) => { |
1311 | 0 | DataType::Duration(TimeUnit::Microsecond) |
1312 | | } |
1313 | | ScalarValue::DurationNanosecond(_) => { |
1314 | 0 | DataType::Duration(TimeUnit::Nanosecond) |
1315 | | } |
1316 | 14 | ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode), |
1317 | 0 | ScalarValue::Dictionary(k, v) => { |
1318 | 0 | DataType::Dictionary(k.clone(), Box::new(v.data_type())) |
1319 | | } |
1320 | 934 | ScalarValue::Null => DataType::Null, |
1321 | | } |
1322 | 1.10M | } |
1323 | | |
1324 | | /// Calculate arithmetic negation for a scalar value |
1325 | 0 | pub fn arithmetic_negate(&self) -> Result<Self> { |
1326 | 0 | fn neg_checked_with_ctx<T: ArrowNativeTypeOp>( |
1327 | 0 | v: T, |
1328 | 0 | ctx: impl Fn() -> String, |
1329 | 0 | ) -> Result<T> { |
1330 | 0 | v.neg_checked() |
1331 | 0 | .map_err(|e| arrow_datafusion_err!(e).context(ctx())) |
1332 | 0 | } |
1333 | 0 | match self { |
1334 | | ScalarValue::Int8(None) |
1335 | | | ScalarValue::Int16(None) |
1336 | | | ScalarValue::Int32(None) |
1337 | | | ScalarValue::Int64(None) |
1338 | | | ScalarValue::Float16(None) |
1339 | | | ScalarValue::Float32(None) |
1340 | 0 | | ScalarValue::Float64(None) => Ok(self.clone()), |
1341 | 0 | ScalarValue::Float16(Some(v)) => { |
1342 | 0 | Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32())))) |
1343 | | } |
1344 | 0 | ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))), |
1345 | 0 | ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))), |
1346 | 0 | ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))), |
1347 | 0 | ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))), |
1348 | 0 | ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))), |
1349 | 0 | ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))), |
1350 | 0 | ScalarValue::IntervalYearMonth(Some(v)) => Ok( |
1351 | 0 | ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || { |
1352 | 0 | format!("In negation of IntervalYearMonth({v})") |
1353 | 0 | })?)), |
1354 | | ), |
1355 | 0 | ScalarValue::IntervalDayTime(Some(v)) => { |
1356 | 0 | let (days, ms) = IntervalDayTimeType::to_parts(*v); |
1357 | 0 | let val = IntervalDayTimeType::make_value( |
1358 | 0 | neg_checked_with_ctx(days, || { |
1359 | 0 | format!("In negation of days {days} in IntervalDayTime") |
1360 | 0 | })?, |
1361 | 0 | neg_checked_with_ctx(ms, || { |
1362 | 0 | format!("In negation of milliseconds {ms} in IntervalDayTime") |
1363 | 0 | })?, |
1364 | | ); |
1365 | 0 | Ok(ScalarValue::IntervalDayTime(Some(val))) |
1366 | | } |
1367 | 0 | ScalarValue::IntervalMonthDayNano(Some(v)) => { |
1368 | 0 | let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v); |
1369 | 0 | let val = IntervalMonthDayNanoType::make_value( |
1370 | 0 | neg_checked_with_ctx(months, || { |
1371 | 0 | format!("In negation of months {months} of IntervalMonthDayNano") |
1372 | 0 | })?, |
1373 | 0 | neg_checked_with_ctx(days, || { |
1374 | 0 | format!("In negation of days {days} of IntervalMonthDayNano") |
1375 | 0 | })?, |
1376 | 0 | neg_checked_with_ctx(nanos, || { |
1377 | 0 | format!("In negation of nanos {nanos} of IntervalMonthDayNano") |
1378 | 0 | })?, |
1379 | | ); |
1380 | 0 | Ok(ScalarValue::IntervalMonthDayNano(Some(val))) |
1381 | | } |
1382 | 0 | ScalarValue::Decimal128(Some(v), precision, scale) => { |
1383 | 0 | Ok(ScalarValue::Decimal128( |
1384 | 0 | Some(neg_checked_with_ctx(*v, || { |
1385 | 0 | format!("In negation of Decimal128({v}, {precision}, {scale})") |
1386 | 0 | })?), |
1387 | 0 | *precision, |
1388 | 0 | *scale, |
1389 | | )) |
1390 | | } |
1391 | 0 | ScalarValue::Decimal256(Some(v), precision, scale) => { |
1392 | 0 | Ok(ScalarValue::Decimal256( |
1393 | 0 | Some(neg_checked_with_ctx(*v, || { |
1394 | 0 | format!("In negation of Decimal256({v}, {precision}, {scale})") |
1395 | 0 | })?), |
1396 | 0 | *precision, |
1397 | 0 | *scale, |
1398 | | )) |
1399 | | } |
1400 | 0 | ScalarValue::TimestampSecond(Some(v), tz) => { |
1401 | 0 | Ok(ScalarValue::TimestampSecond( |
1402 | 0 | Some(neg_checked_with_ctx(*v, || { |
1403 | 0 | format!("In negation of TimestampSecond({v})") |
1404 | 0 | })?), |
1405 | 0 | tz.clone(), |
1406 | | )) |
1407 | | } |
1408 | 0 | ScalarValue::TimestampNanosecond(Some(v), tz) => { |
1409 | 0 | Ok(ScalarValue::TimestampNanosecond( |
1410 | 0 | Some(neg_checked_with_ctx(*v, || { |
1411 | 0 | format!("In negation of TimestampNanoSecond({v})") |
1412 | 0 | })?), |
1413 | 0 | tz.clone(), |
1414 | | )) |
1415 | | } |
1416 | 0 | ScalarValue::TimestampMicrosecond(Some(v), tz) => { |
1417 | 0 | Ok(ScalarValue::TimestampMicrosecond( |
1418 | 0 | Some(neg_checked_with_ctx(*v, || { |
1419 | 0 | format!("In negation of TimestampMicroSecond({v})") |
1420 | 0 | })?), |
1421 | 0 | tz.clone(), |
1422 | | )) |
1423 | | } |
1424 | 0 | ScalarValue::TimestampMillisecond(Some(v), tz) => { |
1425 | 0 | Ok(ScalarValue::TimestampMillisecond( |
1426 | 0 | Some(neg_checked_with_ctx(*v, || { |
1427 | 0 | format!("In negation of TimestampMilliSecond({v})") |
1428 | 0 | })?), |
1429 | 0 | tz.clone(), |
1430 | | )) |
1431 | | } |
1432 | 0 | value => _internal_err!( |
1433 | 0 | "Can not run arithmetic negative on scalar value {value:?}" |
1434 | 0 | ), |
1435 | | } |
1436 | 0 | } |
1437 | | |
1438 | | /// Wrapping addition of `ScalarValue` |
1439 | | /// |
1440 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1441 | | /// should operate on Arrays directly, using vectorized array kernels |
1442 | 22 | pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1443 | 22 | let r = add_wrapping(&self.to_scalar()?0 , &other.borrow().to_scalar()?0 )?0 ; |
1444 | 22 | Self::try_from_array(r.as_ref(), 0) |
1445 | 22 | } |
1446 | | /// Checked addition of `ScalarValue` |
1447 | | /// |
1448 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1449 | | /// should operate on Arrays directly, using vectorized array kernels |
1450 | 22.0k | pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1451 | 22.0k | let r = add(&self.to_scalar()?0 , &other.borrow().to_scalar()?0 )?0 ; |
1452 | 22.0k | Self::try_from_array(r.as_ref(), 0) |
1453 | 22.0k | } |
1454 | | |
1455 | | /// Wrapping subtraction of `ScalarValue` |
1456 | | /// |
1457 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1458 | | /// should operate on Arrays directly, using vectorized array kernels |
1459 | 0 | pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1460 | 0 | let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?; |
1461 | 0 | Self::try_from_array(r.as_ref(), 0) |
1462 | 0 | } |
1463 | | |
1464 | | /// Checked subtraction of `ScalarValue` |
1465 | | /// |
1466 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1467 | | /// should operate on Arrays directly, using vectorized array kernels |
1468 | 22.0k | pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1469 | 22.0k | let r = sub(&self.to_scalar()?0 , &other.borrow().to_scalar()?0 )?0 ; |
1470 | 22.0k | Self::try_from_array(r.as_ref(), 0) |
1471 | 22.0k | } |
1472 | | |
1473 | | /// Wrapping multiplication of `ScalarValue` |
1474 | | /// |
1475 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1476 | | /// should operate on Arrays directly, using vectorized array kernels. |
1477 | 0 | pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1478 | 0 | let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?; |
1479 | 0 | Self::try_from_array(r.as_ref(), 0) |
1480 | 0 | } |
1481 | | |
1482 | | /// Checked multiplication of `ScalarValue` |
1483 | | /// |
1484 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1485 | | /// should operate on Arrays directly, using vectorized array kernels. |
1486 | 0 | pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1487 | 0 | let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?; |
1488 | 0 | Self::try_from_array(r.as_ref(), 0) |
1489 | 0 | } |
1490 | | |
1491 | | /// Performs `lhs / rhs` |
1492 | | /// |
1493 | | /// Overflow or division by zero will result in an error, with exception to |
1494 | | /// floating point numbers, which instead follow the IEEE 754 rules. |
1495 | | /// |
1496 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1497 | | /// should operate on Arrays directly, using vectorized array kernels. |
1498 | 0 | pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1499 | 0 | let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?; |
1500 | 0 | Self::try_from_array(r.as_ref(), 0) |
1501 | 0 | } |
1502 | | |
1503 | | /// Performs `lhs % rhs` |
1504 | | /// |
1505 | | /// Overflow or division by zero will result in an error, with exception to |
1506 | | /// floating point numbers, which instead follow the IEEE 754 rules. |
1507 | | /// |
1508 | | /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code |
1509 | | /// should operate on Arrays directly, using vectorized array kernels. |
1510 | 0 | pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> { |
1511 | 0 | let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?; |
1512 | 0 | Self::try_from_array(r.as_ref(), 0) |
1513 | 0 | } |
1514 | | |
1515 | 0 | pub fn is_unsigned(&self) -> bool { |
1516 | 0 | matches!( |
1517 | 0 | self, |
1518 | | ScalarValue::UInt8(_) |
1519 | | | ScalarValue::UInt16(_) |
1520 | | | ScalarValue::UInt32(_) |
1521 | | | ScalarValue::UInt64(_) |
1522 | | ) |
1523 | 0 | } |
1524 | | |
1525 | | /// whether this value is null or not. |
1526 | 799k | pub fn is_null(&self) -> bool { |
1527 | 799k | match self { |
1528 | 172k | ScalarValue::Boolean(v) => v.is_none(), |
1529 | 0 | ScalarValue::Null => true, |
1530 | 0 | ScalarValue::Float16(v) => v.is_none(), |
1531 | 24 | ScalarValue::Float32(v) => v.is_none(), |
1532 | 223k | ScalarValue::Float64(v) => v.is_none(), |
1533 | 4 | ScalarValue::Decimal128(v, _, _) => v.is_none(), |
1534 | 0 | ScalarValue::Decimal256(v, _, _) => v.is_none(), |
1535 | 0 | ScalarValue::Int8(v) => v.is_none(), |
1536 | 0 | ScalarValue::Int16(v) => v.is_none(), |
1537 | 223k | ScalarValue::Int32(v) => v.is_none(), |
1538 | 24.3k | ScalarValue::Int64(v) => v.is_none(), |
1539 | 0 | ScalarValue::UInt8(v) => v.is_none(), |
1540 | 0 | ScalarValue::UInt16(v) => v.is_none(), |
1541 | 133 | ScalarValue::UInt32(v) => v.is_none(), |
1542 | 126 | ScalarValue::UInt64(v) => v.is_none(), |
1543 | 0 | ScalarValue::Utf8(v) |
1544 | 0 | | ScalarValue::Utf8View(v) |
1545 | 0 | | ScalarValue::LargeUtf8(v) => v.is_none(), |
1546 | 0 | ScalarValue::Binary(v) |
1547 | 0 | | ScalarValue::BinaryView(v) |
1548 | 0 | | ScalarValue::FixedSizeBinary(_, v) |
1549 | 0 | | ScalarValue::LargeBinary(v) => v.is_none(), |
1550 | | // arr.len() should be 1 for a list scalar, but we don't seem to |
1551 | | // enforce that anywhere, so we still check against array length. |
1552 | 0 | ScalarValue::List(arr) => arr.len() == arr.null_count(), |
1553 | 0 | ScalarValue::LargeList(arr) => arr.len() == arr.null_count(), |
1554 | 0 | ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(), |
1555 | 0 | ScalarValue::Struct(arr) => arr.len() == arr.null_count(), |
1556 | 0 | ScalarValue::Map(arr) => arr.len() == arr.null_count(), |
1557 | 0 | ScalarValue::Date32(v) => v.is_none(), |
1558 | 0 | ScalarValue::Date64(v) => v.is_none(), |
1559 | 0 | ScalarValue::Time32Second(v) => v.is_none(), |
1560 | 0 | ScalarValue::Time32Millisecond(v) => v.is_none(), |
1561 | 0 | ScalarValue::Time64Microsecond(v) => v.is_none(), |
1562 | 0 | ScalarValue::Time64Nanosecond(v) => v.is_none(), |
1563 | 0 | ScalarValue::TimestampSecond(v, _) => v.is_none(), |
1564 | 81.1k | ScalarValue::TimestampMillisecond(v, _) => v.is_none(), |
1565 | 0 | ScalarValue::TimestampMicrosecond(v, _) => v.is_none(), |
1566 | 0 | ScalarValue::TimestampNanosecond(v, _) => v.is_none(), |
1567 | 0 | ScalarValue::IntervalYearMonth(v) => v.is_none(), |
1568 | 42.4k | ScalarValue::IntervalDayTime(v) => v.is_none(), |
1569 | 0 | ScalarValue::IntervalMonthDayNano(v) => v.is_none(), |
1570 | 0 | ScalarValue::DurationSecond(v) => v.is_none(), |
1571 | 31.4k | ScalarValue::DurationMillisecond(v) => v.is_none(), |
1572 | 0 | ScalarValue::DurationMicrosecond(v) => v.is_none(), |
1573 | 0 | ScalarValue::DurationNanosecond(v) => v.is_none(), |
1574 | 2 | ScalarValue::Union(v, _, _) => match v { |
1575 | 0 | Some((_, s)) => s.is_null(), |
1576 | 2 | None => true, |
1577 | | }, |
1578 | 0 | ScalarValue::Dictionary(_, v) => v.is_null(), |
1579 | | } |
1580 | 799k | } |
1581 | | |
1582 | | /// Absolute distance between two numeric values (of the same type). This method will return |
1583 | | /// None if either one of the arguments are null. It might also return None if the resulting |
1584 | | /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be |
1585 | | /// rounded to the nearest integer. |
1586 | | /// |
1587 | | /// |
1588 | | /// Note: the datatype itself must support subtraction. |
1589 | 118 | pub fn distance(&self, other: &ScalarValue) -> Option<usize> { |
1590 | 118 | match (self, other) { |
1591 | 0 | (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _), |
1592 | 0 | (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _), |
1593 | 73 | (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _), |
1594 | 34 | (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _), |
1595 | 0 | (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _), |
1596 | 0 | (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _), |
1597 | 0 | (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _), |
1598 | 0 | (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _), |
1599 | | // TODO: we might want to look into supporting ceil/floor here for floats. |
1600 | 0 | (Self::Float16(Some(l)), Self::Float16(Some(r))) => { |
1601 | 0 | Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _) |
1602 | | } |
1603 | 0 | (Self::Float32(Some(l)), Self::Float32(Some(r))) => { |
1604 | 0 | Some((l - r).abs().round() as _) |
1605 | | } |
1606 | 0 | (Self::Float64(Some(l)), Self::Float64(Some(r))) => { |
1607 | 0 | Some((l - r).abs().round() as _) |
1608 | | } |
1609 | 11 | _ => None, |
1610 | | } |
1611 | 118 | } |
1612 | | |
1613 | | /// Converts a scalar value into an 1-row array. |
1614 | | /// |
1615 | | /// # Errors |
1616 | | /// |
1617 | | /// Errors if the ScalarValue cannot be converted into a 1-row array |
1618 | 3.10k | pub fn to_array(&self) -> Result<ArrayRef> { |
1619 | 3.10k | self.to_array_of_size(1) |
1620 | 3.10k | } |
1621 | | |
1622 | | /// Converts a scalar into an arrow [`Scalar`] (which implements |
1623 | | /// the [`Datum`] interface). |
1624 | | /// |
1625 | | /// This can be used to call arrow compute kernels such as `lt` |
1626 | | /// |
1627 | | /// # Errors |
1628 | | /// |
1629 | | /// Errors if the ScalarValue cannot be converted into a 1-row array |
1630 | | /// |
1631 | | /// # Example |
1632 | | /// ``` |
1633 | | /// use datafusion_common::ScalarValue; |
1634 | | /// use arrow::array::{BooleanArray, Int32Array}; |
1635 | | /// |
1636 | | /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]); |
1637 | | /// let five = ScalarValue::Int32(Some(5)); |
1638 | | /// |
1639 | | /// let result = arrow::compute::kernels::cmp::lt( |
1640 | | /// &arr, |
1641 | | /// &five.to_scalar().unwrap(), |
1642 | | /// ).unwrap(); |
1643 | | /// |
1644 | | /// let expected = BooleanArray::from(vec![ |
1645 | | /// Some(true), |
1646 | | /// None, |
1647 | | /// Some(false) |
1648 | | /// ] |
1649 | | /// ); |
1650 | | /// |
1651 | | /// assert_eq!(&result, &expected); |
1652 | | /// ``` |
1653 | | /// [`Datum`]: arrow_array::Datum |
1654 | 176k | pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> { |
1655 | 176k | Ok(Scalar::new(self.to_array_of_size(1)?0 )) |
1656 | 176k | } |
1657 | | |
1658 | | /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`] |
1659 | | /// corresponding to those values. For example, an iterator of |
1660 | | /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`]. |
1661 | | /// |
1662 | | /// Returns an error if the iterator is empty or if the |
1663 | | /// [`ScalarValue`]s are not all the same type |
1664 | | /// |
1665 | | /// # Panics |
1666 | | /// |
1667 | | /// Panics if `self` is a dictionary with invalid key type |
1668 | | /// |
1669 | | /// # Example |
1670 | | /// ``` |
1671 | | /// use datafusion_common::ScalarValue; |
1672 | | /// use arrow::array::{ArrayRef, BooleanArray}; |
1673 | | /// |
1674 | | /// let scalars = vec![ |
1675 | | /// ScalarValue::Boolean(Some(true)), |
1676 | | /// ScalarValue::Boolean(None), |
1677 | | /// ScalarValue::Boolean(Some(false)), |
1678 | | /// ]; |
1679 | | /// |
1680 | | /// // Build an Array from the list of ScalarValues |
1681 | | /// let array = ScalarValue::iter_to_array(scalars.into_iter()) |
1682 | | /// .unwrap(); |
1683 | | /// |
1684 | | /// let expected: ArrayRef = std::sync::Arc::new( |
1685 | | /// BooleanArray::from(vec![ |
1686 | | /// Some(true), |
1687 | | /// None, |
1688 | | /// Some(false) |
1689 | | /// ] |
1690 | | /// )); |
1691 | | /// |
1692 | | /// assert_eq!(&array, &expected); |
1693 | | /// ``` |
1694 | 183 | pub fn iter_to_array( |
1695 | 183 | scalars: impl IntoIterator<Item = ScalarValue>, |
1696 | 183 | ) -> Result<ArrayRef> { |
1697 | 183 | let mut scalars = scalars.into_iter().peekable(); |
1698 | | |
1699 | | // figure out the type based on the first element |
1700 | 183 | let data_type = match scalars.peek() { |
1701 | | None => { |
1702 | 0 | return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array"); |
1703 | | } |
1704 | 183 | Some(sv) => sv.data_type(), |
1705 | | }; |
1706 | | |
1707 | | /// Creates an array of $ARRAY_TY by unpacking values of |
1708 | | /// SCALAR_TY for primitive types |
1709 | | macro_rules! build_array_primitive { |
1710 | | ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{ |
1711 | | { |
1712 | 379 | let array = scalars.map(|sv| { |
1713 | 379 | if let ScalarValue::$SCALAR_TY(v) = sv { |
1714 | 379 | Ok(v) |
1715 | | } else { |
1716 | 0 | _exec_err!( |
1717 | 0 | "Inconsistent types in ScalarValue::iter_to_array. \ |
1718 | 0 | Expected {:?}, got {:?}", |
1719 | 0 | data_type, sv |
1720 | 0 | ) |
1721 | | } |
1722 | 379 | }) |
1723 | | .collect::<Result<$ARRAY_TY>>()?; |
1724 | | Arc::new(array) |
1725 | | } |
1726 | | }}; |
1727 | | } |
1728 | | |
1729 | | macro_rules! build_array_primitive_tz { |
1730 | | ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{ |
1731 | | { |
1732 | 0 | let array = scalars.map(|sv| { |
1733 | 0 | if let ScalarValue::$SCALAR_TY(v, _) = sv { |
1734 | 0 | Ok(v) |
1735 | | } else { |
1736 | 0 | _exec_err!( |
1737 | 0 | "Inconsistent types in ScalarValue::iter_to_array. \ |
1738 | 0 | Expected {:?}, got {:?}", |
1739 | 0 | data_type, sv |
1740 | 0 | ) |
1741 | | } |
1742 | 0 | }) |
1743 | | .collect::<Result<$ARRAY_TY>>()?; |
1744 | | Arc::new(array.with_timezone_opt($TZ.clone())) |
1745 | | } |
1746 | | }}; |
1747 | | } |
1748 | | |
1749 | | /// Creates an array of $ARRAY_TY by unpacking values of |
1750 | | /// SCALAR_TY for "string-like" types. |
1751 | | macro_rules! build_array_string { |
1752 | | ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{ |
1753 | | { |
1754 | 0 | let array = scalars.map(|sv| { |
1755 | 0 | if let ScalarValue::$SCALAR_TY(v) = sv { |
1756 | 0 | Ok(v) |
1757 | | } else { |
1758 | 0 | _exec_err!( |
1759 | 0 | "Inconsistent types in ScalarValue::iter_to_array. \ |
1760 | 0 | Expected {:?}, got {:?}", |
1761 | 0 | data_type, sv |
1762 | 0 | ) |
1763 | | } |
1764 | 0 | }) |
1765 | | .collect::<Result<$ARRAY_TY>>()?; |
1766 | | Arc::new(array) |
1767 | | } |
1768 | | }}; |
1769 | | } |
1770 | | |
1771 | 183 | let array: ArrayRef = match &data_type0 { |
1772 | 0 | DataType::Decimal128(precision, scale) => { |
1773 | 0 | let decimal_array = |
1774 | 0 | ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?; |
1775 | 0 | Arc::new(decimal_array) |
1776 | | } |
1777 | 0 | DataType::Decimal256(precision, scale) => { |
1778 | 0 | let decimal_array = |
1779 | 0 | ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?; |
1780 | 0 | Arc::new(decimal_array) |
1781 | | } |
1782 | 0 | DataType::Null => ScalarValue::iter_to_null_array(scalars)?, |
1783 | 52 | DataType::Boolean => build_array_primitive!(BooleanArray, Boolean), |
1784 | 0 | DataType::Float16 => build_array_primitive!(Float16Array, Float16), |
1785 | 0 | DataType::Float32 => build_array_primitive!(Float32Array, Float32), |
1786 | 116 | DataType::Float64 => build_array_primitive!(Float64Array, Float64), |
1787 | 0 | DataType::Int8 => build_array_primitive!(Int8Array, Int8), |
1788 | 0 | DataType::Int16 => build_array_primitive!(Int16Array, Int16), |
1789 | 9 | DataType::Int32 => build_array_primitive!(Int32Array, Int32), |
1790 | 4 | DataType::Int64 => build_array_primitive!(Int64Array, Int64), |
1791 | 0 | DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8), |
1792 | 0 | DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16), |
1793 | 2 | DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32), |
1794 | 0 | DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64), |
1795 | 0 | DataType::Utf8View => build_array_string!(StringViewArray, Utf8View), |
1796 | 0 | DataType::Utf8 => build_array_string!(StringArray, Utf8), |
1797 | 0 | DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8), |
1798 | 0 | DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView), |
1799 | 0 | DataType::Binary => build_array_string!(BinaryArray, Binary), |
1800 | 0 | DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary), |
1801 | 0 | DataType::Date32 => build_array_primitive!(Date32Array, Date32), |
1802 | 0 | DataType::Date64 => build_array_primitive!(Date64Array, Date64), |
1803 | | DataType::Time32(TimeUnit::Second) => { |
1804 | 0 | build_array_primitive!(Time32SecondArray, Time32Second) |
1805 | | } |
1806 | | DataType::Time32(TimeUnit::Millisecond) => { |
1807 | 0 | build_array_primitive!(Time32MillisecondArray, Time32Millisecond) |
1808 | | } |
1809 | | DataType::Time64(TimeUnit::Microsecond) => { |
1810 | 0 | build_array_primitive!(Time64MicrosecondArray, Time64Microsecond) |
1811 | | } |
1812 | | DataType::Time64(TimeUnit::Nanosecond) => { |
1813 | 0 | build_array_primitive!(Time64NanosecondArray, Time64Nanosecond) |
1814 | | } |
1815 | 0 | DataType::Timestamp(TimeUnit::Second, tz) => { |
1816 | 0 | build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz) |
1817 | | } |
1818 | 0 | DataType::Timestamp(TimeUnit::Millisecond, tz) => { |
1819 | 0 | build_array_primitive_tz!( |
1820 | | TimestampMillisecondArray, |
1821 | | TimestampMillisecond, |
1822 | 0 | tz |
1823 | | ) |
1824 | | } |
1825 | 0 | DataType::Timestamp(TimeUnit::Microsecond, tz) => { |
1826 | 0 | build_array_primitive_tz!( |
1827 | | TimestampMicrosecondArray, |
1828 | | TimestampMicrosecond, |
1829 | 0 | tz |
1830 | | ) |
1831 | | } |
1832 | 0 | DataType::Timestamp(TimeUnit::Nanosecond, tz) => { |
1833 | 0 | build_array_primitive_tz!( |
1834 | | TimestampNanosecondArray, |
1835 | | TimestampNanosecond, |
1836 | 0 | tz |
1837 | | ) |
1838 | | } |
1839 | | DataType::Duration(TimeUnit::Second) => { |
1840 | 0 | build_array_primitive!(DurationSecondArray, DurationSecond) |
1841 | | } |
1842 | | DataType::Duration(TimeUnit::Millisecond) => { |
1843 | 0 | build_array_primitive!(DurationMillisecondArray, DurationMillisecond) |
1844 | | } |
1845 | | DataType::Duration(TimeUnit::Microsecond) => { |
1846 | 0 | build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond) |
1847 | | } |
1848 | | DataType::Duration(TimeUnit::Nanosecond) => { |
1849 | 0 | build_array_primitive!(DurationNanosecondArray, DurationNanosecond) |
1850 | | } |
1851 | | DataType::Interval(IntervalUnit::DayTime) => { |
1852 | 0 | build_array_primitive!(IntervalDayTimeArray, IntervalDayTime) |
1853 | | } |
1854 | | DataType::Interval(IntervalUnit::YearMonth) => { |
1855 | 0 | build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth) |
1856 | | } |
1857 | | DataType::Interval(IntervalUnit::MonthDayNano) => { |
1858 | 0 | build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano) |
1859 | | } |
1860 | | DataType::FixedSizeList(_, _) => { |
1861 | | // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList. |
1862 | | // The length of nulls here we got is 1, so we need to resize the length of nulls to |
1863 | | // the length of non-nulls. |
1864 | 0 | let mut arrays = |
1865 | 0 | scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?; |
1866 | 0 | let first_non_null_data_type = arrays |
1867 | 0 | .iter() |
1868 | 0 | .find(|sv| !sv.is_null(0)) |
1869 | 0 | .map(|sv| sv.data_type().to_owned()); |
1870 | 0 | if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type { |
1871 | 0 | for array in arrays.iter_mut() { |
1872 | 0 | if array.is_null(0) { |
1873 | 0 | *array = Arc::new(FixedSizeListArray::new_null( |
1874 | 0 | Arc::clone(&f), |
1875 | 0 | l, |
1876 | 0 | 1, |
1877 | 0 | )); |
1878 | 0 | } |
1879 | | } |
1880 | 0 | } |
1881 | 0 | let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>(); |
1882 | 0 | arrow::compute::concat(arrays.as_slice())? |
1883 | | } |
1884 | | DataType::List(_) |
1885 | | | DataType::LargeList(_) |
1886 | | | DataType::Map(_, _) |
1887 | | | DataType::Struct(_) |
1888 | | | DataType::Union(_, _) => { |
1889 | 0 | let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?; |
1890 | 0 | let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>(); |
1891 | 0 | arrow::compute::concat(arrays.as_slice())? |
1892 | | } |
1893 | 0 | DataType::Dictionary(key_type, value_type) => { |
1894 | | // create the values array |
1895 | 0 | let value_scalars = scalars |
1896 | 0 | .map(|scalar| match scalar { |
1897 | 0 | ScalarValue::Dictionary(inner_key_type, scalar) => { |
1898 | 0 | if &inner_key_type == key_type { |
1899 | 0 | Ok(*scalar) |
1900 | | } else { |
1901 | 0 | _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})") |
1902 | | } |
1903 | | } |
1904 | | _ => { |
1905 | 0 | _exec_err!( |
1906 | 0 | "Expected scalar of type {value_type} but found: {scalar} {scalar:?}" |
1907 | 0 | ) |
1908 | | } |
1909 | 0 | }) |
1910 | 0 | .collect::<Result<Vec<_>>>()?; |
1911 | | |
1912 | 0 | let values = Self::iter_to_array(value_scalars)?; |
1913 | 0 | assert_eq!(values.data_type(), value_type.as_ref()); |
1914 | | |
1915 | 0 | match key_type.as_ref() { |
1916 | 0 | DataType::Int8 => dict_from_values::<Int8Type>(values)?, |
1917 | 0 | DataType::Int16 => dict_from_values::<Int16Type>(values)?, |
1918 | 0 | DataType::Int32 => dict_from_values::<Int32Type>(values)?, |
1919 | 0 | DataType::Int64 => dict_from_values::<Int64Type>(values)?, |
1920 | 0 | DataType::UInt8 => dict_from_values::<UInt8Type>(values)?, |
1921 | 0 | DataType::UInt16 => dict_from_values::<UInt16Type>(values)?, |
1922 | 0 | DataType::UInt32 => dict_from_values::<UInt32Type>(values)?, |
1923 | 0 | DataType::UInt64 => dict_from_values::<UInt64Type>(values)?, |
1924 | 0 | _ => unreachable!("Invalid dictionary keys type: {:?}", key_type), |
1925 | | } |
1926 | | } |
1927 | 0 | DataType::FixedSizeBinary(size) => { |
1928 | 0 | let array = scalars |
1929 | 0 | .map(|sv| { |
1930 | 0 | if let ScalarValue::FixedSizeBinary(_, v) = sv { |
1931 | 0 | Ok(v) |
1932 | | } else { |
1933 | 0 | _exec_err!( |
1934 | 0 | "Inconsistent types in ScalarValue::iter_to_array. \ |
1935 | 0 | Expected {data_type:?}, got {sv:?}" |
1936 | 0 | ) |
1937 | | } |
1938 | 0 | }) |
1939 | 0 | .collect::<Result<Vec<_>>>()?; |
1940 | 0 | let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
1941 | 0 | array.into_iter(), |
1942 | 0 | *size, |
1943 | 0 | )?; |
1944 | 0 | Arc::new(array) |
1945 | | } |
1946 | | // explicitly enumerate unsupported types so newly added |
1947 | | // types must be aknowledged, Time32 and Time64 types are |
1948 | | // not supported if the TimeUnit is not valid (Time32 can |
1949 | | // only be used with Second and Millisecond, Time64 only |
1950 | | // with Microsecond and Nanosecond) |
1951 | | DataType::Time32(TimeUnit::Microsecond) |
1952 | | | DataType::Time32(TimeUnit::Nanosecond) |
1953 | | | DataType::Time64(TimeUnit::Second) |
1954 | | | DataType::Time64(TimeUnit::Millisecond) |
1955 | | | DataType::RunEndEncoded(_, _) |
1956 | | | DataType::ListView(_) |
1957 | | | DataType::LargeListView(_) => { |
1958 | 0 | return _not_impl_err!( |
1959 | 0 | "Unsupported creation of {:?} array from ScalarValue {:?}", |
1960 | 0 | data_type, |
1961 | 0 | scalars.peek() |
1962 | 0 | ); |
1963 | | } |
1964 | | }; |
1965 | 183 | Ok(array) |
1966 | 183 | } |
1967 | | |
1968 | 0 | fn iter_to_null_array( |
1969 | 0 | scalars: impl IntoIterator<Item = ScalarValue>, |
1970 | 0 | ) -> Result<ArrayRef> { |
1971 | 0 | let length = scalars.into_iter().try_fold( |
1972 | 0 | 0usize, |
1973 | 0 | |r, element: ScalarValue| match element { |
1974 | 0 | ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1), |
1975 | 0 | s => { |
1976 | 0 | _internal_err!("Expected ScalarValue::Null element. Received {s:?}") |
1977 | | } |
1978 | 0 | }, |
1979 | 0 | )?; |
1980 | 0 | Ok(new_null_array(&DataType::Null, length)) |
1981 | 0 | } |
1982 | | |
1983 | 0 | fn iter_to_decimal_array( |
1984 | 0 | scalars: impl IntoIterator<Item = ScalarValue>, |
1985 | 0 | precision: u8, |
1986 | 0 | scale: i8, |
1987 | 0 | ) -> Result<Decimal128Array> { |
1988 | 0 | let array = scalars |
1989 | 0 | .into_iter() |
1990 | 0 | .map(|element: ScalarValue| match element { |
1991 | 0 | ScalarValue::Decimal128(v1, _, _) => Ok(v1), |
1992 | 0 | s => { |
1993 | 0 | _internal_err!("Expected ScalarValue::Null element. Received {s:?}") |
1994 | | } |
1995 | 0 | }) |
1996 | 0 | .collect::<Result<Decimal128Array>>()? |
1997 | 0 | .with_precision_and_scale(precision, scale)?; |
1998 | 0 | Ok(array) |
1999 | 0 | } |
2000 | | |
2001 | 0 | fn iter_to_decimal256_array( |
2002 | 0 | scalars: impl IntoIterator<Item = ScalarValue>, |
2003 | 0 | precision: u8, |
2004 | 0 | scale: i8, |
2005 | 0 | ) -> Result<Decimal256Array> { |
2006 | 0 | let array = scalars |
2007 | 0 | .into_iter() |
2008 | 0 | .map(|element: ScalarValue| match element { |
2009 | 0 | ScalarValue::Decimal256(v1, _, _) => Ok(v1), |
2010 | 0 | s => { |
2011 | 0 | _internal_err!( |
2012 | 0 | "Expected ScalarValue::Decimal256 element. Received {s:?}" |
2013 | 0 | ) |
2014 | | } |
2015 | 0 | }) |
2016 | 0 | .collect::<Result<Decimal256Array>>()? |
2017 | 0 | .with_precision_and_scale(precision, scale)?; |
2018 | 0 | Ok(array) |
2019 | 0 | } |
2020 | | |
2021 | 0 | fn build_decimal_array( |
2022 | 0 | value: Option<i128>, |
2023 | 0 | precision: u8, |
2024 | 0 | scale: i8, |
2025 | 0 | size: usize, |
2026 | 0 | ) -> Result<Decimal128Array> { |
2027 | 0 | Ok(match value { |
2028 | 0 | Some(val) => Decimal128Array::from(vec![val; size]) |
2029 | 0 | .with_precision_and_scale(precision, scale)?, |
2030 | | None => { |
2031 | 0 | let mut builder = Decimal128Array::builder(size) |
2032 | 0 | .with_precision_and_scale(precision, scale)?; |
2033 | 0 | builder.append_nulls(size); |
2034 | 0 | builder.finish() |
2035 | | } |
2036 | | }) |
2037 | 0 | } |
2038 | | |
2039 | 0 | fn build_decimal256_array( |
2040 | 0 | value: Option<i256>, |
2041 | 0 | precision: u8, |
2042 | 0 | scale: i8, |
2043 | 0 | size: usize, |
2044 | 0 | ) -> Result<Decimal256Array> { |
2045 | 0 | Ok(std::iter::repeat(value) |
2046 | 0 | .take(size) |
2047 | 0 | .collect::<Decimal256Array>() |
2048 | 0 | .with_precision_and_scale(precision, scale)?) |
2049 | 0 | } |
2050 | | |
2051 | | /// Converts `Vec<ScalarValue>` where each element has type corresponding to |
2052 | | /// `data_type`, to a single element [`ListArray`]. |
2053 | | /// |
2054 | | /// Example |
2055 | | /// ``` |
2056 | | /// use datafusion_common::ScalarValue; |
2057 | | /// use arrow::array::{ListArray, Int32Array}; |
2058 | | /// use arrow::datatypes::{DataType, Int32Type}; |
2059 | | /// use datafusion_common::cast::as_list_array; |
2060 | | /// |
2061 | | /// let scalars = vec![ |
2062 | | /// ScalarValue::Int32(Some(1)), |
2063 | | /// ScalarValue::Int32(None), |
2064 | | /// ScalarValue::Int32(Some(2)) |
2065 | | /// ]; |
2066 | | /// |
2067 | | /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true); |
2068 | | /// |
2069 | | /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>( |
2070 | | /// vec![ |
2071 | | /// Some(vec![Some(1), None, Some(2)]) |
2072 | | /// ]); |
2073 | | /// |
2074 | | /// assert_eq!(*result, expected); |
2075 | | /// ``` |
2076 | 0 | pub fn new_list( |
2077 | 0 | values: &[ScalarValue], |
2078 | 0 | data_type: &DataType, |
2079 | 0 | nullable: bool, |
2080 | 0 | ) -> Arc<ListArray> { |
2081 | 0 | let values = if values.is_empty() { |
2082 | 0 | new_empty_array(data_type) |
2083 | | } else { |
2084 | 0 | Self::iter_to_array(values.iter().cloned()).unwrap() |
2085 | | }; |
2086 | 0 | Arc::new(array_into_list_array(values, nullable)) |
2087 | 0 | } |
2088 | | |
2089 | | /// Same as [`ScalarValue::new_list`] but with nullable set to true. |
2090 | 0 | pub fn new_list_nullable( |
2091 | 0 | values: &[ScalarValue], |
2092 | 0 | data_type: &DataType, |
2093 | 0 | ) -> Arc<ListArray> { |
2094 | 0 | Self::new_list(values, data_type, true) |
2095 | 0 | } |
2096 | | |
2097 | | /// Create ListArray with Null with specific data type |
2098 | | /// |
2099 | | /// - new_null_list(i32, nullable, 1): `ListArray[NULL]` |
2100 | 0 | pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self { |
2101 | 0 | let data_type = DataType::List(Field::new_list_field(data_type, nullable).into()); |
2102 | 0 | Self::List(Arc::new(ListArray::from(ArrayData::new_null( |
2103 | 0 | &data_type, null_len, |
2104 | 0 | )))) |
2105 | 0 | } |
2106 | | |
2107 | | /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to |
2108 | | /// `data_type`, to a [`ListArray`]. |
2109 | | /// |
2110 | | /// Example |
2111 | | /// ``` |
2112 | | /// use datafusion_common::ScalarValue; |
2113 | | /// use arrow::array::{ListArray, Int32Array}; |
2114 | | /// use arrow::datatypes::{DataType, Int32Type}; |
2115 | | /// use datafusion_common::cast::as_list_array; |
2116 | | /// |
2117 | | /// let scalars = vec![ |
2118 | | /// ScalarValue::Int32(Some(1)), |
2119 | | /// ScalarValue::Int32(None), |
2120 | | /// ScalarValue::Int32(Some(2)) |
2121 | | /// ]; |
2122 | | /// |
2123 | | /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true); |
2124 | | /// |
2125 | | /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>( |
2126 | | /// vec![ |
2127 | | /// Some(vec![Some(1), None, Some(2)]) |
2128 | | /// ]); |
2129 | | /// |
2130 | | /// assert_eq!(*result, expected); |
2131 | | /// ``` |
2132 | 0 | pub fn new_list_from_iter( |
2133 | 0 | values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator, |
2134 | 0 | data_type: &DataType, |
2135 | 0 | nullable: bool, |
2136 | 0 | ) -> Arc<ListArray> { |
2137 | 0 | let values = if values.len() == 0 { |
2138 | 0 | new_empty_array(data_type) |
2139 | | } else { |
2140 | 0 | Self::iter_to_array(values).unwrap() |
2141 | | }; |
2142 | 0 | Arc::new(array_into_list_array(values, nullable)) |
2143 | 0 | } |
2144 | | |
2145 | | /// Converts `Vec<ScalarValue>` where each element has type corresponding to |
2146 | | /// `data_type`, to a [`LargeListArray`]. |
2147 | | /// |
2148 | | /// Example |
2149 | | /// ``` |
2150 | | /// use datafusion_common::ScalarValue; |
2151 | | /// use arrow::array::{LargeListArray, Int32Array}; |
2152 | | /// use arrow::datatypes::{DataType, Int32Type}; |
2153 | | /// use datafusion_common::cast::as_large_list_array; |
2154 | | /// |
2155 | | /// let scalars = vec![ |
2156 | | /// ScalarValue::Int32(Some(1)), |
2157 | | /// ScalarValue::Int32(None), |
2158 | | /// ScalarValue::Int32(Some(2)) |
2159 | | /// ]; |
2160 | | /// |
2161 | | /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32); |
2162 | | /// |
2163 | | /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>( |
2164 | | /// vec![ |
2165 | | /// Some(vec![Some(1), None, Some(2)]) |
2166 | | /// ]); |
2167 | | /// |
2168 | | /// assert_eq!(*result, expected); |
2169 | | /// ``` |
2170 | 0 | pub fn new_large_list( |
2171 | 0 | values: &[ScalarValue], |
2172 | 0 | data_type: &DataType, |
2173 | 0 | ) -> Arc<LargeListArray> { |
2174 | 0 | let values = if values.is_empty() { |
2175 | 0 | new_empty_array(data_type) |
2176 | | } else { |
2177 | 0 | Self::iter_to_array(values.iter().cloned()).unwrap() |
2178 | | }; |
2179 | 0 | Arc::new(array_into_large_list_array(values)) |
2180 | 0 | } |
2181 | | |
2182 | | /// Converts a scalar value into an array of `size` rows. |
2183 | | /// |
2184 | | /// # Errors |
2185 | | /// |
2186 | | /// Errors if `self` is |
2187 | | /// - a decimal that fails be converted to a decimal array of size |
2188 | | /// - a `Fixedsizelist` that fails to be concatenated into an array of size |
2189 | | /// - a `List` that fails to be concatenated into an array of size |
2190 | | /// - a `Dictionary` that fails be converted to a dictionary array of size |
2191 | 179k | pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> { |
2192 | 179k | Ok(match self { |
2193 | 0 | ScalarValue::Decimal128(e, precision, scale) => Arc::new( |
2194 | 0 | ScalarValue::build_decimal_array(*e, *precision, *scale, size)?, |
2195 | | ), |
2196 | 0 | ScalarValue::Decimal256(e, precision, scale) => Arc::new( |
2197 | 0 | ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?, |
2198 | | ), |
2199 | 0 | ScalarValue::Boolean(e) => { |
2200 | 0 | Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef |
2201 | | } |
2202 | 44.2k | ScalarValue::Float64(e) => { |
2203 | 44.2k | build_array_from_option!44.2k (Float64, Float64Array, e, size16 ) |
2204 | | } |
2205 | 8 | ScalarValue::Float32(e) => { |
2206 | 8 | build_array_from_option!0 (Float32, Float32Array, e, size) |
2207 | | } |
2208 | 0 | ScalarValue::Float16(e) => { |
2209 | 0 | build_array_from_option!(Float16, Float16Array, e, size) |
2210 | | } |
2211 | 16 | ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size0 ), |
2212 | 0 | ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size), |
2213 | 100k | ScalarValue::Int32(e) => build_array_from_option!99.5k (Int32, Int32Array, e, size811 ), |
2214 | 5.85k | ScalarValue::Int64(e) => build_array_from_option!5.07k (Int64, Int64Array, e, size779 ), |
2215 | 0 | ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size), |
2216 | 0 | ScalarValue::UInt16(e) => { |
2217 | 0 | build_array_from_option!(UInt16, UInt16Array, e, size) |
2218 | | } |
2219 | 16 | ScalarValue::UInt32(e) => { |
2220 | 16 | build_array_from_option!0 (UInt32, UInt32Array, e, size) |
2221 | | } |
2222 | 44 | ScalarValue::UInt64(e) => { |
2223 | 44 | build_array_from_option!(UInt64, UInt64Array, e, size0 ) |
2224 | | } |
2225 | 0 | ScalarValue::TimestampSecond(e, tz_opt) => { |
2226 | 0 | build_timestamp_array_from_option!( |
2227 | 0 | TimeUnit::Second, |
2228 | 0 | tz_opt.clone(), |
2229 | | TimestampSecondArray, |
2230 | 0 | e, |
2231 | 0 | size |
2232 | | ) |
2233 | | } |
2234 | 11.2k | ScalarValue::TimestampMillisecond(e, tz_opt) => { |
2235 | 11.2k | build_timestamp_array_from_option!( |
2236 | 0 | TimeUnit::Millisecond, |
2237 | 0 | tz_opt.clone(), |
2238 | | TimestampMillisecondArray, |
2239 | 11.2k | e, |
2240 | 0 | size |
2241 | | ) |
2242 | | } |
2243 | | |
2244 | 0 | ScalarValue::TimestampMicrosecond(e, tz_opt) => { |
2245 | 0 | build_timestamp_array_from_option!( |
2246 | 0 | TimeUnit::Microsecond, |
2247 | 0 | tz_opt.clone(), |
2248 | | TimestampMicrosecondArray, |
2249 | 0 | e, |
2250 | 0 | size |
2251 | | ) |
2252 | | } |
2253 | 0 | ScalarValue::TimestampNanosecond(e, tz_opt) => { |
2254 | 0 | build_timestamp_array_from_option!( |
2255 | 0 | TimeUnit::Nanosecond, |
2256 | 0 | tz_opt.clone(), |
2257 | | TimestampNanosecondArray, |
2258 | 0 | e, |
2259 | 0 | size |
2260 | | ) |
2261 | | } |
2262 | 0 | ScalarValue::Utf8(e) => match e { |
2263 | 0 | Some(value) => { |
2264 | 0 | Arc::new(StringArray::from_iter_values(repeat(value).take(size))) |
2265 | | } |
2266 | 0 | None => new_null_array(&DataType::Utf8, size), |
2267 | | }, |
2268 | 0 | ScalarValue::Utf8View(e) => match e { |
2269 | 0 | Some(value) => { |
2270 | 0 | Arc::new(StringViewArray::from_iter_values(repeat(value).take(size))) |
2271 | | } |
2272 | 0 | None => new_null_array(&DataType::Utf8View, size), |
2273 | | }, |
2274 | 0 | ScalarValue::LargeUtf8(e) => match e { |
2275 | 0 | Some(value) => { |
2276 | 0 | Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size))) |
2277 | | } |
2278 | 0 | None => new_null_array(&DataType::LargeUtf8, size), |
2279 | | }, |
2280 | 0 | ScalarValue::Binary(e) => match e { |
2281 | 0 | Some(value) => Arc::new( |
2282 | 0 | repeat(Some(value.as_slice())) |
2283 | 0 | .take(size) |
2284 | 0 | .collect::<BinaryArray>(), |
2285 | 0 | ), |
2286 | | None => { |
2287 | 0 | Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>()) |
2288 | | } |
2289 | | }, |
2290 | 0 | ScalarValue::BinaryView(e) => match e { |
2291 | 0 | Some(value) => Arc::new( |
2292 | 0 | repeat(Some(value.as_slice())) |
2293 | 0 | .take(size) |
2294 | 0 | .collect::<BinaryViewArray>(), |
2295 | 0 | ), |
2296 | | None => { |
2297 | 0 | Arc::new(repeat(None::<&str>).take(size).collect::<BinaryViewArray>()) |
2298 | | } |
2299 | | }, |
2300 | 0 | ScalarValue::FixedSizeBinary(s, e) => match e { |
2301 | 0 | Some(value) => Arc::new( |
2302 | 0 | FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
2303 | 0 | repeat(Some(value.as_slice())).take(size), |
2304 | 0 | *s, |
2305 | 0 | ) |
2306 | 0 | .unwrap(), |
2307 | 0 | ), |
2308 | 0 | None => Arc::new( |
2309 | 0 | FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
2310 | 0 | repeat(None::<&[u8]>).take(size), |
2311 | 0 | *s, |
2312 | 0 | ) |
2313 | 0 | .unwrap(), |
2314 | 0 | ), |
2315 | | }, |
2316 | 0 | ScalarValue::LargeBinary(e) => match e { |
2317 | 0 | Some(value) => Arc::new( |
2318 | 0 | repeat(Some(value.as_slice())) |
2319 | 0 | .take(size) |
2320 | 0 | .collect::<LargeBinaryArray>(), |
2321 | 0 | ), |
2322 | 0 | None => Arc::new( |
2323 | 0 | repeat(None::<&str>) |
2324 | 0 | .take(size) |
2325 | 0 | .collect::<LargeBinaryArray>(), |
2326 | 0 | ), |
2327 | | }, |
2328 | 0 | ScalarValue::List(arr) => { |
2329 | 0 | Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? |
2330 | | } |
2331 | 0 | ScalarValue::LargeList(arr) => { |
2332 | 0 | Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? |
2333 | | } |
2334 | 0 | ScalarValue::FixedSizeList(arr) => { |
2335 | 0 | Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? |
2336 | | } |
2337 | 0 | ScalarValue::Struct(arr) => { |
2338 | 0 | Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? |
2339 | | } |
2340 | 0 | ScalarValue::Map(arr) => { |
2341 | 0 | Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? |
2342 | | } |
2343 | 0 | ScalarValue::Date32(e) => { |
2344 | 0 | build_array_from_option!(Date32, Date32Array, e, size) |
2345 | | } |
2346 | 0 | ScalarValue::Date64(e) => { |
2347 | 0 | build_array_from_option!(Date64, Date64Array, e, size) |
2348 | | } |
2349 | 0 | ScalarValue::Time32Second(e) => { |
2350 | 0 | build_array_from_option!( |
2351 | | Time32, |
2352 | | TimeUnit::Second, |
2353 | | Time32SecondArray, |
2354 | 0 | e, |
2355 | 0 | size |
2356 | | ) |
2357 | | } |
2358 | 0 | ScalarValue::Time32Millisecond(e) => { |
2359 | 0 | build_array_from_option!( |
2360 | | Time32, |
2361 | | TimeUnit::Millisecond, |
2362 | | Time32MillisecondArray, |
2363 | 0 | e, |
2364 | 0 | size |
2365 | | ) |
2366 | | } |
2367 | 0 | ScalarValue::Time64Microsecond(e) => { |
2368 | 0 | build_array_from_option!( |
2369 | | Time64, |
2370 | | TimeUnit::Microsecond, |
2371 | | Time64MicrosecondArray, |
2372 | 0 | e, |
2373 | 0 | size |
2374 | | ) |
2375 | | } |
2376 | 0 | ScalarValue::Time64Nanosecond(e) => { |
2377 | 0 | build_array_from_option!( |
2378 | | Time64, |
2379 | | TimeUnit::Nanosecond, |
2380 | | Time64NanosecondArray, |
2381 | 0 | e, |
2382 | 0 | size |
2383 | | ) |
2384 | | } |
2385 | 10.6k | ScalarValue::IntervalDayTime(e) => build_array_from_option!( |
2386 | | Interval, |
2387 | | IntervalUnit::DayTime, |
2388 | | IntervalDayTimeArray, |
2389 | 10.6k | e, |
2390 | 0 | size |
2391 | | ), |
2392 | 0 | ScalarValue::IntervalYearMonth(e) => build_array_from_option!( |
2393 | | Interval, |
2394 | | IntervalUnit::YearMonth, |
2395 | | IntervalYearMonthArray, |
2396 | 0 | e, |
2397 | 0 | size |
2398 | | ), |
2399 | 0 | ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!( |
2400 | | Interval, |
2401 | | IntervalUnit::MonthDayNano, |
2402 | | IntervalMonthDayNanoArray, |
2403 | 0 | e, |
2404 | 0 | size |
2405 | | ), |
2406 | 0 | ScalarValue::DurationSecond(e) => build_array_from_option!( |
2407 | | Duration, |
2408 | | TimeUnit::Second, |
2409 | | DurationSecondArray, |
2410 | 0 | e, |
2411 | 0 | size |
2412 | | ), |
2413 | 7.39k | ScalarValue::DurationMillisecond(e) => build_array_from_option!( |
2414 | | Duration, |
2415 | | TimeUnit::Millisecond, |
2416 | | DurationMillisecondArray, |
2417 | 7.39k | e, |
2418 | 0 | size |
2419 | | ), |
2420 | 0 | ScalarValue::DurationMicrosecond(e) => build_array_from_option!( |
2421 | | Duration, |
2422 | | TimeUnit::Microsecond, |
2423 | | DurationMicrosecondArray, |
2424 | 0 | e, |
2425 | 0 | size |
2426 | | ), |
2427 | 0 | ScalarValue::DurationNanosecond(e) => build_array_from_option!( |
2428 | | Duration, |
2429 | | TimeUnit::Nanosecond, |
2430 | | DurationNanosecondArray, |
2431 | 0 | e, |
2432 | 0 | size |
2433 | | ), |
2434 | 0 | ScalarValue::Union(value, fields, _mode) => match value { |
2435 | 0 | Some((v_id, value)) => { |
2436 | 0 | let mut new_fields = Vec::with_capacity(fields.len()); |
2437 | 0 | let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len()); |
2438 | 0 | for (f_id, field) in fields.iter() { |
2439 | 0 | let ar = if f_id == *v_id { |
2440 | 0 | value.to_array_of_size(size)? |
2441 | | } else { |
2442 | 0 | let dt = field.data_type(); |
2443 | 0 | new_null_array(dt, size) |
2444 | | }; |
2445 | 0 | let field = (**field).clone(); |
2446 | 0 | child_arrays.push(ar); |
2447 | 0 | new_fields.push(field.clone()); |
2448 | | } |
2449 | 0 | let type_ids = repeat(*v_id).take(size); |
2450 | 0 | let type_ids = ScalarBuffer::<i8>::from_iter(type_ids); |
2451 | 0 | let value_offsets: Option<ScalarBuffer<i32>> = None; |
2452 | 0 | let ar = UnionArray::try_new( |
2453 | 0 | fields.clone(), |
2454 | 0 | type_ids, |
2455 | 0 | value_offsets, |
2456 | 0 | child_arrays, |
2457 | 0 | ) |
2458 | 0 | .map_err(|e| DataFusionError::ArrowError(e, None))?; |
2459 | 0 | Arc::new(ar) |
2460 | | } |
2461 | | None => { |
2462 | 0 | let dt = self.data_type(); |
2463 | 0 | new_null_array(&dt, size) |
2464 | | } |
2465 | | }, |
2466 | 0 | ScalarValue::Dictionary(key_type, v) => { |
2467 | 0 | // values array is one element long (the value) |
2468 | 0 | match key_type.as_ref() { |
2469 | 0 | DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?, |
2470 | 0 | DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?, |
2471 | 0 | DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?, |
2472 | 0 | DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?, |
2473 | 0 | DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?, |
2474 | 0 | DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?, |
2475 | 0 | DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?, |
2476 | 0 | DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?, |
2477 | 0 | _ => unreachable!("Invalid dictionary keys type: {:?}", key_type), |
2478 | | } |
2479 | | } |
2480 | 0 | ScalarValue::Null => new_null_array(&DataType::Null, size), |
2481 | | }) |
2482 | 179k | } |
2483 | | |
2484 | 0 | fn get_decimal_value_from_array( |
2485 | 0 | array: &dyn Array, |
2486 | 0 | index: usize, |
2487 | 0 | precision: u8, |
2488 | 0 | scale: i8, |
2489 | 0 | ) -> Result<ScalarValue> { |
2490 | 0 | match array.data_type() { |
2491 | | DataType::Decimal128(_, _) => { |
2492 | 0 | let array = as_decimal128_array(array)?; |
2493 | 0 | if array.is_null(index) { |
2494 | 0 | Ok(ScalarValue::Decimal128(None, precision, scale)) |
2495 | | } else { |
2496 | 0 | let value = array.value(index); |
2497 | 0 | Ok(ScalarValue::Decimal128(Some(value), precision, scale)) |
2498 | | } |
2499 | | } |
2500 | | DataType::Decimal256(_, _) => { |
2501 | 0 | let array = as_decimal256_array(array)?; |
2502 | 0 | if array.is_null(index) { |
2503 | 0 | Ok(ScalarValue::Decimal256(None, precision, scale)) |
2504 | | } else { |
2505 | 0 | let value = array.value(index); |
2506 | 0 | Ok(ScalarValue::Decimal256(Some(value), precision, scale)) |
2507 | | } |
2508 | | } |
2509 | 0 | _ => _internal_err!("Unsupported decimal type"), |
2510 | | } |
2511 | 0 | } |
2512 | | |
2513 | 0 | fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> { |
2514 | 0 | let arrays = std::iter::repeat(arr).take(size).collect::<Vec<_>>(); |
2515 | 0 | let ret = match !arrays.is_empty() { |
2516 | 0 | true => arrow::compute::concat(arrays.as_slice())?, |
2517 | 0 | false => arr.slice(0, 0), |
2518 | | }; |
2519 | 0 | Ok(ret) |
2520 | 0 | } |
2521 | | |
2522 | | /// Retrieve ScalarValue for each row in `array` |
2523 | | /// |
2524 | | /// Example 1: Array (ScalarValue::Int32) |
2525 | | /// ``` |
2526 | | /// use datafusion_common::ScalarValue; |
2527 | | /// use arrow::array::ListArray; |
2528 | | /// use arrow::datatypes::{DataType, Int32Type}; |
2529 | | /// |
2530 | | /// // Equivalent to [[1,2,3], [4,5]] |
2531 | | /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
2532 | | /// Some(vec![Some(1), Some(2), Some(3)]), |
2533 | | /// Some(vec![Some(4), Some(5)]) |
2534 | | /// ]); |
2535 | | /// |
2536 | | /// // Convert the array into Scalar Values for each row |
2537 | | /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); |
2538 | | /// |
2539 | | /// let expected = vec![ |
2540 | | /// vec![ |
2541 | | /// ScalarValue::Int32(Some(1)), |
2542 | | /// ScalarValue::Int32(Some(2)), |
2543 | | /// ScalarValue::Int32(Some(3)), |
2544 | | /// ], |
2545 | | /// vec![ |
2546 | | /// ScalarValue::Int32(Some(4)), |
2547 | | /// ScalarValue::Int32(Some(5)), |
2548 | | /// ], |
2549 | | /// ]; |
2550 | | /// |
2551 | | /// assert_eq!(scalar_vec, expected); |
2552 | | /// ``` |
2553 | | /// |
2554 | | /// Example 2: Nested array (ScalarValue::List) |
2555 | | /// ``` |
2556 | | /// use datafusion_common::ScalarValue; |
2557 | | /// use arrow::array::ListArray; |
2558 | | /// use arrow::datatypes::{DataType, Int32Type}; |
2559 | | /// use datafusion_common::utils::array_into_list_array_nullable; |
2560 | | /// use std::sync::Arc; |
2561 | | /// |
2562 | | /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
2563 | | /// Some(vec![Some(1), Some(2), Some(3)]), |
2564 | | /// Some(vec![Some(4), Some(5)]) |
2565 | | /// ]); |
2566 | | /// |
2567 | | /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ] |
2568 | | /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr)); |
2569 | | /// |
2570 | | /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example |
2571 | | /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); |
2572 | | /// |
2573 | | /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
2574 | | /// Some(vec![Some(1), Some(2), Some(3)]), |
2575 | | /// ]); |
2576 | | /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
2577 | | /// Some(vec![Some(4), Some(5)]), |
2578 | | /// ]); |
2579 | | /// |
2580 | | /// let expected = vec![ |
2581 | | /// vec![ |
2582 | | /// ScalarValue::List(Arc::new(l1)), |
2583 | | /// ScalarValue::List(Arc::new(l2)), |
2584 | | /// ], |
2585 | | /// ]; |
2586 | | /// |
2587 | | /// assert_eq!(scalar_vec, expected); |
2588 | | /// ``` |
2589 | 0 | pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> { |
2590 | 0 | let mut scalars = Vec::with_capacity(array.len()); |
2591 | | |
2592 | 0 | for index in 0..array.len() { |
2593 | 0 | let nested_array = array.as_list::<i32>().value(index); |
2594 | 0 | let scalar_values = (0..nested_array.len()) |
2595 | 0 | .map(|i| ScalarValue::try_from_array(&nested_array, i)) |
2596 | 0 | .collect::<Result<Vec<_>>>()?; |
2597 | 0 | scalars.push(scalar_values); |
2598 | | } |
2599 | | |
2600 | 0 | Ok(scalars) |
2601 | 0 | } |
2602 | | |
2603 | | // TODO: Support more types after other ScalarValue is wrapped with ArrayRef |
2604 | | /// Get raw data (inner array) inside ScalarValue |
2605 | 0 | pub fn raw_data(&self) -> Result<ArrayRef> { |
2606 | 0 | match self { |
2607 | 0 | ScalarValue::List(arr) => Ok(arr.to_owned()), |
2608 | 0 | _ => _internal_err!("ScalarValue is not a list"), |
2609 | | } |
2610 | 0 | } |
2611 | | |
2612 | | /// Converts a value in `array` at `index` into a ScalarValue |
2613 | 65.1k | pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> { |
2614 | 65.1k | // handle NULL value |
2615 | 65.1k | if !array.is_valid(index) { |
2616 | 1.64k | return array.data_type().try_into(); |
2617 | 63.5k | } |
2618 | 63.5k | |
2619 | 63.5k | Ok(match array.data_type() { |
2620 | 0 | DataType::Null => ScalarValue::Null, |
2621 | 0 | DataType::Decimal128(precision, scale) => { |
2622 | 0 | ScalarValue::get_decimal_value_from_array( |
2623 | 0 | array, index, *precision, *scale, |
2624 | 0 | )? |
2625 | | } |
2626 | 0 | DataType::Decimal256(precision, scale) => { |
2627 | 0 | ScalarValue::get_decimal_value_from_array( |
2628 | 0 | array, index, *precision, *scale, |
2629 | 0 | )? |
2630 | | } |
2631 | 66 | DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)?0 , |
2632 | 21.9k | DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)?0 , |
2633 | 0 | DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?, |
2634 | 0 | DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?, |
2635 | 107 | DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)?0 , |
2636 | 0 | DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?, |
2637 | 0 | DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?, |
2638 | 0 | DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?, |
2639 | 2.32k | DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)?0 , |
2640 | 24.3k | DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)?0 , |
2641 | 0 | DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?, |
2642 | 0 | DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?, |
2643 | 0 | DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?, |
2644 | | DataType::LargeBinary => { |
2645 | 0 | typed_cast!(array, index, LargeBinaryArray, LargeBinary)? |
2646 | | } |
2647 | | DataType::BinaryView => { |
2648 | 0 | typed_cast!(array, index, BinaryViewArray, BinaryView)? |
2649 | | } |
2650 | 0 | DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?, |
2651 | | DataType::LargeUtf8 => { |
2652 | 0 | typed_cast!(array, index, LargeStringArray, LargeUtf8)? |
2653 | | } |
2654 | 0 | DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?, |
2655 | 0 | DataType::List(field) => { |
2656 | 0 | let list_array = array.as_list::<i32>(); |
2657 | 0 | let nested_array = list_array.value(index); |
2658 | 0 | // Produces a single element `ListArray` with the value at `index`. |
2659 | 0 | let arr = |
2660 | 0 | Arc::new(array_into_list_array(nested_array, field.is_nullable())); |
2661 | 0 |
|
2662 | 0 | ScalarValue::List(arr) |
2663 | | } |
2664 | | DataType::LargeList(_) => { |
2665 | 0 | let list_array = as_large_list_array(array); |
2666 | 0 | let nested_array = list_array.value(index); |
2667 | 0 | // Produces a single element `LargeListArray` with the value at `index`. |
2668 | 0 | let arr = Arc::new(array_into_large_list_array(nested_array)); |
2669 | 0 |
|
2670 | 0 | ScalarValue::LargeList(arr) |
2671 | | } |
2672 | | // TODO: There is no test for FixedSizeList now, add it later |
2673 | | DataType::FixedSizeList(_, _) => { |
2674 | 0 | let list_array = as_fixed_size_list_array(array)?; |
2675 | 0 | let nested_array = list_array.value(index); |
2676 | 0 | // Produces a single element `ListArray` with the value at `index`. |
2677 | 0 | let list_size = nested_array.len(); |
2678 | 0 | let arr = |
2679 | 0 | Arc::new(array_into_fixed_size_list_array(nested_array, list_size)); |
2680 | 0 |
|
2681 | 0 | ScalarValue::FixedSizeList(arr) |
2682 | | } |
2683 | 0 | DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?, |
2684 | 0 | DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?, |
2685 | | DataType::Time32(TimeUnit::Second) => { |
2686 | 0 | typed_cast!(array, index, Time32SecondArray, Time32Second)? |
2687 | | } |
2688 | | DataType::Time32(TimeUnit::Millisecond) => { |
2689 | 0 | typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)? |
2690 | | } |
2691 | | DataType::Time64(TimeUnit::Microsecond) => { |
2692 | 0 | typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)? |
2693 | | } |
2694 | | DataType::Time64(TimeUnit::Nanosecond) => { |
2695 | 0 | typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)? |
2696 | | } |
2697 | 0 | DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!( |
2698 | 0 | array, |
2699 | 0 | index, |
2700 | | TimestampSecondArray, |
2701 | | TimestampSecond, |
2702 | 0 | tz_opt |
2703 | 0 | )?, |
2704 | 9.41k | DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!( |
2705 | 9.41k | array, |
2706 | 9.41k | index, |
2707 | | TimestampMillisecondArray, |
2708 | | TimestampMillisecond, |
2709 | 9.41k | tz_opt |
2710 | 0 | )?, |
2711 | 0 | DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!( |
2712 | 0 | array, |
2713 | 0 | index, |
2714 | | TimestampMicrosecondArray, |
2715 | | TimestampMicrosecond, |
2716 | 0 | tz_opt |
2717 | 0 | )?, |
2718 | 0 | DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!( |
2719 | 0 | array, |
2720 | 0 | index, |
2721 | | TimestampNanosecondArray, |
2722 | | TimestampNanosecond, |
2723 | 0 | tz_opt |
2724 | 0 | )?, |
2725 | 0 | DataType::Dictionary(key_type, _) => { |
2726 | 0 | let (values_array, values_index) = match key_type.as_ref() { |
2727 | 0 | DataType::Int8 => get_dict_value::<Int8Type>(array, index)?, |
2728 | 0 | DataType::Int16 => get_dict_value::<Int16Type>(array, index)?, |
2729 | 0 | DataType::Int32 => get_dict_value::<Int32Type>(array, index)?, |
2730 | 0 | DataType::Int64 => get_dict_value::<Int64Type>(array, index)?, |
2731 | 0 | DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?, |
2732 | 0 | DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?, |
2733 | 0 | DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?, |
2734 | 0 | DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?, |
2735 | 0 | _ => unreachable!("Invalid dictionary keys type: {:?}", key_type), |
2736 | | }; |
2737 | | // look up the index in the values dictionary |
2738 | 0 | let value = match values_index { |
2739 | 0 | Some(values_index) => { |
2740 | 0 | ScalarValue::try_from_array(values_array, values_index) |
2741 | | } |
2742 | | // else entry was null, so return null |
2743 | 0 | None => values_array.data_type().try_into(), |
2744 | 0 | }?; |
2745 | | |
2746 | 0 | Self::Dictionary(key_type.clone(), Box::new(value)) |
2747 | | } |
2748 | | DataType::Struct(_) => { |
2749 | 0 | let a = array.slice(index, 1); |
2750 | 0 | Self::Struct(Arc::new(a.as_struct().to_owned())) |
2751 | | } |
2752 | | DataType::FixedSizeBinary(_) => { |
2753 | 0 | let array = as_fixed_size_binary_array(array)?; |
2754 | 0 | let size = match array.data_type() { |
2755 | 0 | DataType::FixedSizeBinary(size) => *size, |
2756 | 0 | _ => unreachable!(), |
2757 | | }; |
2758 | | ScalarValue::FixedSizeBinary( |
2759 | 0 | size, |
2760 | 0 | match array.is_null(index) { |
2761 | 0 | true => None, |
2762 | 0 | false => Some(array.value(index).into()), |
2763 | | }, |
2764 | | ) |
2765 | | } |
2766 | | DataType::Interval(IntervalUnit::DayTime) => { |
2767 | 3.91k | typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)?0 |
2768 | | } |
2769 | | DataType::Interval(IntervalUnit::YearMonth) => { |
2770 | 0 | typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)? |
2771 | | } |
2772 | 0 | DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!( |
2773 | 0 | array, |
2774 | 0 | index, |
2775 | | IntervalMonthDayNanoArray, |
2776 | | IntervalMonthDayNano |
2777 | 0 | )?, |
2778 | | |
2779 | | DataType::Duration(TimeUnit::Second) => { |
2780 | 0 | typed_cast!(array, index, DurationSecondArray, DurationSecond)? |
2781 | | } |
2782 | | DataType::Duration(TimeUnit::Millisecond) => { |
2783 | 1.34k | typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)?0 |
2784 | | } |
2785 | | DataType::Duration(TimeUnit::Microsecond) => { |
2786 | 0 | typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)? |
2787 | | } |
2788 | | DataType::Duration(TimeUnit::Nanosecond) => { |
2789 | 0 | typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)? |
2790 | | } |
2791 | | DataType::Map(_, _) => { |
2792 | 0 | let a = array.slice(index, 1); |
2793 | 0 | Self::Map(Arc::new(a.as_map().to_owned())) |
2794 | | } |
2795 | 0 | DataType::Union(fields, mode) => { |
2796 | 0 | let array = as_union_array(array); |
2797 | 0 | let ti = array.type_id(index); |
2798 | 0 | let index = array.value_offset(index); |
2799 | 0 | let value = ScalarValue::try_from_array(array.child(ti), index)?; |
2800 | 0 | ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode) |
2801 | | } |
2802 | 0 | other => { |
2803 | 0 | return _not_impl_err!( |
2804 | 0 | "Can't create a scalar from array of type \"{other:?}\"" |
2805 | 0 | ); |
2806 | | } |
2807 | | }) |
2808 | 65.1k | } |
2809 | | |
2810 | | /// Try to parse `value` into a ScalarValue of type `target_type` |
2811 | 0 | pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> { |
2812 | 0 | let value = ScalarValue::from(value); |
2813 | 0 | let cast_options = CastOptions { |
2814 | 0 | safe: false, |
2815 | 0 | format_options: Default::default(), |
2816 | 0 | }; |
2817 | 0 | let cast_arr = cast_with_options(&value.to_array()?, target_type, &cast_options)?; |
2818 | 0 | ScalarValue::try_from_array(&cast_arr, 0) |
2819 | 0 | } |
2820 | | |
2821 | | /// Try to cast this value to a ScalarValue of type `data_type` |
2822 | 0 | pub fn cast_to(&self, data_type: &DataType) -> Result<Self> { |
2823 | 0 | let cast_options = CastOptions { |
2824 | 0 | safe: false, |
2825 | 0 | format_options: Default::default(), |
2826 | 0 | }; |
2827 | 0 | let cast_arr = cast_with_options(&self.to_array()?, data_type, &cast_options)?; |
2828 | 0 | ScalarValue::try_from_array(&cast_arr, 0) |
2829 | 0 | } |
2830 | | |
2831 | 0 | fn eq_array_decimal( |
2832 | 0 | array: &ArrayRef, |
2833 | 0 | index: usize, |
2834 | 0 | value: Option<&i128>, |
2835 | 0 | precision: u8, |
2836 | 0 | scale: i8, |
2837 | 0 | ) -> Result<bool> { |
2838 | 0 | let array = as_decimal128_array(array)?; |
2839 | 0 | if array.precision() != precision || array.scale() != scale { |
2840 | 0 | return Ok(false); |
2841 | 0 | } |
2842 | 0 | let is_null = array.is_null(index); |
2843 | 0 | if let Some(v) = value { |
2844 | 0 | Ok(!array.is_null(index) && array.value(index) == *v) |
2845 | | } else { |
2846 | 0 | Ok(is_null) |
2847 | | } |
2848 | 0 | } |
2849 | | |
2850 | 0 | fn eq_array_decimal256( |
2851 | 0 | array: &ArrayRef, |
2852 | 0 | index: usize, |
2853 | 0 | value: Option<&i256>, |
2854 | 0 | precision: u8, |
2855 | 0 | scale: i8, |
2856 | 0 | ) -> Result<bool> { |
2857 | 0 | let array = as_decimal256_array(array)?; |
2858 | 0 | if array.precision() != precision || array.scale() != scale { |
2859 | 0 | return Ok(false); |
2860 | 0 | } |
2861 | 0 | let is_null = array.is_null(index); |
2862 | 0 | if let Some(v) = value { |
2863 | 0 | Ok(!array.is_null(index) && array.value(index) == *v) |
2864 | | } else { |
2865 | 0 | Ok(is_null) |
2866 | | } |
2867 | 0 | } |
2868 | | |
2869 | | /// Compares a single row of array @ index for equality with self, |
2870 | | /// in an optimized fashion. |
2871 | | /// |
2872 | | /// This method implements an optimized version of: |
2873 | | /// |
2874 | | /// ```text |
2875 | | /// let arr_scalar = Self::try_from_array(array, index).unwrap(); |
2876 | | /// arr_scalar.eq(self) |
2877 | | /// ``` |
2878 | | /// |
2879 | | /// *Performance note*: the arrow compute kernels should be |
2880 | | /// preferred over this function if at all possible as they can be |
2881 | | /// vectorized and are generally much faster. |
2882 | | /// |
2883 | | /// This function has a few narrow usescases such as hash table key |
2884 | | /// comparisons where comparing a single row at a time is necessary. |
2885 | | /// |
2886 | | /// # Errors |
2887 | | /// |
2888 | | /// Errors if |
2889 | | /// - it fails to downcast `array` to the data type of `self` |
2890 | | /// - `self` is a `Struct` |
2891 | | /// |
2892 | | /// # Panics |
2893 | | /// |
2894 | | /// Panics if `self` is a dictionary with invalid key type |
2895 | | #[inline] |
2896 | 0 | pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> { |
2897 | 0 | Ok(match self { |
2898 | 0 | ScalarValue::Decimal128(v, precision, scale) => { |
2899 | 0 | ScalarValue::eq_array_decimal( |
2900 | 0 | array, |
2901 | 0 | index, |
2902 | 0 | v.as_ref(), |
2903 | 0 | *precision, |
2904 | 0 | *scale, |
2905 | 0 | )? |
2906 | | } |
2907 | 0 | ScalarValue::Decimal256(v, precision, scale) => { |
2908 | 0 | ScalarValue::eq_array_decimal256( |
2909 | 0 | array, |
2910 | 0 | index, |
2911 | 0 | v.as_ref(), |
2912 | 0 | *precision, |
2913 | 0 | *scale, |
2914 | 0 | )? |
2915 | | } |
2916 | 0 | ScalarValue::Boolean(val) => { |
2917 | 0 | eq_array_primitive!(array, index, BooleanArray, val)? |
2918 | | } |
2919 | 0 | ScalarValue::Float16(val) => { |
2920 | 0 | eq_array_primitive!(array, index, Float16Array, val)? |
2921 | | } |
2922 | 0 | ScalarValue::Float32(val) => { |
2923 | 0 | eq_array_primitive!(array, index, Float32Array, val)? |
2924 | | } |
2925 | 0 | ScalarValue::Float64(val) => { |
2926 | 0 | eq_array_primitive!(array, index, Float64Array, val)? |
2927 | | } |
2928 | 0 | ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?, |
2929 | 0 | ScalarValue::Int16(val) => { |
2930 | 0 | eq_array_primitive!(array, index, Int16Array, val)? |
2931 | | } |
2932 | 0 | ScalarValue::Int32(val) => { |
2933 | 0 | eq_array_primitive!(array, index, Int32Array, val)? |
2934 | | } |
2935 | 0 | ScalarValue::Int64(val) => { |
2936 | 0 | eq_array_primitive!(array, index, Int64Array, val)? |
2937 | | } |
2938 | 0 | ScalarValue::UInt8(val) => { |
2939 | 0 | eq_array_primitive!(array, index, UInt8Array, val)? |
2940 | | } |
2941 | 0 | ScalarValue::UInt16(val) => { |
2942 | 0 | eq_array_primitive!(array, index, UInt16Array, val)? |
2943 | | } |
2944 | 0 | ScalarValue::UInt32(val) => { |
2945 | 0 | eq_array_primitive!(array, index, UInt32Array, val)? |
2946 | | } |
2947 | 0 | ScalarValue::UInt64(val) => { |
2948 | 0 | eq_array_primitive!(array, index, UInt64Array, val)? |
2949 | | } |
2950 | 0 | ScalarValue::Utf8(val) => { |
2951 | 0 | eq_array_primitive!(array, index, StringArray, val)? |
2952 | | } |
2953 | 0 | ScalarValue::Utf8View(val) => { |
2954 | 0 | eq_array_primitive!(array, index, StringViewArray, val)? |
2955 | | } |
2956 | 0 | ScalarValue::LargeUtf8(val) => { |
2957 | 0 | eq_array_primitive!(array, index, LargeStringArray, val)? |
2958 | | } |
2959 | 0 | ScalarValue::Binary(val) => { |
2960 | 0 | eq_array_primitive!(array, index, BinaryArray, val)? |
2961 | | } |
2962 | 0 | ScalarValue::BinaryView(val) => { |
2963 | 0 | eq_array_primitive!(array, index, BinaryViewArray, val)? |
2964 | | } |
2965 | 0 | ScalarValue::FixedSizeBinary(_, val) => { |
2966 | 0 | eq_array_primitive!(array, index, FixedSizeBinaryArray, val)? |
2967 | | } |
2968 | 0 | ScalarValue::LargeBinary(val) => { |
2969 | 0 | eq_array_primitive!(array, index, LargeBinaryArray, val)? |
2970 | | } |
2971 | 0 | ScalarValue::List(arr) => { |
2972 | 0 | Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) |
2973 | | } |
2974 | 0 | ScalarValue::LargeList(arr) => { |
2975 | 0 | Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) |
2976 | | } |
2977 | 0 | ScalarValue::FixedSizeList(arr) => { |
2978 | 0 | Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) |
2979 | | } |
2980 | 0 | ScalarValue::Struct(arr) => { |
2981 | 0 | Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) |
2982 | | } |
2983 | 0 | ScalarValue::Map(arr) => { |
2984 | 0 | Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) |
2985 | | } |
2986 | 0 | ScalarValue::Date32(val) => { |
2987 | 0 | eq_array_primitive!(array, index, Date32Array, val)? |
2988 | | } |
2989 | 0 | ScalarValue::Date64(val) => { |
2990 | 0 | eq_array_primitive!(array, index, Date64Array, val)? |
2991 | | } |
2992 | 0 | ScalarValue::Time32Second(val) => { |
2993 | 0 | eq_array_primitive!(array, index, Time32SecondArray, val)? |
2994 | | } |
2995 | 0 | ScalarValue::Time32Millisecond(val) => { |
2996 | 0 | eq_array_primitive!(array, index, Time32MillisecondArray, val)? |
2997 | | } |
2998 | 0 | ScalarValue::Time64Microsecond(val) => { |
2999 | 0 | eq_array_primitive!(array, index, Time64MicrosecondArray, val)? |
3000 | | } |
3001 | 0 | ScalarValue::Time64Nanosecond(val) => { |
3002 | 0 | eq_array_primitive!(array, index, Time64NanosecondArray, val)? |
3003 | | } |
3004 | 0 | ScalarValue::TimestampSecond(val, _) => { |
3005 | 0 | eq_array_primitive!(array, index, TimestampSecondArray, val)? |
3006 | | } |
3007 | 0 | ScalarValue::TimestampMillisecond(val, _) => { |
3008 | 0 | eq_array_primitive!(array, index, TimestampMillisecondArray, val)? |
3009 | | } |
3010 | 0 | ScalarValue::TimestampMicrosecond(val, _) => { |
3011 | 0 | eq_array_primitive!(array, index, TimestampMicrosecondArray, val)? |
3012 | | } |
3013 | 0 | ScalarValue::TimestampNanosecond(val, _) => { |
3014 | 0 | eq_array_primitive!(array, index, TimestampNanosecondArray, val)? |
3015 | | } |
3016 | 0 | ScalarValue::IntervalYearMonth(val) => { |
3017 | 0 | eq_array_primitive!(array, index, IntervalYearMonthArray, val)? |
3018 | | } |
3019 | 0 | ScalarValue::IntervalDayTime(val) => { |
3020 | 0 | eq_array_primitive!(array, index, IntervalDayTimeArray, val)? |
3021 | | } |
3022 | 0 | ScalarValue::IntervalMonthDayNano(val) => { |
3023 | 0 | eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)? |
3024 | | } |
3025 | 0 | ScalarValue::DurationSecond(val) => { |
3026 | 0 | eq_array_primitive!(array, index, DurationSecondArray, val)? |
3027 | | } |
3028 | 0 | ScalarValue::DurationMillisecond(val) => { |
3029 | 0 | eq_array_primitive!(array, index, DurationMillisecondArray, val)? |
3030 | | } |
3031 | 0 | ScalarValue::DurationMicrosecond(val) => { |
3032 | 0 | eq_array_primitive!(array, index, DurationMicrosecondArray, val)? |
3033 | | } |
3034 | 0 | ScalarValue::DurationNanosecond(val) => { |
3035 | 0 | eq_array_primitive!(array, index, DurationNanosecondArray, val)? |
3036 | | } |
3037 | 0 | ScalarValue::Union(value, _, _) => { |
3038 | 0 | let array = as_union_array(array); |
3039 | 0 | let ti = array.type_id(index); |
3040 | 0 | let index = array.value_offset(index); |
3041 | 0 | if let Some((ti_v, value)) = value { |
3042 | 0 | ti_v == &ti && value.eq_array(array.child(ti), index)? |
3043 | | } else { |
3044 | 0 | array.child(ti).is_null(index) |
3045 | | } |
3046 | | } |
3047 | 0 | ScalarValue::Dictionary(key_type, v) => { |
3048 | 0 | let (values_array, values_index) = match key_type.as_ref() { |
3049 | 0 | DataType::Int8 => get_dict_value::<Int8Type>(array, index)?, |
3050 | 0 | DataType::Int16 => get_dict_value::<Int16Type>(array, index)?, |
3051 | 0 | DataType::Int32 => get_dict_value::<Int32Type>(array, index)?, |
3052 | 0 | DataType::Int64 => get_dict_value::<Int64Type>(array, index)?, |
3053 | 0 | DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?, |
3054 | 0 | DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?, |
3055 | 0 | DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?, |
3056 | 0 | DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?, |
3057 | 0 | _ => unreachable!("Invalid dictionary keys type: {:?}", key_type), |
3058 | | }; |
3059 | | // was the value in the array non null? |
3060 | 0 | match values_index { |
3061 | 0 | Some(values_index) => v.eq_array(values_array, values_index)?, |
3062 | 0 | None => v.is_null(), |
3063 | | } |
3064 | | } |
3065 | 0 | ScalarValue::Null => array.is_null(index), |
3066 | | }) |
3067 | 0 | } |
3068 | | |
3069 | 0 | fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool { |
3070 | 0 | let right = arr2.slice(index, 1); |
3071 | 0 | arr1 == &right |
3072 | 0 | } |
3073 | | |
3074 | | /// Estimate size if bytes including `Self`. For values with internal containers such as `String` |
3075 | | /// includes the allocated size (`capacity`) rather than the current length (`len`) |
3076 | 1.16k | pub fn size(&self) -> usize { |
3077 | 1.16k | std::mem::size_of_val(self) |
3078 | 1.16k | + match self { |
3079 | | ScalarValue::Null |
3080 | | | ScalarValue::Boolean(_) |
3081 | | | ScalarValue::Float16(_) |
3082 | | | ScalarValue::Float32(_) |
3083 | | | ScalarValue::Float64(_) |
3084 | | | ScalarValue::Decimal128(_, _, _) |
3085 | | | ScalarValue::Decimal256(_, _, _) |
3086 | | | ScalarValue::Int8(_) |
3087 | | | ScalarValue::Int16(_) |
3088 | | | ScalarValue::Int32(_) |
3089 | | | ScalarValue::Int64(_) |
3090 | | | ScalarValue::UInt8(_) |
3091 | | | ScalarValue::UInt16(_) |
3092 | | | ScalarValue::UInt32(_) |
3093 | | | ScalarValue::UInt64(_) |
3094 | | | ScalarValue::Date32(_) |
3095 | | | ScalarValue::Date64(_) |
3096 | | | ScalarValue::Time32Second(_) |
3097 | | | ScalarValue::Time32Millisecond(_) |
3098 | | | ScalarValue::Time64Microsecond(_) |
3099 | | | ScalarValue::Time64Nanosecond(_) |
3100 | | | ScalarValue::IntervalYearMonth(_) |
3101 | | | ScalarValue::IntervalDayTime(_) |
3102 | | | ScalarValue::IntervalMonthDayNano(_) |
3103 | | | ScalarValue::DurationSecond(_) |
3104 | | | ScalarValue::DurationMillisecond(_) |
3105 | | | ScalarValue::DurationMicrosecond(_) |
3106 | 1.16k | | ScalarValue::DurationNanosecond(_) => 0, |
3107 | 0 | ScalarValue::Utf8(s) |
3108 | 0 | | ScalarValue::LargeUtf8(s) |
3109 | 0 | | ScalarValue::Utf8View(s) => { |
3110 | 0 | s.as_ref().map(|s| s.capacity()).unwrap_or_default() |
3111 | | } |
3112 | 0 | ScalarValue::TimestampSecond(_, s) |
3113 | 0 | | ScalarValue::TimestampMillisecond(_, s) |
3114 | 0 | | ScalarValue::TimestampMicrosecond(_, s) |
3115 | 0 | | ScalarValue::TimestampNanosecond(_, s) => { |
3116 | 0 | s.as_ref().map(|s| s.len()).unwrap_or_default() |
3117 | | } |
3118 | 0 | ScalarValue::Binary(b) |
3119 | 0 | | ScalarValue::FixedSizeBinary(_, b) |
3120 | 0 | | ScalarValue::LargeBinary(b) |
3121 | 0 | | ScalarValue::BinaryView(b) => { |
3122 | 0 | b.as_ref().map(|b| b.capacity()).unwrap_or_default() |
3123 | | } |
3124 | 0 | ScalarValue::List(arr) => arr.get_array_memory_size(), |
3125 | 0 | ScalarValue::LargeList(arr) => arr.get_array_memory_size(), |
3126 | 0 | ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(), |
3127 | 0 | ScalarValue::Struct(arr) => arr.get_array_memory_size(), |
3128 | 0 | ScalarValue::Map(arr) => arr.get_array_memory_size(), |
3129 | 0 | ScalarValue::Union(vals, fields, _mode) => { |
3130 | 0 | vals.as_ref() |
3131 | 0 | .map(|(_id, sv)| sv.size() - std::mem::size_of_val(sv)) |
3132 | 0 | .unwrap_or_default() |
3133 | 0 | // `fields` is boxed, so it is NOT already included in `self` |
3134 | 0 | + std::mem::size_of_val(fields) |
3135 | 0 | + (std::mem::size_of::<Field>() * fields.len()) |
3136 | 0 | + fields.iter().map(|(_idx, field)| field.size() - std::mem::size_of_val(field)).sum::<usize>() |
3137 | | } |
3138 | 0 | ScalarValue::Dictionary(dt, sv) => { |
3139 | 0 | // `dt` and `sv` are boxed, so they are NOT already included in `self` |
3140 | 0 | dt.size() + sv.size() |
3141 | | } |
3142 | | } |
3143 | 1.16k | } |
3144 | | |
3145 | | /// Estimates [size](Self::size) of [`Vec`] in bytes. |
3146 | | /// |
3147 | | /// Includes the size of the [`Vec`] container itself. |
3148 | 584 | pub fn size_of_vec(vec: &Vec<Self>) -> usize { |
3149 | 584 | std::mem::size_of_val(vec) |
3150 | 584 | + (std::mem::size_of::<ScalarValue>() * vec.capacity()) |
3151 | 584 | + vec |
3152 | 584 | .iter() |
3153 | 584 | .map(|sv| sv.size() - std::mem::size_of_val(sv)) |
3154 | 584 | .sum::<usize>() |
3155 | 584 | } |
3156 | | |
3157 | | /// Estimates [size](Self::size) of [`VecDeque`] in bytes. |
3158 | | /// |
3159 | | /// Includes the size of the [`VecDeque`] container itself. |
3160 | 0 | pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize { |
3161 | 0 | std::mem::size_of_val(vec_deque) |
3162 | 0 | + (std::mem::size_of::<ScalarValue>() * vec_deque.capacity()) |
3163 | 0 | + vec_deque |
3164 | 0 | .iter() |
3165 | 0 | .map(|sv| sv.size() - std::mem::size_of_val(sv)) |
3166 | 0 | .sum::<usize>() |
3167 | 0 | } |
3168 | | |
3169 | | /// Estimates [size](Self::size) of [`HashSet`] in bytes. |
3170 | | /// |
3171 | | /// Includes the size of the [`HashSet`] container itself. |
3172 | 0 | pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize { |
3173 | 0 | std::mem::size_of_val(set) |
3174 | 0 | + (std::mem::size_of::<ScalarValue>() * set.capacity()) |
3175 | 0 | + set |
3176 | 0 | .iter() |
3177 | 0 | .map(|sv| sv.size() - std::mem::size_of_val(sv)) |
3178 | 0 | .sum::<usize>() |
3179 | 0 | } |
3180 | | } |
3181 | | |
3182 | | macro_rules! impl_scalar { |
3183 | | ($ty:ty, $scalar:tt) => { |
3184 | | impl From<$ty> for ScalarValue { |
3185 | 132 | fn from(value: $ty) -> Self { |
3186 | 132 | ScalarValue::$scalar(Some(value)) |
3187 | 132 | } |
3188 | | } |
3189 | | |
3190 | | impl From<Option<$ty>> for ScalarValue { |
3191 | 0 | fn from(value: Option<$ty>) -> Self { |
3192 | 0 | ScalarValue::$scalar(value) |
3193 | 0 | } |
3194 | | } |
3195 | | }; |
3196 | | } |
3197 | | |
3198 | | impl_scalar!(f64, Float64); |
3199 | | impl_scalar!(f32, Float32); |
3200 | | impl_scalar!(i8, Int8); |
3201 | | impl_scalar!(i16, Int16); |
3202 | | impl_scalar!(i32, Int32); |
3203 | | impl_scalar!(i64, Int64); |
3204 | | impl_scalar!(bool, Boolean); |
3205 | | impl_scalar!(u8, UInt8); |
3206 | | impl_scalar!(u16, UInt16); |
3207 | | impl_scalar!(u32, UInt32); |
3208 | | impl_scalar!(u64, UInt64); |
3209 | | |
3210 | | impl From<&str> for ScalarValue { |
3211 | 20 | fn from(value: &str) -> Self { |
3212 | 20 | Some(value).into() |
3213 | 20 | } |
3214 | | } |
3215 | | |
3216 | | impl From<Option<&str>> for ScalarValue { |
3217 | 20 | fn from(value: Option<&str>) -> Self { |
3218 | 20 | let value = value.map(|s| s.to_string()); |
3219 | 20 | ScalarValue::Utf8(value) |
3220 | 20 | } |
3221 | | } |
3222 | | |
3223 | | /// Wrapper to create ScalarValue::Struct for convenience |
3224 | | impl From<Vec<(&str, ScalarValue)>> for ScalarValue { |
3225 | 0 | fn from(value: Vec<(&str, ScalarValue)>) -> Self { |
3226 | 0 | value |
3227 | 0 | .into_iter() |
3228 | 0 | .fold(ScalarStructBuilder::new(), |builder, (name, value)| { |
3229 | 0 | builder.with_name_and_scalar(name, value) |
3230 | 0 | }) |
3231 | 0 | .build() |
3232 | 0 | .unwrap() |
3233 | 0 | } |
3234 | | } |
3235 | | |
3236 | | impl FromStr for ScalarValue { |
3237 | | type Err = Infallible; |
3238 | | |
3239 | 0 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
3240 | 0 | Ok(s.into()) |
3241 | 0 | } |
3242 | | } |
3243 | | |
3244 | | impl From<String> for ScalarValue { |
3245 | 0 | fn from(value: String) -> Self { |
3246 | 0 | ScalarValue::Utf8(Some(value)) |
3247 | 0 | } |
3248 | | } |
3249 | | |
3250 | | macro_rules! impl_try_from { |
3251 | | ($SCALAR:ident, $NATIVE:ident) => { |
3252 | | impl TryFrom<ScalarValue> for $NATIVE { |
3253 | | type Error = DataFusionError; |
3254 | | |
3255 | 0 | fn try_from(value: ScalarValue) -> Result<Self> { |
3256 | 0 | match value { |
3257 | 0 | ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value), |
3258 | 0 | _ => _internal_err!( |
3259 | 0 | "Cannot convert {:?} to {}", |
3260 | 0 | value, |
3261 | 0 | std::any::type_name::<Self>() |
3262 | 0 | ), |
3263 | | } |
3264 | 0 | } |
3265 | | } |
3266 | | }; |
3267 | | } |
3268 | | |
3269 | | impl_try_from!(Int8, i8); |
3270 | | impl_try_from!(Int16, i16); |
3271 | | |
3272 | | // special implementation for i32 because of Date32 and Time32 |
3273 | | impl TryFrom<ScalarValue> for i32 { |
3274 | | type Error = DataFusionError; |
3275 | | |
3276 | 0 | fn try_from(value: ScalarValue) -> Result<Self> { |
3277 | 0 | match value { |
3278 | 0 | ScalarValue::Int32(Some(inner_value)) |
3279 | 0 | | ScalarValue::Date32(Some(inner_value)) |
3280 | 0 | | ScalarValue::Time32Second(Some(inner_value)) |
3281 | 0 | | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value), |
3282 | 0 | _ => _internal_err!( |
3283 | 0 | "Cannot convert {:?} to {}", |
3284 | 0 | value, |
3285 | 0 | std::any::type_name::<Self>() |
3286 | 0 | ), |
3287 | | } |
3288 | 0 | } |
3289 | | } |
3290 | | |
3291 | | // special implementation for i64 because of Date64, Time64 and Timestamp |
3292 | | impl TryFrom<ScalarValue> for i64 { |
3293 | | type Error = DataFusionError; |
3294 | | |
3295 | 0 | fn try_from(value: ScalarValue) -> Result<Self> { |
3296 | 0 | match value { |
3297 | 0 | ScalarValue::Int64(Some(inner_value)) |
3298 | 0 | | ScalarValue::Date64(Some(inner_value)) |
3299 | 0 | | ScalarValue::Time64Microsecond(Some(inner_value)) |
3300 | 0 | | ScalarValue::Time64Nanosecond(Some(inner_value)) |
3301 | 0 | | ScalarValue::TimestampNanosecond(Some(inner_value), _) |
3302 | 0 | | ScalarValue::TimestampMicrosecond(Some(inner_value), _) |
3303 | 0 | | ScalarValue::TimestampMillisecond(Some(inner_value), _) |
3304 | 0 | | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value), |
3305 | 0 | _ => _internal_err!( |
3306 | 0 | "Cannot convert {:?} to {}", |
3307 | 0 | value, |
3308 | 0 | std::any::type_name::<Self>() |
3309 | 0 | ), |
3310 | | } |
3311 | 0 | } |
3312 | | } |
3313 | | |
3314 | | // special implementation for i128 because of Decimal128 |
3315 | | impl TryFrom<ScalarValue> for i128 { |
3316 | | type Error = DataFusionError; |
3317 | | |
3318 | 0 | fn try_from(value: ScalarValue) -> Result<Self> { |
3319 | 0 | match value { |
3320 | 0 | ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value), |
3321 | 0 | _ => _internal_err!( |
3322 | 0 | "Cannot convert {:?} to {}", |
3323 | 0 | value, |
3324 | 0 | std::any::type_name::<Self>() |
3325 | 0 | ), |
3326 | | } |
3327 | 0 | } |
3328 | | } |
3329 | | |
3330 | | // special implementation for i256 because of Decimal128 |
3331 | | impl TryFrom<ScalarValue> for i256 { |
3332 | | type Error = DataFusionError; |
3333 | | |
3334 | 0 | fn try_from(value: ScalarValue) -> Result<Self> { |
3335 | 0 | match value { |
3336 | 0 | ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value), |
3337 | 0 | _ => _internal_err!( |
3338 | 0 | "Cannot convert {:?} to {}", |
3339 | 0 | value, |
3340 | 0 | std::any::type_name::<Self>() |
3341 | 0 | ), |
3342 | | } |
3343 | 0 | } |
3344 | | } |
3345 | | |
3346 | | impl_try_from!(UInt8, u8); |
3347 | | impl_try_from!(UInt16, u16); |
3348 | | impl_try_from!(UInt32, u32); |
3349 | | impl_try_from!(UInt64, u64); |
3350 | | impl_try_from!(Float32, f32); |
3351 | | impl_try_from!(Float64, f64); |
3352 | | impl_try_from!(Boolean, bool); |
3353 | | |
3354 | | impl TryFrom<DataType> for ScalarValue { |
3355 | | type Error = DataFusionError; |
3356 | | |
3357 | | /// Create a Null instance of ScalarValue for this datatype |
3358 | 11.4k | fn try_from(datatype: DataType) -> Result<Self> { |
3359 | 11.4k | (&datatype).try_into() |
3360 | 11.4k | } |
3361 | | } |
3362 | | |
3363 | | impl TryFrom<&DataType> for ScalarValue { |
3364 | | type Error = DataFusionError; |
3365 | | |
3366 | | /// Create a Null instance of ScalarValue for this datatype |
3367 | 116k | fn try_from(data_type: &DataType) -> Result<Self> { |
3368 | 12.7k | Ok(match data_type { |
3369 | 3.38k | DataType::Boolean => ScalarValue::Boolean(None), |
3370 | 0 | DataType::Float16 => ScalarValue::Float16(None), |
3371 | 39.6k | DataType::Float64 => ScalarValue::Float64(None), |
3372 | 22 | DataType::Float32 => ScalarValue::Float32(None), |
3373 | 0 | DataType::Int8 => ScalarValue::Int8(None), |
3374 | 0 | DataType::Int16 => ScalarValue::Int16(None), |
3375 | 41.5k | DataType::Int32 => ScalarValue::Int32(None), |
3376 | 4.20k | DataType::Int64 => ScalarValue::Int64(None), |
3377 | 0 | DataType::UInt8 => ScalarValue::UInt8(None), |
3378 | 0 | DataType::UInt16 => ScalarValue::UInt16(None), |
3379 | 133 | DataType::UInt32 => ScalarValue::UInt32(None), |
3380 | 5 | DataType::UInt64 => ScalarValue::UInt64(None), |
3381 | 0 | DataType::Decimal128(precision, scale) => { |
3382 | 0 | ScalarValue::Decimal128(None, *precision, *scale) |
3383 | | } |
3384 | 0 | DataType::Decimal256(precision, scale) => { |
3385 | 0 | ScalarValue::Decimal256(None, *precision, *scale) |
3386 | | } |
3387 | 0 | DataType::Utf8 => ScalarValue::Utf8(None), |
3388 | 0 | DataType::LargeUtf8 => ScalarValue::LargeUtf8(None), |
3389 | 0 | DataType::Utf8View => ScalarValue::Utf8View(None), |
3390 | 0 | DataType::Binary => ScalarValue::Binary(None), |
3391 | 0 | DataType::BinaryView => ScalarValue::BinaryView(None), |
3392 | 0 | DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None), |
3393 | 0 | DataType::LargeBinary => ScalarValue::LargeBinary(None), |
3394 | 0 | DataType::Date32 => ScalarValue::Date32(None), |
3395 | 0 | DataType::Date64 => ScalarValue::Date64(None), |
3396 | 0 | DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None), |
3397 | | DataType::Time32(TimeUnit::Millisecond) => { |
3398 | 0 | ScalarValue::Time32Millisecond(None) |
3399 | | } |
3400 | | DataType::Time64(TimeUnit::Microsecond) => { |
3401 | 0 | ScalarValue::Time64Microsecond(None) |
3402 | | } |
3403 | 0 | DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None), |
3404 | 0 | DataType::Timestamp(TimeUnit::Second, tz_opt) => { |
3405 | 0 | ScalarValue::TimestampSecond(None, tz_opt.clone()) |
3406 | | } |
3407 | 12.7k | DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => { |
3408 | 12.7k | ScalarValue::TimestampMillisecond(None, tz_opt.clone()) |
3409 | | } |
3410 | 0 | DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => { |
3411 | 0 | ScalarValue::TimestampMicrosecond(None, tz_opt.clone()) |
3412 | | } |
3413 | 0 | DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => { |
3414 | 0 | ScalarValue::TimestampNanosecond(None, tz_opt.clone()) |
3415 | | } |
3416 | | DataType::Interval(IntervalUnit::YearMonth) => { |
3417 | 0 | ScalarValue::IntervalYearMonth(None) |
3418 | | } |
3419 | | DataType::Interval(IntervalUnit::DayTime) => { |
3420 | 6.56k | ScalarValue::IntervalDayTime(None) |
3421 | | } |
3422 | | DataType::Interval(IntervalUnit::MonthDayNano) => { |
3423 | 0 | ScalarValue::IntervalMonthDayNano(None) |
3424 | | } |
3425 | 0 | DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None), |
3426 | | DataType::Duration(TimeUnit::Millisecond) => { |
3427 | 6.97k | ScalarValue::DurationMillisecond(None) |
3428 | | } |
3429 | | DataType::Duration(TimeUnit::Microsecond) => { |
3430 | 0 | ScalarValue::DurationMicrosecond(None) |
3431 | | } |
3432 | | DataType::Duration(TimeUnit::Nanosecond) => { |
3433 | 0 | ScalarValue::DurationNanosecond(None) |
3434 | | } |
3435 | 0 | DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary( |
3436 | 0 | index_type.clone(), |
3437 | 0 | Box::new(value_type.as_ref().try_into()?), |
3438 | | ), |
3439 | | // `ScalaValue::List` contains single element `ListArray`. |
3440 | 0 | DataType::List(field_ref) => ScalarValue::List(Arc::new( |
3441 | 0 | GenericListArray::new_null(Arc::clone(field_ref), 1), |
3442 | 0 | )), |
3443 | | // `ScalarValue::LargeList` contains single element `LargeListArray`. |
3444 | 0 | DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new( |
3445 | 0 | GenericListArray::new_null(Arc::clone(field_ref), 1), |
3446 | 0 | )), |
3447 | | // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`. |
3448 | 0 | DataType::FixedSizeList(field_ref, fixed_length) => { |
3449 | 0 | ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null( |
3450 | 0 | Arc::clone(field_ref), |
3451 | 0 | *fixed_length, |
3452 | 0 | 1, |
3453 | 0 | ))) |
3454 | | } |
3455 | 3 | DataType::Struct(fields) => ScalarValue::Struct( |
3456 | 3 | new_null_array(&DataType::Struct(fields.to_owned()), 1) |
3457 | 3 | .as_struct() |
3458 | 3 | .to_owned() |
3459 | 3 | .into(), |
3460 | 3 | ), |
3461 | 0 | DataType::Map(fields, sorted) => ScalarValue::Map( |
3462 | 0 | new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1) |
3463 | 0 | .as_map() |
3464 | 0 | .to_owned() |
3465 | 0 | .into(), |
3466 | 0 | ), |
3467 | 2 | DataType::Union(fields, mode) => { |
3468 | 2 | ScalarValue::Union(None, fields.clone(), *mode) |
3469 | | } |
3470 | 934 | DataType::Null => ScalarValue::Null, |
3471 | | _ => { |
3472 | 0 | return _not_impl_err!( |
3473 | 0 | "Can't create a scalar from data_type \"{data_type:?}\"" |
3474 | 0 | ); |
3475 | | } |
3476 | | }) |
3477 | 116k | } |
3478 | | } |
3479 | | |
3480 | | macro_rules! format_option { |
3481 | | ($F:expr, $EXPR:expr) => {{ |
3482 | | match $EXPR { |
3483 | | Some(e) => write!($F, "{e}"), |
3484 | | None => write!($F, "NULL"), |
3485 | | } |
3486 | | }}; |
3487 | | } |
3488 | | |
3489 | | // Implement Display trait for ScalarValue |
3490 | | // |
3491 | | // # Panics |
3492 | | // |
3493 | | // Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty` |
3494 | | impl fmt::Display for ScalarValue { |
3495 | 8 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
3496 | 8 | match self { |
3497 | 0 | ScalarValue::Decimal128(v, p, s) => { |
3498 | 0 | write!(f, "{v:?},{p:?},{s:?}")?; |
3499 | | } |
3500 | 0 | ScalarValue::Decimal256(v, p, s) => { |
3501 | 0 | write!(f, "{v:?},{p:?},{s:?}")?; |
3502 | | } |
3503 | 0 | ScalarValue::Boolean(e) => format_option!(f, e)?, |
3504 | 0 | ScalarValue::Float16(e) => format_option!(f, e)?, |
3505 | 0 | ScalarValue::Float32(e) => format_option!(f, e)?, |
3506 | 2 | ScalarValue::Float64(e) => format_option!(f0 , e)?0 , |
3507 | 0 | ScalarValue::Int8(e) => format_option!(f, e)?, |
3508 | 0 | ScalarValue::Int16(e) => format_option!(f, e)?, |
3509 | 0 | ScalarValue::Int32(e) => format_option!(f, e)?, |
3510 | 2 | ScalarValue::Int64(e) => format_option!(f0 , e)?0 , |
3511 | 0 | ScalarValue::UInt8(e) => format_option!(f, e)?, |
3512 | 0 | ScalarValue::UInt16(e) => format_option!(f, e)?, |
3513 | 0 | ScalarValue::UInt32(e) => format_option!(f, e)?, |
3514 | 4 | ScalarValue::UInt64(e) => format_option!1 (f3 , e)?0 , |
3515 | 0 | ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?, |
3516 | 0 | ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?, |
3517 | 0 | ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?, |
3518 | 0 | ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?, |
3519 | 0 | ScalarValue::Utf8(e) |
3520 | 0 | | ScalarValue::LargeUtf8(e) |
3521 | 0 | | ScalarValue::Utf8View(e) => format_option!(f, e)?, |
3522 | 0 | ScalarValue::Binary(e) |
3523 | 0 | | ScalarValue::FixedSizeBinary(_, e) |
3524 | 0 | | ScalarValue::LargeBinary(e) |
3525 | 0 | | ScalarValue::BinaryView(e) => match e { |
3526 | 0 | Some(bytes) => { |
3527 | | // print up to first 10 bytes, with trailing ... if needed |
3528 | 0 | for b in bytes.iter().take(10) { |
3529 | 0 | write!(f, "{b:02X}")?; |
3530 | | } |
3531 | 0 | if bytes.len() > 10 { |
3532 | 0 | write!(f, "...")?; |
3533 | 0 | } |
3534 | | } |
3535 | 0 | None => write!(f, "NULL")?, |
3536 | | }, |
3537 | 0 | ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, |
3538 | 0 | ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, |
3539 | 0 | ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, |
3540 | 0 | ScalarValue::Date32(e) => { |
3541 | 0 | format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))? |
3542 | | } |
3543 | 0 | ScalarValue::Date64(e) => { |
3544 | 0 | format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))? |
3545 | | } |
3546 | 0 | ScalarValue::Time32Second(e) => format_option!(f, e)?, |
3547 | 0 | ScalarValue::Time32Millisecond(e) => format_option!(f, e)?, |
3548 | 0 | ScalarValue::Time64Microsecond(e) => format_option!(f, e)?, |
3549 | 0 | ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?, |
3550 | 0 | ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?, |
3551 | 0 | ScalarValue::IntervalMonthDayNano(e) => { |
3552 | 0 | format_option!(f, e.map(|v| format!("{v:?}")))? |
3553 | | } |
3554 | 0 | ScalarValue::IntervalDayTime(e) => { |
3555 | 0 | format_option!(f, e.map(|v| format!("{v:?}")))?; |
3556 | | } |
3557 | 0 | ScalarValue::DurationSecond(e) => format_option!(f, e)?, |
3558 | 0 | ScalarValue::DurationMillisecond(e) => format_option!(f, e)?, |
3559 | 0 | ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?, |
3560 | 0 | ScalarValue::DurationNanosecond(e) => format_option!(f, e)?, |
3561 | 0 | ScalarValue::Struct(struct_arr) => { |
3562 | 0 | // ScalarValue Struct should always have a single element |
3563 | 0 | assert_eq!(struct_arr.len(), 1); |
3564 | | |
3565 | 0 | if struct_arr.null_count() == struct_arr.len() { |
3566 | 0 | write!(f, "NULL")?; |
3567 | 0 | return Ok(()); |
3568 | 0 | } |
3569 | 0 |
|
3570 | 0 | let columns = struct_arr.columns(); |
3571 | 0 | let fields = struct_arr.fields(); |
3572 | 0 | let nulls = struct_arr.nulls(); |
3573 | 0 |
|
3574 | 0 | write!( |
3575 | 0 | f, |
3576 | 0 | "{{{}}}", |
3577 | 0 | columns |
3578 | 0 | .iter() |
3579 | 0 | .zip(fields.iter()) |
3580 | 0 | .enumerate() |
3581 | 0 | .map(|(index, (column, field))| { |
3582 | 0 | if nulls.is_some_and(|b| b.is_null(index)) { |
3583 | 0 | format!("{}:NULL", field.name()) |
3584 | 0 | } else if let DataType::Struct(_) = field.data_type() { |
3585 | 0 | let sv = ScalarValue::Struct(Arc::new( |
3586 | 0 | column.as_struct().to_owned(), |
3587 | 0 | )); |
3588 | 0 | format!("{}:{sv}", field.name()) |
3589 | | } else { |
3590 | 0 | let sv = array_value_to_string(column, 0).unwrap(); |
3591 | 0 | format!("{}:{sv}", field.name()) |
3592 | | } |
3593 | 0 | }) |
3594 | 0 | .collect::<Vec<_>>() |
3595 | 0 | .join(",") |
3596 | 0 | )? |
3597 | | } |
3598 | 0 | ScalarValue::Map(map_arr) => { |
3599 | 0 | if map_arr.null_count() == map_arr.len() { |
3600 | 0 | write!(f, "NULL")?; |
3601 | 0 | return Ok(()); |
3602 | 0 | } |
3603 | 0 |
|
3604 | 0 | write!( |
3605 | 0 | f, |
3606 | 0 | "[{}]", |
3607 | 0 | map_arr |
3608 | 0 | .iter() |
3609 | 0 | .map(|struct_array| { |
3610 | 0 | if let Some(arr) = struct_array { |
3611 | 0 | let mut buffer = VecDeque::new(); |
3612 | 0 | for i in 0..arr.len() { |
3613 | 0 | let key = |
3614 | 0 | array_value_to_string(arr.column(0), i).unwrap(); |
3615 | 0 | let value = |
3616 | 0 | array_value_to_string(arr.column(1), i).unwrap(); |
3617 | 0 | buffer.push_back(format!("{}:{}", key, value)); |
3618 | 0 | } |
3619 | 0 | format!( |
3620 | 0 | "{{{}}}", |
3621 | 0 | buffer |
3622 | 0 | .into_iter() |
3623 | 0 | .collect::<Vec<_>>() |
3624 | 0 | .join(",") |
3625 | 0 | .as_str() |
3626 | 0 | ) |
3627 | | } else { |
3628 | 0 | "NULL".to_string() |
3629 | | } |
3630 | 0 | }) |
3631 | 0 | .collect::<Vec<_>>() |
3632 | 0 | .join(",") |
3633 | 0 | )? |
3634 | | } |
3635 | 0 | ScalarValue::Union(val, _fields, _mode) => match val { |
3636 | 0 | Some((id, val)) => write!(f, "{}:{}", id, val)?, |
3637 | 0 | None => write!(f, "NULL")?, |
3638 | | }, |
3639 | 0 | ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?, |
3640 | 0 | ScalarValue::Null => write!(f, "NULL")?, |
3641 | | }; |
3642 | 8 | Ok(()) |
3643 | 8 | } |
3644 | | } |
3645 | | |
3646 | 0 | fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result { |
3647 | 0 | // ScalarValue List, LargeList, FixedSizeList should always have a single element |
3648 | 0 | assert_eq!(arr.len(), 1); |
3649 | 0 | let options = FormatOptions::default().with_display_error(true); |
3650 | 0 | let formatter = |
3651 | 0 | ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap(); |
3652 | 0 | let value_formatter = formatter.value(0); |
3653 | 0 | write!(f, "{value_formatter}") |
3654 | 0 | } |
3655 | | |
3656 | | /// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"` |
3657 | 0 | fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result { |
3658 | 0 | let mut iter = data.iter(); |
3659 | 0 | if let Some(b) = iter.next() { |
3660 | 0 | write!(f, "{b}")?; |
3661 | 0 | } |
3662 | 0 | for b in iter { |
3663 | 0 | write!(f, ",{b}")?; |
3664 | | } |
3665 | 0 | Ok(()) |
3666 | 0 | } |
3667 | | |
3668 | | impl fmt::Debug for ScalarValue { |
3669 | 4 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
3670 | 0 | match self { |
3671 | 0 | ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"), |
3672 | 0 | ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"), |
3673 | 0 | ScalarValue::Boolean(_) => write!(f, "Boolean({self})"), |
3674 | 0 | ScalarValue::Float16(_) => write!(f, "Float16({self})"), |
3675 | 0 | ScalarValue::Float32(_) => write!(f, "Float32({self})"), |
3676 | 0 | ScalarValue::Float64(_) => write!(f, "Float64({self})"), |
3677 | 0 | ScalarValue::Int8(_) => write!(f, "Int8({self})"), |
3678 | 0 | ScalarValue::Int16(_) => write!(f, "Int16({self})"), |
3679 | 0 | ScalarValue::Int32(_) => write!(f, "Int32({self})"), |
3680 | 0 | ScalarValue::Int64(_) => write!(f, "Int64({self})"), |
3681 | 0 | ScalarValue::UInt8(_) => write!(f, "UInt8({self})"), |
3682 | 0 | ScalarValue::UInt16(_) => write!(f, "UInt16({self})"), |
3683 | 0 | ScalarValue::UInt32(_) => write!(f, "UInt32({self})"), |
3684 | 4 | ScalarValue::UInt64(_) => write!(f, "UInt64({self})"), |
3685 | 0 | ScalarValue::TimestampSecond(_, tz_opt) => { |
3686 | 0 | write!(f, "TimestampSecond({self}, {tz_opt:?})") |
3687 | | } |
3688 | 0 | ScalarValue::TimestampMillisecond(_, tz_opt) => { |
3689 | 0 | write!(f, "TimestampMillisecond({self}, {tz_opt:?})") |
3690 | | } |
3691 | 0 | ScalarValue::TimestampMicrosecond(_, tz_opt) => { |
3692 | 0 | write!(f, "TimestampMicrosecond({self}, {tz_opt:?})") |
3693 | | } |
3694 | 0 | ScalarValue::TimestampNanosecond(_, tz_opt) => { |
3695 | 0 | write!(f, "TimestampNanosecond({self}, {tz_opt:?})") |
3696 | | } |
3697 | 0 | ScalarValue::Utf8(None) => write!(f, "Utf8({self})"), |
3698 | 0 | ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"), |
3699 | 0 | ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"), |
3700 | 0 | ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"), |
3701 | 0 | ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"), |
3702 | 0 | ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"), |
3703 | 0 | ScalarValue::Binary(None) => write!(f, "Binary({self})"), |
3704 | 0 | ScalarValue::Binary(Some(b)) => { |
3705 | 0 | write!(f, "Binary(\"")?; |
3706 | 0 | fmt_binary(b.as_slice(), f)?; |
3707 | 0 | write!(f, "\")") |
3708 | | } |
3709 | 0 | ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"), |
3710 | 0 | ScalarValue::BinaryView(Some(b)) => { |
3711 | 0 | write!(f, "BinaryView(\"")?; |
3712 | 0 | fmt_binary(b.as_slice(), f)?; |
3713 | 0 | write!(f, "\")") |
3714 | | } |
3715 | 0 | ScalarValue::FixedSizeBinary(size, None) => { |
3716 | 0 | write!(f, "FixedSizeBinary({size}, {self})") |
3717 | | } |
3718 | 0 | ScalarValue::FixedSizeBinary(size, Some(b)) => { |
3719 | 0 | write!(f, "FixedSizeBinary({size}, \"")?; |
3720 | 0 | fmt_binary(b.as_slice(), f)?; |
3721 | 0 | write!(f, "\")") |
3722 | | } |
3723 | 0 | ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"), |
3724 | 0 | ScalarValue::LargeBinary(Some(b)) => { |
3725 | 0 | write!(f, "LargeBinary(\"")?; |
3726 | 0 | fmt_binary(b.as_slice(), f)?; |
3727 | 0 | write!(f, "\")") |
3728 | | } |
3729 | 0 | ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"), |
3730 | 0 | ScalarValue::List(_) => write!(f, "List({self})"), |
3731 | 0 | ScalarValue::LargeList(_) => write!(f, "LargeList({self})"), |
3732 | 0 | ScalarValue::Struct(struct_arr) => { |
3733 | 0 | // ScalarValue Struct should always have a single element |
3734 | 0 | assert_eq!(struct_arr.len(), 1); |
3735 | | |
3736 | 0 | let columns = struct_arr.columns(); |
3737 | 0 | let fields = struct_arr.fields(); |
3738 | 0 |
|
3739 | 0 | write!( |
3740 | 0 | f, |
3741 | 0 | "Struct({{{}}})", |
3742 | 0 | columns |
3743 | 0 | .iter() |
3744 | 0 | .zip(fields.iter()) |
3745 | 0 | .map(|(column, field)| { |
3746 | 0 | let sv = array_value_to_string(column, 0).unwrap(); |
3747 | 0 | let name = field.name(); |
3748 | 0 | format!("{name}:{sv}") |
3749 | 0 | }) |
3750 | 0 | .collect::<Vec<_>>() |
3751 | 0 | .join(",") |
3752 | 0 | ) |
3753 | | } |
3754 | 0 | ScalarValue::Map(map_arr) => { |
3755 | 0 | write!( |
3756 | 0 | f, |
3757 | 0 | "Map([{}])", |
3758 | 0 | map_arr |
3759 | 0 | .iter() |
3760 | 0 | .map(|struct_array| { |
3761 | 0 | if let Some(arr) = struct_array { |
3762 | 0 | let buffer: Vec<String> = (0..arr.len()) |
3763 | 0 | .map(|i| { |
3764 | 0 | let key = array_value_to_string(arr.column(0), i) |
3765 | 0 | .unwrap(); |
3766 | 0 | let value = |
3767 | 0 | array_value_to_string(arr.column(1), i) |
3768 | 0 | .unwrap(); |
3769 | 0 | format!("{key:?}:{value:?}") |
3770 | 0 | }) |
3771 | 0 | .collect(); |
3772 | 0 | format!("{{{}}}", buffer.join(",")) |
3773 | | } else { |
3774 | 0 | "NULL".to_string() |
3775 | | } |
3776 | 0 | }) |
3777 | 0 | .collect::<Vec<_>>() |
3778 | 0 | .join(",") |
3779 | 0 | ) |
3780 | | } |
3781 | 0 | ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"), |
3782 | 0 | ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"), |
3783 | 0 | ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"), |
3784 | | ScalarValue::Time32Millisecond(_) => { |
3785 | 0 | write!(f, "Time32Millisecond(\"{self}\")") |
3786 | | } |
3787 | | ScalarValue::Time64Microsecond(_) => { |
3788 | 0 | write!(f, "Time64Microsecond(\"{self}\")") |
3789 | | } |
3790 | | ScalarValue::Time64Nanosecond(_) => { |
3791 | 0 | write!(f, "Time64Nanosecond(\"{self}\")") |
3792 | | } |
3793 | | ScalarValue::IntervalDayTime(_) => { |
3794 | 0 | write!(f, "IntervalDayTime(\"{self}\")") |
3795 | | } |
3796 | | ScalarValue::IntervalYearMonth(_) => { |
3797 | 0 | write!(f, "IntervalYearMonth(\"{self}\")") |
3798 | | } |
3799 | | ScalarValue::IntervalMonthDayNano(_) => { |
3800 | 0 | write!(f, "IntervalMonthDayNano(\"{self}\")") |
3801 | | } |
3802 | 0 | ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"), |
3803 | | ScalarValue::DurationMillisecond(_) => { |
3804 | 0 | write!(f, "DurationMillisecond(\"{self}\")") |
3805 | | } |
3806 | | ScalarValue::DurationMicrosecond(_) => { |
3807 | 0 | write!(f, "DurationMicrosecond(\"{self}\")") |
3808 | | } |
3809 | | ScalarValue::DurationNanosecond(_) => { |
3810 | 0 | write!(f, "DurationNanosecond(\"{self}\")") |
3811 | | } |
3812 | 0 | ScalarValue::Union(val, _fields, _mode) => match val { |
3813 | 0 | Some((id, val)) => write!(f, "Union {}:{}", id, val), |
3814 | 0 | None => write!(f, "Union(NULL)"), |
3815 | | }, |
3816 | 0 | ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"), |
3817 | 0 | ScalarValue::Null => write!(f, "NULL"), |
3818 | | } |
3819 | 4 | } |
3820 | | } |
3821 | | |
3822 | | /// Trait used to map a NativeType to a ScalarValue |
3823 | | pub trait ScalarType<T: ArrowNativeType> { |
3824 | | /// returns a scalar from an optional T |
3825 | | fn scalar(r: Option<T>) -> ScalarValue; |
3826 | | } |
3827 | | |
3828 | | impl ScalarType<f32> for Float32Type { |
3829 | 0 | fn scalar(r: Option<f32>) -> ScalarValue { |
3830 | 0 | ScalarValue::Float32(r) |
3831 | 0 | } |
3832 | | } |
3833 | | |
3834 | | impl ScalarType<i64> for TimestampSecondType { |
3835 | 0 | fn scalar(r: Option<i64>) -> ScalarValue { |
3836 | 0 | ScalarValue::TimestampSecond(r, None) |
3837 | 0 | } |
3838 | | } |
3839 | | |
3840 | | impl ScalarType<i64> for TimestampMillisecondType { |
3841 | 0 | fn scalar(r: Option<i64>) -> ScalarValue { |
3842 | 0 | ScalarValue::TimestampMillisecond(r, None) |
3843 | 0 | } |
3844 | | } |
3845 | | |
3846 | | impl ScalarType<i64> for TimestampMicrosecondType { |
3847 | 0 | fn scalar(r: Option<i64>) -> ScalarValue { |
3848 | 0 | ScalarValue::TimestampMicrosecond(r, None) |
3849 | 0 | } |
3850 | | } |
3851 | | |
3852 | | impl ScalarType<i64> for TimestampNanosecondType { |
3853 | 0 | fn scalar(r: Option<i64>) -> ScalarValue { |
3854 | 0 | ScalarValue::TimestampNanosecond(r, None) |
3855 | 0 | } |
3856 | | } |
3857 | | |
3858 | | impl ScalarType<i32> for Date32Type { |
3859 | 0 | fn scalar(r: Option<i32>) -> ScalarValue { |
3860 | 0 | ScalarValue::Date32(r) |
3861 | 0 | } |
3862 | | } |
3863 | | |
3864 | | #[cfg(test)] |
3865 | | mod tests { |
3866 | | |
3867 | | use super::*; |
3868 | | use crate::cast::{ |
3869 | | as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array, |
3870 | | }; |
3871 | | |
3872 | | use crate::assert_batches_eq; |
3873 | | use crate::utils::array_into_list_array_nullable; |
3874 | | use arrow::buffer::OffsetBuffer; |
3875 | | use arrow::compute::{is_null, kernels}; |
3876 | | use arrow::error::ArrowError; |
3877 | | use arrow::util::pretty::pretty_format_columns; |
3878 | | use arrow_buffer::Buffer; |
3879 | | use arrow_schema::Fields; |
3880 | | use chrono::NaiveDate; |
3881 | | use rand::Rng; |
3882 | | |
3883 | | #[test] |
3884 | | fn test_scalar_value_from_for_map() { |
3885 | | let string_builder = StringBuilder::new(); |
3886 | | let int_builder = Int32Builder::with_capacity(4); |
3887 | | let mut builder = MapBuilder::new(None, string_builder, int_builder); |
3888 | | builder.keys().append_value("joe"); |
3889 | | builder.values().append_value(1); |
3890 | | builder.append(true).unwrap(); |
3891 | | |
3892 | | builder.keys().append_value("blogs"); |
3893 | | builder.values().append_value(2); |
3894 | | builder.keys().append_value("foo"); |
3895 | | builder.values().append_value(4); |
3896 | | builder.append(true).unwrap(); |
3897 | | builder.append(true).unwrap(); |
3898 | | builder.append(false).unwrap(); |
3899 | | |
3900 | | let expected = builder.finish(); |
3901 | | |
3902 | | let sv = ScalarValue::Map(Arc::new(expected.clone())); |
3903 | | let map_arr = sv.to_array().unwrap(); |
3904 | | let actual = as_map_array(&map_arr).unwrap(); |
3905 | | assert_eq!(actual, &expected); |
3906 | | } |
3907 | | |
3908 | | #[test] |
3909 | | fn test_scalar_value_from_for_struct() { |
3910 | | let boolean = Arc::new(BooleanArray::from(vec![false])); |
3911 | | let int = Arc::new(Int32Array::from(vec![42])); |
3912 | | |
3913 | | let expected = StructArray::from(vec![ |
3914 | | ( |
3915 | | Arc::new(Field::new("b", DataType::Boolean, false)), |
3916 | | Arc::clone(&boolean) as ArrayRef, |
3917 | | ), |
3918 | | ( |
3919 | | Arc::new(Field::new("c", DataType::Int32, false)), |
3920 | | Arc::clone(&int) as ArrayRef, |
3921 | | ), |
3922 | | ]); |
3923 | | |
3924 | | let sv = ScalarStructBuilder::new() |
3925 | | .with_array(Field::new("b", DataType::Boolean, false), boolean) |
3926 | | .with_array(Field::new("c", DataType::Int32, false), int) |
3927 | | .build() |
3928 | | .unwrap(); |
3929 | | |
3930 | | let struct_arr = sv.to_array().unwrap(); |
3931 | | let actual = as_struct_array(&struct_arr).unwrap(); |
3932 | | assert_eq!(actual, &expected); |
3933 | | } |
3934 | | |
3935 | | #[test] |
3936 | | #[should_panic( |
3937 | | expected = "Error building ScalarValue::Struct. Expected array with exactly one element, found array with 4 elements" |
3938 | | )] |
3939 | | fn test_scalar_value_from_for_struct_should_panic() { |
3940 | | let _ = ScalarStructBuilder::new() |
3941 | | .with_array( |
3942 | | Field::new("bool", DataType::Boolean, false), |
3943 | | Arc::new(BooleanArray::from(vec![false, true, false, false])), |
3944 | | ) |
3945 | | .with_array( |
3946 | | Field::new("i32", DataType::Int32, false), |
3947 | | Arc::new(Int32Array::from(vec![42, 28, 19, 31])), |
3948 | | ) |
3949 | | .build() |
3950 | | .unwrap(); |
3951 | | } |
3952 | | |
3953 | | #[test] |
3954 | | fn test_to_array_of_size_for_nested() { |
3955 | | // Struct |
3956 | | let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true])); |
3957 | | let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); |
3958 | | |
3959 | | let struct_array = StructArray::from(vec![ |
3960 | | ( |
3961 | | Arc::new(Field::new("b", DataType::Boolean, false)), |
3962 | | Arc::clone(&boolean) as ArrayRef, |
3963 | | ), |
3964 | | ( |
3965 | | Arc::new(Field::new("c", DataType::Int32, false)), |
3966 | | Arc::clone(&int) as ArrayRef, |
3967 | | ), |
3968 | | ]); |
3969 | | let sv = ScalarValue::Struct(Arc::new(struct_array)); |
3970 | | let actual_arr = sv.to_array_of_size(2).unwrap(); |
3971 | | |
3972 | | let boolean = Arc::new(BooleanArray::from(vec![ |
3973 | | false, false, true, true, false, false, true, true, |
3974 | | ])); |
3975 | | let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31])); |
3976 | | |
3977 | | let struct_array = StructArray::from(vec![ |
3978 | | ( |
3979 | | Arc::new(Field::new("b", DataType::Boolean, false)), |
3980 | | Arc::clone(&boolean) as ArrayRef, |
3981 | | ), |
3982 | | ( |
3983 | | Arc::new(Field::new("c", DataType::Int32, false)), |
3984 | | Arc::clone(&int) as ArrayRef, |
3985 | | ), |
3986 | | ]); |
3987 | | |
3988 | | let actual = as_struct_array(&actual_arr).unwrap(); |
3989 | | assert_eq!(actual, &struct_array); |
3990 | | |
3991 | | // List |
3992 | | let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![ |
3993 | | Some(1), |
3994 | | None, |
3995 | | Some(2), |
3996 | | ])]); |
3997 | | |
3998 | | let sv = ScalarValue::List(Arc::new(arr)); |
3999 | | let actual_arr = sv |
4000 | | .to_array_of_size(2) |
4001 | | .expect("Failed to convert to array of size"); |
4002 | | let actual_list_arr = actual_arr.as_list::<i32>(); |
4003 | | |
4004 | | let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
4005 | | Some(vec![Some(1), None, Some(2)]), |
4006 | | Some(vec![Some(1), None, Some(2)]), |
4007 | | ]); |
4008 | | |
4009 | | assert_eq!(&arr, actual_list_arr); |
4010 | | } |
4011 | | |
4012 | | #[test] |
4013 | | fn test_to_array_of_size_for_fsl() { |
4014 | | let values = Int32Array::from_iter([Some(1), None, Some(2)]); |
4015 | | let field = Arc::new(Field::new("item", DataType::Int32, true)); |
4016 | | let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None); |
4017 | | let sv = ScalarValue::FixedSizeList(Arc::new(arr)); |
4018 | | let actual_arr = sv |
4019 | | .to_array_of_size(2) |
4020 | | .expect("Failed to convert to array of size"); |
4021 | | |
4022 | | let expected_values = |
4023 | | Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]); |
4024 | | let expected_arr = |
4025 | | FixedSizeListArray::new(field, 3, Arc::new(expected_values), None); |
4026 | | |
4027 | | assert_eq!( |
4028 | | &expected_arr, |
4029 | | as_fixed_size_list_array(actual_arr.as_ref()).unwrap() |
4030 | | ); |
4031 | | |
4032 | | let empty_array = sv |
4033 | | .to_array_of_size(0) |
4034 | | .expect("Failed to convert to empty array"); |
4035 | | |
4036 | | assert_eq!(empty_array.len(), 0); |
4037 | | } |
4038 | | |
4039 | | #[test] |
4040 | | fn test_list_to_array_string() { |
4041 | | let scalars = vec![ |
4042 | | ScalarValue::from("rust"), |
4043 | | ScalarValue::from("arrow"), |
4044 | | ScalarValue::from("data-fusion"), |
4045 | | ]; |
4046 | | |
4047 | | let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8); |
4048 | | |
4049 | | let expected = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ |
4050 | | "rust", |
4051 | | "arrow", |
4052 | | "data-fusion", |
4053 | | ]))); |
4054 | | assert_eq!(*result, expected); |
4055 | | } |
4056 | | |
4057 | | fn build_list<O: OffsetSizeTrait>( |
4058 | | values: Vec<Option<Vec<Option<i64>>>>, |
4059 | | ) -> Vec<ScalarValue> { |
4060 | | values |
4061 | | .into_iter() |
4062 | | .map(|v| { |
4063 | | let arr = if v.is_some() { |
4064 | | Arc::new( |
4065 | | GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>( |
4066 | | vec![v], |
4067 | | ), |
4068 | | ) |
4069 | | } else if O::IS_LARGE { |
4070 | | new_null_array( |
4071 | | &DataType::LargeList(Arc::new(Field::new( |
4072 | | "item", |
4073 | | DataType::Int64, |
4074 | | true, |
4075 | | ))), |
4076 | | 1, |
4077 | | ) |
4078 | | } else { |
4079 | | new_null_array( |
4080 | | &DataType::List(Arc::new(Field::new( |
4081 | | "item", |
4082 | | DataType::Int64, |
4083 | | true, |
4084 | | ))), |
4085 | | 1, |
4086 | | ) |
4087 | | }; |
4088 | | |
4089 | | if O::IS_LARGE { |
4090 | | ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into()) |
4091 | | } else { |
4092 | | ScalarValue::List(arr.as_list::<i32>().to_owned().into()) |
4093 | | } |
4094 | | }) |
4095 | | .collect() |
4096 | | } |
4097 | | |
4098 | | #[test] |
4099 | | fn test_iter_to_array_fixed_size_list() { |
4100 | | let field = Arc::new(Field::new("item", DataType::Int32, true)); |
4101 | | let f1 = Arc::new(FixedSizeListArray::new( |
4102 | | Arc::clone(&field), |
4103 | | 3, |
4104 | | Arc::new(Int32Array::from(vec![1, 2, 3])), |
4105 | | None, |
4106 | | )); |
4107 | | let f2 = Arc::new(FixedSizeListArray::new( |
4108 | | Arc::clone(&field), |
4109 | | 3, |
4110 | | Arc::new(Int32Array::from(vec![4, 5, 6])), |
4111 | | None, |
4112 | | )); |
4113 | | let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1)); |
4114 | | |
4115 | | let scalars = vec![ |
4116 | | ScalarValue::FixedSizeList(Arc::clone(&f_nulls)), |
4117 | | ScalarValue::FixedSizeList(f1), |
4118 | | ScalarValue::FixedSizeList(f2), |
4119 | | ScalarValue::FixedSizeList(f_nulls), |
4120 | | ]; |
4121 | | |
4122 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4123 | | |
4124 | | let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>( |
4125 | | vec![ |
4126 | | None, |
4127 | | Some(vec![Some(1), Some(2), Some(3)]), |
4128 | | Some(vec![Some(4), Some(5), Some(6)]), |
4129 | | None, |
4130 | | ], |
4131 | | 3, |
4132 | | ); |
4133 | | assert_eq!(array.as_ref(), &expected); |
4134 | | } |
4135 | | |
4136 | | #[test] |
4137 | | fn test_iter_to_array_struct() { |
4138 | | let s1 = StructArray::from(vec![ |
4139 | | ( |
4140 | | Arc::new(Field::new("A", DataType::Boolean, false)), |
4141 | | Arc::new(BooleanArray::from(vec![false])) as ArrayRef, |
4142 | | ), |
4143 | | ( |
4144 | | Arc::new(Field::new("B", DataType::Int32, false)), |
4145 | | Arc::new(Int32Array::from(vec![42])) as ArrayRef, |
4146 | | ), |
4147 | | ]); |
4148 | | |
4149 | | let s2 = StructArray::from(vec![ |
4150 | | ( |
4151 | | Arc::new(Field::new("A", DataType::Boolean, false)), |
4152 | | Arc::new(BooleanArray::from(vec![false])) as ArrayRef, |
4153 | | ), |
4154 | | ( |
4155 | | Arc::new(Field::new("B", DataType::Int32, false)), |
4156 | | Arc::new(Int32Array::from(vec![42])) as ArrayRef, |
4157 | | ), |
4158 | | ]); |
4159 | | |
4160 | | let scalars = vec![ |
4161 | | ScalarValue::Struct(Arc::new(s1)), |
4162 | | ScalarValue::Struct(Arc::new(s2)), |
4163 | | ]; |
4164 | | |
4165 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4166 | | |
4167 | | let expected = StructArray::from(vec![ |
4168 | | ( |
4169 | | Arc::new(Field::new("A", DataType::Boolean, false)), |
4170 | | Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef, |
4171 | | ), |
4172 | | ( |
4173 | | Arc::new(Field::new("B", DataType::Int32, false)), |
4174 | | Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef, |
4175 | | ), |
4176 | | ]); |
4177 | | assert_eq!(array.as_ref(), &expected); |
4178 | | } |
4179 | | |
4180 | | #[test] |
4181 | | fn test_iter_to_array_struct_with_nulls() { |
4182 | | // non-null |
4183 | | let s1 = StructArray::from(( |
4184 | | vec![ |
4185 | | ( |
4186 | | Arc::new(Field::new("A", DataType::Int32, false)), |
4187 | | Arc::new(Int32Array::from(vec![1])) as ArrayRef, |
4188 | | ), |
4189 | | ( |
4190 | | Arc::new(Field::new("B", DataType::Int64, false)), |
4191 | | Arc::new(Int64Array::from(vec![2])) as ArrayRef, |
4192 | | ), |
4193 | | ], |
4194 | | // Present the null mask, 1 is non-null, 0 is null |
4195 | | Buffer::from(&[1]), |
4196 | | )); |
4197 | | |
4198 | | // null |
4199 | | let s2 = StructArray::from(( |
4200 | | vec![ |
4201 | | ( |
4202 | | Arc::new(Field::new("A", DataType::Int32, false)), |
4203 | | Arc::new(Int32Array::from(vec![3])) as ArrayRef, |
4204 | | ), |
4205 | | ( |
4206 | | Arc::new(Field::new("B", DataType::Int64, false)), |
4207 | | Arc::new(Int64Array::from(vec![4])) as ArrayRef, |
4208 | | ), |
4209 | | ], |
4210 | | Buffer::from(&[0]), |
4211 | | )); |
4212 | | |
4213 | | let scalars = vec![ |
4214 | | ScalarValue::Struct(Arc::new(s1)), |
4215 | | ScalarValue::Struct(Arc::new(s2)), |
4216 | | ]; |
4217 | | |
4218 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4219 | | let struct_array = array.as_struct(); |
4220 | | assert!(struct_array.is_valid(0)); |
4221 | | assert!(struct_array.is_null(1)); |
4222 | | } |
4223 | | |
4224 | | #[test] |
4225 | | fn iter_to_array_primitive_test() { |
4226 | | // List[[1,2,3]], List[null], List[[4,5]] |
4227 | | let scalars = build_list::<i32>(vec![ |
4228 | | Some(vec![Some(1), Some(2), Some(3)]), |
4229 | | None, |
4230 | | Some(vec![Some(4), Some(5)]), |
4231 | | ]); |
4232 | | |
4233 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4234 | | let list_array = as_list_array(&array); |
4235 | | // List[[1,2,3], null, [4,5]] |
4236 | | let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![ |
4237 | | Some(vec![Some(1), Some(2), Some(3)]), |
4238 | | None, |
4239 | | Some(vec![Some(4), Some(5)]), |
4240 | | ]); |
4241 | | assert_eq!(list_array, &expected); |
4242 | | |
4243 | | let scalars = build_list::<i64>(vec![ |
4244 | | Some(vec![Some(1), Some(2), Some(3)]), |
4245 | | None, |
4246 | | Some(vec![Some(4), Some(5)]), |
4247 | | ]); |
4248 | | |
4249 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4250 | | let list_array = as_large_list_array(&array); |
4251 | | let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![ |
4252 | | Some(vec![Some(1), Some(2), Some(3)]), |
4253 | | None, |
4254 | | Some(vec![Some(4), Some(5)]), |
4255 | | ]); |
4256 | | assert_eq!(list_array, &expected); |
4257 | | } |
4258 | | |
4259 | | #[test] |
4260 | | fn iter_to_array_string_test() { |
4261 | | let arr1 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ |
4262 | | "foo", "bar", "baz", |
4263 | | ]))); |
4264 | | let arr2 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ |
4265 | | "rust", "world", |
4266 | | ]))); |
4267 | | |
4268 | | let scalars = vec![ |
4269 | | ScalarValue::List(Arc::new(arr1)), |
4270 | | ScalarValue::List(Arc::new(arr2)), |
4271 | | ]; |
4272 | | |
4273 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4274 | | let result = array.as_list::<i32>(); |
4275 | | |
4276 | | // build expected array |
4277 | | let string_builder = StringBuilder::with_capacity(5, 25); |
4278 | | let mut list_of_string_builder = ListBuilder::new(string_builder); |
4279 | | |
4280 | | list_of_string_builder.values().append_value("foo"); |
4281 | | list_of_string_builder.values().append_value("bar"); |
4282 | | list_of_string_builder.values().append_value("baz"); |
4283 | | list_of_string_builder.append(true); |
4284 | | |
4285 | | list_of_string_builder.values().append_value("rust"); |
4286 | | list_of_string_builder.values().append_value("world"); |
4287 | | list_of_string_builder.append(true); |
4288 | | let expected = list_of_string_builder.finish(); |
4289 | | |
4290 | | assert_eq!(result, &expected); |
4291 | | } |
4292 | | |
4293 | | #[test] |
4294 | | fn test_list_scalar_eq_to_array() { |
4295 | | let list_array: ArrayRef = |
4296 | | Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
4297 | | Some(vec![Some(0), Some(1), Some(2)]), |
4298 | | None, |
4299 | | Some(vec![None, Some(5)]), |
4300 | | ])); |
4301 | | |
4302 | | let fsl_array: ArrayRef = |
4303 | | Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
4304 | | Some(vec![Some(0), Some(1), Some(2)]), |
4305 | | None, |
4306 | | Some(vec![Some(3), None, Some(5)]), |
4307 | | ])); |
4308 | | |
4309 | | for arr in [list_array, fsl_array] { |
4310 | | for i in 0..arr.len() { |
4311 | | let scalar = |
4312 | | ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into()); |
4313 | | assert!(scalar.eq_array(&arr, i).unwrap()); |
4314 | | } |
4315 | | } |
4316 | | } |
4317 | | |
4318 | | #[test] |
4319 | | fn scalar_add_trait_test() -> Result<()> { |
4320 | | let float_value = ScalarValue::Float64(Some(123.)); |
4321 | | let float_value_2 = ScalarValue::Float64(Some(123.)); |
4322 | | assert_eq!( |
4323 | | (float_value.add(&float_value_2))?, |
4324 | | ScalarValue::Float64(Some(246.)) |
4325 | | ); |
4326 | | assert_eq!( |
4327 | | (float_value.add(float_value_2))?, |
4328 | | ScalarValue::Float64(Some(246.)) |
4329 | | ); |
4330 | | Ok(()) |
4331 | | } |
4332 | | |
4333 | | #[test] |
4334 | | fn scalar_sub_trait_test() -> Result<()> { |
4335 | | let float_value = ScalarValue::Float64(Some(123.)); |
4336 | | let float_value_2 = ScalarValue::Float64(Some(123.)); |
4337 | | assert_eq!( |
4338 | | float_value.sub(&float_value_2)?, |
4339 | | ScalarValue::Float64(Some(0.)) |
4340 | | ); |
4341 | | assert_eq!( |
4342 | | float_value.sub(float_value_2)?, |
4343 | | ScalarValue::Float64(Some(0.)) |
4344 | | ); |
4345 | | Ok(()) |
4346 | | } |
4347 | | |
4348 | | #[test] |
4349 | | fn scalar_sub_trait_int32_test() -> Result<()> { |
4350 | | let int_value = ScalarValue::Int32(Some(42)); |
4351 | | let int_value_2 = ScalarValue::Int32(Some(100)); |
4352 | | assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58))); |
4353 | | assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58))); |
4354 | | Ok(()) |
4355 | | } |
4356 | | |
4357 | | #[test] |
4358 | | fn scalar_sub_trait_int32_overflow_test() { |
4359 | | let int_value = ScalarValue::Int32(Some(i32::MAX)); |
4360 | | let int_value_2 = ScalarValue::Int32(Some(i32::MIN)); |
4361 | | let err = int_value |
4362 | | .sub_checked(&int_value_2) |
4363 | | .unwrap_err() |
4364 | | .strip_backtrace(); |
4365 | | assert_eq!( |
4366 | | err, |
4367 | | "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648" |
4368 | | ) |
4369 | | } |
4370 | | |
4371 | | #[test] |
4372 | | fn scalar_sub_trait_int64_test() -> Result<()> { |
4373 | | let int_value = ScalarValue::Int64(Some(42)); |
4374 | | let int_value_2 = ScalarValue::Int64(Some(100)); |
4375 | | assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58))); |
4376 | | assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58))); |
4377 | | Ok(()) |
4378 | | } |
4379 | | |
4380 | | #[test] |
4381 | | fn scalar_sub_trait_int64_overflow_test() { |
4382 | | let int_value = ScalarValue::Int64(Some(i64::MAX)); |
4383 | | let int_value_2 = ScalarValue::Int64(Some(i64::MIN)); |
4384 | | let err = int_value |
4385 | | .sub_checked(&int_value_2) |
4386 | | .unwrap_err() |
4387 | | .strip_backtrace(); |
4388 | | assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808") |
4389 | | } |
4390 | | |
4391 | | #[test] |
4392 | | fn scalar_add_overflow_test() -> Result<()> { |
4393 | | check_scalar_add_overflow::<Int8Type>( |
4394 | | ScalarValue::Int8(Some(i8::MAX)), |
4395 | | ScalarValue::Int8(Some(i8::MAX)), |
4396 | | ); |
4397 | | check_scalar_add_overflow::<UInt8Type>( |
4398 | | ScalarValue::UInt8(Some(u8::MAX)), |
4399 | | ScalarValue::UInt8(Some(u8::MAX)), |
4400 | | ); |
4401 | | check_scalar_add_overflow::<Int16Type>( |
4402 | | ScalarValue::Int16(Some(i16::MAX)), |
4403 | | ScalarValue::Int16(Some(i16::MAX)), |
4404 | | ); |
4405 | | check_scalar_add_overflow::<UInt16Type>( |
4406 | | ScalarValue::UInt16(Some(u16::MAX)), |
4407 | | ScalarValue::UInt16(Some(u16::MAX)), |
4408 | | ); |
4409 | | check_scalar_add_overflow::<Int32Type>( |
4410 | | ScalarValue::Int32(Some(i32::MAX)), |
4411 | | ScalarValue::Int32(Some(i32::MAX)), |
4412 | | ); |
4413 | | check_scalar_add_overflow::<UInt32Type>( |
4414 | | ScalarValue::UInt32(Some(u32::MAX)), |
4415 | | ScalarValue::UInt32(Some(u32::MAX)), |
4416 | | ); |
4417 | | check_scalar_add_overflow::<Int64Type>( |
4418 | | ScalarValue::Int64(Some(i64::MAX)), |
4419 | | ScalarValue::Int64(Some(i64::MAX)), |
4420 | | ); |
4421 | | check_scalar_add_overflow::<UInt64Type>( |
4422 | | ScalarValue::UInt64(Some(u64::MAX)), |
4423 | | ScalarValue::UInt64(Some(u64::MAX)), |
4424 | | ); |
4425 | | |
4426 | | Ok(()) |
4427 | | } |
4428 | | |
4429 | | // Verifies that ScalarValue has the same behavior with compute kernal when it overflows. |
4430 | | fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue) |
4431 | | where |
4432 | | T: ArrowNumericType, |
4433 | | { |
4434 | | let scalar_result = left.add_checked(&right); |
4435 | | |
4436 | | let left_array = left.to_array().expect("Failed to convert to array"); |
4437 | | let right_array = right.to_array().expect("Failed to convert to array"); |
4438 | | let arrow_left_array = left_array.as_primitive::<T>(); |
4439 | | let arrow_right_array = right_array.as_primitive::<T>(); |
4440 | | let arrow_result = kernels::numeric::add(arrow_left_array, arrow_right_array); |
4441 | | |
4442 | | assert_eq!(scalar_result.is_ok(), arrow_result.is_ok()); |
4443 | | } |
4444 | | |
4445 | | #[test] |
4446 | | fn test_interval_add_timestamp() -> Result<()> { |
4447 | | let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano { |
4448 | | months: 1, |
4449 | | days: 2, |
4450 | | nanoseconds: 3, |
4451 | | })); |
4452 | | let timestamp = ScalarValue::TimestampNanosecond(Some(123), None); |
4453 | | let result = interval.add(×tamp)?; |
4454 | | let expect = timestamp.add(&interval)?; |
4455 | | assert_eq!(result, expect); |
4456 | | |
4457 | | let interval = ScalarValue::IntervalYearMonth(Some(123)); |
4458 | | let timestamp = ScalarValue::TimestampNanosecond(Some(123), None); |
4459 | | let result = interval.add(×tamp)?; |
4460 | | let expect = timestamp.add(&interval)?; |
4461 | | assert_eq!(result, expect); |
4462 | | |
4463 | | let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime { |
4464 | | days: 1, |
4465 | | milliseconds: 23, |
4466 | | })); |
4467 | | let timestamp = ScalarValue::TimestampNanosecond(Some(123), None); |
4468 | | let result = interval.add(×tamp)?; |
4469 | | let expect = timestamp.add(&interval)?; |
4470 | | assert_eq!(result, expect); |
4471 | | Ok(()) |
4472 | | } |
4473 | | |
4474 | | #[test] |
4475 | | fn scalar_decimal_test() -> Result<()> { |
4476 | | let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1); |
4477 | | assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type()); |
4478 | | let try_into_value: i128 = decimal_value.clone().try_into().unwrap(); |
4479 | | assert_eq!(123_i128, try_into_value); |
4480 | | assert!(!decimal_value.is_null()); |
4481 | | let neg_decimal_value = decimal_value.arithmetic_negate()?; |
4482 | | match neg_decimal_value { |
4483 | | ScalarValue::Decimal128(v, _, _) => { |
4484 | | assert_eq!(-123, v.unwrap()); |
4485 | | } |
4486 | | _ => { |
4487 | | unreachable!(); |
4488 | | } |
4489 | | } |
4490 | | |
4491 | | // decimal scalar to array |
4492 | | let array = decimal_value |
4493 | | .to_array() |
4494 | | .expect("Failed to convert to array"); |
4495 | | let array = as_decimal128_array(&array)?; |
4496 | | assert_eq!(1, array.len()); |
4497 | | assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone()); |
4498 | | assert_eq!(123i128, array.value(0)); |
4499 | | |
4500 | | // decimal scalar to array with size |
4501 | | let array = decimal_value |
4502 | | .to_array_of_size(10) |
4503 | | .expect("Failed to convert to array of size"); |
4504 | | let array_decimal = as_decimal128_array(&array)?; |
4505 | | assert_eq!(10, array.len()); |
4506 | | assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone()); |
4507 | | assert_eq!(123i128, array_decimal.value(0)); |
4508 | | assert_eq!(123i128, array_decimal.value(9)); |
4509 | | // test eq array |
4510 | | assert!(decimal_value |
4511 | | .eq_array(&array, 1) |
4512 | | .expect("Failed to compare arrays")); |
4513 | | assert!(decimal_value |
4514 | | .eq_array(&array, 5) |
4515 | | .expect("Failed to compare arrays")); |
4516 | | // test try from array |
4517 | | assert_eq!( |
4518 | | decimal_value, |
4519 | | ScalarValue::try_from_array(&array, 5).unwrap() |
4520 | | ); |
4521 | | |
4522 | | assert_eq!( |
4523 | | decimal_value, |
4524 | | ScalarValue::try_new_decimal128(123, 10, 1).unwrap() |
4525 | | ); |
4526 | | |
4527 | | // test compare |
4528 | | let left = ScalarValue::Decimal128(Some(123), 10, 2); |
4529 | | let right = ScalarValue::Decimal128(Some(124), 10, 2); |
4530 | | assert!(!left.eq(&right)); |
4531 | | let result = left < right; |
4532 | | assert!(result); |
4533 | | let result = left <= right; |
4534 | | assert!(result); |
4535 | | let right = ScalarValue::Decimal128(Some(124), 10, 3); |
4536 | | // make sure that two decimals with diff datatype can't be compared. |
4537 | | let result = left.partial_cmp(&right); |
4538 | | assert_eq!(None, result); |
4539 | | |
4540 | | let decimal_vec = vec![ |
4541 | | ScalarValue::Decimal128(Some(1), 10, 2), |
4542 | | ScalarValue::Decimal128(Some(2), 10, 2), |
4543 | | ScalarValue::Decimal128(Some(3), 10, 2), |
4544 | | ]; |
4545 | | // convert the vec to decimal array and check the result |
4546 | | let array = ScalarValue::iter_to_array(decimal_vec).unwrap(); |
4547 | | assert_eq!(3, array.len()); |
4548 | | assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone()); |
4549 | | |
4550 | | let decimal_vec = vec![ |
4551 | | ScalarValue::Decimal128(Some(1), 10, 2), |
4552 | | ScalarValue::Decimal128(Some(2), 10, 2), |
4553 | | ScalarValue::Decimal128(Some(3), 10, 2), |
4554 | | ScalarValue::Decimal128(None, 10, 2), |
4555 | | ]; |
4556 | | let array = ScalarValue::iter_to_array(decimal_vec).unwrap(); |
4557 | | assert_eq!(4, array.len()); |
4558 | | assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone()); |
4559 | | |
4560 | | assert!(ScalarValue::try_new_decimal128(1, 10, 2) |
4561 | | .unwrap() |
4562 | | .eq_array(&array, 0) |
4563 | | .expect("Failed to compare arrays")); |
4564 | | assert!(ScalarValue::try_new_decimal128(2, 10, 2) |
4565 | | .unwrap() |
4566 | | .eq_array(&array, 1) |
4567 | | .expect("Failed to compare arrays")); |
4568 | | assert!(ScalarValue::try_new_decimal128(3, 10, 2) |
4569 | | .unwrap() |
4570 | | .eq_array(&array, 2) |
4571 | | .expect("Failed to compare arrays")); |
4572 | | assert_eq!( |
4573 | | ScalarValue::Decimal128(None, 10, 2), |
4574 | | ScalarValue::try_from_array(&array, 3).unwrap() |
4575 | | ); |
4576 | | |
4577 | | Ok(()) |
4578 | | } |
4579 | | |
4580 | | #[test] |
4581 | | fn test_list_partial_cmp() { |
4582 | | let a = |
4583 | | ScalarValue::List(Arc::new( |
4584 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4585 | | Some(1), |
4586 | | Some(2), |
4587 | | Some(3), |
4588 | | ])]), |
4589 | | )); |
4590 | | let b = |
4591 | | ScalarValue::List(Arc::new( |
4592 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4593 | | Some(1), |
4594 | | Some(2), |
4595 | | Some(3), |
4596 | | ])]), |
4597 | | )); |
4598 | | assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal)); |
4599 | | |
4600 | | let a = |
4601 | | ScalarValue::List(Arc::new( |
4602 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4603 | | Some(10), |
4604 | | Some(2), |
4605 | | Some(3), |
4606 | | ])]), |
4607 | | )); |
4608 | | let b = |
4609 | | ScalarValue::List(Arc::new( |
4610 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4611 | | Some(1), |
4612 | | Some(2), |
4613 | | Some(30), |
4614 | | ])]), |
4615 | | )); |
4616 | | assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); |
4617 | | |
4618 | | let a = |
4619 | | ScalarValue::List(Arc::new( |
4620 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4621 | | Some(10), |
4622 | | Some(2), |
4623 | | Some(3), |
4624 | | ])]), |
4625 | | )); |
4626 | | let b = |
4627 | | ScalarValue::List(Arc::new( |
4628 | | ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![ |
4629 | | Some(10), |
4630 | | Some(2), |
4631 | | Some(30), |
4632 | | ])]), |
4633 | | )); |
4634 | | assert_eq!(a.partial_cmp(&b), Some(Ordering::Less)); |
4635 | | } |
4636 | | |
4637 | | #[test] |
4638 | | fn scalar_value_to_array_u64() -> Result<()> { |
4639 | | let value = ScalarValue::UInt64(Some(13u64)); |
4640 | | let array = value.to_array().expect("Failed to convert to array"); |
4641 | | let array = as_uint64_array(&array)?; |
4642 | | assert_eq!(array.len(), 1); |
4643 | | assert!(!array.is_null(0)); |
4644 | | assert_eq!(array.value(0), 13); |
4645 | | |
4646 | | let value = ScalarValue::UInt64(None); |
4647 | | let array = value.to_array().expect("Failed to convert to array"); |
4648 | | let array = as_uint64_array(&array)?; |
4649 | | assert_eq!(array.len(), 1); |
4650 | | assert!(array.is_null(0)); |
4651 | | Ok(()) |
4652 | | } |
4653 | | |
4654 | | #[test] |
4655 | | fn scalar_value_to_array_u32() -> Result<()> { |
4656 | | let value = ScalarValue::UInt32(Some(13u32)); |
4657 | | let array = value.to_array().expect("Failed to convert to array"); |
4658 | | let array = as_uint32_array(&array)?; |
4659 | | assert_eq!(array.len(), 1); |
4660 | | assert!(!array.is_null(0)); |
4661 | | assert_eq!(array.value(0), 13); |
4662 | | |
4663 | | let value = ScalarValue::UInt32(None); |
4664 | | let array = value.to_array().expect("Failed to convert to array"); |
4665 | | let array = as_uint32_array(&array)?; |
4666 | | assert_eq!(array.len(), 1); |
4667 | | assert!(array.is_null(0)); |
4668 | | Ok(()) |
4669 | | } |
4670 | | |
4671 | | #[test] |
4672 | | fn scalar_list_null_to_array() { |
4673 | | let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64); |
4674 | | |
4675 | | assert_eq!(list_array.len(), 1); |
4676 | | assert_eq!(list_array.values().len(), 0); |
4677 | | } |
4678 | | |
4679 | | #[test] |
4680 | | fn scalar_large_list_null_to_array() { |
4681 | | let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64); |
4682 | | |
4683 | | assert_eq!(list_array.len(), 1); |
4684 | | assert_eq!(list_array.values().len(), 0); |
4685 | | } |
4686 | | |
4687 | | #[test] |
4688 | | fn scalar_list_to_array() -> Result<()> { |
4689 | | let values = vec![ |
4690 | | ScalarValue::UInt64(Some(100)), |
4691 | | ScalarValue::UInt64(None), |
4692 | | ScalarValue::UInt64(Some(101)), |
4693 | | ]; |
4694 | | let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64); |
4695 | | assert_eq!(list_array.len(), 1); |
4696 | | assert_eq!(list_array.values().len(), 3); |
4697 | | |
4698 | | let prim_array_ref = list_array.value(0); |
4699 | | let prim_array = as_uint64_array(&prim_array_ref)?; |
4700 | | assert_eq!(prim_array.len(), 3); |
4701 | | assert_eq!(prim_array.value(0), 100); |
4702 | | assert!(prim_array.is_null(1)); |
4703 | | assert_eq!(prim_array.value(2), 101); |
4704 | | Ok(()) |
4705 | | } |
4706 | | |
4707 | | #[test] |
4708 | | fn scalar_large_list_to_array() -> Result<()> { |
4709 | | let values = vec![ |
4710 | | ScalarValue::UInt64(Some(100)), |
4711 | | ScalarValue::UInt64(None), |
4712 | | ScalarValue::UInt64(Some(101)), |
4713 | | ]; |
4714 | | let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64); |
4715 | | assert_eq!(list_array.len(), 1); |
4716 | | assert_eq!(list_array.values().len(), 3); |
4717 | | |
4718 | | let prim_array_ref = list_array.value(0); |
4719 | | let prim_array = as_uint64_array(&prim_array_ref)?; |
4720 | | assert_eq!(prim_array.len(), 3); |
4721 | | assert_eq!(prim_array.value(0), 100); |
4722 | | assert!(prim_array.is_null(1)); |
4723 | | assert_eq!(prim_array.value(2), 101); |
4724 | | Ok(()) |
4725 | | } |
4726 | | |
4727 | | /// Creates array directly and via ScalarValue and ensures they are the same |
4728 | | macro_rules! check_scalar_iter { |
4729 | | ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{ |
4730 | | let scalars: Vec<_> = |
4731 | | $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect(); |
4732 | | |
4733 | | let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap(); |
4734 | | |
4735 | | let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT)); |
4736 | | |
4737 | | assert_eq!(&array, &expected); |
4738 | | }}; |
4739 | | } |
4740 | | |
4741 | | /// Creates array directly and via ScalarValue and ensures they are the same |
4742 | | /// but for variants that carry a timezone field. |
4743 | | macro_rules! check_scalar_iter_tz { |
4744 | | ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{ |
4745 | | let scalars: Vec<_> = $INPUT |
4746 | | .iter() |
4747 | | .map(|v| ScalarValue::$SCALAR_T(*v, None)) |
4748 | | .collect(); |
4749 | | |
4750 | | let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap(); |
4751 | | |
4752 | | let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT)); |
4753 | | |
4754 | | assert_eq!(&array, &expected); |
4755 | | }}; |
4756 | | } |
4757 | | |
4758 | | /// Creates array directly and via ScalarValue and ensures they |
4759 | | /// are the same, for string arrays |
4760 | | macro_rules! check_scalar_iter_string { |
4761 | | ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{ |
4762 | | let scalars: Vec<_> = $INPUT |
4763 | | .iter() |
4764 | | .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string()))) |
4765 | | .collect(); |
4766 | | |
4767 | | let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap(); |
4768 | | |
4769 | | let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT)); |
4770 | | |
4771 | | assert_eq!(&array, &expected); |
4772 | | }}; |
4773 | | } |
4774 | | |
4775 | | /// Creates array directly and via ScalarValue and ensures they |
4776 | | /// are the same, for binary arrays |
4777 | | macro_rules! check_scalar_iter_binary { |
4778 | | ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{ |
4779 | | let scalars: Vec<_> = $INPUT |
4780 | | .iter() |
4781 | | .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec()))) |
4782 | | .collect(); |
4783 | | |
4784 | | let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap(); |
4785 | | |
4786 | | let expected: $ARRAYTYPE = |
4787 | | $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect(); |
4788 | | |
4789 | | let expected: ArrayRef = Arc::new(expected); |
4790 | | |
4791 | | assert_eq!(&array, &expected); |
4792 | | }}; |
4793 | | } |
4794 | | |
4795 | | #[test] |
4796 | | // despite clippy claiming they are useless, the code doesn't compile otherwise. |
4797 | | #[allow(clippy::useless_vec)] |
4798 | | fn scalar_iter_to_array_boolean() { |
4799 | | check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]); |
4800 | | check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]); |
4801 | | check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]); |
4802 | | |
4803 | | check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]); |
4804 | | check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]); |
4805 | | check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]); |
4806 | | check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]); |
4807 | | |
4808 | | check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]); |
4809 | | check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]); |
4810 | | check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]); |
4811 | | check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]); |
4812 | | |
4813 | | check_scalar_iter_tz!( |
4814 | | TimestampSecond, |
4815 | | TimestampSecondArray, |
4816 | | vec![Some(1), None, Some(3)] |
4817 | | ); |
4818 | | check_scalar_iter_tz!( |
4819 | | TimestampMillisecond, |
4820 | | TimestampMillisecondArray, |
4821 | | vec![Some(1), None, Some(3)] |
4822 | | ); |
4823 | | check_scalar_iter_tz!( |
4824 | | TimestampMicrosecond, |
4825 | | TimestampMicrosecondArray, |
4826 | | vec![Some(1), None, Some(3)] |
4827 | | ); |
4828 | | check_scalar_iter_tz!( |
4829 | | TimestampNanosecond, |
4830 | | TimestampNanosecondArray, |
4831 | | vec![Some(1), None, Some(3)] |
4832 | | ); |
4833 | | |
4834 | | check_scalar_iter_string!( |
4835 | | Utf8, |
4836 | | StringArray, |
4837 | | vec![Some("foo"), None, Some("bar")] |
4838 | | ); |
4839 | | check_scalar_iter_string!( |
4840 | | LargeUtf8, |
4841 | | LargeStringArray, |
4842 | | vec![Some("foo"), None, Some("bar")] |
4843 | | ); |
4844 | | check_scalar_iter_binary!( |
4845 | | Binary, |
4846 | | BinaryArray, |
4847 | | vec![Some(b"foo"), None, Some(b"bar")] |
4848 | | ); |
4849 | | check_scalar_iter_binary!( |
4850 | | LargeBinary, |
4851 | | LargeBinaryArray, |
4852 | | vec![Some(b"foo"), None, Some(b"bar")] |
4853 | | ); |
4854 | | } |
4855 | | |
4856 | | #[test] |
4857 | | fn scalar_iter_to_array_empty() { |
4858 | | let scalars = vec![] as Vec<ScalarValue>; |
4859 | | |
4860 | | let result = ScalarValue::iter_to_array(scalars).unwrap_err(); |
4861 | | assert!( |
4862 | | result |
4863 | | .to_string() |
4864 | | .contains("Empty iterator passed to ScalarValue::iter_to_array"), |
4865 | | "{}", |
4866 | | result |
4867 | | ); |
4868 | | } |
4869 | | |
4870 | | #[test] |
4871 | | fn scalar_iter_to_dictionary() { |
4872 | | fn make_val(v: Option<String>) -> ScalarValue { |
4873 | | let key_type = DataType::Int32; |
4874 | | let value = ScalarValue::Utf8(v); |
4875 | | ScalarValue::Dictionary(Box::new(key_type), Box::new(value)) |
4876 | | } |
4877 | | |
4878 | | let scalars = [ |
4879 | | make_val(Some("Foo".into())), |
4880 | | make_val(None), |
4881 | | make_val(Some("Bar".into())), |
4882 | | ]; |
4883 | | |
4884 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
4885 | | let array = as_dictionary_array::<Int32Type>(&array).unwrap(); |
4886 | | let values_array = as_string_array(array.values()).unwrap(); |
4887 | | |
4888 | | let values = array |
4889 | | .keys_iter() |
4890 | | .map(|k| { |
4891 | | k.map(|k| { |
4892 | | assert!(values_array.is_valid(k)); |
4893 | | values_array.value(k) |
4894 | | }) |
4895 | | }) |
4896 | | .collect::<Vec<_>>(); |
4897 | | |
4898 | | let expected = vec![Some("Foo"), None, Some("Bar")]; |
4899 | | assert_eq!(values, expected); |
4900 | | } |
4901 | | |
4902 | | #[test] |
4903 | | fn scalar_iter_to_array_mismatched_types() { |
4904 | | use ScalarValue::*; |
4905 | | // If the scalar values are not all the correct type, error here |
4906 | | let scalars = [Boolean(Some(true)), Int32(Some(5))]; |
4907 | | |
4908 | | let result = ScalarValue::iter_to_array(scalars).unwrap_err(); |
4909 | | assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"), |
4910 | | "{}", result); |
4911 | | } |
4912 | | |
4913 | | #[test] |
4914 | | fn scalar_try_from_array_null() { |
4915 | | let array = vec![Some(33), None].into_iter().collect::<Int64Array>(); |
4916 | | let array: ArrayRef = Arc::new(array); |
4917 | | |
4918 | | assert_eq!( |
4919 | | ScalarValue::Int64(Some(33)), |
4920 | | ScalarValue::try_from_array(&array, 0).unwrap() |
4921 | | ); |
4922 | | assert_eq!( |
4923 | | ScalarValue::Int64(None), |
4924 | | ScalarValue::try_from_array(&array, 1).unwrap() |
4925 | | ); |
4926 | | } |
4927 | | |
4928 | | #[test] |
4929 | | fn scalar_try_from_array_list_array_null() { |
4930 | | let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
4931 | | Some(vec![Some(1), Some(2)]), |
4932 | | None, |
4933 | | ]); |
4934 | | |
4935 | | let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap(); |
4936 | | let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap(); |
4937 | | |
4938 | | let data_type = |
4939 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); |
4940 | | |
4941 | | assert_eq!(non_null_list_scalar.data_type(), data_type); |
4942 | | assert_eq!(null_list_scalar.data_type(), data_type); |
4943 | | } |
4944 | | |
4945 | | #[test] |
4946 | | fn scalar_try_from_list_datatypes() { |
4947 | | let inner_field = Arc::new(Field::new("item", DataType::Int32, true)); |
4948 | | |
4949 | | // Test for List |
4950 | | let data_type = &DataType::List(Arc::clone(&inner_field)); |
4951 | | let scalar: ScalarValue = data_type.try_into().unwrap(); |
4952 | | let expected = ScalarValue::List( |
4953 | | new_null_array(data_type, 1) |
4954 | | .as_list::<i32>() |
4955 | | .to_owned() |
4956 | | .into(), |
4957 | | ); |
4958 | | assert_eq!(expected, scalar); |
4959 | | assert!(expected.is_null()); |
4960 | | |
4961 | | // Test for LargeList |
4962 | | let data_type = &DataType::LargeList(Arc::clone(&inner_field)); |
4963 | | let scalar: ScalarValue = data_type.try_into().unwrap(); |
4964 | | let expected = ScalarValue::LargeList( |
4965 | | new_null_array(data_type, 1) |
4966 | | .as_list::<i64>() |
4967 | | .to_owned() |
4968 | | .into(), |
4969 | | ); |
4970 | | assert_eq!(expected, scalar); |
4971 | | assert!(expected.is_null()); |
4972 | | |
4973 | | // Test for FixedSizeList(5) |
4974 | | let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5); |
4975 | | let scalar: ScalarValue = data_type.try_into().unwrap(); |
4976 | | let expected = ScalarValue::FixedSizeList( |
4977 | | new_null_array(data_type, 1) |
4978 | | .as_fixed_size_list() |
4979 | | .to_owned() |
4980 | | .into(), |
4981 | | ); |
4982 | | assert_eq!(expected, scalar); |
4983 | | assert!(expected.is_null()); |
4984 | | } |
4985 | | |
4986 | | #[test] |
4987 | | fn scalar_try_from_list_of_list() { |
4988 | | let data_type = DataType::List(Arc::new(Field::new( |
4989 | | "item", |
4990 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), |
4991 | | true, |
4992 | | ))); |
4993 | | let data_type = &data_type; |
4994 | | let scalar: ScalarValue = data_type.try_into().unwrap(); |
4995 | | |
4996 | | let expected = ScalarValue::List( |
4997 | | new_null_array( |
4998 | | &DataType::List(Arc::new(Field::new( |
4999 | | "item", |
5000 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), |
5001 | | true, |
5002 | | ))), |
5003 | | 1, |
5004 | | ) |
5005 | | .as_list::<i32>() |
5006 | | .to_owned() |
5007 | | .into(), |
5008 | | ); |
5009 | | |
5010 | | assert_eq!(expected, scalar) |
5011 | | } |
5012 | | |
5013 | | #[test] |
5014 | | fn scalar_try_from_not_equal_list_nested_list() { |
5015 | | let list_data_type = |
5016 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); |
5017 | | let data_type = &list_data_type; |
5018 | | let list_scalar: ScalarValue = data_type.try_into().unwrap(); |
5019 | | |
5020 | | let nested_list_data_type = DataType::List(Arc::new(Field::new( |
5021 | | "item", |
5022 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), |
5023 | | true, |
5024 | | ))); |
5025 | | let data_type = &nested_list_data_type; |
5026 | | let nested_list_scalar: ScalarValue = data_type.try_into().unwrap(); |
5027 | | |
5028 | | assert_ne!(list_scalar, nested_list_scalar); |
5029 | | } |
5030 | | |
5031 | | #[test] |
5032 | | fn scalar_try_from_dict_datatype() { |
5033 | | let data_type = |
5034 | | DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)); |
5035 | | let data_type = &data_type; |
5036 | | let expected = ScalarValue::Dictionary( |
5037 | | Box::new(DataType::Int8), |
5038 | | Box::new(ScalarValue::Utf8(None)), |
5039 | | ); |
5040 | | assert_eq!(expected, data_type.try_into().unwrap()) |
5041 | | } |
5042 | | |
5043 | | #[test] |
5044 | | fn size_of_scalar() { |
5045 | | // Since ScalarValues are used in a non trivial number of places, |
5046 | | // making it larger means significant more memory consumption |
5047 | | // per distinct value. |
5048 | | // |
5049 | | // Thus this test ensures that no code change makes ScalarValue larger |
5050 | | // |
5051 | | // The alignment requirements differ across architectures and |
5052 | | // thus the size of the enum appears to as well |
5053 | | |
5054 | | // The value may also change depending on rust version |
5055 | | assert_eq!(std::mem::size_of::<ScalarValue>(), 64); |
5056 | | } |
5057 | | |
5058 | | #[test] |
5059 | | fn memory_size() { |
5060 | | let sv = ScalarValue::Binary(Some(Vec::with_capacity(10))); |
5061 | | assert_eq!(sv.size(), std::mem::size_of::<ScalarValue>() + 10,); |
5062 | | let sv_size = sv.size(); |
5063 | | |
5064 | | let mut v = Vec::with_capacity(10); |
5065 | | // do NOT clone `sv` here because this may shrink the vector capacity |
5066 | | v.push(sv); |
5067 | | assert_eq!(v.capacity(), 10); |
5068 | | assert_eq!( |
5069 | | ScalarValue::size_of_vec(&v), |
5070 | | std::mem::size_of::<Vec<ScalarValue>>() |
5071 | | + (9 * std::mem::size_of::<ScalarValue>()) |
5072 | | + sv_size, |
5073 | | ); |
5074 | | |
5075 | | let mut s = HashSet::with_capacity(0); |
5076 | | // do NOT clone `sv` here because this may shrink the vector capacity |
5077 | | s.insert(v.pop().unwrap()); |
5078 | | // hashsets may easily grow during insert, so capacity is dynamic |
5079 | | let s_capacity = s.capacity(); |
5080 | | assert_eq!( |
5081 | | ScalarValue::size_of_hashset(&s), |
5082 | | std::mem::size_of::<HashSet<ScalarValue>>() |
5083 | | + ((s_capacity - 1) * std::mem::size_of::<ScalarValue>()) |
5084 | | + sv_size, |
5085 | | ); |
5086 | | } |
5087 | | |
5088 | | #[test] |
5089 | | fn scalar_eq_array() { |
5090 | | // Validate that eq_array has the same semantics as ScalarValue::eq |
5091 | | macro_rules! make_typed_vec { |
5092 | | ($INPUT:expr, $TYPE:ident) => {{ |
5093 | | $INPUT |
5094 | | .iter() |
5095 | | .map(|v| v.map(|v| v as $TYPE)) |
5096 | | .collect::<Vec<_>>() |
5097 | | }}; |
5098 | | } |
5099 | | |
5100 | | let bool_vals = [Some(true), None, Some(false)]; |
5101 | | let f32_vals = [Some(-1.0), None, Some(1.0)]; |
5102 | | let f64_vals = make_typed_vec!(f32_vals, f64); |
5103 | | |
5104 | | let i8_vals = [Some(-1), None, Some(1)]; |
5105 | | let i16_vals = make_typed_vec!(i8_vals, i16); |
5106 | | let i32_vals = make_typed_vec!(i8_vals, i32); |
5107 | | let i64_vals = make_typed_vec!(i8_vals, i64); |
5108 | | |
5109 | | let u8_vals = [Some(0), None, Some(1)]; |
5110 | | let u16_vals = make_typed_vec!(u8_vals, u16); |
5111 | | let u32_vals = make_typed_vec!(u8_vals, u32); |
5112 | | let u64_vals = make_typed_vec!(u8_vals, u64); |
5113 | | |
5114 | | let str_vals = [Some("foo"), None, Some("bar")]; |
5115 | | |
5116 | | let interval_dt_vals = [ |
5117 | | Some(IntervalDayTime::MINUS_ONE), |
5118 | | None, |
5119 | | Some(IntervalDayTime::ONE), |
5120 | | ]; |
5121 | | let interval_mdn_vals = [ |
5122 | | Some(IntervalMonthDayNano::MINUS_ONE), |
5123 | | None, |
5124 | | Some(IntervalMonthDayNano::ONE), |
5125 | | ]; |
5126 | | |
5127 | | /// Test each value in `scalar` with the corresponding element |
5128 | | /// at `array`. Assumes each element is unique (aka not equal |
5129 | | /// with all other indexes) |
5130 | | #[derive(Debug)] |
5131 | | struct TestCase { |
5132 | | array: ArrayRef, |
5133 | | scalars: Vec<ScalarValue>, |
5134 | | } |
5135 | | |
5136 | | /// Create a test case for casing the input to the specified array type |
5137 | | macro_rules! make_test_case { |
5138 | | ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{ |
5139 | | TestCase { |
5140 | | array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()), |
5141 | | scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(), |
5142 | | } |
5143 | | }}; |
5144 | | |
5145 | | ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{ |
5146 | | let tz = $TZ; |
5147 | | TestCase { |
5148 | | array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()), |
5149 | | scalars: $INPUT |
5150 | | .iter() |
5151 | | .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone())) |
5152 | | .collect(), |
5153 | | } |
5154 | | }}; |
5155 | | } |
5156 | | |
5157 | | macro_rules! make_str_test_case { |
5158 | | ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{ |
5159 | | TestCase { |
5160 | | array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()), |
5161 | | scalars: $INPUT |
5162 | | .iter() |
5163 | | .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string()))) |
5164 | | .collect(), |
5165 | | } |
5166 | | }}; |
5167 | | } |
5168 | | |
5169 | | macro_rules! make_binary_test_case { |
5170 | | ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{ |
5171 | | TestCase { |
5172 | | array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()), |
5173 | | scalars: $INPUT |
5174 | | .iter() |
5175 | | .map(|v| { |
5176 | | ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec())) |
5177 | | }) |
5178 | | .collect(), |
5179 | | } |
5180 | | }}; |
5181 | | } |
5182 | | |
5183 | | /// create a test case for DictionaryArray<$INDEX_TY> |
5184 | | macro_rules! make_str_dict_test_case { |
5185 | | ($INPUT:expr, $INDEX_TY:ident) => {{ |
5186 | | TestCase { |
5187 | | array: Arc::new( |
5188 | | $INPUT |
5189 | | .iter() |
5190 | | .cloned() |
5191 | | .collect::<DictionaryArray<$INDEX_TY>>(), |
5192 | | ), |
5193 | | scalars: $INPUT |
5194 | | .iter() |
5195 | | .map(|v| { |
5196 | | ScalarValue::Dictionary( |
5197 | | Box::new($INDEX_TY::DATA_TYPE), |
5198 | | Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))), |
5199 | | ) |
5200 | | }) |
5201 | | .collect(), |
5202 | | } |
5203 | | }}; |
5204 | | } |
5205 | | |
5206 | | let cases = vec![ |
5207 | | make_test_case!(bool_vals, BooleanArray, Boolean), |
5208 | | make_test_case!(f32_vals, Float32Array, Float32), |
5209 | | make_test_case!(f64_vals, Float64Array, Float64), |
5210 | | make_test_case!(i8_vals, Int8Array, Int8), |
5211 | | make_test_case!(i16_vals, Int16Array, Int16), |
5212 | | make_test_case!(i32_vals, Int32Array, Int32), |
5213 | | make_test_case!(i64_vals, Int64Array, Int64), |
5214 | | make_test_case!(u8_vals, UInt8Array, UInt8), |
5215 | | make_test_case!(u16_vals, UInt16Array, UInt16), |
5216 | | make_test_case!(u32_vals, UInt32Array, UInt32), |
5217 | | make_test_case!(u64_vals, UInt64Array, UInt64), |
5218 | | make_str_test_case!(str_vals, StringArray, Utf8), |
5219 | | make_str_test_case!(str_vals, LargeStringArray, LargeUtf8), |
5220 | | make_binary_test_case!(str_vals, BinaryArray, Binary), |
5221 | | make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary), |
5222 | | make_test_case!(i32_vals, Date32Array, Date32), |
5223 | | make_test_case!(i64_vals, Date64Array, Date64), |
5224 | | make_test_case!(i32_vals, Time32SecondArray, Time32Second), |
5225 | | make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond), |
5226 | | make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond), |
5227 | | make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond), |
5228 | | make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None), |
5229 | | make_test_case!( |
5230 | | i64_vals, |
5231 | | TimestampSecondArray, |
5232 | | TimestampSecond, |
5233 | | Some("UTC".into()) |
5234 | | ), |
5235 | | make_test_case!( |
5236 | | i64_vals, |
5237 | | TimestampMillisecondArray, |
5238 | | TimestampMillisecond, |
5239 | | None |
5240 | | ), |
5241 | | make_test_case!( |
5242 | | i64_vals, |
5243 | | TimestampMillisecondArray, |
5244 | | TimestampMillisecond, |
5245 | | Some("UTC".into()) |
5246 | | ), |
5247 | | make_test_case!( |
5248 | | i64_vals, |
5249 | | TimestampMicrosecondArray, |
5250 | | TimestampMicrosecond, |
5251 | | None |
5252 | | ), |
5253 | | make_test_case!( |
5254 | | i64_vals, |
5255 | | TimestampMicrosecondArray, |
5256 | | TimestampMicrosecond, |
5257 | | Some("UTC".into()) |
5258 | | ), |
5259 | | make_test_case!( |
5260 | | i64_vals, |
5261 | | TimestampNanosecondArray, |
5262 | | TimestampNanosecond, |
5263 | | None |
5264 | | ), |
5265 | | make_test_case!( |
5266 | | i64_vals, |
5267 | | TimestampNanosecondArray, |
5268 | | TimestampNanosecond, |
5269 | | Some("UTC".into()) |
5270 | | ), |
5271 | | make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth), |
5272 | | make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime), |
5273 | | make_test_case!( |
5274 | | interval_mdn_vals, |
5275 | | IntervalMonthDayNanoArray, |
5276 | | IntervalMonthDayNano |
5277 | | ), |
5278 | | make_str_dict_test_case!(str_vals, Int8Type), |
5279 | | make_str_dict_test_case!(str_vals, Int16Type), |
5280 | | make_str_dict_test_case!(str_vals, Int32Type), |
5281 | | make_str_dict_test_case!(str_vals, Int64Type), |
5282 | | make_str_dict_test_case!(str_vals, UInt8Type), |
5283 | | make_str_dict_test_case!(str_vals, UInt16Type), |
5284 | | make_str_dict_test_case!(str_vals, UInt32Type), |
5285 | | make_str_dict_test_case!(str_vals, UInt64Type), |
5286 | | ]; |
5287 | | |
5288 | | for case in cases { |
5289 | | println!("**** Test Case *****"); |
5290 | | let TestCase { array, scalars } = case; |
5291 | | println!("Input array type: {}", array.data_type()); |
5292 | | println!("Input scalars: {scalars:#?}"); |
5293 | | assert_eq!(array.len(), scalars.len()); |
5294 | | |
5295 | | for (index, scalar) in scalars.into_iter().enumerate() { |
5296 | | assert!( |
5297 | | scalar |
5298 | | .eq_array(&array, index) |
5299 | | .expect("Failed to compare arrays"), |
5300 | | "Expected {scalar:?} to be equal to {array:?} at index {index}" |
5301 | | ); |
5302 | | |
5303 | | // test that all other elements are *not* equal |
5304 | | for other_index in 0..array.len() { |
5305 | | if index != other_index { |
5306 | | assert!( |
5307 | | !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"), |
5308 | | "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}" |
5309 | | ); |
5310 | | } |
5311 | | } |
5312 | | } |
5313 | | } |
5314 | | } |
5315 | | |
5316 | | #[test] |
5317 | | fn scalar_partial_ordering() { |
5318 | | use ScalarValue::*; |
5319 | | |
5320 | | assert_eq!( |
5321 | | Int64(Some(33)).partial_cmp(&Int64(Some(0))), |
5322 | | Some(Ordering::Greater) |
5323 | | ); |
5324 | | assert_eq!( |
5325 | | Int64(Some(0)).partial_cmp(&Int64(Some(33))), |
5326 | | Some(Ordering::Less) |
5327 | | ); |
5328 | | assert_eq!( |
5329 | | Int64(Some(33)).partial_cmp(&Int64(Some(33))), |
5330 | | Some(Ordering::Equal) |
5331 | | ); |
5332 | | // For different data type, `partial_cmp` returns None. |
5333 | | assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None); |
5334 | | assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None); |
5335 | | |
5336 | | assert_eq!( |
5337 | | ScalarValue::from(vec![ |
5338 | | ("A", ScalarValue::from(1.0)), |
5339 | | ("B", ScalarValue::from("Z")), |
5340 | | ]) |
5341 | | .partial_cmp(&ScalarValue::from(vec![ |
5342 | | ("A", ScalarValue::from(2.0)), |
5343 | | ("B", ScalarValue::from("A")), |
5344 | | ])), |
5345 | | Some(Ordering::Less) |
5346 | | ); |
5347 | | |
5348 | | // For different struct fields, `partial_cmp` returns None. |
5349 | | assert_eq!( |
5350 | | ScalarValue::from(vec![ |
5351 | | ("A", ScalarValue::from(1.0)), |
5352 | | ("B", ScalarValue::from("Z")), |
5353 | | ]) |
5354 | | .partial_cmp(&ScalarValue::from(vec![ |
5355 | | ("a", ScalarValue::from(2.0)), |
5356 | | ("b", ScalarValue::from("A")), |
5357 | | ])), |
5358 | | None |
5359 | | ); |
5360 | | } |
5361 | | |
5362 | | #[test] |
5363 | | fn test_scalar_value_from_string() { |
5364 | | let scalar = ScalarValue::from("foo"); |
5365 | | assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string()))); |
5366 | | let scalar = ScalarValue::from("foo".to_string()); |
5367 | | assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string()))); |
5368 | | let scalar = ScalarValue::from_str("foo").unwrap(); |
5369 | | assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string()))); |
5370 | | } |
5371 | | |
5372 | | #[test] |
5373 | | fn test_scalar_struct() { |
5374 | | let field_a = Arc::new(Field::new("A", DataType::Int32, false)); |
5375 | | let field_b = Arc::new(Field::new("B", DataType::Boolean, false)); |
5376 | | let field_c = Arc::new(Field::new("C", DataType::Utf8, false)); |
5377 | | |
5378 | | let field_e = Arc::new(Field::new("e", DataType::Int16, false)); |
5379 | | let field_f = Arc::new(Field::new("f", DataType::Int64, false)); |
5380 | | let field_d = Arc::new(Field::new( |
5381 | | "D", |
5382 | | DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()), |
5383 | | false, |
5384 | | )); |
5385 | | |
5386 | | let struct_array = StructArray::from(vec![ |
5387 | | ( |
5388 | | Arc::clone(&field_e), |
5389 | | Arc::new(Int16Array::from(vec![2])) as ArrayRef, |
5390 | | ), |
5391 | | ( |
5392 | | Arc::clone(&field_f), |
5393 | | Arc::new(Int64Array::from(vec![3])) as ArrayRef, |
5394 | | ), |
5395 | | ]); |
5396 | | |
5397 | | let struct_array = StructArray::from(vec![ |
5398 | | ( |
5399 | | Arc::clone(&field_a), |
5400 | | Arc::new(Int32Array::from(vec![23])) as ArrayRef, |
5401 | | ), |
5402 | | ( |
5403 | | Arc::clone(&field_b), |
5404 | | Arc::new(BooleanArray::from(vec![false])) as ArrayRef, |
5405 | | ), |
5406 | | ( |
5407 | | Arc::clone(&field_c), |
5408 | | Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef, |
5409 | | ), |
5410 | | (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef), |
5411 | | ]); |
5412 | | let scalar = ScalarValue::Struct(Arc::new(struct_array)); |
5413 | | |
5414 | | let array = scalar |
5415 | | .to_array_of_size(2) |
5416 | | .expect("Failed to convert to array of size"); |
5417 | | |
5418 | | let expected = Arc::new(StructArray::from(vec![ |
5419 | | ( |
5420 | | Arc::clone(&field_a), |
5421 | | Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef, |
5422 | | ), |
5423 | | ( |
5424 | | Arc::clone(&field_b), |
5425 | | Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef, |
5426 | | ), |
5427 | | ( |
5428 | | Arc::clone(&field_c), |
5429 | | Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef, |
5430 | | ), |
5431 | | ( |
5432 | | Arc::clone(&field_d), |
5433 | | Arc::new(StructArray::from(vec![ |
5434 | | ( |
5435 | | Arc::clone(&field_e), |
5436 | | Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef, |
5437 | | ), |
5438 | | ( |
5439 | | Arc::clone(&field_f), |
5440 | | Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef, |
5441 | | ), |
5442 | | ])) as ArrayRef, |
5443 | | ), |
5444 | | ])) as ArrayRef; |
5445 | | |
5446 | | assert_eq!(&array, &expected); |
5447 | | |
5448 | | // Construct from second element of ArrayRef |
5449 | | let constructed = ScalarValue::try_from_array(&expected, 1).unwrap(); |
5450 | | assert_eq!(constructed, scalar); |
5451 | | |
5452 | | // None version |
5453 | | let none_scalar = ScalarValue::try_from(array.data_type()).unwrap(); |
5454 | | assert!(none_scalar.is_null()); |
5455 | | assert_eq!( |
5456 | | format!("{none_scalar:?}"), |
5457 | | String::from("Struct({A:,B:,C:,D:})") |
5458 | | ); |
5459 | | |
5460 | | // Construct with convenience From<Vec<(&str, ScalarValue)>> |
5461 | | let constructed = ScalarValue::from(vec![ |
5462 | | ("A", ScalarValue::from(23)), |
5463 | | ("B", ScalarValue::from(false)), |
5464 | | ("C", ScalarValue::from("Hello")), |
5465 | | ( |
5466 | | "D", |
5467 | | ScalarValue::from(vec![ |
5468 | | ("e", ScalarValue::from(2i16)), |
5469 | | ("f", ScalarValue::from(3i64)), |
5470 | | ]), |
5471 | | ), |
5472 | | ]); |
5473 | | assert_eq!(constructed, scalar); |
5474 | | |
5475 | | // Build Array from Vec of structs |
5476 | | let scalars = vec![ |
5477 | | ScalarValue::from(vec![ |
5478 | | ("A", ScalarValue::from(23)), |
5479 | | ("B", ScalarValue::from(false)), |
5480 | | ("C", ScalarValue::from("Hello")), |
5481 | | ( |
5482 | | "D", |
5483 | | ScalarValue::from(vec![ |
5484 | | ("e", ScalarValue::from(2i16)), |
5485 | | ("f", ScalarValue::from(3i64)), |
5486 | | ]), |
5487 | | ), |
5488 | | ]), |
5489 | | ScalarValue::from(vec![ |
5490 | | ("A", ScalarValue::from(7)), |
5491 | | ("B", ScalarValue::from(true)), |
5492 | | ("C", ScalarValue::from("World")), |
5493 | | ( |
5494 | | "D", |
5495 | | ScalarValue::from(vec![ |
5496 | | ("e", ScalarValue::from(4i16)), |
5497 | | ("f", ScalarValue::from(5i64)), |
5498 | | ]), |
5499 | | ), |
5500 | | ]), |
5501 | | ScalarValue::from(vec![ |
5502 | | ("A", ScalarValue::from(-1000)), |
5503 | | ("B", ScalarValue::from(true)), |
5504 | | ("C", ScalarValue::from("!!!!!")), |
5505 | | ( |
5506 | | "D", |
5507 | | ScalarValue::from(vec![ |
5508 | | ("e", ScalarValue::from(6i16)), |
5509 | | ("f", ScalarValue::from(7i64)), |
5510 | | ]), |
5511 | | ), |
5512 | | ]), |
5513 | | ]; |
5514 | | let array = ScalarValue::iter_to_array(scalars).unwrap(); |
5515 | | |
5516 | | let expected = Arc::new(StructArray::from(vec![ |
5517 | | ( |
5518 | | Arc::clone(&field_a), |
5519 | | Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef, |
5520 | | ), |
5521 | | ( |
5522 | | Arc::clone(&field_b), |
5523 | | Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef, |
5524 | | ), |
5525 | | ( |
5526 | | Arc::clone(&field_c), |
5527 | | Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef, |
5528 | | ), |
5529 | | ( |
5530 | | Arc::clone(&field_d), |
5531 | | Arc::new(StructArray::from(vec![ |
5532 | | ( |
5533 | | Arc::clone(&field_e), |
5534 | | Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef, |
5535 | | ), |
5536 | | ( |
5537 | | Arc::clone(&field_f), |
5538 | | Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef, |
5539 | | ), |
5540 | | ])) as ArrayRef, |
5541 | | ), |
5542 | | ])) as ArrayRef; |
5543 | | |
5544 | | assert_eq!(&array, &expected); |
5545 | | } |
5546 | | |
5547 | | #[test] |
5548 | | fn test_scalar_union_sparse() { |
5549 | | let field_a = Arc::new(Field::new("A", DataType::Int32, true)); |
5550 | | let field_b = Arc::new(Field::new("B", DataType::Boolean, true)); |
5551 | | let field_c = Arc::new(Field::new("C", DataType::Utf8, true)); |
5552 | | let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]); |
5553 | | |
5554 | | let mut values_a = vec![None; 6]; |
5555 | | values_a[0] = Some(42); |
5556 | | let mut values_b = vec![None; 6]; |
5557 | | values_b[1] = Some(true); |
5558 | | let mut values_c = vec![None; 6]; |
5559 | | values_c[2] = Some("foo"); |
5560 | | let children: Vec<ArrayRef> = vec![ |
5561 | | Arc::new(Int32Array::from(values_a)), |
5562 | | Arc::new(BooleanArray::from(values_b)), |
5563 | | Arc::new(StringArray::from(values_c)), |
5564 | | ]; |
5565 | | |
5566 | | let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]); |
5567 | | let array: ArrayRef = Arc::new( |
5568 | | UnionArray::try_new(fields.clone(), type_ids, None, children) |
5569 | | .expect("UnionArray"), |
5570 | | ); |
5571 | | |
5572 | | let expected = [ |
5573 | | (0, ScalarValue::from(42)), |
5574 | | (1, ScalarValue::from(true)), |
5575 | | (2, ScalarValue::from("foo")), |
5576 | | (0, ScalarValue::Int32(None)), |
5577 | | (1, ScalarValue::Boolean(None)), |
5578 | | (2, ScalarValue::Utf8(None)), |
5579 | | ]; |
5580 | | |
5581 | | for (i, (ti, value)) in expected.into_iter().enumerate() { |
5582 | | let is_null = value.is_null(); |
5583 | | let value = Some((ti, Box::new(value))); |
5584 | | let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse); |
5585 | | let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array"); |
5586 | | |
5587 | | assert_eq!( |
5588 | | actual, expected, |
5589 | | "[{i}] {actual} was not equal to {expected}" |
5590 | | ); |
5591 | | |
5592 | | assert!( |
5593 | | expected.eq_array(&array, i).expect("eq_array"), |
5594 | | "[{i}] {expected}.eq_array was false" |
5595 | | ); |
5596 | | |
5597 | | if is_null { |
5598 | | assert!(actual.is_null(), "[{i}] {actual} was not null") |
5599 | | } |
5600 | | } |
5601 | | } |
5602 | | |
5603 | | #[test] |
5604 | | fn test_scalar_union_dense() { |
5605 | | let field_a = Arc::new(Field::new("A", DataType::Int32, true)); |
5606 | | let field_b = Arc::new(Field::new("B", DataType::Boolean, true)); |
5607 | | let field_c = Arc::new(Field::new("C", DataType::Utf8, true)); |
5608 | | let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]); |
5609 | | let children: Vec<ArrayRef> = vec![ |
5610 | | Arc::new(Int32Array::from(vec![Some(42), None])), |
5611 | | Arc::new(BooleanArray::from(vec![Some(true), None])), |
5612 | | Arc::new(StringArray::from(vec![Some("foo"), None])), |
5613 | | ]; |
5614 | | |
5615 | | let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]); |
5616 | | let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]); |
5617 | | let array: ArrayRef = Arc::new( |
5618 | | UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children) |
5619 | | .expect("UnionArray"), |
5620 | | ); |
5621 | | |
5622 | | let expected = [ |
5623 | | (0, ScalarValue::from(42)), |
5624 | | (1, ScalarValue::from(true)), |
5625 | | (2, ScalarValue::from("foo")), |
5626 | | (0, ScalarValue::Int32(None)), |
5627 | | (1, ScalarValue::Boolean(None)), |
5628 | | (2, ScalarValue::Utf8(None)), |
5629 | | ]; |
5630 | | |
5631 | | for (i, (ti, value)) in expected.into_iter().enumerate() { |
5632 | | let is_null = value.is_null(); |
5633 | | let value = Some((ti, Box::new(value))); |
5634 | | let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense); |
5635 | | let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array"); |
5636 | | |
5637 | | assert_eq!( |
5638 | | actual, expected, |
5639 | | "[{i}] {actual} was not equal to {expected}" |
5640 | | ); |
5641 | | |
5642 | | assert!( |
5643 | | expected.eq_array(&array, i).expect("eq_array"), |
5644 | | "[{i}] {expected}.eq_array was false" |
5645 | | ); |
5646 | | |
5647 | | if is_null { |
5648 | | assert!(actual.is_null(), "[{i}] {actual} was not null") |
5649 | | } |
5650 | | } |
5651 | | } |
5652 | | |
5653 | | #[test] |
5654 | | fn test_lists_in_struct() { |
5655 | | let field_a = Arc::new(Field::new("A", DataType::Utf8, false)); |
5656 | | let field_primitive_list = Arc::new(Field::new( |
5657 | | "primitive_list", |
5658 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), |
5659 | | false, |
5660 | | )); |
5661 | | |
5662 | | // Define primitive list scalars |
5663 | | let l0 = |
5664 | | ScalarValue::List(Arc::new( |
5665 | | ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![ |
5666 | | Some(1), |
5667 | | Some(2), |
5668 | | Some(3), |
5669 | | ])]), |
5670 | | )); |
5671 | | let l1 = |
5672 | | ScalarValue::List(Arc::new( |
5673 | | ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![ |
5674 | | Some(4), |
5675 | | Some(5), |
5676 | | ])]), |
5677 | | )); |
5678 | | let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::< |
5679 | | Int32Type, |
5680 | | _, |
5681 | | _, |
5682 | | >(vec![Some(vec![Some(6)])]))); |
5683 | | |
5684 | | // Define struct scalars |
5685 | | let s0 = ScalarValue::from(vec![ |
5686 | | ("A", ScalarValue::from("First")), |
5687 | | ("primitive_list", l0), |
5688 | | ]); |
5689 | | |
5690 | | let s1 = ScalarValue::from(vec![ |
5691 | | ("A", ScalarValue::from("Second")), |
5692 | | ("primitive_list", l1), |
5693 | | ]); |
5694 | | |
5695 | | let s2 = ScalarValue::from(vec![ |
5696 | | ("A", ScalarValue::from("Third")), |
5697 | | ("primitive_list", l2), |
5698 | | ]); |
5699 | | |
5700 | | // iter_to_array for struct scalars |
5701 | | let array = |
5702 | | ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap(); |
5703 | | |
5704 | | let array = as_struct_array(&array).unwrap(); |
5705 | | let expected = StructArray::from(vec![ |
5706 | | ( |
5707 | | Arc::clone(&field_a), |
5708 | | Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef, |
5709 | | ), |
5710 | | ( |
5711 | | Arc::clone(&field_primitive_list), |
5712 | | Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![ |
5713 | | Some(vec![Some(1), Some(2), Some(3)]), |
5714 | | Some(vec![Some(4), Some(5)]), |
5715 | | Some(vec![Some(6)]), |
5716 | | ])), |
5717 | | ), |
5718 | | ]); |
5719 | | |
5720 | | assert_eq!(array, &expected); |
5721 | | |
5722 | | // Define list-of-structs scalars |
5723 | | |
5724 | | let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); |
5725 | | let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array))); |
5726 | | |
5727 | | let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); |
5728 | | let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array))); |
5729 | | |
5730 | | let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); |
5731 | | let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array))); |
5732 | | |
5733 | | // iter_to_array for list-of-struct |
5734 | | let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); |
5735 | | let array = array.as_list::<i32>(); |
5736 | | |
5737 | | // Construct expected array with array builders |
5738 | | let field_a_builder = StringBuilder::with_capacity(4, 1024); |
5739 | | let primitive_value_builder = Int32Array::builder(8); |
5740 | | let field_primitive_list_builder = ListBuilder::new(primitive_value_builder); |
5741 | | |
5742 | | let element_builder = StructBuilder::new( |
5743 | | vec![field_a, field_primitive_list], |
5744 | | vec![ |
5745 | | Box::new(field_a_builder), |
5746 | | Box::new(field_primitive_list_builder), |
5747 | | ], |
5748 | | ); |
5749 | | |
5750 | | let mut list_builder = ListBuilder::new(element_builder); |
5751 | | |
5752 | | list_builder |
5753 | | .values() |
5754 | | .field_builder::<StringBuilder>(0) |
5755 | | .unwrap() |
5756 | | .append_value("First"); |
5757 | | list_builder |
5758 | | .values() |
5759 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5760 | | .unwrap() |
5761 | | .values() |
5762 | | .append_value(1); |
5763 | | list_builder |
5764 | | .values() |
5765 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5766 | | .unwrap() |
5767 | | .values() |
5768 | | .append_value(2); |
5769 | | list_builder |
5770 | | .values() |
5771 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5772 | | .unwrap() |
5773 | | .values() |
5774 | | .append_value(3); |
5775 | | list_builder |
5776 | | .values() |
5777 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5778 | | .unwrap() |
5779 | | .append(true); |
5780 | | list_builder.values().append(true); |
5781 | | |
5782 | | list_builder |
5783 | | .values() |
5784 | | .field_builder::<StringBuilder>(0) |
5785 | | .unwrap() |
5786 | | .append_value("Second"); |
5787 | | list_builder |
5788 | | .values() |
5789 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5790 | | .unwrap() |
5791 | | .values() |
5792 | | .append_value(4); |
5793 | | list_builder |
5794 | | .values() |
5795 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5796 | | .unwrap() |
5797 | | .values() |
5798 | | .append_value(5); |
5799 | | list_builder |
5800 | | .values() |
5801 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5802 | | .unwrap() |
5803 | | .append(true); |
5804 | | list_builder.values().append(true); |
5805 | | list_builder.append(true); |
5806 | | |
5807 | | list_builder |
5808 | | .values() |
5809 | | .field_builder::<StringBuilder>(0) |
5810 | | .unwrap() |
5811 | | .append_value("Third"); |
5812 | | list_builder |
5813 | | .values() |
5814 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5815 | | .unwrap() |
5816 | | .values() |
5817 | | .append_value(6); |
5818 | | list_builder |
5819 | | .values() |
5820 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5821 | | .unwrap() |
5822 | | .append(true); |
5823 | | list_builder.values().append(true); |
5824 | | list_builder.append(true); |
5825 | | |
5826 | | list_builder |
5827 | | .values() |
5828 | | .field_builder::<StringBuilder>(0) |
5829 | | .unwrap() |
5830 | | .append_value("Second"); |
5831 | | list_builder |
5832 | | .values() |
5833 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5834 | | .unwrap() |
5835 | | .values() |
5836 | | .append_value(4); |
5837 | | list_builder |
5838 | | .values() |
5839 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5840 | | .unwrap() |
5841 | | .values() |
5842 | | .append_value(5); |
5843 | | list_builder |
5844 | | .values() |
5845 | | .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1) |
5846 | | .unwrap() |
5847 | | .append(true); |
5848 | | list_builder.values().append(true); |
5849 | | list_builder.append(true); |
5850 | | |
5851 | | let expected = list_builder.finish(); |
5852 | | |
5853 | | assert_eq!(array, &expected); |
5854 | | } |
5855 | | |
5856 | | fn build_2d_list(data: Vec<Option<i32>>) -> ListArray { |
5857 | | let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]); |
5858 | | ListArray::new( |
5859 | | Arc::new(Field::new( |
5860 | | "item", |
5861 | | DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), |
5862 | | true, |
5863 | | )), |
5864 | | OffsetBuffer::<i32>::from_lengths([1]), |
5865 | | Arc::new(a1), |
5866 | | None, |
5867 | | ) |
5868 | | } |
5869 | | |
5870 | | #[test] |
5871 | | fn test_nested_lists() { |
5872 | | // Define inner list scalars |
5873 | | let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]); |
5874 | | let arr2 = build_2d_list(vec![Some(4), Some(5)]); |
5875 | | let arr3 = build_2d_list(vec![Some(6)]); |
5876 | | |
5877 | | let array = ScalarValue::iter_to_array(vec![ |
5878 | | ScalarValue::List(Arc::new(arr1)), |
5879 | | ScalarValue::List(Arc::new(arr2)), |
5880 | | ScalarValue::List(Arc::new(arr3)), |
5881 | | ]) |
5882 | | .unwrap(); |
5883 | | let array = array.as_list::<i32>(); |
5884 | | |
5885 | | // Construct expected array with array builders |
5886 | | let inner_builder = Int32Array::builder(6); |
5887 | | let middle_builder = ListBuilder::new(inner_builder); |
5888 | | let mut outer_builder = ListBuilder::new(middle_builder); |
5889 | | |
5890 | | outer_builder.values().values().append_value(1); |
5891 | | outer_builder.values().values().append_value(2); |
5892 | | outer_builder.values().values().append_value(3); |
5893 | | outer_builder.values().append(true); |
5894 | | outer_builder.append(true); |
5895 | | |
5896 | | outer_builder.values().values().append_value(4); |
5897 | | outer_builder.values().values().append_value(5); |
5898 | | outer_builder.values().append(true); |
5899 | | outer_builder.append(true); |
5900 | | |
5901 | | outer_builder.values().values().append_value(6); |
5902 | | outer_builder.values().append(true); |
5903 | | outer_builder.append(true); |
5904 | | |
5905 | | let expected = outer_builder.finish(); |
5906 | | |
5907 | | assert_eq!(array, &expected); |
5908 | | } |
5909 | | |
5910 | | #[test] |
5911 | | fn scalar_timestamp_ns_utc_timezone() { |
5912 | | let scalar = ScalarValue::TimestampNanosecond( |
5913 | | Some(1599566400000000000), |
5914 | | Some("UTC".into()), |
5915 | | ); |
5916 | | |
5917 | | assert_eq!( |
5918 | | scalar.data_type(), |
5919 | | DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())) |
5920 | | ); |
5921 | | |
5922 | | let array = scalar.to_array().expect("Failed to convert to array"); |
5923 | | assert_eq!(array.len(), 1); |
5924 | | assert_eq!( |
5925 | | array.data_type(), |
5926 | | &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())) |
5927 | | ); |
5928 | | |
5929 | | let newscalar = ScalarValue::try_from_array(&array, 0).unwrap(); |
5930 | | assert_eq!( |
5931 | | newscalar.data_type(), |
5932 | | DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())) |
5933 | | ); |
5934 | | } |
5935 | | |
5936 | | #[test] |
5937 | | fn cast_round_trip() { |
5938 | | check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16); |
5939 | | check_scalar_cast(ScalarValue::Int8(None), DataType::Int16); |
5940 | | |
5941 | | check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16); |
5942 | | |
5943 | | check_scalar_cast(ScalarValue::Float64(None), DataType::Int16); |
5944 | | |
5945 | | check_scalar_cast( |
5946 | | ScalarValue::from("foo"), |
5947 | | DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), |
5948 | | ); |
5949 | | |
5950 | | check_scalar_cast( |
5951 | | ScalarValue::Utf8(None), |
5952 | | DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), |
5953 | | ); |
5954 | | |
5955 | | check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View); |
5956 | | check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View); |
5957 | | check_scalar_cast( |
5958 | | ScalarValue::from("larger than 12 bytes string"), |
5959 | | DataType::Utf8View, |
5960 | | ); |
5961 | | } |
5962 | | |
5963 | | // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` |
5964 | | fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) { |
5965 | | // convert from scalar --> Array to call cast |
5966 | | let scalar_array = scalar.to_array().expect("Failed to convert to array"); |
5967 | | // cast the actual value |
5968 | | let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap(); |
5969 | | |
5970 | | // turn it back to a scalar |
5971 | | let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap(); |
5972 | | assert_eq!(cast_scalar.data_type(), desired_type); |
5973 | | |
5974 | | // Some time later the "cast" scalar is turned back into an array: |
5975 | | let array = cast_scalar |
5976 | | .to_array_of_size(10) |
5977 | | .expect("Failed to convert to array of size"); |
5978 | | |
5979 | | // The datatype should be "Dictionary" but is actually Utf8!!! |
5980 | | assert_eq!(array.data_type(), &desired_type) |
5981 | | } |
5982 | | |
5983 | | #[test] |
5984 | | fn test_scalar_negative() -> Result<()> { |
5985 | | // positive test |
5986 | | let value = ScalarValue::Int32(Some(12)); |
5987 | | assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?); |
5988 | | let value = ScalarValue::Int32(None); |
5989 | | assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?); |
5990 | | |
5991 | | // negative test |
5992 | | let value = ScalarValue::UInt8(Some(12)); |
5993 | | assert!(value.arithmetic_negate().is_err()); |
5994 | | let value = ScalarValue::Boolean(None); |
5995 | | assert!(value.arithmetic_negate().is_err()); |
5996 | | Ok(()) |
5997 | | } |
5998 | | |
5999 | | #[test] |
6000 | | #[allow(arithmetic_overflow)] // we want to test them |
6001 | | fn test_scalar_negative_overflows() -> Result<()> { |
6002 | | macro_rules! test_overflow_on_value { |
6003 | | ($($val:expr),* $(,)?) => {$( |
6004 | | { |
6005 | | let value: ScalarValue = $val; |
6006 | | let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}"); |
6007 | | let root_err = err.find_root(); |
6008 | | match root_err{ |
6009 | | DataFusionError::ArrowError( |
6010 | | ArrowError::ArithmeticOverflow(_), |
6011 | | _, |
6012 | | ) => {} |
6013 | | _ => return Err(err), |
6014 | | }; |
6015 | | } |
6016 | | )*}; |
6017 | | } |
6018 | | test_overflow_on_value!( |
6019 | | // the integers |
6020 | | i8::MIN.into(), |
6021 | | i16::MIN.into(), |
6022 | | i32::MIN.into(), |
6023 | | i64::MIN.into(), |
6024 | | // for decimals, only value needs to be tested |
6025 | | ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?, |
6026 | | ScalarValue::Decimal256(Some(i256::MIN), 20, 5), |
6027 | | // interval, check all possible values |
6028 | | ScalarValue::IntervalYearMonth(Some(i32::MIN)), |
6029 | | ScalarValue::new_interval_dt(i32::MIN, 999), |
6030 | | ScalarValue::new_interval_dt(1, i32::MIN), |
6031 | | ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456), |
6032 | | ScalarValue::new_interval_mdn(12, i32::MIN, 123_456), |
6033 | | ScalarValue::new_interval_mdn(12, 15, i64::MIN), |
6034 | | // tz doesn't matter when negating |
6035 | | ScalarValue::TimestampSecond(Some(i64::MIN), None), |
6036 | | ScalarValue::TimestampMillisecond(Some(i64::MIN), None), |
6037 | | ScalarValue::TimestampMicrosecond(Some(i64::MIN), None), |
6038 | | ScalarValue::TimestampNanosecond(Some(i64::MIN), None), |
6039 | | ); |
6040 | | |
6041 | | let float_cases = [ |
6042 | | ( |
6043 | | ScalarValue::Float16(Some(f16::MIN)), |
6044 | | ScalarValue::Float16(Some(f16::MAX)), |
6045 | | ), |
6046 | | ( |
6047 | | ScalarValue::Float16(Some(f16::MAX)), |
6048 | | ScalarValue::Float16(Some(f16::MIN)), |
6049 | | ), |
6050 | | (f32::MIN.into(), f32::MAX.into()), |
6051 | | (f32::MAX.into(), f32::MIN.into()), |
6052 | | (f64::MIN.into(), f64::MAX.into()), |
6053 | | (f64::MAX.into(), f64::MIN.into()), |
6054 | | ]; |
6055 | | // skip float 16 because they aren't supported |
6056 | | for (test, expected) in float_cases.into_iter().skip(2) { |
6057 | | assert_eq!(test.arithmetic_negate()?, expected); |
6058 | | } |
6059 | | Ok(()) |
6060 | | } |
6061 | | |
6062 | | #[test] |
6063 | | fn f16_test_overflow() { |
6064 | | // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case |
6065 | | let cases = [ |
6066 | | ( |
6067 | | ScalarValue::Float16(Some(f16::MIN)), |
6068 | | ScalarValue::Float16(Some(f16::MAX)), |
6069 | | ), |
6070 | | ( |
6071 | | ScalarValue::Float16(Some(f16::MAX)), |
6072 | | ScalarValue::Float16(Some(f16::MIN)), |
6073 | | ), |
6074 | | ]; |
6075 | | |
6076 | | for (test, expected) in cases { |
6077 | | assert_eq!(test.arithmetic_negate().unwrap(), expected); |
6078 | | } |
6079 | | } |
6080 | | |
6081 | | macro_rules! expect_operation_error { |
6082 | | ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => { |
6083 | | #[test] |
6084 | | fn $TEST_NAME() { |
6085 | | let lhs = ScalarValue::UInt64(Some(12)); |
6086 | | let rhs = ScalarValue::Int32(Some(-3)); |
6087 | | match lhs.$FUNCTION(&rhs) { |
6088 | | Ok(_result) => { |
6089 | | panic!( |
6090 | | "Expected binary operation error between lhs: '{:?}', rhs: {:?}", |
6091 | | lhs, rhs |
6092 | | ); |
6093 | | } |
6094 | | Err(e) => { |
6095 | | let error_message = e.to_string(); |
6096 | | assert!( |
6097 | | error_message.contains($EXPECTED_ERROR), |
6098 | | "Expected error '{}' not found in actual error '{}'", |
6099 | | $EXPECTED_ERROR, |
6100 | | error_message |
6101 | | ); |
6102 | | } |
6103 | | } |
6104 | | } |
6105 | | }; |
6106 | | } |
6107 | | |
6108 | | expect_operation_error!( |
6109 | | expect_add_error, |
6110 | | add, |
6111 | | "Invalid arithmetic operation: UInt64 + Int32" |
6112 | | ); |
6113 | | expect_operation_error!( |
6114 | | expect_sub_error, |
6115 | | sub, |
6116 | | "Invalid arithmetic operation: UInt64 - Int32" |
6117 | | ); |
6118 | | |
6119 | | macro_rules! decimal_op_test_cases { |
6120 | | ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => { |
6121 | | $( |
6122 | | |
6123 | | let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE); |
6124 | | let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE); |
6125 | | let result = left.$OPERATION(&right).unwrap(); |
6126 | | assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result); |
6127 | | |
6128 | | )+ |
6129 | | }; |
6130 | | } |
6131 | | |
6132 | | #[test] |
6133 | | fn decimal_operations() { |
6134 | | decimal_op_test_cases!( |
6135 | | add, |
6136 | | [ |
6137 | | [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2], |
6138 | | // test sum decimal with diff scale |
6139 | | [ |
6140 | | Some(123), |
6141 | | 10, |
6142 | | 3, |
6143 | | Some(124), |
6144 | | 10, |
6145 | | 2, |
6146 | | Some(123 + 124 * 10_i128.pow(1)), |
6147 | | 12, |
6148 | | 3 |
6149 | | ], |
6150 | | // diff precision and scale for decimal data type |
6151 | | [ |
6152 | | Some(123), |
6153 | | 10, |
6154 | | 2, |
6155 | | Some(124), |
6156 | | 11, |
6157 | | 3, |
6158 | | Some(123 * 10_i128.pow(3 - 2) + 124), |
6159 | | 12, |
6160 | | 3 |
6161 | | ] |
6162 | | ] |
6163 | | ); |
6164 | | } |
6165 | | |
6166 | | #[test] |
6167 | | fn decimal_operations_with_nulls() { |
6168 | | decimal_op_test_cases!( |
6169 | | add, |
6170 | | [ |
6171 | | // Case: (None, Some, 0) |
6172 | | [None, 10, 2, Some(123), 10, 2, None, 11, 2], |
6173 | | // Case: (Some, None, 0) |
6174 | | [Some(123), 10, 2, None, 10, 2, None, 11, 2], |
6175 | | // Case: (Some, None, _) + Side=False |
6176 | | [Some(123), 8, 2, None, 10, 3, None, 11, 3], |
6177 | | // Case: (None, Some, _) + Side=False |
6178 | | [None, 8, 2, Some(123), 10, 3, None, 11, 3], |
6179 | | // Case: (Some, None, _) + Side=True |
6180 | | [Some(123), 8, 4, None, 10, 3, None, 12, 4], |
6181 | | // Case: (None, Some, _) + Side=True |
6182 | | [None, 10, 3, Some(123), 8, 4, None, 12, 4] |
6183 | | ] |
6184 | | ); |
6185 | | } |
6186 | | |
6187 | | #[test] |
6188 | | fn test_scalar_distance() { |
6189 | | let cases = [ |
6190 | | // scalar (lhs), scalar (rhs), expected distance |
6191 | | // --------------------------------------------- |
6192 | | (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1), |
6193 | | (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1), |
6194 | | ( |
6195 | | ScalarValue::Int16(Some(-5)), |
6196 | | ScalarValue::Int16(Some(5)), |
6197 | | 10, |
6198 | | ), |
6199 | | ( |
6200 | | ScalarValue::Int16(Some(5)), |
6201 | | ScalarValue::Int16(Some(-5)), |
6202 | | 10, |
6203 | | ), |
6204 | | (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0), |
6205 | | ( |
6206 | | ScalarValue::Int32(Some(-5)), |
6207 | | ScalarValue::Int32(Some(-10)), |
6208 | | 5, |
6209 | | ), |
6210 | | ( |
6211 | | ScalarValue::Int64(Some(-10)), |
6212 | | ScalarValue::Int64(Some(-5)), |
6213 | | 5, |
6214 | | ), |
6215 | | (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1), |
6216 | | (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0), |
6217 | | ( |
6218 | | ScalarValue::UInt16(Some(5)), |
6219 | | ScalarValue::UInt16(Some(10)), |
6220 | | 5, |
6221 | | ), |
6222 | | ( |
6223 | | ScalarValue::UInt32(Some(10)), |
6224 | | ScalarValue::UInt32(Some(5)), |
6225 | | 5, |
6226 | | ), |
6227 | | ( |
6228 | | ScalarValue::UInt64(Some(5)), |
6229 | | ScalarValue::UInt64(Some(10)), |
6230 | | 5, |
6231 | | ), |
6232 | | ( |
6233 | | ScalarValue::Float16(Some(f16::from_f32(1.1))), |
6234 | | ScalarValue::Float16(Some(f16::from_f32(1.9))), |
6235 | | 1, |
6236 | | ), |
6237 | | ( |
6238 | | ScalarValue::Float16(Some(f16::from_f32(-5.3))), |
6239 | | ScalarValue::Float16(Some(f16::from_f32(-9.2))), |
6240 | | 4, |
6241 | | ), |
6242 | | ( |
6243 | | ScalarValue::Float16(Some(f16::from_f32(-5.3))), |
6244 | | ScalarValue::Float16(Some(f16::from_f32(-9.7))), |
6245 | | 4, |
6246 | | ), |
6247 | | ( |
6248 | | ScalarValue::Float32(Some(1.0)), |
6249 | | ScalarValue::Float32(Some(2.0)), |
6250 | | 1, |
6251 | | ), |
6252 | | ( |
6253 | | ScalarValue::Float32(Some(2.0)), |
6254 | | ScalarValue::Float32(Some(1.0)), |
6255 | | 1, |
6256 | | ), |
6257 | | ( |
6258 | | ScalarValue::Float64(Some(0.0)), |
6259 | | ScalarValue::Float64(Some(0.0)), |
6260 | | 0, |
6261 | | ), |
6262 | | ( |
6263 | | ScalarValue::Float64(Some(-5.0)), |
6264 | | ScalarValue::Float64(Some(-10.0)), |
6265 | | 5, |
6266 | | ), |
6267 | | ( |
6268 | | ScalarValue::Float64(Some(-10.0)), |
6269 | | ScalarValue::Float64(Some(-5.0)), |
6270 | | 5, |
6271 | | ), |
6272 | | // Floats are currently special cased to f64/f32 and the result is rounded |
6273 | | // rather than ceiled/floored. In the future we might want to take a mode |
6274 | | // which specified the rounding behavior. |
6275 | | ( |
6276 | | ScalarValue::Float32(Some(1.2)), |
6277 | | ScalarValue::Float32(Some(1.3)), |
6278 | | 0, |
6279 | | ), |
6280 | | ( |
6281 | | ScalarValue::Float32(Some(1.1)), |
6282 | | ScalarValue::Float32(Some(1.9)), |
6283 | | 1, |
6284 | | ), |
6285 | | ( |
6286 | | ScalarValue::Float64(Some(-5.3)), |
6287 | | ScalarValue::Float64(Some(-9.2)), |
6288 | | 4, |
6289 | | ), |
6290 | | ( |
6291 | | ScalarValue::Float64(Some(-5.3)), |
6292 | | ScalarValue::Float64(Some(-9.7)), |
6293 | | 4, |
6294 | | ), |
6295 | | ( |
6296 | | ScalarValue::Float64(Some(-5.3)), |
6297 | | ScalarValue::Float64(Some(-9.9)), |
6298 | | 5, |
6299 | | ), |
6300 | | ]; |
6301 | | for (lhs, rhs, expected) in cases.iter() { |
6302 | | let distance = lhs.distance(rhs).unwrap(); |
6303 | | assert_eq!(distance, *expected); |
6304 | | } |
6305 | | } |
6306 | | |
6307 | | #[test] |
6308 | | fn test_scalar_distance_invalid() { |
6309 | | let cases = [ |
6310 | | // scalar (lhs), scalar (rhs) |
6311 | | // -------------------------- |
6312 | | // Same type but with nulls |
6313 | | (ScalarValue::Int8(None), ScalarValue::Int8(None)), |
6314 | | (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))), |
6315 | | (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)), |
6316 | | // Different type |
6317 | | (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))), |
6318 | | (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))), |
6319 | | ( |
6320 | | ScalarValue::Float16(Some(f16::from_f32(1.0))), |
6321 | | ScalarValue::Float32(Some(1.0)), |
6322 | | ), |
6323 | | ( |
6324 | | ScalarValue::Float16(Some(f16::from_f32(1.0))), |
6325 | | ScalarValue::Int32(Some(1)), |
6326 | | ), |
6327 | | ( |
6328 | | ScalarValue::Float64(Some(1.1)), |
6329 | | ScalarValue::Float32(Some(2.2)), |
6330 | | ), |
6331 | | ( |
6332 | | ScalarValue::UInt64(Some(777)), |
6333 | | ScalarValue::Int32(Some(111)), |
6334 | | ), |
6335 | | // Different types with nulls |
6336 | | (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))), |
6337 | | (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)), |
6338 | | // Unsupported types |
6339 | | (ScalarValue::from("foo"), ScalarValue::from("bar")), |
6340 | | ( |
6341 | | ScalarValue::Boolean(Some(true)), |
6342 | | ScalarValue::Boolean(Some(false)), |
6343 | | ), |
6344 | | (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))), |
6345 | | (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))), |
6346 | | ( |
6347 | | ScalarValue::Decimal128(Some(123), 5, 5), |
6348 | | ScalarValue::Decimal128(Some(120), 5, 5), |
6349 | | ), |
6350 | | ]; |
6351 | | for (lhs, rhs) in cases { |
6352 | | let distance = lhs.distance(&rhs); |
6353 | | assert!(distance.is_none()); |
6354 | | } |
6355 | | } |
6356 | | |
6357 | | #[test] |
6358 | | fn test_scalar_interval_negate() { |
6359 | | let cases = [ |
6360 | | ( |
6361 | | ScalarValue::new_interval_ym(1, 12), |
6362 | | ScalarValue::new_interval_ym(-1, -12), |
6363 | | ), |
6364 | | ( |
6365 | | ScalarValue::new_interval_dt(1, 999), |
6366 | | ScalarValue::new_interval_dt(-1, -999), |
6367 | | ), |
6368 | | ( |
6369 | | ScalarValue::new_interval_mdn(12, 15, 123_456), |
6370 | | ScalarValue::new_interval_mdn(-12, -15, -123_456), |
6371 | | ), |
6372 | | ]; |
6373 | | for (expr, expected) in cases.iter() { |
6374 | | let result = expr.arithmetic_negate().unwrap(); |
6375 | | assert_eq!(*expected, result, "-expr:{expr:?}"); |
6376 | | } |
6377 | | } |
6378 | | |
6379 | | #[test] |
6380 | | fn test_scalar_interval_add() { |
6381 | | let cases = [ |
6382 | | ( |
6383 | | ScalarValue::new_interval_ym(1, 12), |
6384 | | ScalarValue::new_interval_ym(1, 12), |
6385 | | ScalarValue::new_interval_ym(2, 24), |
6386 | | ), |
6387 | | ( |
6388 | | ScalarValue::new_interval_dt(1, 999), |
6389 | | ScalarValue::new_interval_dt(1, 999), |
6390 | | ScalarValue::new_interval_dt(2, 1998), |
6391 | | ), |
6392 | | ( |
6393 | | ScalarValue::new_interval_mdn(12, 15, 123_456), |
6394 | | ScalarValue::new_interval_mdn(12, 15, 123_456), |
6395 | | ScalarValue::new_interval_mdn(24, 30, 246_912), |
6396 | | ), |
6397 | | ]; |
6398 | | for (lhs, rhs, expected) in cases.iter() { |
6399 | | let result = lhs.add(rhs).unwrap(); |
6400 | | let result_commute = rhs.add(lhs).unwrap(); |
6401 | | assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}"); |
6402 | | assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}"); |
6403 | | } |
6404 | | } |
6405 | | |
6406 | | #[test] |
6407 | | fn test_scalar_interval_sub() { |
6408 | | let cases = [ |
6409 | | ( |
6410 | | ScalarValue::new_interval_ym(1, 12), |
6411 | | ScalarValue::new_interval_ym(1, 12), |
6412 | | ScalarValue::new_interval_ym(0, 0), |
6413 | | ), |
6414 | | ( |
6415 | | ScalarValue::new_interval_dt(1, 999), |
6416 | | ScalarValue::new_interval_dt(1, 999), |
6417 | | ScalarValue::new_interval_dt(0, 0), |
6418 | | ), |
6419 | | ( |
6420 | | ScalarValue::new_interval_mdn(12, 15, 123_456), |
6421 | | ScalarValue::new_interval_mdn(12, 15, 123_456), |
6422 | | ScalarValue::new_interval_mdn(0, 0, 0), |
6423 | | ), |
6424 | | ]; |
6425 | | for (lhs, rhs, expected) in cases.iter() { |
6426 | | let result = lhs.sub(rhs).unwrap(); |
6427 | | assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}"); |
6428 | | } |
6429 | | } |
6430 | | |
6431 | | #[test] |
6432 | | fn timestamp_op_random_tests() { |
6433 | | // timestamp1 + (or -) interval = timestamp2 |
6434 | | // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? |
6435 | | let sample_size = 1000; |
6436 | | let timestamps1 = get_random_timestamps(sample_size); |
6437 | | let intervals = get_random_intervals(sample_size); |
6438 | | // ts(sec) + interval(ns) = ts(sec); however, |
6439 | | // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, |
6440 | | // timestamps are more precise than intervals in tests. |
6441 | | for (idx, ts1) in timestamps1.iter().enumerate() { |
6442 | | if idx % 2 == 0 { |
6443 | | let timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); |
6444 | | let back = timestamp2.sub(intervals[idx].clone()).unwrap(); |
6445 | | assert_eq!(ts1, &back); |
6446 | | } else { |
6447 | | let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); |
6448 | | let back = timestamp2.add(intervals[idx].clone()).unwrap(); |
6449 | | assert_eq!(ts1, &back); |
6450 | | }; |
6451 | | } |
6452 | | } |
6453 | | |
6454 | | #[test] |
6455 | | fn test_struct_nulls() { |
6456 | | let fields_b = Fields::from(vec![ |
6457 | | Field::new("ba", DataType::UInt64, true), |
6458 | | Field::new("bb", DataType::UInt64, true), |
6459 | | ]); |
6460 | | let fields = Fields::from(vec![ |
6461 | | Field::new("a", DataType::UInt64, true), |
6462 | | Field::new("b", DataType::Struct(fields_b.clone()), true), |
6463 | | ]); |
6464 | | |
6465 | | let struct_value = vec![ |
6466 | | ( |
6467 | | Arc::clone(&fields[0]), |
6468 | | Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef, |
6469 | | ), |
6470 | | ( |
6471 | | Arc::clone(&fields[1]), |
6472 | | Arc::new(StructArray::from(vec![ |
6473 | | ( |
6474 | | Arc::clone(&fields_b[0]), |
6475 | | Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef, |
6476 | | ), |
6477 | | ( |
6478 | | Arc::clone(&fields_b[1]), |
6479 | | Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef, |
6480 | | ), |
6481 | | ])) as ArrayRef, |
6482 | | ), |
6483 | | ]; |
6484 | | |
6485 | | let struct_value_with_nulls = vec![ |
6486 | | ( |
6487 | | Arc::clone(&fields[0]), |
6488 | | Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef, |
6489 | | ), |
6490 | | ( |
6491 | | Arc::clone(&fields[1]), |
6492 | | Arc::new(StructArray::from(( |
6493 | | vec![ |
6494 | | ( |
6495 | | Arc::clone(&fields_b[0]), |
6496 | | Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef, |
6497 | | ), |
6498 | | ( |
6499 | | Arc::clone(&fields_b[1]), |
6500 | | Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef, |
6501 | | ), |
6502 | | ], |
6503 | | Buffer::from(&[0]), |
6504 | | ))) as ArrayRef, |
6505 | | ), |
6506 | | ]; |
6507 | | |
6508 | | let scalars = vec![ |
6509 | | // all null |
6510 | | ScalarValue::Struct(Arc::new(StructArray::from(( |
6511 | | struct_value.clone(), |
6512 | | Buffer::from(&[0]), |
6513 | | )))), |
6514 | | // field 1 valid, field 2 null |
6515 | | ScalarValue::Struct(Arc::new(StructArray::from(( |
6516 | | struct_value_with_nulls.clone(), |
6517 | | Buffer::from(&[1]), |
6518 | | )))), |
6519 | | // all valid |
6520 | | ScalarValue::Struct(Arc::new(StructArray::from(( |
6521 | | struct_value.clone(), |
6522 | | Buffer::from(&[1]), |
6523 | | )))), |
6524 | | ]; |
6525 | | |
6526 | | let check_array = |array| { |
6527 | | let is_null = is_null(&array).unwrap(); |
6528 | | assert_eq!(is_null, BooleanArray::from(vec![true, false, false])); |
6529 | | |
6530 | | let formatted = pretty_format_columns("col", &[array]).unwrap().to_string(); |
6531 | | let formatted = formatted.split('\n').collect::<Vec<_>>(); |
6532 | | let expected = vec![ |
6533 | | "+---------------------------+", |
6534 | | "| col |", |
6535 | | "+---------------------------+", |
6536 | | "| |", |
6537 | | "| {a: 1, b: } |", |
6538 | | "| {a: 1, b: {ba: 2, bb: 3}} |", |
6539 | | "+---------------------------+", |
6540 | | ]; |
6541 | | assert_eq!( |
6542 | | formatted, expected, |
6543 | | "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}" |
6544 | | ); |
6545 | | }; |
6546 | | |
6547 | | // test `ScalarValue::iter_to_array` |
6548 | | let array = ScalarValue::iter_to_array(scalars.clone()).unwrap(); |
6549 | | check_array(array); |
6550 | | |
6551 | | // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size` |
6552 | | let arrays = scalars |
6553 | | .iter() |
6554 | | .map(ScalarValue::to_array) |
6555 | | .collect::<Result<Vec<_>>>() |
6556 | | .expect("Failed to convert to array"); |
6557 | | let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>(); |
6558 | | let array = arrow::compute::concat(&arrays).unwrap(); |
6559 | | check_array(array); |
6560 | | } |
6561 | | |
6562 | | #[test] |
6563 | | fn test_struct_display() { |
6564 | | let field_a = Field::new("a", DataType::Int32, true); |
6565 | | let field_b = Field::new("b", DataType::Utf8, true); |
6566 | | |
6567 | | let s = ScalarStructBuilder::new() |
6568 | | .with_scalar(field_a, ScalarValue::from(1i32)) |
6569 | | .with_scalar(field_b, ScalarValue::Utf8(None)) |
6570 | | .build() |
6571 | | .unwrap(); |
6572 | | |
6573 | | assert_eq!(s.to_string(), "{a:1,b:}"); |
6574 | | assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#); |
6575 | | |
6576 | | let ScalarValue::Struct(arr) = s else { |
6577 | | panic!("Expected struct"); |
6578 | | }; |
6579 | | |
6580 | | //verify compared to arrow display |
6581 | | let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap(); |
6582 | | let expected = [ |
6583 | | "+-------------+", |
6584 | | "| s |", |
6585 | | "+-------------+", |
6586 | | "| {a: 1, b: } |", |
6587 | | "+-------------+", |
6588 | | ]; |
6589 | | assert_batches_eq!(&expected, &[batch]); |
6590 | | } |
6591 | | |
6592 | | #[test] |
6593 | | fn test_struct_display_null() { |
6594 | | let fields = vec![Field::new("a", DataType::Int32, false)]; |
6595 | | let s = ScalarStructBuilder::new_null(fields); |
6596 | | assert_eq!(s.to_string(), "NULL"); |
6597 | | |
6598 | | let ScalarValue::Struct(arr) = s else { |
6599 | | panic!("Expected struct"); |
6600 | | }; |
6601 | | |
6602 | | //verify compared to arrow display |
6603 | | let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap(); |
6604 | | |
6605 | | #[rustfmt::skip] |
6606 | | let expected = [ |
6607 | | "+---+", |
6608 | | "| s |", |
6609 | | "+---+", |
6610 | | "| |", |
6611 | | "+---+", |
6612 | | ]; |
6613 | | assert_batches_eq!(&expected, &[batch]); |
6614 | | } |
6615 | | |
6616 | | #[test] |
6617 | | fn test_map_display_and_debug() { |
6618 | | let string_builder = StringBuilder::new(); |
6619 | | let int_builder = Int32Builder::with_capacity(4); |
6620 | | let mut builder = MapBuilder::new(None, string_builder, int_builder); |
6621 | | builder.keys().append_value("joe"); |
6622 | | builder.values().append_value(1); |
6623 | | builder.append(true).unwrap(); |
6624 | | |
6625 | | builder.keys().append_value("blogs"); |
6626 | | builder.values().append_value(2); |
6627 | | builder.keys().append_value("foo"); |
6628 | | builder.values().append_value(4); |
6629 | | builder.append(true).unwrap(); |
6630 | | builder.append(true).unwrap(); |
6631 | | builder.append(false).unwrap(); |
6632 | | |
6633 | | let map_value = ScalarValue::Map(Arc::new(builder.finish())); |
6634 | | |
6635 | | assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]"); |
6636 | | assert_eq!( |
6637 | | format!("{map_value:?}"), |
6638 | | r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"# |
6639 | | ); |
6640 | | |
6641 | | let ScalarValue::Map(arr) = map_value else { |
6642 | | panic!("Expected map"); |
6643 | | }; |
6644 | | |
6645 | | //verify compared to arrow display |
6646 | | let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap(); |
6647 | | let expected = [ |
6648 | | "+--------------------+", |
6649 | | "| m |", |
6650 | | "+--------------------+", |
6651 | | "| {joe: 1} |", |
6652 | | "| {blogs: 2, foo: 4} |", |
6653 | | "| {} |", |
6654 | | "| |", |
6655 | | "+--------------------+", |
6656 | | ]; |
6657 | | assert_batches_eq!(&expected, &[batch]); |
6658 | | } |
6659 | | |
6660 | | #[test] |
6661 | | fn test_binary_display() { |
6662 | | let no_binary_value = ScalarValue::Binary(None); |
6663 | | assert_eq!(format!("{no_binary_value}"), "NULL"); |
6664 | | let single_binary_value = ScalarValue::Binary(Some(vec![42u8])); |
6665 | | assert_eq!(format!("{single_binary_value}"), "2A"); |
6666 | | let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3])); |
6667 | | assert_eq!(format!("{small_binary_value}"), "010203"); |
6668 | | let large_binary_value = |
6669 | | ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6670 | | assert_eq!(format!("{large_binary_value}"), "0102030405060708090A..."); |
6671 | | |
6672 | | let no_binary_value = ScalarValue::BinaryView(None); |
6673 | | assert_eq!(format!("{no_binary_value}"), "NULL"); |
6674 | | let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3])); |
6675 | | assert_eq!(format!("{small_binary_value}"), "010203"); |
6676 | | let large_binary_value = |
6677 | | ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6678 | | assert_eq!(format!("{large_binary_value}"), "0102030405060708090A..."); |
6679 | | |
6680 | | let no_binary_value = ScalarValue::LargeBinary(None); |
6681 | | assert_eq!(format!("{no_binary_value}"), "NULL"); |
6682 | | let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3])); |
6683 | | assert_eq!(format!("{small_binary_value}"), "010203"); |
6684 | | let large_binary_value = |
6685 | | ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6686 | | assert_eq!(format!("{large_binary_value}"), "0102030405060708090A..."); |
6687 | | |
6688 | | let no_binary_value = ScalarValue::FixedSizeBinary(3, None); |
6689 | | assert_eq!(format!("{no_binary_value}"), "NULL"); |
6690 | | let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3])); |
6691 | | assert_eq!(format!("{small_binary_value}"), "010203"); |
6692 | | let large_binary_value = ScalarValue::FixedSizeBinary( |
6693 | | 11, |
6694 | | Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), |
6695 | | ); |
6696 | | assert_eq!(format!("{large_binary_value}"), "0102030405060708090A..."); |
6697 | | } |
6698 | | |
6699 | | #[test] |
6700 | | fn test_binary_debug() { |
6701 | | let no_binary_value = ScalarValue::Binary(None); |
6702 | | assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)"); |
6703 | | let single_binary_value = ScalarValue::Binary(Some(vec![42u8])); |
6704 | | assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")"); |
6705 | | let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3])); |
6706 | | assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")"); |
6707 | | let large_binary_value = |
6708 | | ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6709 | | assert_eq!( |
6710 | | format!("{large_binary_value:?}"), |
6711 | | "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")" |
6712 | | ); |
6713 | | |
6714 | | let no_binary_value = ScalarValue::BinaryView(None); |
6715 | | assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)"); |
6716 | | let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3])); |
6717 | | assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")"); |
6718 | | let large_binary_value = |
6719 | | ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6720 | | assert_eq!( |
6721 | | format!("{large_binary_value:?}"), |
6722 | | "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")" |
6723 | | ); |
6724 | | |
6725 | | let no_binary_value = ScalarValue::LargeBinary(None); |
6726 | | assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)"); |
6727 | | let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3])); |
6728 | | assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")"); |
6729 | | let large_binary_value = |
6730 | | ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); |
6731 | | assert_eq!( |
6732 | | format!("{large_binary_value:?}"), |
6733 | | "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")" |
6734 | | ); |
6735 | | |
6736 | | let no_binary_value = ScalarValue::FixedSizeBinary(3, None); |
6737 | | assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)"); |
6738 | | let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3])); |
6739 | | assert_eq!( |
6740 | | format!("{small_binary_value:?}"), |
6741 | | "FixedSizeBinary(3, \"1,2,3\")" |
6742 | | ); |
6743 | | let large_binary_value = ScalarValue::FixedSizeBinary( |
6744 | | 11, |
6745 | | Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), |
6746 | | ); |
6747 | | assert_eq!( |
6748 | | format!("{large_binary_value:?}"), |
6749 | | "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")" |
6750 | | ); |
6751 | | } |
6752 | | |
6753 | | #[test] |
6754 | | fn test_build_timestamp_millisecond_list() { |
6755 | | let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)]; |
6756 | | let arr = ScalarValue::new_list_nullable( |
6757 | | &values, |
6758 | | &DataType::Timestamp(TimeUnit::Millisecond, None), |
6759 | | ); |
6760 | | assert_eq!(1, arr.len()); |
6761 | | } |
6762 | | |
6763 | | #[test] |
6764 | | fn test_newlist_timestamp_zone() { |
6765 | | let s: &'static str = "UTC"; |
6766 | | let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))]; |
6767 | | let arr = ScalarValue::new_list_nullable( |
6768 | | &values, |
6769 | | &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())), |
6770 | | ); |
6771 | | assert_eq!(1, arr.len()); |
6772 | | assert_eq!( |
6773 | | arr.data_type(), |
6774 | | &DataType::List(Arc::new(Field::new( |
6775 | | "item", |
6776 | | DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())), |
6777 | | true, |
6778 | | ))) |
6779 | | ); |
6780 | | } |
6781 | | |
6782 | | fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> { |
6783 | | let vector_size = sample_size; |
6784 | | let mut timestamp = vec![]; |
6785 | | let mut rng = rand::thread_rng(); |
6786 | | for i in 0..vector_size { |
6787 | | let year = rng.gen_range(1995..=2050); |
6788 | | let month = rng.gen_range(1..=12); |
6789 | | let day = rng.gen_range(1..=28); // to exclude invalid dates |
6790 | | let hour = rng.gen_range(0..=23); |
6791 | | let minute = rng.gen_range(0..=59); |
6792 | | let second = rng.gen_range(0..=59); |
6793 | | if i % 4 == 0 { |
6794 | | timestamp.push(ScalarValue::TimestampSecond( |
6795 | | Some( |
6796 | | NaiveDate::from_ymd_opt(year, month, day) |
6797 | | .unwrap() |
6798 | | .and_hms_opt(hour, minute, second) |
6799 | | .unwrap() |
6800 | | .and_utc() |
6801 | | .timestamp(), |
6802 | | ), |
6803 | | None, |
6804 | | )) |
6805 | | } else if i % 4 == 1 { |
6806 | | let millisec = rng.gen_range(0..=999); |
6807 | | timestamp.push(ScalarValue::TimestampMillisecond( |
6808 | | Some( |
6809 | | NaiveDate::from_ymd_opt(year, month, day) |
6810 | | .unwrap() |
6811 | | .and_hms_milli_opt(hour, minute, second, millisec) |
6812 | | .unwrap() |
6813 | | .and_utc() |
6814 | | .timestamp_millis(), |
6815 | | ), |
6816 | | None, |
6817 | | )) |
6818 | | } else if i % 4 == 2 { |
6819 | | let microsec = rng.gen_range(0..=999_999); |
6820 | | timestamp.push(ScalarValue::TimestampMicrosecond( |
6821 | | Some( |
6822 | | NaiveDate::from_ymd_opt(year, month, day) |
6823 | | .unwrap() |
6824 | | .and_hms_micro_opt(hour, minute, second, microsec) |
6825 | | .unwrap() |
6826 | | .and_utc() |
6827 | | .timestamp_micros(), |
6828 | | ), |
6829 | | None, |
6830 | | )) |
6831 | | } else if i % 4 == 3 { |
6832 | | let nanosec = rng.gen_range(0..=999_999_999); |
6833 | | timestamp.push(ScalarValue::TimestampNanosecond( |
6834 | | Some( |
6835 | | NaiveDate::from_ymd_opt(year, month, day) |
6836 | | .unwrap() |
6837 | | .and_hms_nano_opt(hour, minute, second, nanosec) |
6838 | | .unwrap() |
6839 | | .and_utc() |
6840 | | .timestamp_nanos_opt() |
6841 | | .unwrap(), |
6842 | | ), |
6843 | | None, |
6844 | | )) |
6845 | | } |
6846 | | } |
6847 | | timestamp |
6848 | | } |
6849 | | |
6850 | | fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> { |
6851 | | const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; |
6852 | | const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; |
6853 | | |
6854 | | let vector_size = sample_size; |
6855 | | let mut intervals = vec![]; |
6856 | | let mut rng = rand::thread_rng(); |
6857 | | const SECS_IN_ONE_DAY: i32 = 86_400; |
6858 | | const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000; |
6859 | | for i in 0..vector_size { |
6860 | | if i % 4 == 0 { |
6861 | | let days = rng.gen_range(0..5000); |
6862 | | // to not break second precision |
6863 | | let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000; |
6864 | | intervals.push(ScalarValue::new_interval_dt(days, millis)); |
6865 | | } else if i % 4 == 1 { |
6866 | | let days = rng.gen_range(0..5000); |
6867 | | let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32)); |
6868 | | intervals.push(ScalarValue::new_interval_dt(days, millisec)); |
6869 | | } else if i % 4 == 2 { |
6870 | | let days = rng.gen_range(0..5000); |
6871 | | // to not break microsec precision |
6872 | | let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000; |
6873 | | intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); |
6874 | | } else { |
6875 | | let days = rng.gen_range(0..5000); |
6876 | | let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY); |
6877 | | intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); |
6878 | | } |
6879 | | } |
6880 | | intervals |
6881 | | } |
6882 | | |
6883 | | fn union_fields() -> UnionFields { |
6884 | | [ |
6885 | | (0, Arc::new(Field::new("A", DataType::Int32, true))), |
6886 | | (1, Arc::new(Field::new("B", DataType::Float64, true))), |
6887 | | ] |
6888 | | .into_iter() |
6889 | | .collect() |
6890 | | } |
6891 | | |
6892 | | #[test] |
6893 | | fn sparse_scalar_union_is_null() { |
6894 | | let sparse_scalar = ScalarValue::Union( |
6895 | | Some((0_i8, Box::new(ScalarValue::Int32(None)))), |
6896 | | union_fields(), |
6897 | | UnionMode::Sparse, |
6898 | | ); |
6899 | | assert!(sparse_scalar.is_null()); |
6900 | | } |
6901 | | |
6902 | | #[test] |
6903 | | fn dense_scalar_union_is_null() { |
6904 | | let dense_scalar = ScalarValue::Union( |
6905 | | Some((0_i8, Box::new(ScalarValue::Int32(None)))), |
6906 | | union_fields(), |
6907 | | UnionMode::Dense, |
6908 | | ); |
6909 | | assert!(dense_scalar.is_null()); |
6910 | | } |
6911 | | |
6912 | | #[test] |
6913 | | fn null_dictionary_scalar_produces_null_dictionary_array() { |
6914 | | let dictionary_scalar = ScalarValue::Dictionary( |
6915 | | Box::new(DataType::Int32), |
6916 | | Box::new(ScalarValue::Null), |
6917 | | ); |
6918 | | assert!(dictionary_scalar.is_null()); |
6919 | | let dictionary_array = dictionary_scalar.to_array().unwrap(); |
6920 | | assert!(dictionary_array.is_null(0)); |
6921 | | } |
6922 | | } |