/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/joins/test_utils.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! This file has test utils for hash joins |
19 | | |
20 | | use std::sync::Arc; |
21 | | |
22 | | use crate::joins::utils::{JoinFilter, JoinOn}; |
23 | | use crate::joins::{ |
24 | | HashJoinExec, PartitionMode, StreamJoinPartitionMode, SymmetricHashJoinExec, |
25 | | }; |
26 | | use crate::memory::MemoryExec; |
27 | | use crate::repartition::RepartitionExec; |
28 | | use crate::{common, ExecutionPlan, ExecutionPlanProperties, Partitioning}; |
29 | | |
30 | | use arrow::util::pretty::pretty_format_batches; |
31 | | use arrow_array::{ |
32 | | ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, RecordBatch, |
33 | | TimestampMillisecondArray, |
34 | | }; |
35 | | use arrow_buffer::IntervalDayTime; |
36 | | use arrow_schema::{DataType, Schema}; |
37 | | use datafusion_common::{Result, ScalarValue}; |
38 | | use datafusion_execution::TaskContext; |
39 | | use datafusion_expr::{JoinType, Operator}; |
40 | | use datafusion_physical_expr::expressions::{binary, cast, col, lit}; |
41 | | use datafusion_physical_expr::intervals::test_utils::{ |
42 | | gen_conjunctive_numerical_expr, gen_conjunctive_temporal_expr, |
43 | | }; |
44 | | use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; |
45 | | |
46 | | use rand::prelude::StdRng; |
47 | | use rand::{Rng, SeedableRng}; |
48 | | |
49 | 333 | pub fn compare_batches(collected_1: &[RecordBatch], collected_2: &[RecordBatch]) { |
50 | 333 | // compare |
51 | 333 | let first_formatted = pretty_format_batches(collected_1).unwrap().to_string(); |
52 | 333 | let second_formatted = pretty_format_batches(collected_2).unwrap().to_string(); |
53 | 333 | |
54 | 333 | let mut first_formatted_sorted: Vec<&str> = first_formatted.trim().lines().collect(); |
55 | 333 | first_formatted_sorted.sort_unstable(); |
56 | 333 | |
57 | 333 | let mut second_formatted_sorted: Vec<&str> = |
58 | 333 | second_formatted.trim().lines().collect(); |
59 | 333 | second_formatted_sorted.sort_unstable(); |
60 | | |
61 | 10.8k | for (i, (first_line, second_line)) in first_formatted_sorted |
62 | 333 | .iter() |
63 | 333 | .zip(&second_formatted_sorted) |
64 | 333 | .enumerate() |
65 | | { |
66 | 10.8k | assert_eq!((i, first_line), (i, second_line)); |
67 | | } |
68 | 333 | } |
69 | | |
70 | 333 | pub async fn partitioned_sym_join_with_filter( |
71 | 333 | left: Arc<dyn ExecutionPlan>, |
72 | 333 | right: Arc<dyn ExecutionPlan>, |
73 | 333 | on: JoinOn, |
74 | 333 | filter: Option<JoinFilter>, |
75 | 333 | join_type: &JoinType, |
76 | 333 | null_equals_null: bool, |
77 | 333 | context: Arc<TaskContext>, |
78 | 333 | ) -> Result<Vec<RecordBatch>> { |
79 | 333 | let partition_count = 4; |
80 | 333 | |
81 | 333 | let left_expr = on |
82 | 333 | .iter() |
83 | 333 | .map(|(l, _)| Arc::clone(l) as _) |
84 | 333 | .collect::<Vec<_>>(); |
85 | 333 | |
86 | 333 | let right_expr = on |
87 | 333 | .iter() |
88 | 333 | .map(|(_, r)| Arc::clone(r) as _) |
89 | 333 | .collect::<Vec<_>>(); |
90 | | |
91 | 333 | let join = SymmetricHashJoinExec::try_new( |
92 | 333 | Arc::new(RepartitionExec::try_new( |
93 | 333 | Arc::clone(&left), |
94 | 333 | Partitioning::Hash(left_expr, partition_count), |
95 | 333 | )?0 ), |
96 | 333 | Arc::new(RepartitionExec::try_new( |
97 | 333 | Arc::clone(&right), |
98 | 333 | Partitioning::Hash(right_expr, partition_count), |
99 | 333 | )?0 ), |
100 | 333 | on, |
101 | 333 | filter, |
102 | 333 | join_type, |
103 | 333 | null_equals_null, |
104 | 333 | left.output_ordering().map(|p| p.to_vec()277 ), |
105 | 333 | right.output_ordering().map(|p| p.to_vec()277 ), |
106 | 333 | StreamJoinPartitionMode::Partitioned, |
107 | 333 | )?0 ; |
108 | | |
109 | 333 | let mut batches = vec![]; |
110 | 1.33k | for i in 0..partition_count333 { |
111 | 1.33k | let stream = join.execute(i, Arc::clone(&context))?0 ; |
112 | 1.33k | let more_batches = common::collect(stream).await697 ?0 ; |
113 | 1.33k | batches.extend( |
114 | 1.33k | more_batches |
115 | 1.33k | .into_iter() |
116 | 2.75k | .filter(|b| b.num_rows() > 0) |
117 | 1.33k | .collect::<Vec<_>>(), |
118 | 1.33k | ); |
119 | 1.33k | } |
120 | | |
121 | 333 | Ok(batches) |
122 | 333 | } |
123 | | |
124 | 333 | pub async fn partitioned_hash_join_with_filter( |
125 | 333 | left: Arc<dyn ExecutionPlan>, |
126 | 333 | right: Arc<dyn ExecutionPlan>, |
127 | 333 | on: JoinOn, |
128 | 333 | filter: Option<JoinFilter>, |
129 | 333 | join_type: &JoinType, |
130 | 333 | null_equals_null: bool, |
131 | 333 | context: Arc<TaskContext>, |
132 | 333 | ) -> Result<Vec<RecordBatch>> { |
133 | 333 | let partition_count = 4; |
134 | 333 | let (left_expr, right_expr) = on |
135 | 333 | .iter() |
136 | 333 | .map(|(l, r)| (Arc::clone(l) as _, Arc::clone(r) as _)) |
137 | 333 | .unzip(); |
138 | | |
139 | 333 | let join = Arc::new(HashJoinExec::try_new( |
140 | 333 | Arc::new(RepartitionExec::try_new( |
141 | 333 | left, |
142 | 333 | Partitioning::Hash(left_expr, partition_count), |
143 | 333 | )?0 ), |
144 | 333 | Arc::new(RepartitionExec::try_new( |
145 | 333 | right, |
146 | 333 | Partitioning::Hash(right_expr, partition_count), |
147 | 333 | )?0 ), |
148 | 333 | on, |
149 | 333 | filter, |
150 | 333 | join_type, |
151 | 333 | None, |
152 | 333 | PartitionMode::Partitioned, |
153 | 333 | null_equals_null, |
154 | 0 | )?); |
155 | | |
156 | 333 | let mut batches = vec![]; |
157 | 1.33k | for i in 0..partition_count333 { |
158 | 1.33k | let stream = join.execute(i, Arc::clone(&context))?0 ; |
159 | 2.28k | let more_batches1.33k = common::collect(stream)1.33k .await?0 ; |
160 | 1.33k | batches.extend( |
161 | 1.33k | more_batches |
162 | 1.33k | .into_iter() |
163 | 4.84k | .filter(|b| b.num_rows() > 0) |
164 | 1.33k | .collect::<Vec<_>>(), |
165 | 1.33k | ); |
166 | 1.33k | } |
167 | | |
168 | 333 | Ok(batches) |
169 | 333 | } |
170 | | |
171 | 10 | pub fn split_record_batches( |
172 | 10 | batch: &RecordBatch, |
173 | 10 | batch_size: usize, |
174 | 10 | ) -> Result<Vec<RecordBatch>> { |
175 | 10 | let row_num = batch.num_rows(); |
176 | 10 | let number_of_batch = row_num / batch_size; |
177 | 10 | let mut sizes = vec![batch_size; number_of_batch]; |
178 | 10 | sizes.push(row_num - (batch_size * number_of_batch)); |
179 | 10 | let mut result = vec![]; |
180 | 40 | for (i, size) in sizes.iter().enumerate()10 { |
181 | 40 | result.push(batch.slice(i * batch_size, *size)); |
182 | 40 | } |
183 | 10 | Ok(result) |
184 | 10 | } |
185 | | |
186 | | struct AscendingRandomFloatIterator { |
187 | | prev: f64, |
188 | | max: f64, |
189 | | rng: StdRng, |
190 | | } |
191 | | |
192 | | impl AscendingRandomFloatIterator { |
193 | 5 | fn new(min: f64, max: f64) -> Self { |
194 | 5 | let mut rng = StdRng::seed_from_u64(42); |
195 | 5 | let initial = rng.gen_range(min..max); |
196 | 5 | AscendingRandomFloatIterator { |
197 | 5 | prev: initial, |
198 | 5 | max, |
199 | 5 | rng, |
200 | 5 | } |
201 | 5 | } |
202 | | } |
203 | | |
204 | | impl Iterator for AscendingRandomFloatIterator { |
205 | | type Item = f64; |
206 | | |
207 | 150 | fn next(&mut self) -> Option<Self::Item> { |
208 | 150 | let value = self.rng.gen_range(self.prev..self.max); |
209 | 150 | self.prev = value; |
210 | 150 | Some(value) |
211 | 150 | } |
212 | | } |
213 | | |
214 | 64 | pub fn join_expr_tests_fixture_temporal( |
215 | 64 | expr_id: usize, |
216 | 64 | left_col: Arc<dyn PhysicalExpr>, |
217 | 64 | right_col: Arc<dyn PhysicalExpr>, |
218 | 64 | schema: &Schema, |
219 | 64 | ) -> Result<Arc<dyn PhysicalExpr>> { |
220 | 64 | match expr_id { |
221 | | // constructs ((left_col - INTERVAL '100ms') > (right_col - INTERVAL '200ms')) AND ((left_col - INTERVAL '450ms') < (right_col - INTERVAL '300ms')) |
222 | 32 | 0 => gen_conjunctive_temporal_expr( |
223 | 32 | left_col, |
224 | 32 | right_col, |
225 | 32 | Operator::Minus, |
226 | 32 | Operator::Minus, |
227 | 32 | Operator::Minus, |
228 | 32 | Operator::Minus, |
229 | 32 | ScalarValue::new_interval_dt(0, 100), // 100 ms |
230 | 32 | ScalarValue::new_interval_dt(0, 200), // 200 ms |
231 | 32 | ScalarValue::new_interval_dt(0, 450), // 450 ms |
232 | 32 | ScalarValue::new_interval_dt(0, 300), // 300 ms |
233 | 32 | schema, |
234 | 32 | ), |
235 | | // constructs ((left_col - TIMESTAMP '2023-01-01:12.00.03') > (right_col - TIMESTAMP '2023-01-01:12.00.01')) AND ((left_col - TIMESTAMP '2023-01-01:12.00.00') < (right_col - TIMESTAMP '2023-01-01:12.00.02')) |
236 | 16 | 1 => gen_conjunctive_temporal_expr( |
237 | 16 | left_col, |
238 | 16 | right_col, |
239 | 16 | Operator::Minus, |
240 | 16 | Operator::Minus, |
241 | 16 | Operator::Minus, |
242 | 16 | Operator::Minus, |
243 | 16 | ScalarValue::TimestampMillisecond(Some(1672574403000), None), // 2023-01-01:12.00.03 |
244 | 16 | ScalarValue::TimestampMillisecond(Some(1672574401000), None), // 2023-01-01:12.00.01 |
245 | 16 | ScalarValue::TimestampMillisecond(Some(1672574400000), None), // 2023-01-01:12.00.00 |
246 | 16 | ScalarValue::TimestampMillisecond(Some(1672574402000), None), // 2023-01-01:12.00.02 |
247 | 16 | schema, |
248 | 16 | ), |
249 | | // constructs ((left_col - DURATION '3 secs') > (right_col - DURATION '2 secs')) AND ((left_col - DURATION '5 secs') < (right_col - DURATION '4 secs')) |
250 | 16 | 2 => gen_conjunctive_temporal_expr( |
251 | 16 | left_col, |
252 | 16 | right_col, |
253 | 16 | Operator::Minus, |
254 | 16 | Operator::Minus, |
255 | 16 | Operator::Minus, |
256 | 16 | Operator::Minus, |
257 | 16 | ScalarValue::DurationMillisecond(Some(3000)), // 3 secs |
258 | 16 | ScalarValue::DurationMillisecond(Some(2000)), // 2 secs |
259 | 16 | ScalarValue::DurationMillisecond(Some(5000)), // 5 secs |
260 | 16 | ScalarValue::DurationMillisecond(Some(4000)), // 4 secs |
261 | 16 | schema, |
262 | 16 | ), |
263 | 0 | _ => unreachable!(), |
264 | | } |
265 | 64 | } |
266 | | |
267 | | // It creates join filters for different type of fields for testing. |
268 | | macro_rules! join_expr_tests { |
269 | | ($func_name:ident, $type:ty, $SCALAR:ident) => { |
270 | 243 | pub fn $func_name( |
271 | 243 | expr_id: usize, |
272 | 243 | left_col: Arc<dyn PhysicalExpr>, |
273 | 243 | right_col: Arc<dyn PhysicalExpr>, |
274 | 243 | ) -> Arc<dyn PhysicalExpr> { |
275 | 243 | match expr_id { |
276 | | // left_col + 1 > right_col + 5 AND left_col + 3 < right_col + 10 |
277 | 40 | 0 => gen_conjunctive_numerical_expr( |
278 | 40 | left_col, |
279 | 40 | right_col, |
280 | 40 | ( |
281 | 40 | Operator::Plus, |
282 | 40 | Operator::Plus, |
283 | 40 | Operator::Plus, |
284 | 40 | Operator::Plus, |
285 | 40 | ), |
286 | 40 | ScalarValue::$SCALAR(Some(1 as $type)), |
287 | 40 | ScalarValue::$SCALAR(Some(5 as $type)), |
288 | 40 | ScalarValue::$SCALAR(Some(3 as $type)), |
289 | 40 | ScalarValue::$SCALAR(Some(10 as $type)), |
290 | 40 | (Operator::Gt, Operator::Lt), |
291 | 40 | ), |
292 | | // left_col - 1 > right_col + 5 AND left_col + 3 < right_col + 10 |
293 | 43 | 1 => gen_conjunctive_numerical_expr( |
294 | 43 | left_col, |
295 | 43 | right_col, |
296 | 43 | ( |
297 | 43 | Operator::Minus, |
298 | 43 | Operator::Plus, |
299 | 43 | Operator::Plus, |
300 | 43 | Operator::Plus, |
301 | 43 | ), |
302 | 43 | ScalarValue::$SCALAR(Some(1 as $type)), |
303 | 43 | ScalarValue::$SCALAR(Some(5 as $type)), |
304 | 43 | ScalarValue::$SCALAR(Some(3 as $type)), |
305 | 43 | ScalarValue::$SCALAR(Some(10 as $type)), |
306 | 43 | (Operator::Gt, Operator::Lt), |
307 | 43 | ), |
308 | | // left_col - 1 > right_col + 5 AND left_col - 3 < right_col + 10 |
309 | 40 | 2 => gen_conjunctive_numerical_expr( |
310 | 40 | left_col, |
311 | 40 | right_col, |
312 | 40 | ( |
313 | 40 | Operator::Minus, |
314 | 40 | Operator::Plus, |
315 | 40 | Operator::Minus, |
316 | 40 | Operator::Plus, |
317 | 40 | ), |
318 | 40 | ScalarValue::$SCALAR(Some(1 as $type)), |
319 | 40 | ScalarValue::$SCALAR(Some(5 as $type)), |
320 | 40 | ScalarValue::$SCALAR(Some(3 as $type)), |
321 | 40 | ScalarValue::$SCALAR(Some(10 as $type)), |
322 | 40 | (Operator::Gt, Operator::Lt), |
323 | 40 | ), |
324 | | // left_col - 10 > right_col - 5 AND left_col - 3 < right_col + 10 |
325 | 40 | 3 => gen_conjunctive_numerical_expr( |
326 | 40 | left_col, |
327 | 40 | right_col, |
328 | 40 | ( |
329 | 40 | Operator::Minus, |
330 | 40 | Operator::Minus, |
331 | 40 | Operator::Minus, |
332 | 40 | Operator::Plus, |
333 | 40 | ), |
334 | 40 | ScalarValue::$SCALAR(Some(10 as $type)), |
335 | 40 | ScalarValue::$SCALAR(Some(5 as $type)), |
336 | 40 | ScalarValue::$SCALAR(Some(3 as $type)), |
337 | 40 | ScalarValue::$SCALAR(Some(10 as $type)), |
338 | 40 | (Operator::Gt, Operator::Lt), |
339 | 40 | ), |
340 | | // left_col - 10 > right_col - 5 AND left_col - 30 < right_col - 3 |
341 | 40 | 4 => gen_conjunctive_numerical_expr( |
342 | 40 | left_col, |
343 | 40 | right_col, |
344 | 40 | ( |
345 | 40 | Operator::Minus, |
346 | 40 | Operator::Minus, |
347 | 40 | Operator::Minus, |
348 | 40 | Operator::Minus, |
349 | 40 | ), |
350 | 40 | ScalarValue::$SCALAR(Some(10 as $type)), |
351 | 40 | ScalarValue::$SCALAR(Some(5 as $type)), |
352 | 40 | ScalarValue::$SCALAR(Some(30 as $type)), |
353 | 40 | ScalarValue::$SCALAR(Some(3 as $type)), |
354 | 40 | (Operator::Gt, Operator::Lt), |
355 | 40 | ), |
356 | | // left_col - 2 >= right_col - 5 AND left_col - 7 <= right_col - 3 |
357 | 40 | 5 => gen_conjunctive_numerical_expr( |
358 | 40 | left_col, |
359 | 40 | right_col, |
360 | 40 | ( |
361 | 40 | Operator::Minus, |
362 | 40 | Operator::Plus, |
363 | 40 | Operator::Plus, |
364 | 40 | Operator::Minus, |
365 | 40 | ), |
366 | 40 | ScalarValue::$SCALAR(Some(2 as $type)), |
367 | 40 | ScalarValue::$SCALAR(Some(5 as $type)), |
368 | 40 | ScalarValue::$SCALAR(Some(7 as $type)), |
369 | 40 | ScalarValue::$SCALAR(Some(3 as $type)), |
370 | 40 | (Operator::GtEq, Operator::LtEq), |
371 | 40 | ), |
372 | | // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col - 39 |
373 | 0 | 6 => gen_conjunctive_numerical_expr( |
374 | 0 | left_col, |
375 | 0 | right_col, |
376 | 0 | ( |
377 | 0 | Operator::Plus, |
378 | 0 | Operator::Minus, |
379 | 0 | Operator::Plus, |
380 | 0 | Operator::Plus, |
381 | 0 | ), |
382 | 0 | ScalarValue::$SCALAR(Some(28 as $type)), |
383 | 0 | ScalarValue::$SCALAR(Some(11 as $type)), |
384 | 0 | ScalarValue::$SCALAR(Some(21 as $type)), |
385 | 0 | ScalarValue::$SCALAR(Some(39 as $type)), |
386 | 0 | (Operator::Gt, Operator::LtEq), |
387 | 0 | ), |
388 | | // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col + 39 |
389 | 0 | 7 => gen_conjunctive_numerical_expr( |
390 | 0 | left_col, |
391 | 0 | right_col, |
392 | 0 | ( |
393 | 0 | Operator::Plus, |
394 | 0 | Operator::Minus, |
395 | 0 | Operator::Minus, |
396 | 0 | Operator::Plus, |
397 | 0 | ), |
398 | 0 | ScalarValue::$SCALAR(Some(28 as $type)), |
399 | 0 | ScalarValue::$SCALAR(Some(11 as $type)), |
400 | 0 | ScalarValue::$SCALAR(Some(21 as $type)), |
401 | 0 | ScalarValue::$SCALAR(Some(39 as $type)), |
402 | 0 | (Operator::GtEq, Operator::Lt), |
403 | 0 | ), |
404 | 0 | _ => panic!("No case"), |
405 | | } |
406 | 243 | } |
407 | | }; |
408 | | } |
409 | | |
410 | | join_expr_tests!(join_expr_tests_fixture_i32, i32, Int32); |
411 | | join_expr_tests!(join_expr_tests_fixture_f64, f64, Float64); |
412 | | |
413 | 5 | pub fn build_sides_record_batches( |
414 | 5 | table_size: i32, |
415 | 5 | key_cardinality: (i32, i32), |
416 | 5 | ) -> Result<(RecordBatch, RecordBatch)> { |
417 | 5 | let null_ratio: f64 = 0.4; |
418 | 5 | let initial_range = 0..table_size; |
419 | 5 | let index = (table_size as f64 * null_ratio).round() as i32; |
420 | 5 | let rest_of = index..table_size; |
421 | 5 | let ordered: ArrayRef = Arc::new(Int32Array::from_iter( |
422 | 5 | initial_range.clone().collect::<Vec<i32>>(), |
423 | 5 | )); |
424 | 5 | let ordered_des = Arc::new(Int32Array::from_iter( |
425 | 5 | initial_range.clone().rev().collect::<Vec<i32>>(), |
426 | 5 | )); |
427 | 5 | let cardinality = Arc::new(Int32Array::from_iter( |
428 | 150 | initial_range.clone().map(|x| x % 4).collect::<Vec<i32>>(), |
429 | 5 | )); |
430 | 5 | let cardinality_key_left = Arc::new(Int32Array::from_iter( |
431 | 5 | initial_range |
432 | 5 | .clone() |
433 | 150 | .map(|x| x % key_cardinality.0) |
434 | 5 | .collect::<Vec<i32>>(), |
435 | 5 | )); |
436 | 5 | let cardinality_key_right = Arc::new(Int32Array::from_iter( |
437 | 5 | initial_range |
438 | 5 | .clone() |
439 | 150 | .map(|x| x % key_cardinality.1) |
440 | 5 | .collect::<Vec<i32>>(), |
441 | 5 | )); |
442 | 5 | let ordered_asc_null_first = Arc::new(Int32Array::from_iter({ |
443 | 5 | std::iter::repeat(None) |
444 | 5 | .take(index as usize) |
445 | 5 | .chain(rest_of.clone().map(Some)) |
446 | 5 | .collect::<Vec<Option<i32>>>() |
447 | 5 | })); |
448 | 5 | let ordered_asc_null_last = Arc::new(Int32Array::from_iter({ |
449 | 5 | rest_of |
450 | 5 | .clone() |
451 | 5 | .map(Some) |
452 | 5 | .chain(std::iter::repeat(None).take(index as usize)) |
453 | 5 | .collect::<Vec<Option<i32>>>() |
454 | 5 | })); |
455 | 5 | |
456 | 5 | let ordered_desc_null_first = Arc::new(Int32Array::from_iter({ |
457 | 5 | std::iter::repeat(None) |
458 | 5 | .take(index as usize) |
459 | 5 | .chain(rest_of.rev().map(Some)) |
460 | 5 | .collect::<Vec<Option<i32>>>() |
461 | 5 | })); |
462 | 5 | |
463 | 5 | let time = Arc::new(TimestampMillisecondArray::from( |
464 | 5 | initial_range |
465 | 5 | .clone() |
466 | 150 | .map(|x| x as i64 + 1672531200000) // x + 2023-01-01:00.00.00 |
467 | 5 | .collect::<Vec<i64>>(), |
468 | 5 | )); |
469 | 5 | let interval_time: ArrayRef = Arc::new(IntervalDayTimeArray::from( |
470 | 5 | initial_range |
471 | 150 | .map(|x| IntervalDayTime { |
472 | 150 | days: 0, |
473 | 150 | milliseconds: x * 100, |
474 | 150 | }) // x * 100ms |
475 | 5 | .collect::<Vec<_>>(), |
476 | 5 | )); |
477 | 5 | |
478 | 5 | let float_asc = Arc::new(Float64Array::from_iter_values( |
479 | 5 | AscendingRandomFloatIterator::new(0., table_size as f64) |
480 | 5 | .take(table_size as usize), |
481 | 5 | )); |
482 | | |
483 | 5 | let left = RecordBatch::try_from_iter(vec![ |
484 | 5 | ("la1", Arc::clone(&ordered)), |
485 | 5 | ("lb1", Arc::clone(&cardinality) as ArrayRef), |
486 | 5 | ("lc1", cardinality_key_left), |
487 | 5 | ("lt1", Arc::clone(&time) as ArrayRef), |
488 | 5 | ("la2", Arc::clone(&ordered)), |
489 | 5 | ("la1_des", Arc::clone(&ordered_des) as ArrayRef), |
490 | 5 | ( |
491 | 5 | "l_asc_null_first", |
492 | 5 | Arc::clone(&ordered_asc_null_first) as ArrayRef, |
493 | 5 | ), |
494 | 5 | ( |
495 | 5 | "l_asc_null_last", |
496 | 5 | Arc::clone(&ordered_asc_null_last) as ArrayRef, |
497 | 5 | ), |
498 | 5 | ( |
499 | 5 | "l_desc_null_first", |
500 | 5 | Arc::clone(&ordered_desc_null_first) as ArrayRef, |
501 | 5 | ), |
502 | 5 | ("li1", Arc::clone(&interval_time)), |
503 | 5 | ("l_float", Arc::clone(&float_asc) as ArrayRef), |
504 | 5 | ])?0 ; |
505 | 5 | let right = RecordBatch::try_from_iter(vec![ |
506 | 5 | ("ra1", Arc::clone(&ordered)), |
507 | 5 | ("rb1", cardinality), |
508 | 5 | ("rc1", cardinality_key_right), |
509 | 5 | ("rt1", time), |
510 | 5 | ("ra2", ordered), |
511 | 5 | ("ra1_des", ordered_des), |
512 | 5 | ("r_asc_null_first", ordered_asc_null_first), |
513 | 5 | ("r_asc_null_last", ordered_asc_null_last), |
514 | 5 | ("r_desc_null_first", ordered_desc_null_first), |
515 | 5 | ("ri1", interval_time), |
516 | 5 | ("r_float", float_asc), |
517 | 5 | ])?0 ; |
518 | 5 | Ok((left, right)) |
519 | 5 | } |
520 | | |
521 | 333 | pub fn create_memory_table( |
522 | 333 | left_partition: Vec<RecordBatch>, |
523 | 333 | right_partition: Vec<RecordBatch>, |
524 | 333 | left_sorted: Vec<LexOrdering>, |
525 | 333 | right_sorted: Vec<LexOrdering>, |
526 | 333 | ) -> Result<(Arc<dyn ExecutionPlan>, Arc<dyn ExecutionPlan>)> { |
527 | 333 | let left_schema = left_partition[0].schema(); |
528 | 333 | let left = MemoryExec::try_new(&[left_partition], left_schema, None)?0 |
529 | 333 | .with_sort_information(left_sorted); |
530 | 333 | let right_schema = right_partition[0].schema(); |
531 | 333 | let right = MemoryExec::try_new(&[right_partition], right_schema, None)?0 |
532 | 333 | .with_sort_information(right_sorted); |
533 | 333 | Ok((Arc::new(left), Arc::new(right))) |
534 | 333 | } |
535 | | |
536 | | /// Filter expr for a + b > c + 10 AND a + b < c + 100 |
537 | 21 | pub(crate) fn complicated_filter( |
538 | 21 | filter_schema: &Schema, |
539 | 21 | ) -> Result<Arc<dyn PhysicalExpr>> { |
540 | 21 | let left_expr = binary( |
541 | | cast( |
542 | | binary( |
543 | 21 | col("0", filter_schema)?0 , |
544 | 21 | Operator::Plus, |
545 | 21 | col("1", filter_schema)?0 , |
546 | 21 | filter_schema, |
547 | 0 | )?, |
548 | 21 | filter_schema, |
549 | 21 | DataType::Int64, |
550 | 0 | )?, |
551 | 21 | Operator::Gt, |
552 | 21 | binary( |
553 | 21 | cast(col("2", filter_schema)?0 , filter_schema, DataType::Int64)?0 , |
554 | 21 | Operator::Plus, |
555 | 21 | lit(ScalarValue::Int64(Some(10))), |
556 | 21 | filter_schema, |
557 | 0 | )?, |
558 | 21 | filter_schema, |
559 | 0 | )?; |
560 | | |
561 | 21 | let right_expr = binary( |
562 | | cast( |
563 | | binary( |
564 | 21 | col("0", filter_schema)?0 , |
565 | 21 | Operator::Plus, |
566 | 21 | col("1", filter_schema)?0 , |
567 | 21 | filter_schema, |
568 | 0 | )?, |
569 | 21 | filter_schema, |
570 | 21 | DataType::Int64, |
571 | 0 | )?, |
572 | 21 | Operator::Lt, |
573 | 21 | binary( |
574 | 21 | cast(col("2", filter_schema)?0 , filter_schema, DataType::Int64)?0 , |
575 | 21 | Operator::Plus, |
576 | 21 | lit(ScalarValue::Int64(Some(100))), |
577 | 21 | filter_schema, |
578 | 0 | )?, |
579 | 21 | filter_schema, |
580 | 0 | )?; |
581 | 21 | binary(left_expr, Operator::And, right_expr, filter_schema) |
582 | 21 | } |