/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/test.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Utilities for testing datafusion-physical-plan |
19 | | |
20 | | use std::collections::HashMap; |
21 | | use std::pin::Pin; |
22 | | use std::sync::Arc; |
23 | | |
24 | | use arrow_array::{ArrayRef, Int32Array, RecordBatch}; |
25 | | use arrow_schema::{DataType, Field, Schema, SchemaRef}; |
26 | | use datafusion_execution::{SendableRecordBatchStream, TaskContext}; |
27 | | use futures::{Future, FutureExt}; |
28 | | |
29 | | use crate::memory::MemoryExec; |
30 | | use crate::stream::RecordBatchStreamAdapter; |
31 | | use crate::streaming::PartitionStream; |
32 | | use crate::ExecutionPlan; |
33 | | |
34 | | pub mod exec; |
35 | | |
36 | | /// Asserts that given future is pending. |
37 | 9 | pub fn assert_is_pending<'a, T>(fut: &mut Pin<Box<dyn Future<Output = T> + Send + 'a>>) { |
38 | 9 | let waker = futures::task::noop_waker(); |
39 | 9 | let mut cx = futures::task::Context::from_waker(&waker); |
40 | 9 | let poll = fut.poll_unpin(&mut cx); |
41 | 9 | |
42 | 9 | assert!(poll.is_pending()); |
43 | 9 | } |
44 | | |
45 | | /// Get the schema for the aggregate_test_* csv files |
46 | 9 | pub fn aggr_test_schema() -> SchemaRef { |
47 | 9 | let mut f1 = Field::new("c1", DataType::Utf8, false); |
48 | 9 | f1.set_metadata(HashMap::from_iter(vec![("testing".into(), "test".into())])); |
49 | 9 | let schema = Schema::new(vec![ |
50 | 9 | f1, |
51 | 9 | Field::new("c2", DataType::UInt32, false), |
52 | 9 | Field::new("c3", DataType::Int8, false), |
53 | 9 | Field::new("c4", DataType::Int16, false), |
54 | 9 | Field::new("c5", DataType::Int32, false), |
55 | 9 | Field::new("c6", DataType::Int64, false), |
56 | 9 | Field::new("c7", DataType::UInt8, false), |
57 | 9 | Field::new("c8", DataType::UInt16, false), |
58 | 9 | Field::new("c9", DataType::UInt32, false), |
59 | 9 | Field::new("c10", DataType::UInt64, false), |
60 | 9 | Field::new("c11", DataType::Float32, false), |
61 | 9 | Field::new("c12", DataType::Float64, false), |
62 | 9 | Field::new("c13", DataType::Utf8, false), |
63 | 9 | ]); |
64 | 9 | |
65 | 9 | Arc::new(schema) |
66 | 9 | } |
67 | | |
68 | | /// returns record batch with 3 columns of i32 in memory |
69 | 453 | pub fn build_table_i32( |
70 | 453 | a: (&str, &Vec<i32>), |
71 | 453 | b: (&str, &Vec<i32>), |
72 | 453 | c: (&str, &Vec<i32>), |
73 | 453 | ) -> RecordBatch { |
74 | 453 | let schema = Schema::new(vec![ |
75 | 453 | Field::new(a.0, DataType::Int32, false), |
76 | 453 | Field::new(b.0, DataType::Int32, false), |
77 | 453 | Field::new(c.0, DataType::Int32, false), |
78 | 453 | ]); |
79 | 453 | |
80 | 453 | RecordBatch::try_new( |
81 | 453 | Arc::new(schema), |
82 | 453 | vec![ |
83 | 453 | Arc::new(Int32Array::from(a.1.clone())), |
84 | 453 | Arc::new(Int32Array::from(b.1.clone())), |
85 | 453 | Arc::new(Int32Array::from(c.1.clone())), |
86 | 453 | ], |
87 | 453 | ) |
88 | 453 | .unwrap() |
89 | 453 | } |
90 | | |
91 | | /// returns memory table scan wrapped around record batch with 3 columns of i32 |
92 | 8 | pub fn build_table_scan_i32( |
93 | 8 | a: (&str, &Vec<i32>), |
94 | 8 | b: (&str, &Vec<i32>), |
95 | 8 | c: (&str, &Vec<i32>), |
96 | 8 | ) -> Arc<dyn ExecutionPlan> { |
97 | 8 | let batch = build_table_i32(a, b, c); |
98 | 8 | let schema = batch.schema(); |
99 | 8 | Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) |
100 | 8 | } |
101 | | |
102 | | /// Return a RecordBatch with a single Int32 array with values (0..sz) in a field named "i" |
103 | 260 | pub fn make_partition(sz: i32) -> RecordBatch { |
104 | 260 | let seq_start = 0; |
105 | 260 | let seq_end = sz; |
106 | 260 | let values = (seq_start..seq_end).collect::<Vec<_>>(); |
107 | 260 | let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); |
108 | 260 | let arr = Arc::new(Int32Array::from(values)); |
109 | 260 | let arr = arr as ArrayRef; |
110 | 260 | |
111 | 260 | RecordBatch::try_new(schema, vec![arr]).unwrap() |
112 | 260 | } |
113 | | |
114 | | /// Returns a `MemoryExec` that scans `partitions` of 100 batches each |
115 | 37 | pub fn scan_partitioned(partitions: usize) -> Arc<dyn ExecutionPlan> { |
116 | 37 | Arc::new(mem_exec(partitions)) |
117 | 37 | } |
118 | | |
119 | | /// Returns a `MemoryExec` that scans `partitions` of 100 batches each |
120 | 37 | pub fn mem_exec(partitions: usize) -> MemoryExec { |
121 | 242 | let data: Vec<Vec<_>> = (0..partitions).map(|_| vec![make_partition(100)]).collect(); |
122 | 37 | |
123 | 37 | let schema = data[0][0].schema(); |
124 | 37 | let projection = None; |
125 | 37 | MemoryExec::try_new(&data, schema, projection).unwrap() |
126 | 37 | } |
127 | | |
128 | | // construct a stream partition for test purposes |
129 | | #[derive(Debug)] |
130 | | pub struct TestPartitionStream { |
131 | | pub schema: SchemaRef, |
132 | | pub batches: Vec<RecordBatch>, |
133 | | } |
134 | | |
135 | | impl TestPartitionStream { |
136 | | /// Create a new stream partition with the provided batches |
137 | 2 | pub fn new_with_batches(batches: Vec<RecordBatch>) -> Self { |
138 | 2 | let schema = batches[0].schema(); |
139 | 2 | Self { schema, batches } |
140 | 2 | } |
141 | | } |
142 | | impl PartitionStream for TestPartitionStream { |
143 | 4 | fn schema(&self) -> &SchemaRef { |
144 | 4 | &self.schema |
145 | 4 | } |
146 | 2 | fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream { |
147 | 2 | let stream = futures::stream::iter(self.batches.clone().into_iter().map(Ok)); |
148 | 2 | Box::pin(RecordBatchStreamAdapter::new( |
149 | 2 | Arc::clone(&self.schema), |
150 | 2 | stream, |
151 | 2 | )) |
152 | 2 | } |
153 | | } |