Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/test.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Utilities for testing datafusion-physical-plan
19
20
use std::collections::HashMap;
21
use std::pin::Pin;
22
use std::sync::Arc;
23
24
use arrow_array::{ArrayRef, Int32Array, RecordBatch};
25
use arrow_schema::{DataType, Field, Schema, SchemaRef};
26
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
27
use futures::{Future, FutureExt};
28
29
use crate::memory::MemoryExec;
30
use crate::stream::RecordBatchStreamAdapter;
31
use crate::streaming::PartitionStream;
32
use crate::ExecutionPlan;
33
34
pub mod exec;
35
36
/// Asserts that given future is pending.
37
9
pub fn assert_is_pending<'a, T>(fut: &mut Pin<Box<dyn Future<Output = T> + Send + 'a>>) {
38
9
    let waker = futures::task::noop_waker();
39
9
    let mut cx = futures::task::Context::from_waker(&waker);
40
9
    let poll = fut.poll_unpin(&mut cx);
41
9
42
9
    assert!(poll.is_pending());
43
9
}
44
45
/// Get the schema for the aggregate_test_* csv files
46
9
pub fn aggr_test_schema() -> SchemaRef {
47
9
    let mut f1 = Field::new("c1", DataType::Utf8, false);
48
9
    f1.set_metadata(HashMap::from_iter(vec![("testing".into(), "test".into())]));
49
9
    let schema = Schema::new(vec![
50
9
        f1,
51
9
        Field::new("c2", DataType::UInt32, false),
52
9
        Field::new("c3", DataType::Int8, false),
53
9
        Field::new("c4", DataType::Int16, false),
54
9
        Field::new("c5", DataType::Int32, false),
55
9
        Field::new("c6", DataType::Int64, false),
56
9
        Field::new("c7", DataType::UInt8, false),
57
9
        Field::new("c8", DataType::UInt16, false),
58
9
        Field::new("c9", DataType::UInt32, false),
59
9
        Field::new("c10", DataType::UInt64, false),
60
9
        Field::new("c11", DataType::Float32, false),
61
9
        Field::new("c12", DataType::Float64, false),
62
9
        Field::new("c13", DataType::Utf8, false),
63
9
    ]);
64
9
65
9
    Arc::new(schema)
66
9
}
67
68
/// returns record batch with 3 columns of i32 in memory
69
453
pub fn build_table_i32(
70
453
    a: (&str, &Vec<i32>),
71
453
    b: (&str, &Vec<i32>),
72
453
    c: (&str, &Vec<i32>),
73
453
) -> RecordBatch {
74
453
    let schema = Schema::new(vec![
75
453
        Field::new(a.0, DataType::Int32, false),
76
453
        Field::new(b.0, DataType::Int32, false),
77
453
        Field::new(c.0, DataType::Int32, false),
78
453
    ]);
79
453
80
453
    RecordBatch::try_new(
81
453
        Arc::new(schema),
82
453
        vec![
83
453
            Arc::new(Int32Array::from(a.1.clone())),
84
453
            Arc::new(Int32Array::from(b.1.clone())),
85
453
            Arc::new(Int32Array::from(c.1.clone())),
86
453
        ],
87
453
    )
88
453
    .unwrap()
89
453
}
90
91
/// returns memory table scan wrapped around record batch with 3 columns of i32
92
8
pub fn build_table_scan_i32(
93
8
    a: (&str, &Vec<i32>),
94
8
    b: (&str, &Vec<i32>),
95
8
    c: (&str, &Vec<i32>),
96
8
) -> Arc<dyn ExecutionPlan> {
97
8
    let batch = build_table_i32(a, b, c);
98
8
    let schema = batch.schema();
99
8
    Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
100
8
}
101
102
/// Return a RecordBatch with a single Int32 array with values (0..sz) in a field named "i"
103
260
pub fn make_partition(sz: i32) -> RecordBatch {
104
260
    let seq_start = 0;
105
260
    let seq_end = sz;
106
260
    let values = (seq_start..seq_end).collect::<Vec<_>>();
107
260
    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
108
260
    let arr = Arc::new(Int32Array::from(values));
109
260
    let arr = arr as ArrayRef;
110
260
111
260
    RecordBatch::try_new(schema, vec![arr]).unwrap()
112
260
}
113
114
/// Returns a `MemoryExec` that scans `partitions` of 100 batches each
115
37
pub fn scan_partitioned(partitions: usize) -> Arc<dyn ExecutionPlan> {
116
37
    Arc::new(mem_exec(partitions))
117
37
}
118
119
/// Returns a `MemoryExec` that scans `partitions` of 100 batches each
120
37
pub fn mem_exec(partitions: usize) -> MemoryExec {
121
242
    let data: Vec<Vec<_>> = (0..partitions).map(|_| vec![make_partition(100)]).collect();
122
37
123
37
    let schema = data[0][0].schema();
124
37
    let projection = None;
125
37
    MemoryExec::try_new(&data, schema, projection).unwrap()
126
37
}
127
128
// construct a stream partition for test purposes
129
#[derive(Debug)]
130
pub struct TestPartitionStream {
131
    pub schema: SchemaRef,
132
    pub batches: Vec<RecordBatch>,
133
}
134
135
impl TestPartitionStream {
136
    /// Create a new stream partition with the provided batches
137
2
    pub fn new_with_batches(batches: Vec<RecordBatch>) -> Self {
138
2
        let schema = batches[0].schema();
139
2
        Self { schema, batches }
140
2
    }
141
}
142
impl PartitionStream for TestPartitionStream {
143
4
    fn schema(&self) -> &SchemaRef {
144
4
        &self.schema
145
4
    }
146
2
    fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
147
2
        let stream = futures::stream::iter(self.batches.clone().into_iter().map(Ok));
148
2
        Box::pin(RecordBatchStreamAdapter::new(
149
2
            Arc::clone(&self.schema),
150
2
            stream,
151
2
        ))
152
2
    }
153
}