Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/empty.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! EmptyRelation with produce_one_row=false execution plan
19
20
use std::any::Any;
21
use std::sync::Arc;
22
23
use super::{
24
    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
25
    Statistics,
26
};
27
use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning};
28
29
use arrow::datatypes::SchemaRef;
30
use arrow::record_batch::RecordBatch;
31
use datafusion_common::{internal_err, Result};
32
use datafusion_execution::TaskContext;
33
use datafusion_physical_expr::EquivalenceProperties;
34
35
use log::trace;
36
37
/// Execution plan for empty relation with produce_one_row=false
38
#[derive(Debug)]
39
pub struct EmptyExec {
40
    /// The schema for the produced row
41
    schema: SchemaRef,
42
    /// Number of partitions
43
    partitions: usize,
44
    cache: PlanProperties,
45
}
46
47
impl EmptyExec {
48
    /// Create a new EmptyExec
49
4
    pub fn new(schema: SchemaRef) -> Self {
50
4
        let cache = Self::compute_properties(Arc::clone(&schema), 1);
51
4
        EmptyExec {
52
4
            schema,
53
4
            partitions: 1,
54
4
            cache,
55
4
        }
56
4
    }
57
58
    /// Create a new EmptyExec with specified partition number
59
0
    pub fn with_partitions(mut self, partitions: usize) -> Self {
60
0
        self.partitions = partitions;
61
0
        // Changing partitions may invalidate output partitioning, so update it:
62
0
        let output_partitioning = Self::output_partitioning_helper(self.partitions);
63
0
        self.cache = self.cache.with_partitioning(output_partitioning);
64
0
        self
65
0
    }
66
67
3
    fn data(&self) -> Result<Vec<RecordBatch>> {
68
3
        Ok(vec![])
69
3
    }
70
71
4
    fn output_partitioning_helper(n_partitions: usize) -> Partitioning {
72
4
        Partitioning::UnknownPartitioning(n_partitions)
73
4
    }
74
75
    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
76
4
    fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties {
77
4
        let eq_properties = EquivalenceProperties::new(schema);
78
4
        let output_partitioning = Self::output_partitioning_helper(n_partitions);
79
4
        PlanProperties::new(
80
4
            eq_properties,
81
4
            // Output Partitioning
82
4
            output_partitioning,
83
4
            // Execution Mode
84
4
            ExecutionMode::Bounded,
85
4
        )
86
4
    }
87
}
88
89
impl DisplayAs for EmptyExec {
90
0
    fn fmt_as(
91
0
        &self,
92
0
        t: DisplayFormatType,
93
0
        f: &mut std::fmt::Formatter,
94
0
    ) -> std::fmt::Result {
95
0
        match t {
96
            DisplayFormatType::Default | DisplayFormatType::Verbose => {
97
0
                write!(f, "EmptyExec")
98
0
            }
99
0
        }
100
0
    }
101
}
102
103
impl ExecutionPlan for EmptyExec {
104
0
    fn name(&self) -> &'static str {
105
0
        "EmptyExec"
106
0
    }
107
108
    /// Return a reference to Any that can be used for downcasting
109
0
    fn as_any(&self) -> &dyn Any {
110
0
        self
111
0
    }
112
113
12
    fn properties(&self) -> &PlanProperties {
114
12
        &self.cache
115
12
    }
116
117
2
    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
118
2
        vec![]
119
2
    }
120
121
1
    fn with_new_children(
122
1
        self: Arc<Self>,
123
1
        _: Vec<Arc<dyn ExecutionPlan>>,
124
1
    ) -> Result<Arc<dyn ExecutionPlan>> {
125
1
        Ok(self)
126
1
    }
127
128
3
    fn execute(
129
3
        &self,
130
3
        partition: usize,
131
3
        context: Arc<TaskContext>,
132
3
    ) -> Result<SendableRecordBatchStream> {
133
3
        trace!(
"Start EmptyExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()0
);
134
135
3
        if partition >= self.partitions {
136
2
            return internal_err!(
137
2
                "EmptyExec invalid partition {} (expected less than {})",
138
2
                partition,
139
2
                self.partitions
140
2
            );
141
1
        }
142
1
143
1
        Ok(Box::pin(MemoryStream::try_new(
144
1
            self.data()
?0
,
145
1
            Arc::clone(&self.schema),
146
1
            None,
147
0
        )?))
148
3
    }
149
150
2
    fn statistics(&self) -> Result<Statistics> {
151
2
        let batch = self
152
2
            .data()
153
2
            .expect("Create empty RecordBatch should not fail");
154
2
        Ok(common::compute_record_batch_statistics(
155
2
            &[batch],
156
2
            &self.schema,
157
2
            None,
158
2
        ))
159
2
    }
160
}
161
162
#[cfg(test)]
163
mod tests {
164
    use super::*;
165
    use crate::test;
166
    use crate::with_new_children_if_necessary;
167
168
    #[tokio::test]
169
1
    async fn empty() -> Result<()> {
170
1
        let task_ctx = Arc::new(TaskContext::default());
171
1
        let schema = test::aggr_test_schema();
172
1
173
1
        let empty = EmptyExec::new(Arc::clone(&schema));
174
1
        assert_eq!(empty.schema(), schema);
175
1
176
1
        // we should have no results
177
1
        let iter = empty.execute(0, task_ctx)
?0
;
178
1
        let batches = common::collect(iter).
await0
?0
;
179
1
        assert!(batches.is_empty());
180
1
181
1
        Ok(())
182
1
    }
183
184
    #[test]
185
1
    fn with_new_children() -> Result<()> {
186
1
        let schema = test::aggr_test_schema();
187
1
        let empty = Arc::new(EmptyExec::new(Arc::clone(&schema)));
188
189
1
        let empty2 = with_new_children_if_necessary(
190
1
            Arc::clone(&empty) as Arc<dyn ExecutionPlan>,
191
1
            vec![],
192
1
        )
?0
;
193
1
        assert_eq!(empty.schema(), empty2.schema());
194
195
1
        let too_many_kids = vec![empty2];
196
1
        assert!(
197
1
            with_new_children_if_necessary(empty, too_many_kids).is_err(),
198
0
            "expected error when providing list of kids"
199
        );
200
1
        Ok(())
201
1
    }
202
203
    #[tokio::test]
204
1
    async fn invalid_execute() -> Result<()> {
205
1
        let task_ctx = Arc::new(TaskContext::default());
206
1
        let schema = test::aggr_test_schema();
207
1
        let empty = EmptyExec::new(schema);
208
1
209
1
        // ask for the wrong partition
210
1
        assert!(empty.execute(1, Arc::clone(&task_ctx)).is_err());
211
1
        assert!(empty.execute(20, task_ctx).is_err());
212
1
        Ok(())
213
1
    }
214
}