Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/placeholder_row.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! EmptyRelation produce_one_row=true execution plan
19
20
use std::any::Any;
21
use std::sync::Arc;
22
23
use super::{
24
    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
25
    Statistics,
26
};
27
use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning};
28
29
use arrow::array::{ArrayRef, NullArray};
30
use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
31
use arrow::record_batch::RecordBatch;
32
use arrow_array::RecordBatchOptions;
33
use datafusion_common::{internal_err, Result};
34
use datafusion_execution::TaskContext;
35
use datafusion_physical_expr::EquivalenceProperties;
36
37
use log::trace;
38
39
/// Execution plan for empty relation with produce_one_row=true
40
#[derive(Debug)]
41
pub struct PlaceholderRowExec {
42
    /// The schema for the produced row
43
    schema: SchemaRef,
44
    /// Number of partitions
45
    partitions: usize,
46
    cache: PlanProperties,
47
}
48
49
impl PlaceholderRowExec {
50
    /// Create a new PlaceholderRowExec
51
4
    pub fn new(schema: SchemaRef) -> Self {
52
4
        let partitions = 1;
53
4
        let cache = Self::compute_properties(Arc::clone(&schema), partitions);
54
4
        PlaceholderRowExec {
55
4
            schema,
56
4
            partitions,
57
4
            cache,
58
4
        }
59
4
    }
60
61
    /// Create a new PlaceholderRowExecPlaceholderRowExec with specified partition number
62
1
    pub fn with_partitions(mut self, partitions: usize) -> Self {
63
1
        self.partitions = partitions;
64
1
        // Update output partitioning when updating partitions:
65
1
        let output_partitioning = Self::output_partitioning_helper(self.partitions);
66
1
        self.cache = self.cache.with_partitioning(output_partitioning);
67
1
        self
68
1
    }
69
70
4
    fn data(&self) -> Result<Vec<RecordBatch>> {
71
4
        Ok({
72
4
            let n_field = self.schema.fields.len();
73
4
            vec![RecordBatch::try_new_with_options(
74
4
                Arc::new(Schema::new(
75
4
                    (0..n_field)
76
52
                        .map(|i| {
77
52
                            Field::new(format!("placeholder_{i}"), DataType::Null, true)
78
52
                        })
79
4
                        .collect::<Fields>(),
80
4
                )),
81
4
                (0..n_field)
82
52
                    .map(|_i| {
83
52
                        let ret: ArrayRef = Arc::new(NullArray::new(1));
84
52
                        ret
85
52
                    })
86
4
                    .collect(),
87
4
                // Even if column number is empty we can generate single row.
88
4
                &RecordBatchOptions::new().with_row_count(Some(1)),
89
4
            )
?0
]
90
        })
91
4
    }
92
93
5
    fn output_partitioning_helper(n_partitions: usize) -> Partitioning {
94
5
        Partitioning::UnknownPartitioning(n_partitions)
95
5
    }
96
97
    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
98
4
    fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties {
99
4
        let eq_properties = EquivalenceProperties::new(schema);
100
4
        // Get output partitioning:
101
4
        let output_partitioning = Self::output_partitioning_helper(n_partitions);
102
4
103
4
        PlanProperties::new(eq_properties, output_partitioning, ExecutionMode::Bounded)
104
4
    }
105
}
106
107
impl DisplayAs for PlaceholderRowExec {
108
0
    fn fmt_as(
109
0
        &self,
110
0
        t: DisplayFormatType,
111
0
        f: &mut std::fmt::Formatter,
112
0
    ) -> std::fmt::Result {
113
0
        match t {
114
            DisplayFormatType::Default | DisplayFormatType::Verbose => {
115
0
                write!(f, "PlaceholderRowExec")
116
0
            }
117
0
        }
118
0
    }
119
}
120
121
impl ExecutionPlan for PlaceholderRowExec {
122
0
    fn name(&self) -> &'static str {
123
0
        "PlaceholderRowExec"
124
0
    }
125
126
    /// Return a reference to Any that can be used for downcasting
127
0
    fn as_any(&self) -> &dyn Any {
128
0
        self
129
0
    }
130
131
2
    fn properties(&self) -> &PlanProperties {
132
2
        &self.cache
133
2
    }
134
135
2
    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
136
2
        vec![]
137
2
    }
138
139
1
    fn with_new_children(
140
1
        self: Arc<Self>,
141
1
        _: Vec<Arc<dyn ExecutionPlan>>,
142
1
    ) -> Result<Arc<dyn ExecutionPlan>> {
143
1
        Ok(self)
144
1
    }
145
146
6
    fn execute(
147
6
        &self,
148
6
        partition: usize,
149
6
        context: Arc<TaskContext>,
150
6
    ) -> Result<SendableRecordBatchStream> {
151
6
        trace!(
"Start PlaceholderRowExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id()0
);
152
153
6
        if partition >= self.partitions {
154
2
            return internal_err!(
155
2
                "PlaceholderRowExec invalid partition {} (expected less than {})",
156
2
                partition,
157
2
                self.partitions
158
2
            );
159
4
        }
160
4
161
4
        Ok(Box::pin(MemoryStream::try_new(
162
4
            self.data()
?0
,
163
4
            Arc::clone(&self.schema),
164
4
            None,
165
0
        )?))
166
6
    }
167
168
0
    fn statistics(&self) -> Result<Statistics> {
169
0
        let batch = self
170
0
            .data()
171
0
            .expect("Create single row placeholder RecordBatch should not fail");
172
0
        Ok(common::compute_record_batch_statistics(
173
0
            &[batch],
174
0
            &self.schema,
175
0
            None,
176
0
        ))
177
0
    }
178
}
179
180
#[cfg(test)]
181
mod tests {
182
    use super::*;
183
    use crate::{test, with_new_children_if_necessary};
184
185
    #[test]
186
1
    fn with_new_children() -> Result<()> {
187
1
        let schema = test::aggr_test_schema();
188
1
189
1
        let placeholder = Arc::new(PlaceholderRowExec::new(schema));
190
191
1
        let placeholder_2 = with_new_children_if_necessary(
192
1
            Arc::clone(&placeholder) as Arc<dyn ExecutionPlan>,
193
1
            vec![],
194
1
        )
?0
;
195
1
        assert_eq!(placeholder.schema(), placeholder_2.schema());
196
197
1
        let too_many_kids = vec![placeholder_2];
198
1
        assert!(
199
1
            with_new_children_if_necessary(placeholder, too_many_kids).is_err(),
200
0
            "expected error when providing list of kids"
201
        );
202
1
        Ok(())
203
1
    }
204
205
    #[tokio::test]
206
1
    async fn invalid_execute() -> Result<()> {
207
1
        let task_ctx = Arc::new(TaskContext::default());
208
1
        let schema = test::aggr_test_schema();
209
1
        let placeholder = PlaceholderRowExec::new(schema);
210
1
211
1
        // ask for the wrong partition
212
1
        assert!(placeholder.execute(1, Arc::clone(&task_ctx)).is_err());
213
1
        assert!(placeholder.execute(20, task_ctx).is_err());
214
1
        Ok(())
215
1
    }
216
217
    #[tokio::test]
218
1
    async fn produce_one_row() -> Result<()> {
219
1
        let task_ctx = Arc::new(TaskContext::default());
220
1
        let schema = test::aggr_test_schema();
221
1
        let placeholder = PlaceholderRowExec::new(schema);
222
1
223
1
        let iter = placeholder.execute(0, task_ctx)
?0
;
224
1
        let batches = common::collect(iter).
await0
?0
;
225
1
226
1
        // should have one item
227
1
        assert_eq!(batches.len(), 1);
228
1
229
1
        Ok(())
230
1
    }
231
232
    #[tokio::test]
233
1
    async fn produce_one_row_multiple_partition() -> Result<()> {
234
1
        let task_ctx = Arc::new(TaskContext::default());
235
1
        let schema = test::aggr_test_schema();
236
1
        let partitions = 3;
237
1
        let placeholder = PlaceholderRowExec::new(schema).with_partitions(partitions);
238
1
239
3
        for n in 0..
partitions1
{
240
3
            let iter = placeholder.execute(n, Arc::clone(&task_ctx))
?0
;
241
3
            let batches = common::collect(iter).
await0
?0
;
242
1
243
1
            // should have one item
244
3
            assert_eq!(batches.len(), 1);
245
1
        }
246
1
247
1
        Ok(())
248
1
    }
249
}