Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/values.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Values execution plan
19
20
use std::any::Any;
21
use std::sync::Arc;
22
23
use super::{
24
    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
25
    Statistics,
26
};
27
use crate::{
28
    memory::MemoryStream, ColumnarValue, DisplayFormatType, ExecutionPlan, Partitioning,
29
    PhysicalExpr,
30
};
31
32
use arrow::datatypes::{Schema, SchemaRef};
33
use arrow::record_batch::{RecordBatch, RecordBatchOptions};
34
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
35
use datafusion_execution::TaskContext;
36
use datafusion_physical_expr::EquivalenceProperties;
37
38
/// Execution plan for values list based relation (produces constant rows)
39
#[derive(Debug)]
40
pub struct ValuesExec {
41
    /// The schema
42
    schema: SchemaRef,
43
    /// The data
44
    data: Vec<RecordBatch>,
45
    /// Cache holding plan properties like equivalences, output partitioning etc.
46
    cache: PlanProperties,
47
}
48
49
impl ValuesExec {
50
    /// create a new values exec from data as expr
51
3
    pub fn try_new(
52
3
        schema: SchemaRef,
53
3
        data: Vec<Vec<Arc<dyn PhysicalExpr>>>,
54
3
    ) -> Result<Self> {
55
3
        if data.is_empty() {
56
1
            return plan_err!("Values list cannot be empty");
57
2
        }
58
2
        let n_row = data.len();
59
2
        let n_col = schema.fields().len();
60
        // we have this single row batch as a placeholder to satisfy evaluation argument
61
        // and generate a single output row
62
2
        let batch = RecordBatch::try_new_with_options(
63
2
            Arc::new(Schema::empty()),
64
2
            vec![],
65
2
            &RecordBatchOptions::new().with_row_count(Some(1)),
66
2
        )
?0
;
67
68
2
        let arr = (0..n_col)
69
2
            .map(|j| {
70
2
                (0..n_row)
71
2
                    .map(|i| {
72
2
                        let r = data[i][j].evaluate(&batch);
73
74
0
                        match r {
75
2
                            Ok(ColumnarValue::Scalar(scalar)) => Ok(scalar),
76
0
                            Ok(ColumnarValue::Array(a)) if a.len() == 1 => {
77
0
                                ScalarValue::try_from_array(&a, 0)
78
                            }
79
0
                            Ok(ColumnarValue::Array(a)) => {
80
0
                                plan_err!(
81
0
                                    "Cannot have array values {a:?} in a values list"
82
0
                                )
83
                            }
84
0
                            Err(err) => Err(err),
85
                        }
86
2
                    })
87
2
                    .collect::<Result<Vec<_>>>()
88
2
                    .and_then(ScalarValue::iter_to_array)
89
2
            })
90
2
            .collect::<Result<Vec<_>>>()
?0
;
91
2
        let 
batch1
= RecordBatch::try_new_with_options(
92
2
            Arc::clone(&schema),
93
2
            arr,
94
2
            &RecordBatchOptions::new().with_row_count(Some(n_row)),
95
2
        )
?1
;
96
1
        let data: Vec<RecordBatch> = vec![batch];
97
1
        Self::try_new_from_batches(schema, data)
98
3
    }
99
100
    /// Create a new plan using the provided schema and batches.
101
    ///
102
    /// Errors if any of the batches don't match the provided schema, or if no
103
    /// batches are provided.
104
4
    pub fn try_new_from_batches(
105
4
        schema: SchemaRef,
106
4
        batches: Vec<RecordBatch>,
107
4
    ) -> Result<Self> {
108
4
        if batches.is_empty() {
109
1
            return plan_err!("Values list cannot be empty");
110
3
        }
111
112
6
        for 
batch4
in &batches {
113
4
            let batch_schema = batch.schema();
114
4
            if batch_schema != schema {
115
1
                return plan_err!(
116
1
                    "Batch has invalid schema. Expected: {schema}, got: {batch_schema}"
117
1
                );
118
3
            }
119
        }
120
121
2
        let cache = Self::compute_properties(Arc::clone(&schema));
122
2
        Ok(ValuesExec {
123
2
            schema,
124
2
            data: batches,
125
2
            cache,
126
2
        })
127
4
    }
128
129
    /// provides the data
130
0
    pub fn data(&self) -> Vec<RecordBatch> {
131
0
        self.data.clone()
132
0
    }
133
134
    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
135
2
    fn compute_properties(schema: SchemaRef) -> PlanProperties {
136
2
        let eq_properties = EquivalenceProperties::new(schema);
137
2
138
2
        PlanProperties::new(
139
2
            eq_properties,
140
2
            Partitioning::UnknownPartitioning(1),
141
2
            ExecutionMode::Bounded,
142
2
        )
143
2
    }
144
}
145
146
impl DisplayAs for ValuesExec {
147
0
    fn fmt_as(
148
0
        &self,
149
0
        t: DisplayFormatType,
150
0
        f: &mut std::fmt::Formatter,
151
0
    ) -> std::fmt::Result {
152
0
        match t {
153
            DisplayFormatType::Default | DisplayFormatType::Verbose => {
154
0
                write!(f, "ValuesExec")
155
0
            }
156
0
        }
157
0
    }
158
}
159
160
impl ExecutionPlan for ValuesExec {
161
0
    fn name(&self) -> &'static str {
162
0
        "ValuesExec"
163
0
    }
164
165
    /// Return a reference to Any that can be used for downcasting
166
0
    fn as_any(&self) -> &dyn Any {
167
0
        self
168
0
    }
169
170
0
    fn properties(&self) -> &PlanProperties {
171
0
        &self.cache
172
0
    }
173
174
0
    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
175
0
        vec![]
176
0
    }
177
178
0
    fn with_new_children(
179
0
        self: Arc<Self>,
180
0
        _: Vec<Arc<dyn ExecutionPlan>>,
181
0
    ) -> Result<Arc<dyn ExecutionPlan>> {
182
0
        ValuesExec::try_new_from_batches(Arc::clone(&self.schema), self.data.clone())
183
0
            .map(|e| Arc::new(e) as _)
184
0
    }
185
186
0
    fn execute(
187
0
        &self,
188
0
        partition: usize,
189
0
        _context: Arc<TaskContext>,
190
0
    ) -> Result<SendableRecordBatchStream> {
191
0
        // ValuesExec has a single output partition
192
0
        if 0 != partition {
193
0
            return internal_err!(
194
0
                "ValuesExec invalid partition {partition} (expected 0)"
195
0
            );
196
0
        }
197
0
198
0
        Ok(Box::pin(MemoryStream::try_new(
199
0
            self.data(),
200
0
            Arc::clone(&self.schema),
201
0
            None,
202
0
        )?))
203
0
    }
204
205
0
    fn statistics(&self) -> Result<Statistics> {
206
0
        let batch = self.data();
207
0
        Ok(common::compute_record_batch_statistics(
208
0
            &[batch],
209
0
            &self.schema,
210
0
            None,
211
0
        ))
212
0
    }
213
}
214
215
#[cfg(test)]
216
mod tests {
217
    use super::*;
218
    use crate::expressions::lit;
219
    use crate::test::{self, make_partition};
220
221
    use arrow_schema::{DataType, Field};
222
223
    #[tokio::test]
224
1
    async fn values_empty_case() -> Result<()> {
225
1
        let schema = test::aggr_test_schema();
226
1
        let empty = ValuesExec::try_new(schema, vec![]);
227
1
        assert!(empty.is_err());
228
1
        Ok(())
229
1
    }
230
231
    #[test]
232
1
    fn new_exec_with_batches() {
233
1
        let batch = make_partition(7);
234
1
        let schema = batch.schema();
235
1
        let batches = vec![batch.clone(), batch];
236
1
237
1
        let _exec = ValuesExec::try_new_from_batches(schema, batches).unwrap();
238
1
    }
239
240
    #[test]
241
1
    fn new_exec_with_batches_empty() {
242
1
        let batch = make_partition(7);
243
1
        let schema = batch.schema();
244
1
        let _ = ValuesExec::try_new_from_batches(schema, Vec::new()).unwrap_err();
245
1
    }
246
247
    #[test]
248
1
    fn new_exec_with_batches_invalid_schema() {
249
1
        let batch = make_partition(7);
250
1
        let batches = vec![batch.clone(), batch];
251
1
252
1
        let invalid_schema = Arc::new(Schema::new(vec![
253
1
            Field::new("col0", DataType::UInt32, false),
254
1
            Field::new("col1", DataType::Utf8, false),
255
1
        ]));
256
1
        let _ = ValuesExec::try_new_from_batches(invalid_schema, batches).unwrap_err();
257
1
    }
258
259
    // Test issue: https://github.com/apache/datafusion/issues/8763
260
    #[test]
261
1
    fn new_exec_with_non_nullable_schema() {
262
1
        let schema = Arc::new(Schema::new(vec![Field::new(
263
1
            "col0",
264
1
            DataType::UInt32,
265
1
            false,
266
1
        )]));
267
1
        let _ = ValuesExec::try_new(Arc::clone(&schema), vec![vec![lit(1u32)]]).unwrap();
268
1
        // Test that a null value is rejected
269
1
        let _ = ValuesExec::try_new(schema, vec![vec![lit(ScalarValue::UInt32(None))]])
270
1
            .unwrap_err();
271
1
    }
272
}