Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/streaming.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Generic plans for deferred execution: [`StreamingTableExec`] and [`PartitionStream`]
19
20
use std::any::Any;
21
use std::fmt::Debug;
22
use std::sync::Arc;
23
24
use super::{DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties};
25
use crate::display::{display_orderings, ProjectSchemaDisplay};
26
use crate::stream::RecordBatchStreamAdapter;
27
use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
28
29
use arrow::datatypes::SchemaRef;
30
use arrow_schema::Schema;
31
use datafusion_common::{internal_err, plan_err, Result};
32
use datafusion_execution::TaskContext;
33
use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
34
35
use crate::limit::LimitStream;
36
use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
37
use async_trait::async_trait;
38
use futures::stream::StreamExt;
39
use log::debug;
40
41
/// A partition that can be converted into a [`SendableRecordBatchStream`]
42
///
43
/// Combined with [`StreamingTableExec`], you can use this trait to implement
44
/// [`ExecutionPlan`] for a custom source with less boiler plate than
45
/// implementing `ExecutionPlan` directly for many use cases.
46
pub trait PartitionStream: Debug + Send + Sync {
47
    /// Returns the schema of this partition
48
    fn schema(&self) -> &SchemaRef;
49
50
    /// Returns a stream yielding this partitions values
51
    fn execute(&self, ctx: Arc<TaskContext>) -> SendableRecordBatchStream;
52
}
53
54
/// An [`ExecutionPlan`] for one or more [`PartitionStream`]s.
55
///
56
/// If your source can be represented as one or more [`PartitionStream`]s, you can
57
/// use this struct to implement [`ExecutionPlan`].
58
pub struct StreamingTableExec {
59
    partitions: Vec<Arc<dyn PartitionStream>>,
60
    projection: Option<Arc<[usize]>>,
61
    projected_schema: SchemaRef,
62
    projected_output_ordering: Vec<LexOrdering>,
63
    infinite: bool,
64
    limit: Option<usize>,
65
    cache: PlanProperties,
66
    metrics: ExecutionPlanMetricsSet,
67
}
68
69
impl StreamingTableExec {
70
    /// Try to create a new [`StreamingTableExec`] returning an error if the schema is incorrect
71
5
    pub fn try_new(
72
5
        schema: SchemaRef,
73
5
        partitions: Vec<Arc<dyn PartitionStream>>,
74
5
        projection: Option<&Vec<usize>>,
75
5
        projected_output_ordering: impl IntoIterator<Item = LexOrdering>,
76
5
        infinite: bool,
77
5
        limit: Option<usize>,
78
5
    ) -> Result<Self> {
79
5
        for 
x3
in partitions.iter() {
80
3
            let partition_schema = x.schema();
81
3
            if !schema.eq(partition_schema) {
82
0
                debug!(
83
0
                    "Target schema does not match with partition schema. \
84
0
                        Target_schema: {schema:?}. Partition Schema: {partition_schema:?}"
85
                );
86
0
                return plan_err!("Mismatch between schema and batches");
87
3
            }
88
        }
89
90
5
        let projected_schema = match projection {
91
0
            Some(p) => Arc::new(schema.project(p)?),
92
5
            None => schema,
93
        };
94
5
        let projected_output_ordering =
95
5
            projected_output_ordering.into_iter().collect::<Vec<_>>();
96
5
        let cache = Self::compute_properties(
97
5
            Arc::clone(&projected_schema),
98
5
            &projected_output_ordering,
99
5
            &partitions,
100
5
            infinite,
101
5
        );
102
5
        Ok(Self {
103
5
            partitions,
104
5
            projected_schema,
105
5
            projection: projection.cloned().map(Into::into),
106
5
            projected_output_ordering,
107
5
            infinite,
108
5
            limit,
109
5
            cache,
110
5
            metrics: ExecutionPlanMetricsSet::new(),
111
5
        })
112
5
    }
113
114
0
    pub fn partitions(&self) -> &Vec<Arc<dyn PartitionStream>> {
115
0
        &self.partitions
116
0
    }
117
118
0
    pub fn partition_schema(&self) -> &SchemaRef {
119
0
        self.partitions[0].schema()
120
0
    }
121
122
0
    pub fn projection(&self) -> &Option<Arc<[usize]>> {
123
0
        &self.projection
124
0
    }
125
126
0
    pub fn projected_schema(&self) -> &Schema {
127
0
        &self.projected_schema
128
0
    }
129
130
0
    pub fn projected_output_ordering(&self) -> impl IntoIterator<Item = LexOrdering> {
131
0
        self.projected_output_ordering.clone()
132
0
    }
133
134
0
    pub fn is_infinite(&self) -> bool {
135
0
        self.infinite
136
0
    }
137
138
0
    pub fn limit(&self) -> Option<usize> {
139
0
        self.limit
140
0
    }
141
142
    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
143
5
    fn compute_properties(
144
5
        schema: SchemaRef,
145
5
        orderings: &[LexOrdering],
146
5
        partitions: &[Arc<dyn PartitionStream>],
147
5
        is_infinite: bool,
148
5
    ) -> PlanProperties {
149
5
        // Calculate equivalence properties:
150
5
        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
151
5
152
5
        // Get output partitioning:
153
5
        let output_partitioning = Partitioning::UnknownPartitioning(partitions.len());
154
155
        // Determine execution mode:
156
5
        let mode = if is_infinite {
157
3
            ExecutionMode::Unbounded
158
        } else {
159
2
            ExecutionMode::Bounded
160
        };
161
162
5
        PlanProperties::new(eq_properties, output_partitioning, mode)
163
5
    }
164
}
165
166
impl std::fmt::Debug for StreamingTableExec {
167
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168
0
        f.debug_struct("LazyMemTableExec").finish_non_exhaustive()
169
0
    }
170
}
171
172
impl DisplayAs for StreamingTableExec {
173
1
    fn fmt_as(
174
1
        &self,
175
1
        t: DisplayFormatType,
176
1
        f: &mut std::fmt::Formatter,
177
1
    ) -> std::fmt::Result {
178
1
        match t {
179
            DisplayFormatType::Default | DisplayFormatType::Verbose => {
180
1
                write!(
181
1
                    f,
182
1
                    "StreamingTableExec: partition_sizes={:?}",
183
1
                    self.partitions.len(),
184
1
                )
?0
;
185
1
                if !self.projected_schema.fields().is_empty() {
186
1
                    write!(
187
1
                        f,
188
1
                        ", projection={}",
189
1
                        ProjectSchemaDisplay(&self.projected_schema)
190
1
                    )
?0
;
191
0
                }
192
1
                if self.infinite {
193
1
                    write!(f, ", infinite_source=true")
?0
;
194
0
                }
195
1
                if let Some(
fetch0
) = self.limit {
196
0
                    write!(f, ", fetch={fetch}")?;
197
1
                }
198
199
1
                display_orderings(f, &self.projected_output_ordering)
?0
;
200
201
1
                Ok(())
202
            }
203
        }
204
1
    }
205
}
206
207
#[async_trait]
208
impl ExecutionPlan for StreamingTableExec {
209
0
    fn name(&self) -> &'static str {
210
0
        "StreamingTableExec"
211
0
    }
212
213
0
    fn as_any(&self) -> &dyn Any {
214
0
        self
215
0
    }
216
217
85
    fn properties(&self) -> &PlanProperties {
218
85
        &self.cache
219
85
    }
220
221
0
    fn fetch(&self) -> Option<usize> {
222
0
        self.limit
223
0
    }
224
225
1
    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
226
1
        vec![]
227
1
    }
228
229
0
    fn with_new_children(
230
0
        self: Arc<Self>,
231
0
        children: Vec<Arc<dyn ExecutionPlan>>,
232
0
    ) -> Result<Arc<dyn ExecutionPlan>> {
233
0
        if children.is_empty() {
234
0
            Ok(self)
235
        } else {
236
0
            internal_err!("Children cannot be replaced in {self:?}")
237
        }
238
0
    }
239
240
3
    fn execute(
241
3
        &self,
242
3
        partition: usize,
243
3
        ctx: Arc<TaskContext>,
244
3
    ) -> Result<SendableRecordBatchStream> {
245
3
        let stream = self.partitions[partition].execute(ctx);
246
3
        let projected_stream = match self.projection.clone() {
247
0
            Some(projection) => Box::pin(RecordBatchStreamAdapter::new(
248
0
                Arc::clone(&self.projected_schema),
249
0
                stream.map(move |x| {
250
0
                    x.and_then(|b| b.project(projection.as_ref()).map_err(Into::into))
251
0
                }),
252
0
            )),
253
3
            None => stream,
254
        };
255
3
        Ok(match self.limit {
256
2
            None => projected_stream,
257
1
            Some(fetch) => {
258
1
                let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
259
1
                Box::pin(LimitStream::new(
260
1
                    projected_stream,
261
1
                    0,
262
1
                    Some(fetch),
263
1
                    baseline_metrics,
264
1
                ))
265
            }
266
        })
267
3
    }
268
269
0
    fn metrics(&self) -> Option<MetricsSet> {
270
0
        Some(self.metrics.clone_inner())
271
0
    }
272
273
0
    fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
274
0
        Some(Arc::new(StreamingTableExec {
275
0
            partitions: self.partitions.clone(),
276
0
            projection: self.projection.clone(),
277
0
            projected_schema: Arc::clone(&self.projected_schema),
278
0
            projected_output_ordering: self.projected_output_ordering.clone(),
279
0
            infinite: self.infinite,
280
0
            limit,
281
0
            cache: self.cache.clone(),
282
0
            metrics: self.metrics.clone(),
283
0
        }))
284
0
    }
285
}
286
287
#[cfg(test)]
288
mod test {
289
    use super::*;
290
    use crate::collect_partitioned;
291
    use crate::streaming::PartitionStream;
292
    use crate::test::{make_partition, TestPartitionStream};
293
    use arrow::record_batch::RecordBatch;
294
295
    #[tokio::test]
296
1
    async fn test_no_limit() {
297
1
        let exec = TestBuilder::new()
298
1
            // make 2 batches, each with 100 rows
299
1
            .with_batches(vec![make_partition(100), make_partition(100)])
300
1
            .build();
301
1
302
1
        let counts = collect_num_rows(Arc::new(exec)).await;
303
1
        assert_eq!(counts, vec![200]);
304
1
    }
305
306
    #[tokio::test]
307
1
    async fn test_limit() {
308
1
        let exec = TestBuilder::new()
309
1
            // make 2 batches, each with 100 rows
310
1
            .with_batches(vec![make_partition(100), make_partition(100)])
311
1
            // limit to only the first 75 rows back
312
1
            .with_limit(Some(75))
313
1
            .build();
314
1
315
1
        let counts = collect_num_rows(Arc::new(exec)).await;
316
1
        assert_eq!(counts, vec![75]);
317
1
    }
318
319
    /// Runs the provided execution plan and returns a vector of the number of
320
    /// rows in each partition
321
2
    async fn collect_num_rows(exec: Arc<dyn ExecutionPlan>) -> Vec<usize> {
322
2
        let ctx = Arc::new(TaskContext::default());
323
2
        let partition_batches = collect_partitioned(exec, ctx).await.unwrap();
324
2
        partition_batches
325
2
            .into_iter()
326
3
            .map(|batches| 
batches.iter().map(2
|b| b.num_rows()
).sum::<usize>()2
)
327
2
            .collect()
328
2
    }
329
330
    #[derive(Default)]
331
    struct TestBuilder {
332
        schema: Option<SchemaRef>,
333
        partitions: Vec<Arc<dyn PartitionStream>>,
334
        projection: Option<Vec<usize>>,
335
        projected_output_ordering: Vec<LexOrdering>,
336
        infinite: bool,
337
        limit: Option<usize>,
338
    }
339
340
    impl TestBuilder {
341
2
        fn new() -> Self {
342
2
            Self::default()
343
2
        }
344
345
        /// Set the batches for the stream
346
2
        fn with_batches(mut self, batches: Vec<RecordBatch>) -> Self {
347
2
            let stream = TestPartitionStream::new_with_batches(batches);
348
2
            self.schema = Some(Arc::clone(stream.schema()));
349
2
            self.partitions = vec![Arc::new(stream)];
350
2
            self
351
2
        }
352
353
        /// Set the limit for the stream
354
1
        fn with_limit(mut self, limit: Option<usize>) -> Self {
355
1
            self.limit = limit;
356
1
            self
357
1
        }
358
359
2
        fn build(self) -> StreamingTableExec {
360
2
            StreamingTableExec::try_new(
361
2
                self.schema.unwrap(),
362
2
                self.partitions,
363
2
                self.projection.as_ref(),
364
2
                self.projected_output_ordering,
365
2
                self.infinite,
366
2
                self.limit,
367
2
            )
368
2
            .unwrap()
369
2
        }
370
    }
371
}