/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/streaming.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Generic plans for deferred execution: [`StreamingTableExec`] and [`PartitionStream`] |
19 | | |
20 | | use std::any::Any; |
21 | | use std::fmt::Debug; |
22 | | use std::sync::Arc; |
23 | | |
24 | | use super::{DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties}; |
25 | | use crate::display::{display_orderings, ProjectSchemaDisplay}; |
26 | | use crate::stream::RecordBatchStreamAdapter; |
27 | | use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; |
28 | | |
29 | | use arrow::datatypes::SchemaRef; |
30 | | use arrow_schema::Schema; |
31 | | use datafusion_common::{internal_err, plan_err, Result}; |
32 | | use datafusion_execution::TaskContext; |
33 | | use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; |
34 | | |
35 | | use crate::limit::LimitStream; |
36 | | use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; |
37 | | use async_trait::async_trait; |
38 | | use futures::stream::StreamExt; |
39 | | use log::debug; |
40 | | |
41 | | /// A partition that can be converted into a [`SendableRecordBatchStream`] |
42 | | /// |
43 | | /// Combined with [`StreamingTableExec`], you can use this trait to implement |
44 | | /// [`ExecutionPlan`] for a custom source with less boiler plate than |
45 | | /// implementing `ExecutionPlan` directly for many use cases. |
46 | | pub trait PartitionStream: Debug + Send + Sync { |
47 | | /// Returns the schema of this partition |
48 | | fn schema(&self) -> &SchemaRef; |
49 | | |
50 | | /// Returns a stream yielding this partitions values |
51 | | fn execute(&self, ctx: Arc<TaskContext>) -> SendableRecordBatchStream; |
52 | | } |
53 | | |
54 | | /// An [`ExecutionPlan`] for one or more [`PartitionStream`]s. |
55 | | /// |
56 | | /// If your source can be represented as one or more [`PartitionStream`]s, you can |
57 | | /// use this struct to implement [`ExecutionPlan`]. |
58 | | pub struct StreamingTableExec { |
59 | | partitions: Vec<Arc<dyn PartitionStream>>, |
60 | | projection: Option<Arc<[usize]>>, |
61 | | projected_schema: SchemaRef, |
62 | | projected_output_ordering: Vec<LexOrdering>, |
63 | | infinite: bool, |
64 | | limit: Option<usize>, |
65 | | cache: PlanProperties, |
66 | | metrics: ExecutionPlanMetricsSet, |
67 | | } |
68 | | |
69 | | impl StreamingTableExec { |
70 | | /// Try to create a new [`StreamingTableExec`] returning an error if the schema is incorrect |
71 | 5 | pub fn try_new( |
72 | 5 | schema: SchemaRef, |
73 | 5 | partitions: Vec<Arc<dyn PartitionStream>>, |
74 | 5 | projection: Option<&Vec<usize>>, |
75 | 5 | projected_output_ordering: impl IntoIterator<Item = LexOrdering>, |
76 | 5 | infinite: bool, |
77 | 5 | limit: Option<usize>, |
78 | 5 | ) -> Result<Self> { |
79 | 5 | for x3 in partitions.iter() { |
80 | 3 | let partition_schema = x.schema(); |
81 | 3 | if !schema.eq(partition_schema) { |
82 | 0 | debug!( |
83 | 0 | "Target schema does not match with partition schema. \ |
84 | 0 | Target_schema: {schema:?}. Partition Schema: {partition_schema:?}" |
85 | | ); |
86 | 0 | return plan_err!("Mismatch between schema and batches"); |
87 | 3 | } |
88 | | } |
89 | | |
90 | 5 | let projected_schema = match projection { |
91 | 0 | Some(p) => Arc::new(schema.project(p)?), |
92 | 5 | None => schema, |
93 | | }; |
94 | 5 | let projected_output_ordering = |
95 | 5 | projected_output_ordering.into_iter().collect::<Vec<_>>(); |
96 | 5 | let cache = Self::compute_properties( |
97 | 5 | Arc::clone(&projected_schema), |
98 | 5 | &projected_output_ordering, |
99 | 5 | &partitions, |
100 | 5 | infinite, |
101 | 5 | ); |
102 | 5 | Ok(Self { |
103 | 5 | partitions, |
104 | 5 | projected_schema, |
105 | 5 | projection: projection.cloned().map(Into::into), |
106 | 5 | projected_output_ordering, |
107 | 5 | infinite, |
108 | 5 | limit, |
109 | 5 | cache, |
110 | 5 | metrics: ExecutionPlanMetricsSet::new(), |
111 | 5 | }) |
112 | 5 | } |
113 | | |
114 | 0 | pub fn partitions(&self) -> &Vec<Arc<dyn PartitionStream>> { |
115 | 0 | &self.partitions |
116 | 0 | } |
117 | | |
118 | 0 | pub fn partition_schema(&self) -> &SchemaRef { |
119 | 0 | self.partitions[0].schema() |
120 | 0 | } |
121 | | |
122 | 0 | pub fn projection(&self) -> &Option<Arc<[usize]>> { |
123 | 0 | &self.projection |
124 | 0 | } |
125 | | |
126 | 0 | pub fn projected_schema(&self) -> &Schema { |
127 | 0 | &self.projected_schema |
128 | 0 | } |
129 | | |
130 | 0 | pub fn projected_output_ordering(&self) -> impl IntoIterator<Item = LexOrdering> { |
131 | 0 | self.projected_output_ordering.clone() |
132 | 0 | } |
133 | | |
134 | 0 | pub fn is_infinite(&self) -> bool { |
135 | 0 | self.infinite |
136 | 0 | } |
137 | | |
138 | 0 | pub fn limit(&self) -> Option<usize> { |
139 | 0 | self.limit |
140 | 0 | } |
141 | | |
142 | | /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. |
143 | 5 | fn compute_properties( |
144 | 5 | schema: SchemaRef, |
145 | 5 | orderings: &[LexOrdering], |
146 | 5 | partitions: &[Arc<dyn PartitionStream>], |
147 | 5 | is_infinite: bool, |
148 | 5 | ) -> PlanProperties { |
149 | 5 | // Calculate equivalence properties: |
150 | 5 | let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings); |
151 | 5 | |
152 | 5 | // Get output partitioning: |
153 | 5 | let output_partitioning = Partitioning::UnknownPartitioning(partitions.len()); |
154 | | |
155 | | // Determine execution mode: |
156 | 5 | let mode = if is_infinite { |
157 | 3 | ExecutionMode::Unbounded |
158 | | } else { |
159 | 2 | ExecutionMode::Bounded |
160 | | }; |
161 | | |
162 | 5 | PlanProperties::new(eq_properties, output_partitioning, mode) |
163 | 5 | } |
164 | | } |
165 | | |
166 | | impl std::fmt::Debug for StreamingTableExec { |
167 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
168 | 0 | f.debug_struct("LazyMemTableExec").finish_non_exhaustive() |
169 | 0 | } |
170 | | } |
171 | | |
172 | | impl DisplayAs for StreamingTableExec { |
173 | 1 | fn fmt_as( |
174 | 1 | &self, |
175 | 1 | t: DisplayFormatType, |
176 | 1 | f: &mut std::fmt::Formatter, |
177 | 1 | ) -> std::fmt::Result { |
178 | 1 | match t { |
179 | | DisplayFormatType::Default | DisplayFormatType::Verbose => { |
180 | 1 | write!( |
181 | 1 | f, |
182 | 1 | "StreamingTableExec: partition_sizes={:?}", |
183 | 1 | self.partitions.len(), |
184 | 1 | )?0 ; |
185 | 1 | if !self.projected_schema.fields().is_empty() { |
186 | 1 | write!( |
187 | 1 | f, |
188 | 1 | ", projection={}", |
189 | 1 | ProjectSchemaDisplay(&self.projected_schema) |
190 | 1 | )?0 ; |
191 | 0 | } |
192 | 1 | if self.infinite { |
193 | 1 | write!(f, ", infinite_source=true")?0 ; |
194 | 0 | } |
195 | 1 | if let Some(fetch0 ) = self.limit { |
196 | 0 | write!(f, ", fetch={fetch}")?; |
197 | 1 | } |
198 | | |
199 | 1 | display_orderings(f, &self.projected_output_ordering)?0 ; |
200 | | |
201 | 1 | Ok(()) |
202 | | } |
203 | | } |
204 | 1 | } |
205 | | } |
206 | | |
207 | | #[async_trait] |
208 | | impl ExecutionPlan for StreamingTableExec { |
209 | 0 | fn name(&self) -> &'static str { |
210 | 0 | "StreamingTableExec" |
211 | 0 | } |
212 | | |
213 | 0 | fn as_any(&self) -> &dyn Any { |
214 | 0 | self |
215 | 0 | } |
216 | | |
217 | 85 | fn properties(&self) -> &PlanProperties { |
218 | 85 | &self.cache |
219 | 85 | } |
220 | | |
221 | 0 | fn fetch(&self) -> Option<usize> { |
222 | 0 | self.limit |
223 | 0 | } |
224 | | |
225 | 1 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
226 | 1 | vec![] |
227 | 1 | } |
228 | | |
229 | 0 | fn with_new_children( |
230 | 0 | self: Arc<Self>, |
231 | 0 | children: Vec<Arc<dyn ExecutionPlan>>, |
232 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
233 | 0 | if children.is_empty() { |
234 | 0 | Ok(self) |
235 | | } else { |
236 | 0 | internal_err!("Children cannot be replaced in {self:?}") |
237 | | } |
238 | 0 | } |
239 | | |
240 | 3 | fn execute( |
241 | 3 | &self, |
242 | 3 | partition: usize, |
243 | 3 | ctx: Arc<TaskContext>, |
244 | 3 | ) -> Result<SendableRecordBatchStream> { |
245 | 3 | let stream = self.partitions[partition].execute(ctx); |
246 | 3 | let projected_stream = match self.projection.clone() { |
247 | 0 | Some(projection) => Box::pin(RecordBatchStreamAdapter::new( |
248 | 0 | Arc::clone(&self.projected_schema), |
249 | 0 | stream.map(move |x| { |
250 | 0 | x.and_then(|b| b.project(projection.as_ref()).map_err(Into::into)) |
251 | 0 | }), |
252 | 0 | )), |
253 | 3 | None => stream, |
254 | | }; |
255 | 3 | Ok(match self.limit { |
256 | 2 | None => projected_stream, |
257 | 1 | Some(fetch) => { |
258 | 1 | let baseline_metrics = BaselineMetrics::new(&self.metrics, partition); |
259 | 1 | Box::pin(LimitStream::new( |
260 | 1 | projected_stream, |
261 | 1 | 0, |
262 | 1 | Some(fetch), |
263 | 1 | baseline_metrics, |
264 | 1 | )) |
265 | | } |
266 | | }) |
267 | 3 | } |
268 | | |
269 | 0 | fn metrics(&self) -> Option<MetricsSet> { |
270 | 0 | Some(self.metrics.clone_inner()) |
271 | 0 | } |
272 | | |
273 | 0 | fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> { |
274 | 0 | Some(Arc::new(StreamingTableExec { |
275 | 0 | partitions: self.partitions.clone(), |
276 | 0 | projection: self.projection.clone(), |
277 | 0 | projected_schema: Arc::clone(&self.projected_schema), |
278 | 0 | projected_output_ordering: self.projected_output_ordering.clone(), |
279 | 0 | infinite: self.infinite, |
280 | 0 | limit, |
281 | 0 | cache: self.cache.clone(), |
282 | 0 | metrics: self.metrics.clone(), |
283 | 0 | })) |
284 | 0 | } |
285 | | } |
286 | | |
287 | | #[cfg(test)] |
288 | | mod test { |
289 | | use super::*; |
290 | | use crate::collect_partitioned; |
291 | | use crate::streaming::PartitionStream; |
292 | | use crate::test::{make_partition, TestPartitionStream}; |
293 | | use arrow::record_batch::RecordBatch; |
294 | | |
295 | | #[tokio::test] |
296 | 1 | async fn test_no_limit() { |
297 | 1 | let exec = TestBuilder::new() |
298 | 1 | // make 2 batches, each with 100 rows |
299 | 1 | .with_batches(vec![make_partition(100), make_partition(100)]) |
300 | 1 | .build(); |
301 | 1 | |
302 | 1 | let counts = collect_num_rows(Arc::new(exec)).await; |
303 | 1 | assert_eq!(counts, vec![200]); |
304 | 1 | } |
305 | | |
306 | | #[tokio::test] |
307 | 1 | async fn test_limit() { |
308 | 1 | let exec = TestBuilder::new() |
309 | 1 | // make 2 batches, each with 100 rows |
310 | 1 | .with_batches(vec![make_partition(100), make_partition(100)]) |
311 | 1 | // limit to only the first 75 rows back |
312 | 1 | .with_limit(Some(75)) |
313 | 1 | .build(); |
314 | 1 | |
315 | 1 | let counts = collect_num_rows(Arc::new(exec)).await; |
316 | 1 | assert_eq!(counts, vec![75]); |
317 | 1 | } |
318 | | |
319 | | /// Runs the provided execution plan and returns a vector of the number of |
320 | | /// rows in each partition |
321 | 2 | async fn collect_num_rows(exec: Arc<dyn ExecutionPlan>) -> Vec<usize> { |
322 | 2 | let ctx = Arc::new(TaskContext::default()); |
323 | 2 | let partition_batches = collect_partitioned(exec, ctx).await.unwrap(); |
324 | 2 | partition_batches |
325 | 2 | .into_iter() |
326 | 3 | .map(|batches| batches.iter().map(2 |b| b.num_rows()).sum::<usize>()2 ) |
327 | 2 | .collect() |
328 | 2 | } |
329 | | |
330 | | #[derive(Default)] |
331 | | struct TestBuilder { |
332 | | schema: Option<SchemaRef>, |
333 | | partitions: Vec<Arc<dyn PartitionStream>>, |
334 | | projection: Option<Vec<usize>>, |
335 | | projected_output_ordering: Vec<LexOrdering>, |
336 | | infinite: bool, |
337 | | limit: Option<usize>, |
338 | | } |
339 | | |
340 | | impl TestBuilder { |
341 | 2 | fn new() -> Self { |
342 | 2 | Self::default() |
343 | 2 | } |
344 | | |
345 | | /// Set the batches for the stream |
346 | 2 | fn with_batches(mut self, batches: Vec<RecordBatch>) -> Self { |
347 | 2 | let stream = TestPartitionStream::new_with_batches(batches); |
348 | 2 | self.schema = Some(Arc::clone(stream.schema())); |
349 | 2 | self.partitions = vec![Arc::new(stream)]; |
350 | 2 | self |
351 | 2 | } |
352 | | |
353 | | /// Set the limit for the stream |
354 | 1 | fn with_limit(mut self, limit: Option<usize>) -> Self { |
355 | 1 | self.limit = limit; |
356 | 1 | self |
357 | 1 | } |
358 | | |
359 | 2 | fn build(self) -> StreamingTableExec { |
360 | 2 | StreamingTableExec::try_new( |
361 | 2 | self.schema.unwrap(), |
362 | 2 | self.partitions, |
363 | 2 | self.projection.as_ref(), |
364 | 2 | self.projected_output_ordering, |
365 | 2 | self.infinite, |
366 | 2 | self.limit, |
367 | 2 | ) |
368 | 2 | .unwrap() |
369 | 2 | } |
370 | | } |
371 | | } |