/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/memory.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Execution plan for reading in-memory batches of data |
19 | | |
20 | | use std::any::Any; |
21 | | use std::fmt; |
22 | | use std::sync::Arc; |
23 | | use std::task::{Context, Poll}; |
24 | | |
25 | | use super::expressions::PhysicalSortExpr; |
26 | | use super::{ |
27 | | common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, |
28 | | PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, |
29 | | }; |
30 | | |
31 | | use arrow::datatypes::SchemaRef; |
32 | | use arrow::record_batch::RecordBatch; |
33 | | use datafusion_common::{internal_err, project_schema, Result}; |
34 | | use datafusion_execution::memory_pool::MemoryReservation; |
35 | | use datafusion_execution::TaskContext; |
36 | | use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; |
37 | | |
38 | | use futures::Stream; |
39 | | |
40 | | /// Execution plan for reading in-memory batches of data |
41 | | pub struct MemoryExec { |
42 | | /// The partitions to query |
43 | | partitions: Vec<Vec<RecordBatch>>, |
44 | | /// Schema representing the data before projection |
45 | | schema: SchemaRef, |
46 | | /// Schema representing the data after the optional projection is applied |
47 | | projected_schema: SchemaRef, |
48 | | /// Optional projection |
49 | | projection: Option<Vec<usize>>, |
50 | | // Sort information: one or more equivalent orderings |
51 | | sort_information: Vec<LexOrdering>, |
52 | | cache: PlanProperties, |
53 | | /// if partition sizes should be displayed |
54 | | show_sizes: bool, |
55 | | } |
56 | | |
57 | | impl fmt::Debug for MemoryExec { |
58 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
59 | 0 | f.debug_struct("MemoryExec") |
60 | 0 | .field("partitions", &"[...]") |
61 | 0 | .field("schema", &self.schema) |
62 | 0 | .field("projection", &self.projection) |
63 | 0 | .field("sort_information", &self.sort_information) |
64 | 0 | .finish() |
65 | 0 | } |
66 | | } |
67 | | |
68 | | impl DisplayAs for MemoryExec { |
69 | 6 | fn fmt_as( |
70 | 6 | &self, |
71 | 6 | t: DisplayFormatType, |
72 | 6 | f: &mut std::fmt::Formatter, |
73 | 6 | ) -> std::fmt::Result { |
74 | 6 | match t { |
75 | | DisplayFormatType::Default | DisplayFormatType::Verbose => { |
76 | 6 | let partition_sizes: Vec<_> = |
77 | 6 | self.partitions.iter().map(|b| b.len()).collect(); |
78 | 6 | |
79 | 6 | let output_ordering = self |
80 | 6 | .sort_information |
81 | 6 | .first() |
82 | 6 | .map(|output_ordering| { |
83 | 3 | format!( |
84 | 3 | ", output_ordering={}", |
85 | 3 | PhysicalSortExpr::format_list(output_ordering) |
86 | 3 | ) |
87 | 6 | }) |
88 | 6 | .unwrap_or_default(); |
89 | 6 | |
90 | 6 | if self.show_sizes { |
91 | 6 | write!( |
92 | 6 | f, |
93 | 6 | "MemoryExec: partitions={}, partition_sizes={partition_sizes:?}{output_ordering}", |
94 | 6 | partition_sizes.len(), |
95 | 6 | ) |
96 | | } else { |
97 | 0 | write!(f, "MemoryExec: partitions={}", partition_sizes.len(),) |
98 | | } |
99 | | } |
100 | | } |
101 | 6 | } |
102 | | } |
103 | | |
104 | | impl ExecutionPlan for MemoryExec { |
105 | 0 | fn name(&self) -> &'static str { |
106 | 0 | "MemoryExec" |
107 | 0 | } |
108 | | |
109 | | /// Return a reference to Any that can be used for downcasting |
110 | 0 | fn as_any(&self) -> &dyn Any { |
111 | 0 | self |
112 | 0 | } |
113 | | |
114 | 26.6k | fn properties(&self) -> &PlanProperties { |
115 | 26.6k | &self.cache |
116 | 26.6k | } |
117 | | |
118 | 6 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
119 | 6 | // this is a leaf node and has no children |
120 | 6 | vec![] |
121 | 6 | } |
122 | | |
123 | 0 | fn with_new_children( |
124 | 0 | self: Arc<Self>, |
125 | 0 | children: Vec<Arc<dyn ExecutionPlan>>, |
126 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
127 | 0 | // MemoryExec has no children |
128 | 0 | if children.is_empty() { |
129 | 0 | Ok(self) |
130 | | } else { |
131 | 0 | internal_err!("Children cannot be replaced in {self:?}") |
132 | | } |
133 | 0 | } |
134 | | |
135 | 2.58k | fn execute( |
136 | 2.58k | &self, |
137 | 2.58k | partition: usize, |
138 | 2.58k | _context: Arc<TaskContext>, |
139 | 2.58k | ) -> Result<SendableRecordBatchStream> { |
140 | 2.58k | Ok(Box::pin(MemoryStream::try_new( |
141 | 2.58k | self.partitions[partition].clone(), |
142 | 2.58k | Arc::clone(&self.projected_schema), |
143 | 2.58k | self.projection.clone(), |
144 | 2.58k | )?0 )) |
145 | 2.58k | } |
146 | | |
147 | | /// We recompute the statistics dynamically from the arrow metadata as it is pretty cheap to do so |
148 | 25 | fn statistics(&self) -> Result<Statistics> { |
149 | 25 | Ok(common::compute_record_batch_statistics( |
150 | 25 | &self.partitions, |
151 | 25 | &self.schema, |
152 | 25 | self.projection.clone(), |
153 | 25 | )) |
154 | 25 | } |
155 | | } |
156 | | |
157 | | impl MemoryExec { |
158 | | /// Create a new execution plan for reading in-memory record batches |
159 | | /// The provided `schema` should not have the projection applied. |
160 | 1.17k | pub fn try_new( |
161 | 1.17k | partitions: &[Vec<RecordBatch>], |
162 | 1.17k | schema: SchemaRef, |
163 | 1.17k | projection: Option<Vec<usize>>, |
164 | 1.17k | ) -> Result<Self> { |
165 | 1.17k | let projected_schema = project_schema(&schema, projection.as_ref())?0 ; |
166 | 1.17k | let cache = |
167 | 1.17k | Self::compute_properties(Arc::clone(&projected_schema), &[], partitions); |
168 | 1.17k | Ok(Self { |
169 | 1.17k | partitions: partitions.to_vec(), |
170 | 1.17k | schema, |
171 | 1.17k | projected_schema, |
172 | 1.17k | projection, |
173 | 1.17k | sort_information: vec![], |
174 | 1.17k | cache, |
175 | 1.17k | show_sizes: true, |
176 | 1.17k | }) |
177 | 1.17k | } |
178 | | |
179 | | /// set `show_sizes` to determine whether to display partition sizes |
180 | 0 | pub fn with_show_sizes(mut self, show_sizes: bool) -> Self { |
181 | 0 | self.show_sizes = show_sizes; |
182 | 0 | self |
183 | 0 | } |
184 | | |
185 | 0 | pub fn partitions(&self) -> &[Vec<RecordBatch>] { |
186 | 0 | &self.partitions |
187 | 0 | } |
188 | | |
189 | 0 | pub fn projection(&self) -> &Option<Vec<usize>> { |
190 | 0 | &self.projection |
191 | 0 | } |
192 | | |
193 | | /// A memory table can be ordered by multiple expressions simultaneously. |
194 | | /// [`EquivalenceProperties`] keeps track of expressions that describe the |
195 | | /// global ordering of the schema. These columns are not necessarily same; e.g. |
196 | | /// ```text |
197 | | /// ┌-------┐ |
198 | | /// | a | b | |
199 | | /// |---|---| |
200 | | /// | 1 | 9 | |
201 | | /// | 2 | 8 | |
202 | | /// | 3 | 7 | |
203 | | /// | 5 | 5 | |
204 | | /// └---┴---┘ |
205 | | /// ``` |
206 | | /// where both `a ASC` and `b DESC` can describe the table ordering. With |
207 | | /// [`EquivalenceProperties`], we can keep track of these equivalences |
208 | | /// and treat `a ASC` and `b DESC` as the same ordering requirement. |
209 | 710 | pub fn with_sort_information(mut self, sort_information: Vec<LexOrdering>) -> Self { |
210 | 710 | self.sort_information = sort_information; |
211 | 710 | |
212 | 710 | // We need to update equivalence properties when updating sort information. |
213 | 710 | let eq_properties = EquivalenceProperties::new_with_orderings( |
214 | 710 | self.schema(), |
215 | 710 | &self.sort_information, |
216 | 710 | ); |
217 | 710 | self.cache = self.cache.with_eq_properties(eq_properties); |
218 | 710 | self |
219 | 710 | } |
220 | | |
221 | 0 | pub fn original_schema(&self) -> SchemaRef { |
222 | 0 | Arc::clone(&self.schema) |
223 | 0 | } |
224 | | |
225 | | /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. |
226 | 1.17k | fn compute_properties( |
227 | 1.17k | schema: SchemaRef, |
228 | 1.17k | orderings: &[LexOrdering], |
229 | 1.17k | partitions: &[Vec<RecordBatch>], |
230 | 1.17k | ) -> PlanProperties { |
231 | 1.17k | let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings); |
232 | 1.17k | PlanProperties::new( |
233 | 1.17k | eq_properties, // Equivalence Properties |
234 | 1.17k | Partitioning::UnknownPartitioning(partitions.len()), // Output Partitioning |
235 | 1.17k | ExecutionMode::Bounded, // Execution Mode |
236 | 1.17k | ) |
237 | 1.17k | } |
238 | | } |
239 | | |
240 | | /// Iterator over batches |
241 | | pub struct MemoryStream { |
242 | | /// Vector of record batches |
243 | | data: Vec<RecordBatch>, |
244 | | /// Optional memory reservation bound to the data, freed on drop |
245 | | reservation: Option<MemoryReservation>, |
246 | | /// Schema representing the data |
247 | | schema: SchemaRef, |
248 | | /// Optional projection for which columns to load |
249 | | projection: Option<Vec<usize>>, |
250 | | /// Index into the data |
251 | | index: usize, |
252 | | } |
253 | | |
254 | | impl MemoryStream { |
255 | | /// Create an iterator for a vector of record batches |
256 | 2.59k | pub fn try_new( |
257 | 2.59k | data: Vec<RecordBatch>, |
258 | 2.59k | schema: SchemaRef, |
259 | 2.59k | projection: Option<Vec<usize>>, |
260 | 2.59k | ) -> Result<Self> { |
261 | 2.59k | Ok(Self { |
262 | 2.59k | data, |
263 | 2.59k | reservation: None, |
264 | 2.59k | schema, |
265 | 2.59k | projection, |
266 | 2.59k | index: 0, |
267 | 2.59k | }) |
268 | 2.59k | } |
269 | | |
270 | | /// Set the memory reservation for the data |
271 | 1 | pub(super) fn with_reservation(mut self, reservation: MemoryReservation) -> Self { |
272 | 1 | self.reservation = Some(reservation); |
273 | 1 | self |
274 | 1 | } |
275 | | } |
276 | | |
277 | | impl Stream for MemoryStream { |
278 | | type Item = Result<RecordBatch>; |
279 | | |
280 | 35.3k | fn poll_next( |
281 | 35.3k | mut self: std::pin::Pin<&mut Self>, |
282 | 35.3k | _: &mut Context<'_>, |
283 | 35.3k | ) -> Poll<Option<Self::Item>> { |
284 | 35.3k | Poll::Ready(if self.index < self.data.len() { |
285 | 32.7k | self.index += 1; |
286 | 32.7k | let batch = &self.data[self.index - 1]; |
287 | | |
288 | | // return just the columns requested |
289 | 32.7k | let batch = match self.projection.as_ref() { |
290 | 0 | Some(columns) => batch.project(columns)?, |
291 | 32.7k | None => batch.clone(), |
292 | | }; |
293 | | |
294 | 32.7k | Some(Ok(batch)) |
295 | | } else { |
296 | 2.58k | None |
297 | | }) |
298 | 35.3k | } |
299 | | |
300 | 0 | fn size_hint(&self) -> (usize, Option<usize>) { |
301 | 0 | (self.data.len(), Some(self.data.len())) |
302 | 0 | } |
303 | | } |
304 | | |
305 | | impl RecordBatchStream for MemoryStream { |
306 | | /// Get the schema |
307 | 350 | fn schema(&self) -> SchemaRef { |
308 | 350 | Arc::clone(&self.schema) |
309 | 350 | } |
310 | | } |
311 | | |
312 | | #[cfg(test)] |
313 | | mod tests { |
314 | | use std::sync::Arc; |
315 | | |
316 | | use crate::memory::MemoryExec; |
317 | | use crate::ExecutionPlan; |
318 | | |
319 | | use arrow_schema::{DataType, Field, Schema, SortOptions}; |
320 | | use datafusion_physical_expr::expressions::col; |
321 | | use datafusion_physical_expr::PhysicalSortExpr; |
322 | | |
323 | | #[test] |
324 | 1 | fn test_memory_order_eq() -> datafusion_common::Result<()> { |
325 | 1 | let schema = Arc::new(Schema::new(vec![ |
326 | 1 | Field::new("a", DataType::Int64, false), |
327 | 1 | Field::new("b", DataType::Int64, false), |
328 | 1 | Field::new("c", DataType::Int64, false), |
329 | 1 | ])); |
330 | 1 | let sort1 = vec![ |
331 | 1 | PhysicalSortExpr { |
332 | 1 | expr: col("a", &schema)?0 , |
333 | 1 | options: SortOptions::default(), |
334 | 1 | }, |
335 | 1 | PhysicalSortExpr { |
336 | 1 | expr: col("b", &schema)?0 , |
337 | 1 | options: SortOptions::default(), |
338 | | }, |
339 | | ]; |
340 | 1 | let sort2 = vec![PhysicalSortExpr { |
341 | 1 | expr: col("c", &schema)?0 , |
342 | 1 | options: SortOptions::default(), |
343 | 1 | }]; |
344 | 1 | let mut expected_output_order = vec![]; |
345 | 1 | expected_output_order.extend(sort1.clone()); |
346 | 1 | expected_output_order.extend(sort2.clone()); |
347 | 1 | |
348 | 1 | let sort_information = vec![sort1.clone(), sort2.clone()]; |
349 | 1 | let mem_exec = MemoryExec::try_new(&[vec![]], schema, None)?0 |
350 | 1 | .with_sort_information(sort_information); |
351 | 1 | |
352 | 1 | assert_eq!( |
353 | 1 | mem_exec.properties().output_ordering().unwrap(), |
354 | 1 | expected_output_order |
355 | 1 | ); |
356 | 1 | let eq_properties = mem_exec.properties().equivalence_properties(); |
357 | 1 | assert!(eq_properties.oeq_class().contains(&sort1)); |
358 | 1 | assert!(eq_properties.oeq_class().contains(&sort2)); |
359 | 1 | Ok(()) |
360 | 1 | } |
361 | | } |