/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/work_table.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Defines the work table query plan |
19 | | |
20 | | use std::any::Any; |
21 | | use std::sync::{Arc, Mutex}; |
22 | | |
23 | | use super::{ |
24 | | metrics::{ExecutionPlanMetricsSet, MetricsSet}, |
25 | | SendableRecordBatchStream, Statistics, |
26 | | }; |
27 | | use crate::memory::MemoryStream; |
28 | | use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties}; |
29 | | |
30 | | use arrow::datatypes::SchemaRef; |
31 | | use arrow::record_batch::RecordBatch; |
32 | | use datafusion_common::{internal_datafusion_err, internal_err, Result}; |
33 | | use datafusion_execution::memory_pool::MemoryReservation; |
34 | | use datafusion_execution::TaskContext; |
35 | | use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; |
36 | | |
37 | | /// A vector of record batches with a memory reservation. |
38 | | #[derive(Debug)] |
39 | | pub(super) struct ReservedBatches { |
40 | | batches: Vec<RecordBatch>, |
41 | | #[allow(dead_code)] |
42 | | reservation: MemoryReservation, |
43 | | } |
44 | | |
45 | | impl ReservedBatches { |
46 | 1 | pub(super) fn new(batches: Vec<RecordBatch>, reservation: MemoryReservation) -> Self { |
47 | 1 | ReservedBatches { |
48 | 1 | batches, |
49 | 1 | reservation, |
50 | 1 | } |
51 | 1 | } |
52 | | } |
53 | | |
54 | | /// The name is from PostgreSQL's terminology. |
55 | | /// See <https://wiki.postgresql.org/wiki/CTEReadme#How_Recursion_Works> |
56 | | /// This table serves as a mirror or buffer between each iteration of a recursive query. |
57 | | #[derive(Debug)] |
58 | | pub(super) struct WorkTable { |
59 | | batches: Mutex<Option<ReservedBatches>>, |
60 | | } |
61 | | |
62 | | impl WorkTable { |
63 | | /// Create a new work table. |
64 | 1 | pub(super) fn new() -> Self { |
65 | 1 | Self { |
66 | 1 | batches: Mutex::new(None), |
67 | 1 | } |
68 | 1 | } |
69 | | |
70 | | /// Take the previously written batches from the work table. |
71 | | /// This will be called by the [`WorkTableExec`] when it is executed. |
72 | 2 | fn take(&self) -> Result<ReservedBatches> { |
73 | 2 | self.batches |
74 | 2 | .lock() |
75 | 2 | .unwrap() |
76 | 2 | .take() |
77 | 2 | .ok_or_else(|| internal_datafusion_err!("Unexpected empty work table")1 ) |
78 | 2 | } |
79 | | |
80 | | /// Update the results of a recursive query iteration to the work table. |
81 | 1 | pub(super) fn update(&self, batches: ReservedBatches) { |
82 | 1 | self.batches.lock().unwrap().replace(batches); |
83 | 1 | } |
84 | | } |
85 | | |
86 | | /// A temporary "working table" operation where the input data will be |
87 | | /// taken from the named handle during the execution and will be re-published |
88 | | /// as is (kind of like a mirror). |
89 | | /// |
90 | | /// Most notably used in the implementation of recursive queries where the |
91 | | /// underlying relation does not exist yet but the data will come as the previous |
92 | | /// term is evaluated. This table will be used such that the recursive plan |
93 | | /// will register a receiver in the task context and this plan will use that |
94 | | /// receiver to get the data and stream it back up so that the batches are available |
95 | | /// in the next iteration. |
96 | | #[derive(Clone, Debug)] |
97 | | pub struct WorkTableExec { |
98 | | /// Name of the relation handler |
99 | | name: String, |
100 | | /// The schema of the stream |
101 | | schema: SchemaRef, |
102 | | /// The work table |
103 | | work_table: Arc<WorkTable>, |
104 | | /// Execution metrics |
105 | | metrics: ExecutionPlanMetricsSet, |
106 | | /// Cache holding plan properties like equivalences, output partitioning etc. |
107 | | cache: PlanProperties, |
108 | | } |
109 | | |
110 | | impl WorkTableExec { |
111 | | /// Create a new execution plan for a worktable exec. |
112 | 0 | pub fn new(name: String, schema: SchemaRef) -> Self { |
113 | 0 | let cache = Self::compute_properties(Arc::clone(&schema)); |
114 | 0 | Self { |
115 | 0 | name, |
116 | 0 | schema, |
117 | 0 | metrics: ExecutionPlanMetricsSet::new(), |
118 | 0 | work_table: Arc::new(WorkTable::new()), |
119 | 0 | cache, |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | 0 | pub(super) fn with_work_table(&self, work_table: Arc<WorkTable>) -> Self { |
124 | 0 | Self { |
125 | 0 | name: self.name.clone(), |
126 | 0 | schema: Arc::clone(&self.schema), |
127 | 0 | metrics: ExecutionPlanMetricsSet::new(), |
128 | 0 | work_table, |
129 | 0 | cache: self.cache.clone(), |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | | /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. |
134 | 0 | fn compute_properties(schema: SchemaRef) -> PlanProperties { |
135 | 0 | let eq_properties = EquivalenceProperties::new(schema); |
136 | 0 |
|
137 | 0 | PlanProperties::new( |
138 | 0 | eq_properties, |
139 | 0 | Partitioning::UnknownPartitioning(1), |
140 | 0 | ExecutionMode::Bounded, |
141 | 0 | ) |
142 | 0 | } |
143 | | } |
144 | | |
145 | | impl DisplayAs for WorkTableExec { |
146 | 0 | fn fmt_as( |
147 | 0 | &self, |
148 | 0 | t: DisplayFormatType, |
149 | 0 | f: &mut std::fmt::Formatter, |
150 | 0 | ) -> std::fmt::Result { |
151 | 0 | match t { |
152 | | DisplayFormatType::Default | DisplayFormatType::Verbose => { |
153 | 0 | write!(f, "WorkTableExec: name={}", self.name) |
154 | 0 | } |
155 | 0 | } |
156 | 0 | } |
157 | | } |
158 | | |
159 | | impl ExecutionPlan for WorkTableExec { |
160 | 0 | fn name(&self) -> &'static str { |
161 | 0 | "WorkTableExec" |
162 | 0 | } |
163 | | |
164 | 0 | fn as_any(&self) -> &dyn Any { |
165 | 0 | self |
166 | 0 | } |
167 | | |
168 | 0 | fn properties(&self) -> &PlanProperties { |
169 | 0 | &self.cache |
170 | 0 | } |
171 | | |
172 | 0 | fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> { |
173 | 0 | vec![] |
174 | 0 | } |
175 | | |
176 | 0 | fn maintains_input_order(&self) -> Vec<bool> { |
177 | 0 | vec![false] |
178 | 0 | } |
179 | | |
180 | 0 | fn benefits_from_input_partitioning(&self) -> Vec<bool> { |
181 | 0 | vec![false] |
182 | 0 | } |
183 | | |
184 | 0 | fn with_new_children( |
185 | 0 | self: Arc<Self>, |
186 | 0 | _: Vec<Arc<dyn ExecutionPlan>>, |
187 | 0 | ) -> Result<Arc<dyn ExecutionPlan>> { |
188 | 0 | Ok(Arc::clone(&self) as Arc<dyn ExecutionPlan>) |
189 | 0 | } |
190 | | |
191 | | /// Stream the batches that were written to the work table. |
192 | 0 | fn execute( |
193 | 0 | &self, |
194 | 0 | partition: usize, |
195 | 0 | _context: Arc<TaskContext>, |
196 | 0 | ) -> Result<SendableRecordBatchStream> { |
197 | 0 | // WorkTable streams must be the plan base. |
198 | 0 | if partition != 0 { |
199 | 0 | return internal_err!( |
200 | 0 | "WorkTableExec got an invalid partition {partition} (expected 0)" |
201 | 0 | ); |
202 | 0 | } |
203 | 0 | let batch = self.work_table.take()?; |
204 | | Ok(Box::pin( |
205 | 0 | MemoryStream::try_new(batch.batches, Arc::clone(&self.schema), None)? |
206 | 0 | .with_reservation(batch.reservation), |
207 | | )) |
208 | 0 | } |
209 | | |
210 | 0 | fn metrics(&self) -> Option<MetricsSet> { |
211 | 0 | Some(self.metrics.clone_inner()) |
212 | 0 | } |
213 | | |
214 | 0 | fn statistics(&self) -> Result<Statistics> { |
215 | 0 | Ok(Statistics::new_unknown(&self.schema())) |
216 | 0 | } |
217 | | } |
218 | | |
219 | | #[cfg(test)] |
220 | | mod tests { |
221 | | use super::*; |
222 | | use arrow_array::{ArrayRef, Int32Array}; |
223 | | use datafusion_execution::memory_pool::{MemoryConsumer, UnboundedMemoryPool}; |
224 | | |
225 | | #[test] |
226 | 1 | fn test_work_table() { |
227 | 1 | let work_table = WorkTable::new(); |
228 | 1 | // can't take from empty work_table |
229 | 1 | assert!(work_table.take().is_err()); |
230 | | |
231 | 1 | let pool = Arc::new(UnboundedMemoryPool::default()) as _; |
232 | 1 | let mut reservation = MemoryConsumer::new("test_work_table").register(&pool); |
233 | 1 | |
234 | 1 | // update batch to work_table |
235 | 1 | let array: ArrayRef = Arc::new((0..5).collect::<Int32Array>()); |
236 | 1 | let batch = RecordBatch::try_from_iter(vec![("col", array)]).unwrap(); |
237 | 1 | reservation.try_grow(100).unwrap(); |
238 | 1 | work_table.update(ReservedBatches::new(vec![batch.clone()], reservation)); |
239 | 1 | // take from work_table |
240 | 1 | let reserved_batches = work_table.take().unwrap(); |
241 | 1 | assert_eq!(reserved_batches.batches, vec![batch.clone()]); |
242 | | |
243 | | // consume the batch by the MemoryStream |
244 | 1 | let memory_stream = |
245 | 1 | MemoryStream::try_new(reserved_batches.batches, batch.schema(), None) |
246 | 1 | .unwrap() |
247 | 1 | .with_reservation(reserved_batches.reservation); |
248 | 1 | |
249 | 1 | // should still be reserved |
250 | 1 | assert_eq!(pool.reserved(), 100); |
251 | | |
252 | | // the reservation should be freed after drop the memory_stream |
253 | 1 | drop(memory_stream); |
254 | 1 | assert_eq!(pool.reserved(), 0); |
255 | 1 | } |
256 | | } |