Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/work_table.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines the work table query plan
19
20
use std::any::Any;
21
use std::sync::{Arc, Mutex};
22
23
use super::{
24
    metrics::{ExecutionPlanMetricsSet, MetricsSet},
25
    SendableRecordBatchStream, Statistics,
26
};
27
use crate::memory::MemoryStream;
28
use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties};
29
30
use arrow::datatypes::SchemaRef;
31
use arrow::record_batch::RecordBatch;
32
use datafusion_common::{internal_datafusion_err, internal_err, Result};
33
use datafusion_execution::memory_pool::MemoryReservation;
34
use datafusion_execution::TaskContext;
35
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
36
37
/// A vector of record batches with a memory reservation.
38
#[derive(Debug)]
39
pub(super) struct ReservedBatches {
40
    batches: Vec<RecordBatch>,
41
    #[allow(dead_code)]
42
    reservation: MemoryReservation,
43
}
44
45
impl ReservedBatches {
46
1
    pub(super) fn new(batches: Vec<RecordBatch>, reservation: MemoryReservation) -> Self {
47
1
        ReservedBatches {
48
1
            batches,
49
1
            reservation,
50
1
        }
51
1
    }
52
}
53
54
/// The name is from PostgreSQL's terminology.
55
/// See <https://wiki.postgresql.org/wiki/CTEReadme#How_Recursion_Works>
56
/// This table serves as a mirror or buffer between each iteration of a recursive query.
57
#[derive(Debug)]
58
pub(super) struct WorkTable {
59
    batches: Mutex<Option<ReservedBatches>>,
60
}
61
62
impl WorkTable {
63
    /// Create a new work table.
64
1
    pub(super) fn new() -> Self {
65
1
        Self {
66
1
            batches: Mutex::new(None),
67
1
        }
68
1
    }
69
70
    /// Take the previously written batches from the work table.
71
    /// This will be called by the [`WorkTableExec`] when it is executed.
72
2
    fn take(&self) -> Result<ReservedBatches> {
73
2
        self.batches
74
2
            .lock()
75
2
            .unwrap()
76
2
            .take()
77
2
            .ok_or_else(|| 
internal_datafusion_err!("Unexpected empty work table")1
)
78
2
    }
79
80
    /// Update the results of a recursive query iteration to the work table.
81
1
    pub(super) fn update(&self, batches: ReservedBatches) {
82
1
        self.batches.lock().unwrap().replace(batches);
83
1
    }
84
}
85
86
/// A temporary "working table" operation where the input data will be
87
/// taken from the named handle during the execution and will be re-published
88
/// as is (kind of like a mirror).
89
///
90
/// Most notably used in the implementation of recursive queries where the
91
/// underlying relation does not exist yet but the data will come as the previous
92
/// term is evaluated. This table will be used such that the recursive plan
93
/// will register a receiver in the task context and this plan will use that
94
/// receiver to get the data and stream it back up so that the batches are available
95
/// in the next iteration.
96
#[derive(Clone, Debug)]
97
pub struct WorkTableExec {
98
    /// Name of the relation handler
99
    name: String,
100
    /// The schema of the stream
101
    schema: SchemaRef,
102
    /// The work table
103
    work_table: Arc<WorkTable>,
104
    /// Execution metrics
105
    metrics: ExecutionPlanMetricsSet,
106
    /// Cache holding plan properties like equivalences, output partitioning etc.
107
    cache: PlanProperties,
108
}
109
110
impl WorkTableExec {
111
    /// Create a new execution plan for a worktable exec.
112
0
    pub fn new(name: String, schema: SchemaRef) -> Self {
113
0
        let cache = Self::compute_properties(Arc::clone(&schema));
114
0
        Self {
115
0
            name,
116
0
            schema,
117
0
            metrics: ExecutionPlanMetricsSet::new(),
118
0
            work_table: Arc::new(WorkTable::new()),
119
0
            cache,
120
0
        }
121
0
    }
122
123
0
    pub(super) fn with_work_table(&self, work_table: Arc<WorkTable>) -> Self {
124
0
        Self {
125
0
            name: self.name.clone(),
126
0
            schema: Arc::clone(&self.schema),
127
0
            metrics: ExecutionPlanMetricsSet::new(),
128
0
            work_table,
129
0
            cache: self.cache.clone(),
130
0
        }
131
0
    }
132
133
    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
134
0
    fn compute_properties(schema: SchemaRef) -> PlanProperties {
135
0
        let eq_properties = EquivalenceProperties::new(schema);
136
0
137
0
        PlanProperties::new(
138
0
            eq_properties,
139
0
            Partitioning::UnknownPartitioning(1),
140
0
            ExecutionMode::Bounded,
141
0
        )
142
0
    }
143
}
144
145
impl DisplayAs for WorkTableExec {
146
0
    fn fmt_as(
147
0
        &self,
148
0
        t: DisplayFormatType,
149
0
        f: &mut std::fmt::Formatter,
150
0
    ) -> std::fmt::Result {
151
0
        match t {
152
            DisplayFormatType::Default | DisplayFormatType::Verbose => {
153
0
                write!(f, "WorkTableExec: name={}", self.name)
154
0
            }
155
0
        }
156
0
    }
157
}
158
159
impl ExecutionPlan for WorkTableExec {
160
0
    fn name(&self) -> &'static str {
161
0
        "WorkTableExec"
162
0
    }
163
164
0
    fn as_any(&self) -> &dyn Any {
165
0
        self
166
0
    }
167
168
0
    fn properties(&self) -> &PlanProperties {
169
0
        &self.cache
170
0
    }
171
172
0
    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
173
0
        vec![]
174
0
    }
175
176
0
    fn maintains_input_order(&self) -> Vec<bool> {
177
0
        vec![false]
178
0
    }
179
180
0
    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
181
0
        vec![false]
182
0
    }
183
184
0
    fn with_new_children(
185
0
        self: Arc<Self>,
186
0
        _: Vec<Arc<dyn ExecutionPlan>>,
187
0
    ) -> Result<Arc<dyn ExecutionPlan>> {
188
0
        Ok(Arc::clone(&self) as Arc<dyn ExecutionPlan>)
189
0
    }
190
191
    /// Stream the batches that were written to the work table.
192
0
    fn execute(
193
0
        &self,
194
0
        partition: usize,
195
0
        _context: Arc<TaskContext>,
196
0
    ) -> Result<SendableRecordBatchStream> {
197
0
        // WorkTable streams must be the plan base.
198
0
        if partition != 0 {
199
0
            return internal_err!(
200
0
                "WorkTableExec got an invalid partition {partition} (expected 0)"
201
0
            );
202
0
        }
203
0
        let batch = self.work_table.take()?;
204
        Ok(Box::pin(
205
0
            MemoryStream::try_new(batch.batches, Arc::clone(&self.schema), None)?
206
0
                .with_reservation(batch.reservation),
207
        ))
208
0
    }
209
210
0
    fn metrics(&self) -> Option<MetricsSet> {
211
0
        Some(self.metrics.clone_inner())
212
0
    }
213
214
0
    fn statistics(&self) -> Result<Statistics> {
215
0
        Ok(Statistics::new_unknown(&self.schema()))
216
0
    }
217
}
218
219
#[cfg(test)]
220
mod tests {
221
    use super::*;
222
    use arrow_array::{ArrayRef, Int32Array};
223
    use datafusion_execution::memory_pool::{MemoryConsumer, UnboundedMemoryPool};
224
225
    #[test]
226
1
    fn test_work_table() {
227
1
        let work_table = WorkTable::new();
228
1
        // can't take from empty work_table
229
1
        assert!(work_table.take().is_err());
230
231
1
        let pool = Arc::new(UnboundedMemoryPool::default()) as _;
232
1
        let mut reservation = MemoryConsumer::new("test_work_table").register(&pool);
233
1
234
1
        // update batch to work_table
235
1
        let array: ArrayRef = Arc::new((0..5).collect::<Int32Array>());
236
1
        let batch = RecordBatch::try_from_iter(vec![("col", array)]).unwrap();
237
1
        reservation.try_grow(100).unwrap();
238
1
        work_table.update(ReservedBatches::new(vec![batch.clone()], reservation));
239
1
        // take from work_table
240
1
        let reserved_batches = work_table.take().unwrap();
241
1
        assert_eq!(reserved_batches.batches, vec![batch.clone()]);
242
243
        // consume the batch by the MemoryStream
244
1
        let memory_stream =
245
1
            MemoryStream::try_new(reserved_batches.batches, batch.schema(), None)
246
1
                .unwrap()
247
1
                .with_reservation(reserved_batches.reservation);
248
1
249
1
        // should still be reserved
250
1
        assert_eq!(pool.reserved(), 100);
251
252
        // the reservation should be freed after drop the memory_stream
253
1
        drop(memory_stream);
254
1
        assert_eq!(pool.reserved(), 0);
255
1
    }
256
}