/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/common.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Defines common code used in execution plans |
19 | | |
20 | | use std::fs; |
21 | | use std::fs::{metadata, File}; |
22 | | use std::path::{Path, PathBuf}; |
23 | | use std::sync::Arc; |
24 | | |
25 | | use super::SendableRecordBatchStream; |
26 | | use crate::stream::RecordBatchReceiverStream; |
27 | | use crate::{ColumnStatistics, Statistics}; |
28 | | |
29 | | use arrow::datatypes::Schema; |
30 | | use arrow::ipc::writer::{FileWriter, IpcWriteOptions}; |
31 | | use arrow::record_batch::RecordBatch; |
32 | | use arrow_array::Array; |
33 | | use datafusion_common::stats::Precision; |
34 | | use datafusion_common::{plan_err, DataFusionError, Result}; |
35 | | use datafusion_execution::memory_pool::MemoryReservation; |
36 | | |
37 | | use futures::{StreamExt, TryStreamExt}; |
38 | | use parking_lot::Mutex; |
39 | | |
40 | | /// [`MemoryReservation`] used across query execution streams |
41 | | pub(crate) type SharedMemoryReservation = Arc<Mutex<MemoryReservation>>; |
42 | | |
43 | | /// Create a vector of record batches from a stream |
44 | 3.32k | pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> { |
45 | 3.53k | stream.try_collect::<Vec<_>>().await |
46 | 3.31k | } |
47 | | |
48 | | /// Recursively builds a list of files in a directory with a given extension |
49 | 0 | pub fn build_checked_file_list(dir: &str, ext: &str) -> Result<Vec<String>> { |
50 | 0 | let mut filenames: Vec<String> = Vec::new(); |
51 | 0 | build_file_list_recurse(dir, &mut filenames, ext)?; |
52 | 0 | if filenames.is_empty() { |
53 | 0 | return plan_err!("No files found at {dir} with file extension {ext}"); |
54 | 0 | } |
55 | 0 | Ok(filenames) |
56 | 0 | } |
57 | | |
58 | | /// Recursively builds a list of files in a directory with a given extension |
59 | 0 | pub fn build_file_list(dir: &str, ext: &str) -> Result<Vec<String>> { |
60 | 0 | let mut filenames: Vec<String> = Vec::new(); |
61 | 0 | build_file_list_recurse(dir, &mut filenames, ext)?; |
62 | 0 | Ok(filenames) |
63 | 0 | } |
64 | | |
65 | | /// Recursively build a list of files in a directory with a given extension with an accumulator list |
66 | 0 | fn build_file_list_recurse( |
67 | 0 | dir: &str, |
68 | 0 | filenames: &mut Vec<String>, |
69 | 0 | ext: &str, |
70 | 0 | ) -> Result<()> { |
71 | 0 | let metadata = metadata(dir)?; |
72 | 0 | if metadata.is_file() { |
73 | 0 | if dir.ends_with(ext) { |
74 | 0 | filenames.push(dir.to_string()); |
75 | 0 | } |
76 | | } else { |
77 | 0 | for entry in fs::read_dir(dir)? { |
78 | 0 | let entry = entry?; |
79 | 0 | let path = entry.path(); |
80 | 0 | if let Some(path_name) = path.to_str() { |
81 | 0 | if path.is_dir() { |
82 | 0 | build_file_list_recurse(path_name, filenames, ext)?; |
83 | 0 | } else if path_name.ends_with(ext) { |
84 | 0 | filenames.push(path_name.to_string()); |
85 | 0 | } |
86 | | } else { |
87 | 0 | return plan_err!("Invalid path"); |
88 | | } |
89 | | } |
90 | | } |
91 | 0 | Ok(()) |
92 | 0 | } |
93 | | |
94 | | /// If running in a tokio context spawns the execution of `stream` to a separate task |
95 | | /// allowing it to execute in parallel with an intermediate buffer of size `buffer` |
96 | 40 | pub(crate) fn spawn_buffered( |
97 | 40 | mut input: SendableRecordBatchStream, |
98 | 40 | buffer: usize, |
99 | 40 | ) -> SendableRecordBatchStream { |
100 | | // Use tokio only if running from a multi-thread tokio context |
101 | 40 | match tokio::runtime::Handle::try_current() { |
102 | 0 | Ok(handle) |
103 | 40 | if handle.runtime_flavor() == tokio::runtime::RuntimeFlavor::MultiThread => |
104 | 0 | { |
105 | 0 | let mut builder = RecordBatchReceiverStream::builder(input.schema(), buffer); |
106 | 0 |
|
107 | 0 | let sender = builder.tx(); |
108 | 0 |
|
109 | 0 | builder.spawn(async move { |
110 | 0 | while let Some(item) = input.next().await { |
111 | 0 | if sender.send(item).await.is_err() { |
112 | | // receiver dropped when query is shutdown early (e.g., limit) or error, |
113 | | // no need to return propagate the send error. |
114 | 0 | return Ok(()); |
115 | 0 | } |
116 | | } |
117 | | |
118 | 0 | Ok(()) |
119 | 0 | }); |
120 | 0 |
|
121 | 0 | builder.build() |
122 | | } |
123 | 40 | _ => input, |
124 | | } |
125 | 40 | } |
126 | | |
127 | | /// Computes the statistics for an in-memory RecordBatch |
128 | | /// |
129 | | /// Only computes statistics that are in arrows metadata (num rows, byte size and nulls) |
130 | | /// and does not apply any kernel on the actual data. |
131 | 30 | pub fn compute_record_batch_statistics( |
132 | 30 | batches: &[Vec<RecordBatch>], |
133 | 30 | schema: &Schema, |
134 | 30 | projection: Option<Vec<usize>>, |
135 | 30 | ) -> Statistics { |
136 | 30 | let nb_rows = batches.iter().flatten().map(RecordBatch::num_rows).sum(); |
137 | | |
138 | 30 | let projection = match projection { |
139 | 2 | Some(p) => p, |
140 | 28 | None => (0..schema.fields().len()).collect(), |
141 | | }; |
142 | | |
143 | 30 | let total_byte_size = batches |
144 | 30 | .iter() |
145 | 30 | .flatten() |
146 | 103 | .map(|b| { |
147 | 103 | projection |
148 | 103 | .iter() |
149 | 104 | .map(|index| b.column(*index).get_array_memory_size()) |
150 | 103 | .sum::<usize>() |
151 | 103 | }) |
152 | 30 | .sum(); |
153 | 30 | |
154 | 30 | let mut null_counts = vec![0; projection.len()]; |
155 | | |
156 | 105 | for partition in batches.iter()30 { |
157 | 208 | for batch103 in partition { |
158 | 104 | for (stat_index, col_index) in projection.iter().enumerate()103 { |
159 | 104 | null_counts[stat_index] += batch.column(*col_index).null_count(); |
160 | 104 | } |
161 | | } |
162 | | } |
163 | 30 | let column_statistics = null_counts |
164 | 30 | .into_iter() |
165 | 34 | .map(|null_count| { |
166 | 34 | let mut s = ColumnStatistics::new_unknown(); |
167 | 34 | s.null_count = Precision::Exact(null_count); |
168 | 34 | s |
169 | 34 | }) |
170 | 30 | .collect(); |
171 | 30 | |
172 | 30 | Statistics { |
173 | 30 | num_rows: Precision::Exact(nb_rows), |
174 | 30 | total_byte_size: Precision::Exact(total_byte_size), |
175 | 30 | column_statistics, |
176 | 30 | } |
177 | 30 | } |
178 | | |
179 | | /// Write in Arrow IPC format. |
180 | | pub struct IPCWriter { |
181 | | /// path |
182 | | pub path: PathBuf, |
183 | | /// inner writer |
184 | | pub writer: FileWriter<File>, |
185 | | /// batches written |
186 | | pub num_batches: usize, |
187 | | /// rows written |
188 | | pub num_rows: usize, |
189 | | /// bytes written |
190 | | pub num_bytes: usize, |
191 | | } |
192 | | |
193 | | impl IPCWriter { |
194 | | /// Create new writer |
195 | 47 | pub fn new(path: &Path, schema: &Schema) -> Result<Self> { |
196 | 47 | let file = File::create(path).map_err(|e| { |
197 | 0 | DataFusionError::Execution(format!( |
198 | 0 | "Failed to create partition file at {path:?}: {e:?}" |
199 | 0 | )) |
200 | 47 | })?0 ; |
201 | | Ok(Self { |
202 | | num_batches: 0, |
203 | | num_rows: 0, |
204 | | num_bytes: 0, |
205 | 47 | path: path.into(), |
206 | 47 | writer: FileWriter::try_new(file, schema)?0 , |
207 | | }) |
208 | 47 | } |
209 | | |
210 | | /// Create new writer with IPC write options |
211 | 0 | pub fn new_with_options( |
212 | 0 | path: &Path, |
213 | 0 | schema: &Schema, |
214 | 0 | write_options: IpcWriteOptions, |
215 | 0 | ) -> Result<Self> { |
216 | 0 | let file = File::create(path).map_err(|e| { |
217 | 0 | DataFusionError::Execution(format!( |
218 | 0 | "Failed to create partition file at {path:?}: {e:?}" |
219 | 0 | )) |
220 | 0 | })?; |
221 | | Ok(Self { |
222 | | num_batches: 0, |
223 | | num_rows: 0, |
224 | | num_bytes: 0, |
225 | 0 | path: path.into(), |
226 | 0 | writer: FileWriter::try_new_with_options(file, schema, write_options)?, |
227 | | }) |
228 | 0 | } |
229 | | /// Write one single batch |
230 | 51 | pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { |
231 | 51 | self.writer.write(batch)?0 ; |
232 | 51 | self.num_batches += 1; |
233 | 51 | self.num_rows += batch.num_rows(); |
234 | 51 | let num_bytes: usize = batch.get_array_memory_size(); |
235 | 51 | self.num_bytes += num_bytes; |
236 | 51 | Ok(()) |
237 | 51 | } |
238 | | |
239 | | /// Finish the writer |
240 | 47 | pub fn finish(&mut self) -> Result<()> { |
241 | 47 | self.writer.finish().map_err(Into::into) |
242 | 47 | } |
243 | | |
244 | | /// Path write to |
245 | 0 | pub fn path(&self) -> &Path { |
246 | 0 | &self.path |
247 | 0 | } |
248 | | } |
249 | | |
250 | | /// Checks if the given projection is valid for the given schema. |
251 | 683 | pub fn can_project( |
252 | 683 | schema: &arrow_schema::SchemaRef, |
253 | 683 | projection: Option<&Vec<usize>>, |
254 | 683 | ) -> Result<()> { |
255 | 683 | match projection { |
256 | 0 | Some(columns) => { |
257 | 0 | if columns |
258 | 0 | .iter() |
259 | 0 | .max() |
260 | 0 | .map_or(false, |&i| i >= schema.fields().len()) |
261 | | { |
262 | 0 | Err(arrow_schema::ArrowError::SchemaError(format!( |
263 | 0 | "project index {} out of bounds, max field {}", |
264 | 0 | columns.iter().max().unwrap(), |
265 | 0 | schema.fields().len() |
266 | 0 | )) |
267 | 0 | .into()) |
268 | | } else { |
269 | 0 | Ok(()) |
270 | | } |
271 | | } |
272 | 683 | None => Ok(()), |
273 | | } |
274 | 683 | } |
275 | | |
276 | | #[cfg(test)] |
277 | | mod tests { |
278 | | use super::*; |
279 | | |
280 | | use arrow::{ |
281 | | array::{Float32Array, Float64Array, UInt64Array}, |
282 | | datatypes::{DataType, Field}, |
283 | | }; |
284 | | |
285 | | #[test] |
286 | 1 | fn test_compute_record_batch_statistics_empty() -> Result<()> { |
287 | 1 | let schema = Arc::new(Schema::new(vec![ |
288 | 1 | Field::new("f32", DataType::Float32, false), |
289 | 1 | Field::new("f64", DataType::Float64, false), |
290 | 1 | ])); |
291 | 1 | let stats = compute_record_batch_statistics(&[], &schema, Some(vec![0, 1])); |
292 | 1 | |
293 | 1 | assert_eq!(stats.num_rows, Precision::Exact(0)); |
294 | 1 | assert_eq!(stats.total_byte_size, Precision::Exact(0)); |
295 | 1 | Ok(()) |
296 | 1 | } |
297 | | |
298 | | #[test] |
299 | 1 | fn test_compute_record_batch_statistics() -> Result<()> { |
300 | 1 | let schema = Arc::new(Schema::new(vec![ |
301 | 1 | Field::new("f32", DataType::Float32, false), |
302 | 1 | Field::new("f64", DataType::Float64, false), |
303 | 1 | Field::new("u64", DataType::UInt64, false), |
304 | 1 | ])); |
305 | 1 | let batch = RecordBatch::try_new( |
306 | 1 | Arc::clone(&schema), |
307 | 1 | vec![ |
308 | 1 | Arc::new(Float32Array::from(vec![1., 2., 3.])), |
309 | 1 | Arc::new(Float64Array::from(vec![9., 8., 7.])), |
310 | 1 | Arc::new(UInt64Array::from(vec![4, 5, 6])), |
311 | 1 | ], |
312 | 1 | )?0 ; |
313 | | |
314 | | // just select f32,f64 |
315 | 1 | let select_projection = Some(vec![0, 1]); |
316 | 1 | let byte_size = batch |
317 | 1 | .project(&select_projection.clone().unwrap()) |
318 | 1 | .unwrap() |
319 | 1 | .get_array_memory_size(); |
320 | 1 | |
321 | 1 | let actual = |
322 | 1 | compute_record_batch_statistics(&[vec![batch]], &schema, select_projection); |
323 | 1 | |
324 | 1 | let expected = Statistics { |
325 | 1 | num_rows: Precision::Exact(3), |
326 | 1 | total_byte_size: Precision::Exact(byte_size), |
327 | 1 | column_statistics: vec![ |
328 | 1 | ColumnStatistics { |
329 | 1 | distinct_count: Precision::Absent, |
330 | 1 | max_value: Precision::Absent, |
331 | 1 | min_value: Precision::Absent, |
332 | 1 | null_count: Precision::Exact(0), |
333 | 1 | }, |
334 | 1 | ColumnStatistics { |
335 | 1 | distinct_count: Precision::Absent, |
336 | 1 | max_value: Precision::Absent, |
337 | 1 | min_value: Precision::Absent, |
338 | 1 | null_count: Precision::Exact(0), |
339 | 1 | }, |
340 | 1 | ], |
341 | 1 | }; |
342 | 1 | |
343 | 1 | assert_eq!(actual, expected); |
344 | 1 | Ok(()) |
345 | 1 | } |
346 | | |
347 | | #[test] |
348 | 1 | fn test_compute_record_batch_statistics_null() -> Result<()> { |
349 | 1 | let schema = |
350 | 1 | Arc::new(Schema::new(vec![Field::new("u64", DataType::UInt64, true)])); |
351 | 1 | let batch1 = RecordBatch::try_new( |
352 | 1 | Arc::clone(&schema), |
353 | 1 | vec![Arc::new(UInt64Array::from(vec![Some(1), None, None]))], |
354 | 1 | )?0 ; |
355 | 1 | let batch2 = RecordBatch::try_new( |
356 | 1 | Arc::clone(&schema), |
357 | 1 | vec![Arc::new(UInt64Array::from(vec![Some(1), Some(2), None]))], |
358 | 1 | )?0 ; |
359 | 1 | let byte_size = batch1.get_array_memory_size() + batch2.get_array_memory_size(); |
360 | 1 | let actual = |
361 | 1 | compute_record_batch_statistics(&[vec![batch1], vec![batch2]], &schema, None); |
362 | 1 | |
363 | 1 | let expected = Statistics { |
364 | 1 | num_rows: Precision::Exact(6), |
365 | 1 | total_byte_size: Precision::Exact(byte_size), |
366 | 1 | column_statistics: vec![ColumnStatistics { |
367 | 1 | distinct_count: Precision::Absent, |
368 | 1 | max_value: Precision::Absent, |
369 | 1 | min_value: Precision::Absent, |
370 | 1 | null_count: Precision::Exact(3), |
371 | 1 | }], |
372 | 1 | }; |
373 | 1 | |
374 | 1 | assert_eq!(actual, expected); |
375 | 1 | Ok(()) |
376 | 1 | } |
377 | | } |