Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/sorts/stream.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::sorts::cursor::{ArrayValues, CursorArray, RowValues};
19
use crate::SendableRecordBatchStream;
20
use crate::{PhysicalExpr, PhysicalSortExpr};
21
use arrow::array::Array;
22
use arrow::datatypes::Schema;
23
use arrow::record_batch::RecordBatch;
24
use arrow::row::{RowConverter, SortField};
25
use datafusion_common::Result;
26
use datafusion_execution::memory_pool::MemoryReservation;
27
use futures::stream::{Fuse, StreamExt};
28
use std::marker::PhantomData;
29
use std::sync::Arc;
30
use std::task::{ready, Context, Poll};
31
32
/// A [`Stream`](futures::Stream) that has multiple partitions that can
33
/// be polled separately but not concurrently
34
///
35
/// Used by sort preserving merge to decouple the cursor merging logic from
36
/// the source of the cursors, the intention being to allow preserving
37
/// any row encoding performed for intermediate sorts
38
pub trait PartitionedStream: std::fmt::Debug + Send {
39
    type Output;
40
41
    /// Returns the number of partitions
42
    fn partitions(&self) -> usize;
43
44
    fn poll_next(
45
        &mut self,
46
        cx: &mut Context<'_>,
47
        stream_idx: usize,
48
    ) -> Poll<Option<Self::Output>>;
49
}
50
51
/// A newtype wrapper around a set of fused [`SendableRecordBatchStream`]
52
/// that implements debug, and skips over empty [`RecordBatch`]
53
struct FusedStreams(Vec<Fuse<SendableRecordBatchStream>>);
54
55
impl std::fmt::Debug for FusedStreams {
56
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57
0
        f.debug_struct("FusedStreams")
58
0
            .field("num_streams", &self.0.len())
59
0
            .finish()
60
0
    }
61
}
62
63
impl FusedStreams {
64
927
    fn poll_next(
65
927
        &mut self,
66
927
        cx: &mut Context<'_>,
67
927
        stream_idx: usize,
68
927
    ) -> Poll<Option<Result<RecordBatch>>> {
69
        loop {
70
927
            match 
ready!197
(self.0[stream_idx].poll_next_unpin(cx)) {
71
676
                Some(Ok(
b0
)) if b.num_rows() ==
0 => continue0
,
72
730
                r => return Poll::Ready(r),
73
            }
74
        }
75
927
    }
76
}
77
78
/// A [`PartitionedStream`] that wraps a set of [`SendableRecordBatchStream`]
79
/// and computes [`RowValues`] based on the provided [`PhysicalSortExpr`]
80
#[derive(Debug)]
81
pub struct RowCursorStream {
82
    /// Converter to convert output of physical expressions
83
    converter: RowConverter,
84
    /// The physical expressions to sort by
85
    column_expressions: Vec<Arc<dyn PhysicalExpr>>,
86
    /// Input streams
87
    streams: FusedStreams,
88
    /// Tracks the memory used by `converter`
89
    reservation: MemoryReservation,
90
}
91
92
impl RowCursorStream {
93
5
    pub fn try_new(
94
5
        schema: &Schema,
95
5
        expressions: &[PhysicalSortExpr],
96
5
        streams: Vec<SendableRecordBatchStream>,
97
5
        reservation: MemoryReservation,
98
5
    ) -> Result<Self> {
99
5
        let sort_fields = expressions
100
5
            .iter()
101
10
            .map(|expr| {
102
10
                let data_type = expr.expr.data_type(schema)
?0
;
103
10
                Ok(SortField::new_with_options(data_type, expr.options))
104
10
            })
105
5
            .collect::<Result<Vec<_>>>()
?0
;
106
107
11
        let 
streams = streams.into_iter().map(5
|s| s.fuse()
).collect()5
;
108
5
        let converter = RowConverter::new(sort_fields)
?0
;
109
5
        Ok(Self {
110
5
            converter,
111
5
            reservation,
112
10
            column_expressions: expressions.iter().map(|x| Arc::clone(&x.expr)).collect(),
113
5
            streams: FusedStreams(streams),
114
5
        })
115
5
    }
116
117
11
    fn convert_batch(&mut self, batch: &RecordBatch) -> Result<RowValues> {
118
11
        let cols = self
119
11
            .column_expressions
120
11
            .iter()
121
22
            .map(|expr| expr.evaluate(batch)
?0
.into_array(batch.num_rows()))
122
11
            .collect::<Result<Vec<_>>>()
?0
;
123
124
11
        let rows = self.converter.convert_columns(&cols)
?0
;
125
11
        self.reservation.try_resize(self.converter.size())
?0
;
126
127
        // track the memory in the newly created Rows.
128
11
        let mut rows_reservation = self.reservation.new_empty();
129
11
        rows_reservation.try_grow(rows.size())
?0
;
130
11
        Ok(RowValues::new(rows, rows_reservation))
131
11
    }
132
}
133
134
impl PartitionedStream for RowCursorStream {
135
    type Output = Result<(RowValues, RecordBatch)>;
136
137
5
    fn partitions(&self) -> usize {
138
5
        self.streams.0.len()
139
5
    }
140
141
22
    fn poll_next(
142
22
        &mut self,
143
22
        cx: &mut Context<'_>,
144
22
        stream_idx: usize,
145
22
    ) -> Poll<Option<Self::Output>> {
146
22
        Poll::Ready(
ready!0
(self.streams.poll_next(cx, stream_idx)).map(|r| {
147
11
            r.and_then(|batch| {
148
11
                let cursor = self.convert_batch(&batch)
?0
;
149
11
                Ok((cursor, batch))
150
11
            })
151
22
        }))
152
22
    }
153
}
154
155
/// Specialized stream for sorts on single primitive columns
156
pub struct FieldCursorStream<T: CursorArray> {
157
    /// The physical expressions to sort by
158
    sort: PhysicalSortExpr,
159
    /// Input streams
160
    streams: FusedStreams,
161
    phantom: PhantomData<fn(T) -> T>,
162
}
163
164
impl<T: CursorArray> std::fmt::Debug for FieldCursorStream<T> {
165
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166
0
        f.debug_struct("PrimitiveCursorStream")
167
0
            .field("num_streams", &self.streams)
168
0
            .finish()
169
0
    }
170
}
171
172
impl<T: CursorArray> FieldCursorStream<T> {
173
14
    pub fn new(sort: PhysicalSortExpr, streams: Vec<SendableRecordBatchStream>) -> Self {
174
45
        let streams = streams.into_iter().map(|s| s.fuse()).collect();
175
14
        Self {
176
14
            sort,
177
14
            streams: FusedStreams(streams),
178
14
            phantom: Default::default(),
179
14
        }
180
14
    }
181
182
665
    fn convert_batch(&mut self, batch: &RecordBatch) -> Result<ArrayValues<T::Values>> {
183
665
        let value = self.sort.expr.evaluate(batch)
?0
;
184
665
        let array = value.into_array(batch.num_rows())
?0
;
185
665
        let array = array.as_any().downcast_ref::<T>().expect("field values");
186
665
        Ok(ArrayValues::new(self.sort.options, array))
187
665
    }
188
}
189
190
impl<T: CursorArray> PartitionedStream for FieldCursorStream<T> {
191
    type Output = Result<(ArrayValues<T::Values>, RecordBatch)>;
192
193
14
    fn partitions(&self) -> usize {
194
14
        self.streams.0.len()
195
14
    }
196
197
905
    fn poll_next(
198
905
        &mut self,
199
905
        cx: &mut Context<'_>,
200
905
        stream_idx: usize,
201
905
    ) -> Poll<Option<Self::Output>> {
202
905
        Poll::Ready(
ready!197
(self.streams.poll_next(cx, stream_idx)).
map(708
|r| {
203
665
            r.and_then(|batch| {
204
665
                let cursor = self.convert_batch(&batch)
?0
;
205
665
                Ok((cursor, batch))
206
665
            })
207
708
        }))
208
905
    }
209
}