Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/aggregates/order/full.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use datafusion_expr::EmitTo;
19
20
/// Tracks grouping state when the data is ordered entirely by its
21
/// group keys
22
///
23
/// When the group values are sorted, as soon as we see group `n+1` we
24
/// know we will never see any rows for group `n` again and thus they
25
/// can be emitted.
26
///
27
/// For example, given `SUM(amt) GROUP BY id` if the input is sorted
28
/// by `id` as soon as a new `id` value is seen all previous values
29
/// can be emitted.
30
///
31
/// The state is tracked like this:
32
///
33
/// ```text
34
///      ┌─────┐   ┌──────────────────┐
35
///      │┌───┐│   │ ┌──────────────┐ │         ┏━━━━━━━━━━━━━━┓
36
///      ││ 0 ││   │ │     123      │ │   ┌─────┃      13      ┃
37
///      │└───┘│   │ └──────────────┘ │   │     ┗━━━━━━━━━━━━━━┛
38
///      │ ... │   │    ...           │   │
39
///      │┌───┐│   │ ┌──────────────┐ │   │         current
40
///      ││12 ││   │ │     234      │ │   │
41
///      │├───┤│   │ ├──────────────┤ │   │
42
///      ││12 ││   │ │     234      │ │   │
43
///      │├───┤│   │ ├──────────────┤ │   │
44
///      ││13 ││   │ │     456      │◀┼───┘
45
///      │└───┘│   │ └──────────────┘ │
46
///      └─────┘   └──────────────────┘
47
///
48
///  group indices    group_values        current tracks the most
49
/// (in group value                          recent group index
50
///      order)
51
/// ```
52
///
53
/// In this diagram, the current group is `13`, and thus groups
54
/// `0..12` can be emitted. Note that `13` can not yet be emitted as
55
/// there may be more values in the next batch with the same group_id.
56
#[derive(Debug)]
57
pub struct GroupOrderingFull {
58
    state: State,
59
}
60
61
#[derive(Debug)]
62
enum State {
63
    /// Seen no input yet
64
    Start,
65
66
    /// Data is in progress. `current` is the current group for which
67
    /// values are being generated. Can emit `current` - 1
68
    InProgress { current: usize },
69
70
    /// Seen end of input: all groups can be emitted
71
    Complete,
72
}
73
74
impl GroupOrderingFull {
75
4
    pub fn new() -> Self {
76
4
        Self {
77
4
            state: State::Start,
78
4
        }
79
4
    }
80
81
    // How many groups be emitted, or None if no data can be emitted
82
12
    pub fn emit_to(&self) -> Option<EmitTo> {
83
12
        match &self.state {
84
0
            State::Start => None,
85
12
            State::InProgress { current, .. } => {
86
12
                if *current == 0 {
87
                    // Can not emit if still on the first row
88
4
                    None
89
                } else {
90
                    // otherwise emit all rows prior to the current group
91
8
                    Some(EmitTo::First(*current))
92
                }
93
            }
94
0
            State::Complete { .. } => Some(EmitTo::All),
95
        }
96
12
    }
97
98
    /// remove the first n groups from the internal state, shifting
99
    /// all existing indexes down by `n`
100
8
    pub fn remove_groups(&mut self, n: usize) {
101
8
        match &mut self.state {
102
0
            State::Start => panic!("invalid state: start"),
103
8
            State::InProgress { current } => {
104
8
                // shift down by n
105
8
                assert!(*current >= n);
106
8
                *current -= n;
107
            }
108
0
            State::Complete { .. } => panic!("invalid state: complete"),
109
        }
110
8
    }
111
112
    /// Note that the input is complete so any outstanding groups are done as well
113
4
    pub fn input_done(&mut self) {
114
4
        self.state = State::Complete;
115
4
    }
116
117
    /// Called when new groups are added in a batch. See documentation
118
    /// on [`super::GroupOrdering::new_groups`]
119
12
    pub fn new_groups(&mut self, total_num_groups: usize) {
120
12
        assert_ne!(total_num_groups, 0);
121
122
        // Update state
123
12
        let max_group_index = total_num_groups - 1;
124
12
        self.state = match self.state {
125
4
            State::Start => State::InProgress {
126
4
                current: max_group_index,
127
4
            },
128
8
            State::InProgress { current } => {
129
8
                // expect to see new group indexes when called again
130
8
                assert!(current <= max_group_index, 
"{current} <= {max_group_index}"0
);
131
8
                State::InProgress {
132
8
                    current: max_group_index,
133
8
                }
134
            }
135
            State::Complete { .. } => {
136
0
                panic!("Saw new group after input was complete");
137
            }
138
        };
139
12
    }
140
141
28
    pub(crate) fn size(&self) -> usize {
142
28
        std::mem::size_of::<Self>()
143
28
    }
144
}
145
146
impl Default for GroupOrderingFull {
147
0
    fn default() -> Self {
148
0
        Self::new()
149
0
    }
150
}