Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-plan/src/aggregates/order/mod.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use arrow_array::ArrayRef;
19
use arrow_schema::Schema;
20
use datafusion_common::Result;
21
use datafusion_expr::EmitTo;
22
use datafusion_physical_expr::PhysicalSortExpr;
23
24
mod full;
25
mod partial;
26
27
use crate::InputOrderMode;
28
pub use full::GroupOrderingFull;
29
pub use partial::GroupOrderingPartial;
30
31
/// Ordering information for each group in the hash table
32
#[derive(Debug)]
33
pub enum GroupOrdering {
34
    /// Groups are not ordered
35
    None,
36
    /// Groups are ordered by some pre-set of the group keys
37
    Partial(GroupOrderingPartial),
38
    /// Groups are entirely contiguous,
39
    Full(GroupOrderingFull),
40
}
41
42
impl GroupOrdering {
43
    /// Create a `GroupOrdering` for the specified ordering
44
70
    pub fn try_new(
45
70
        input_schema: &Schema,
46
70
        mode: &InputOrderMode,
47
70
        ordering: &[PhysicalSortExpr],
48
70
    ) -> Result<Self> {
49
70
        match mode {
50
70
            InputOrderMode::Linear => Ok(GroupOrdering::None),
51
0
            InputOrderMode::PartiallySorted(order_indices) => {
52
0
                GroupOrderingPartial::try_new(input_schema, order_indices, ordering)
53
0
                    .map(GroupOrdering::Partial)
54
            }
55
0
            InputOrderMode::Sorted => Ok(GroupOrdering::Full(GroupOrderingFull::new())),
56
        }
57
70
    }
58
59
    // How many groups be emitted, or None if no data can be emitted
60
128
    pub fn emit_to(&self) -> Option<EmitTo> {
61
128
        match self {
62
116
            GroupOrdering::None => None,
63
0
            GroupOrdering::Partial(partial) => partial.emit_to(),
64
12
            GroupOrdering::Full(full) => full.emit_to(),
65
        }
66
128
    }
67
68
    /// Updates the state the input is done
69
70
    pub fn input_done(&mut self) {
70
70
        match self {
71
66
            GroupOrdering::None => {}
72
0
            GroupOrdering::Partial(partial) => partial.input_done(),
73
4
            GroupOrdering::Full(full) => full.input_done(),
74
        }
75
70
    }
76
77
    /// remove the first n groups from the internal state, shifting
78
    /// all existing indexes down by `n`
79
36
    pub fn remove_groups(&mut self, n: usize) {
80
36
        match self {
81
28
            GroupOrdering::None => {}
82
0
            GroupOrdering::Partial(partial) => partial.remove_groups(n),
83
8
            GroupOrdering::Full(full) => full.remove_groups(n),
84
        }
85
36
    }
86
87
    /// Called when new groups are added in a batch
88
    ///
89
    /// * `total_num_groups`: total number of groups (so max
90
    ///   group_index is total_num_groups - 1).
91
    ///
92
    /// * `group_values`: group key values for *each row* in the batch
93
    ///
94
    /// * `group_indices`: indices for each row in the batch
95
    ///
96
    /// * `hashes`: hash values for each row in the batch
97
128
    pub fn new_groups(
98
128
        &mut self,
99
128
        batch_group_values: &[ArrayRef],
100
128
        group_indices: &[usize],
101
128
        total_num_groups: usize,
102
128
    ) -> Result<()> {
103
128
        match self {
104
116
            GroupOrdering::None => {}
105
0
            GroupOrdering::Partial(partial) => {
106
0
                partial.new_groups(
107
0
                    batch_group_values,
108
0
                    group_indices,
109
0
                    total_num_groups,
110
0
                )?;
111
            }
112
12
            GroupOrdering::Full(full) => {
113
12
                full.new_groups(total_num_groups);
114
12
            }
115
        };
116
128
        Ok(())
117
128
    }
118
119
    /// Return the size of memory used by the ordering state, in bytes
120
371
    pub fn size(&self) -> usize {
121
371
        std::mem::size_of::<Self>()
122
371
            + match self {
123
343
                GroupOrdering::None => 0,
124
0
                GroupOrdering::Partial(partial) => partial.size(),
125
28
                GroupOrdering::Full(full) => full.size(),
126
            }
127
371
    }
128
}