Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/window/cume_dist.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines physical expression for `cume_dist` that can evaluated
19
//! at runtime during query execution
20
21
use crate::window::BuiltInWindowFunctionExpr;
22
use crate::PhysicalExpr;
23
use arrow::array::ArrayRef;
24
use arrow::array::Float64Array;
25
use arrow::datatypes::{DataType, Field};
26
use datafusion_common::Result;
27
use datafusion_expr::PartitionEvaluator;
28
use std::any::Any;
29
use std::iter;
30
use std::ops::Range;
31
use std::sync::Arc;
32
33
/// CumeDist calculates the cume_dist in the window function with order by
34
#[derive(Debug)]
35
pub struct CumeDist {
36
    name: String,
37
    /// Output data type
38
    data_type: DataType,
39
}
40
41
/// Create a cume_dist window function
42
0
pub fn cume_dist(name: String, data_type: &DataType) -> CumeDist {
43
0
    CumeDist {
44
0
        name,
45
0
        data_type: data_type.clone(),
46
0
    }
47
0
}
48
49
impl BuiltInWindowFunctionExpr for CumeDist {
50
    /// Return a reference to Any that can be used for downcasting
51
0
    fn as_any(&self) -> &dyn Any {
52
0
        self
53
0
    }
54
55
0
    fn field(&self) -> Result<Field> {
56
0
        let nullable = false;
57
0
        Ok(Field::new(self.name(), self.data_type.clone(), nullable))
58
0
    }
59
60
0
    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
61
0
        vec![]
62
0
    }
63
64
0
    fn name(&self) -> &str {
65
0
        &self.name
66
0
    }
67
68
0
    fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
69
0
        Ok(Box::new(CumeDistEvaluator {}))
70
0
    }
71
}
72
73
#[derive(Debug)]
74
pub(crate) struct CumeDistEvaluator;
75
76
impl PartitionEvaluator for CumeDistEvaluator {
77
0
    fn evaluate_all_with_rank(
78
0
        &self,
79
0
        num_rows: usize,
80
0
        ranks_in_partition: &[Range<usize>],
81
0
    ) -> Result<ArrayRef> {
82
0
        let scalar = num_rows as f64;
83
0
        let result = Float64Array::from_iter_values(
84
0
            ranks_in_partition
85
0
                .iter()
86
0
                .scan(0_u64, |acc, range| {
87
0
                    let len = range.end - range.start;
88
0
                    *acc += len as u64;
89
0
                    let value: f64 = (*acc as f64) / scalar;
90
0
                    let result = iter::repeat(value).take(len);
91
0
                    Some(result)
92
0
                })
93
0
                .flatten(),
94
0
        );
95
0
        Ok(Arc::new(result))
96
0
    }
97
98
0
    fn include_rank(&self) -> bool {
99
0
        true
100
0
    }
101
}
102
103
#[cfg(test)]
104
mod tests {
105
    use super::*;
106
    use datafusion_common::cast::as_float64_array;
107
108
    fn test_i32_result(
109
        expr: &CumeDist,
110
        num_rows: usize,
111
        ranks: Vec<Range<usize>>,
112
        expected: Vec<f64>,
113
    ) -> Result<()> {
114
        let result = expr
115
            .create_evaluator()?
116
            .evaluate_all_with_rank(num_rows, &ranks)?;
117
        let result = as_float64_array(&result)?;
118
        let result = result.values();
119
        assert_eq!(expected, *result);
120
        Ok(())
121
    }
122
123
    #[test]
124
    #[allow(clippy::single_range_in_vec_init)]
125
    fn test_cume_dist() -> Result<()> {
126
        let r = cume_dist("arr".into(), &DataType::Float64);
127
128
        let expected = vec![0.0; 0];
129
        test_i32_result(&r, 0, vec![], expected)?;
130
131
        let expected = vec![1.0; 1];
132
        test_i32_result(&r, 1, vec![0..1], expected)?;
133
134
        let expected = vec![1.0; 2];
135
        test_i32_result(&r, 2, vec![0..2], expected)?;
136
137
        let expected = vec![0.5, 0.5, 1.0, 1.0];
138
        test_i32_result(&r, 4, vec![0..2, 2..4], expected)?;
139
140
        let expected = vec![0.25, 0.5, 0.75, 1.0];
141
        test_i32_result(&r, 4, vec![0..1, 1..2, 2..3, 3..4], expected)?;
142
143
        Ok(())
144
    }
145
}