Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/expressions/like.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::hash::{Hash, Hasher};
19
use std::{any::Any, sync::Arc};
20
21
use crate::{physical_expr::down_cast_any_ref, PhysicalExpr};
22
23
use arrow::record_batch::RecordBatch;
24
use arrow_schema::{DataType, Schema};
25
use datafusion_common::{internal_err, Result};
26
use datafusion_expr::ColumnarValue;
27
use datafusion_physical_expr_common::datum::apply_cmp;
28
29
// Like expression
30
#[derive(Debug, Hash)]
31
pub struct LikeExpr {
32
    negated: bool,
33
    case_insensitive: bool,
34
    expr: Arc<dyn PhysicalExpr>,
35
    pattern: Arc<dyn PhysicalExpr>,
36
}
37
38
impl LikeExpr {
39
0
    pub fn new(
40
0
        negated: bool,
41
0
        case_insensitive: bool,
42
0
        expr: Arc<dyn PhysicalExpr>,
43
0
        pattern: Arc<dyn PhysicalExpr>,
44
0
    ) -> Self {
45
0
        Self {
46
0
            negated,
47
0
            case_insensitive,
48
0
            expr,
49
0
            pattern,
50
0
        }
51
0
    }
52
53
    /// Is negated
54
0
    pub fn negated(&self) -> bool {
55
0
        self.negated
56
0
    }
57
58
    /// Is case insensitive
59
0
    pub fn case_insensitive(&self) -> bool {
60
0
        self.case_insensitive
61
0
    }
62
63
    /// Input expression
64
0
    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
65
0
        &self.expr
66
0
    }
67
68
    /// Pattern expression
69
0
    pub fn pattern(&self) -> &Arc<dyn PhysicalExpr> {
70
0
        &self.pattern
71
0
    }
72
73
    /// Operator name
74
0
    fn op_name(&self) -> &str {
75
0
        match (self.negated, self.case_insensitive) {
76
0
            (false, false) => "LIKE",
77
0
            (true, false) => "NOT LIKE",
78
0
            (false, true) => "ILIKE",
79
0
            (true, true) => "NOT ILIKE",
80
        }
81
0
    }
82
}
83
84
impl std::fmt::Display for LikeExpr {
85
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
86
0
        write!(f, "{} {} {}", self.expr, self.op_name(), self.pattern)
87
0
    }
88
}
89
90
impl PhysicalExpr for LikeExpr {
91
0
    fn as_any(&self) -> &dyn Any {
92
0
        self
93
0
    }
94
95
0
    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
96
0
        Ok(DataType::Boolean)
97
0
    }
98
99
0
    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
100
0
        Ok(self.expr.nullable(input_schema)? || self.pattern.nullable(input_schema)?)
101
0
    }
102
103
0
    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
104
        use arrow::compute::*;
105
0
        let lhs = self.expr.evaluate(batch)?;
106
0
        let rhs = self.pattern.evaluate(batch)?;
107
0
        match (self.negated, self.case_insensitive) {
108
0
            (false, false) => apply_cmp(&lhs, &rhs, like),
109
0
            (false, true) => apply_cmp(&lhs, &rhs, ilike),
110
0
            (true, false) => apply_cmp(&lhs, &rhs, nlike),
111
0
            (true, true) => apply_cmp(&lhs, &rhs, nilike),
112
        }
113
0
    }
114
115
0
    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
116
0
        vec![&self.expr, &self.pattern]
117
0
    }
118
119
0
    fn with_new_children(
120
0
        self: Arc<Self>,
121
0
        children: Vec<Arc<dyn PhysicalExpr>>,
122
0
    ) -> Result<Arc<dyn PhysicalExpr>> {
123
0
        Ok(Arc::new(LikeExpr::new(
124
0
            self.negated,
125
0
            self.case_insensitive,
126
0
            Arc::clone(&children[0]),
127
0
            Arc::clone(&children[1]),
128
0
        )))
129
0
    }
130
131
0
    fn dyn_hash(&self, state: &mut dyn Hasher) {
132
0
        let mut s = state;
133
0
        self.hash(&mut s);
134
0
    }
135
}
136
137
impl PartialEq<dyn Any> for LikeExpr {
138
0
    fn eq(&self, other: &dyn Any) -> bool {
139
0
        down_cast_any_ref(other)
140
0
            .downcast_ref::<Self>()
141
0
            .map(|x| {
142
0
                self.negated == x.negated
143
0
                    && self.case_insensitive == x.case_insensitive
144
0
                    && self.expr.eq(&x.expr)
145
0
                    && self.pattern.eq(&x.pattern)
146
0
            })
147
0
            .unwrap_or(false)
148
0
    }
149
}
150
151
/// used for optimize Dictionary like
152
0
fn can_like_type(from_type: &DataType) -> bool {
153
0
    match from_type {
154
0
        DataType::Dictionary(_, inner_type_from) => **inner_type_from == DataType::Utf8,
155
0
        _ => false,
156
    }
157
0
}
158
159
/// Create a like expression, erroring if the argument types are not compatible.
160
0
pub fn like(
161
0
    negated: bool,
162
0
    case_insensitive: bool,
163
0
    expr: Arc<dyn PhysicalExpr>,
164
0
    pattern: Arc<dyn PhysicalExpr>,
165
0
    input_schema: &Schema,
166
0
) -> Result<Arc<dyn PhysicalExpr>> {
167
0
    let expr_type = &expr.data_type(input_schema)?;
168
0
    let pattern_type = &pattern.data_type(input_schema)?;
169
0
    if !expr_type.eq(pattern_type) && !can_like_type(expr_type) {
170
0
        return internal_err!(
171
0
            "The type of {expr_type} AND {pattern_type} of like physical should be same"
172
0
        );
173
0
    }
174
0
    Ok(Arc::new(LikeExpr::new(
175
0
        negated,
176
0
        case_insensitive,
177
0
        expr,
178
0
        pattern,
179
0
    )))
180
0
}
181
182
#[cfg(test)]
183
mod test {
184
    use super::*;
185
    use crate::expressions::col;
186
    use arrow::array::*;
187
    use arrow_schema::Field;
188
    use datafusion_common::cast::as_boolean_array;
189
190
    macro_rules! test_like {
191
        ($A_VEC:expr, $B_VEC:expr, $VEC:expr, $NULLABLE: expr, $NEGATED:expr, $CASE_INSENSITIVE:expr,) => {{
192
            let schema = Schema::new(vec![
193
                Field::new("a", DataType::Utf8, $NULLABLE),
194
                Field::new("b", DataType::Utf8, $NULLABLE),
195
            ]);
196
            let a = StringArray::from($A_VEC);
197
            let b = StringArray::from($B_VEC);
198
199
            let expression = like(
200
                $NEGATED,
201
                $CASE_INSENSITIVE,
202
                col("a", &schema)?,
203
                col("b", &schema)?,
204
                &schema,
205
            )?;
206
            let batch = RecordBatch::try_new(
207
                Arc::new(schema.clone()),
208
                vec![Arc::new(a), Arc::new(b)],
209
            )?;
210
211
            // compute
212
            let result = expression
213
                .evaluate(&batch)?
214
                .into_array(batch.num_rows())
215
                .expect("Failed to convert to array");
216
            let result =
217
                as_boolean_array(&result).expect("failed to downcast to BooleanArray");
218
            let expected = &BooleanArray::from($VEC);
219
            assert_eq!(expected, result);
220
        }};
221
    }
222
223
    #[test]
224
    fn like_op() -> Result<()> {
225
        test_like!(
226
            vec!["hello world", "world"],
227
            vec!["%hello%", "%hello%"],
228
            vec![true, false],
229
            false,
230
            false,
231
            false,
232
        ); // like
233
        test_like!(
234
            vec![Some("hello world"), None, Some("world")],
235
            vec![Some("%hello%"), None, Some("%hello%")],
236
            vec![Some(false), None, Some(true)],
237
            true,
238
            true,
239
            false,
240
        ); // not like
241
        test_like!(
242
            vec!["hello world", "world"],
243
            vec!["%helLo%", "%helLo%"],
244
            vec![true, false],
245
            false,
246
            false,
247
            true,
248
        ); // ilike
249
        test_like!(
250
            vec![Some("hello world"), None, Some("world")],
251
            vec![Some("%helLo%"), None, Some("%helLo%")],
252
            vec![Some(false), None, Some(true)],
253
            true,
254
            true,
255
            true,
256
        ); // not ilike
257
258
        Ok(())
259
    }
260
}