/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/expressions/like.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::hash::{Hash, Hasher}; |
19 | | use std::{any::Any, sync::Arc}; |
20 | | |
21 | | use crate::{physical_expr::down_cast_any_ref, PhysicalExpr}; |
22 | | |
23 | | use arrow::record_batch::RecordBatch; |
24 | | use arrow_schema::{DataType, Schema}; |
25 | | use datafusion_common::{internal_err, Result}; |
26 | | use datafusion_expr::ColumnarValue; |
27 | | use datafusion_physical_expr_common::datum::apply_cmp; |
28 | | |
29 | | // Like expression |
30 | | #[derive(Debug, Hash)] |
31 | | pub struct LikeExpr { |
32 | | negated: bool, |
33 | | case_insensitive: bool, |
34 | | expr: Arc<dyn PhysicalExpr>, |
35 | | pattern: Arc<dyn PhysicalExpr>, |
36 | | } |
37 | | |
38 | | impl LikeExpr { |
39 | 0 | pub fn new( |
40 | 0 | negated: bool, |
41 | 0 | case_insensitive: bool, |
42 | 0 | expr: Arc<dyn PhysicalExpr>, |
43 | 0 | pattern: Arc<dyn PhysicalExpr>, |
44 | 0 | ) -> Self { |
45 | 0 | Self { |
46 | 0 | negated, |
47 | 0 | case_insensitive, |
48 | 0 | expr, |
49 | 0 | pattern, |
50 | 0 | } |
51 | 0 | } |
52 | | |
53 | | /// Is negated |
54 | 0 | pub fn negated(&self) -> bool { |
55 | 0 | self.negated |
56 | 0 | } |
57 | | |
58 | | /// Is case insensitive |
59 | 0 | pub fn case_insensitive(&self) -> bool { |
60 | 0 | self.case_insensitive |
61 | 0 | } |
62 | | |
63 | | /// Input expression |
64 | 0 | pub fn expr(&self) -> &Arc<dyn PhysicalExpr> { |
65 | 0 | &self.expr |
66 | 0 | } |
67 | | |
68 | | /// Pattern expression |
69 | 0 | pub fn pattern(&self) -> &Arc<dyn PhysicalExpr> { |
70 | 0 | &self.pattern |
71 | 0 | } |
72 | | |
73 | | /// Operator name |
74 | 0 | fn op_name(&self) -> &str { |
75 | 0 | match (self.negated, self.case_insensitive) { |
76 | 0 | (false, false) => "LIKE", |
77 | 0 | (true, false) => "NOT LIKE", |
78 | 0 | (false, true) => "ILIKE", |
79 | 0 | (true, true) => "NOT ILIKE", |
80 | | } |
81 | 0 | } |
82 | | } |
83 | | |
84 | | impl std::fmt::Display for LikeExpr { |
85 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
86 | 0 | write!(f, "{} {} {}", self.expr, self.op_name(), self.pattern) |
87 | 0 | } |
88 | | } |
89 | | |
90 | | impl PhysicalExpr for LikeExpr { |
91 | 0 | fn as_any(&self) -> &dyn Any { |
92 | 0 | self |
93 | 0 | } |
94 | | |
95 | 0 | fn data_type(&self, _input_schema: &Schema) -> Result<DataType> { |
96 | 0 | Ok(DataType::Boolean) |
97 | 0 | } |
98 | | |
99 | 0 | fn nullable(&self, input_schema: &Schema) -> Result<bool> { |
100 | 0 | Ok(self.expr.nullable(input_schema)? || self.pattern.nullable(input_schema)?) |
101 | 0 | } |
102 | | |
103 | 0 | fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { |
104 | | use arrow::compute::*; |
105 | 0 | let lhs = self.expr.evaluate(batch)?; |
106 | 0 | let rhs = self.pattern.evaluate(batch)?; |
107 | 0 | match (self.negated, self.case_insensitive) { |
108 | 0 | (false, false) => apply_cmp(&lhs, &rhs, like), |
109 | 0 | (false, true) => apply_cmp(&lhs, &rhs, ilike), |
110 | 0 | (true, false) => apply_cmp(&lhs, &rhs, nlike), |
111 | 0 | (true, true) => apply_cmp(&lhs, &rhs, nilike), |
112 | | } |
113 | 0 | } |
114 | | |
115 | 0 | fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> { |
116 | 0 | vec![&self.expr, &self.pattern] |
117 | 0 | } |
118 | | |
119 | 0 | fn with_new_children( |
120 | 0 | self: Arc<Self>, |
121 | 0 | children: Vec<Arc<dyn PhysicalExpr>>, |
122 | 0 | ) -> Result<Arc<dyn PhysicalExpr>> { |
123 | 0 | Ok(Arc::new(LikeExpr::new( |
124 | 0 | self.negated, |
125 | 0 | self.case_insensitive, |
126 | 0 | Arc::clone(&children[0]), |
127 | 0 | Arc::clone(&children[1]), |
128 | 0 | ))) |
129 | 0 | } |
130 | | |
131 | 0 | fn dyn_hash(&self, state: &mut dyn Hasher) { |
132 | 0 | let mut s = state; |
133 | 0 | self.hash(&mut s); |
134 | 0 | } |
135 | | } |
136 | | |
137 | | impl PartialEq<dyn Any> for LikeExpr { |
138 | 0 | fn eq(&self, other: &dyn Any) -> bool { |
139 | 0 | down_cast_any_ref(other) |
140 | 0 | .downcast_ref::<Self>() |
141 | 0 | .map(|x| { |
142 | 0 | self.negated == x.negated |
143 | 0 | && self.case_insensitive == x.case_insensitive |
144 | 0 | && self.expr.eq(&x.expr) |
145 | 0 | && self.pattern.eq(&x.pattern) |
146 | 0 | }) |
147 | 0 | .unwrap_or(false) |
148 | 0 | } |
149 | | } |
150 | | |
151 | | /// used for optimize Dictionary like |
152 | 0 | fn can_like_type(from_type: &DataType) -> bool { |
153 | 0 | match from_type { |
154 | 0 | DataType::Dictionary(_, inner_type_from) => **inner_type_from == DataType::Utf8, |
155 | 0 | _ => false, |
156 | | } |
157 | 0 | } |
158 | | |
159 | | /// Create a like expression, erroring if the argument types are not compatible. |
160 | 0 | pub fn like( |
161 | 0 | negated: bool, |
162 | 0 | case_insensitive: bool, |
163 | 0 | expr: Arc<dyn PhysicalExpr>, |
164 | 0 | pattern: Arc<dyn PhysicalExpr>, |
165 | 0 | input_schema: &Schema, |
166 | 0 | ) -> Result<Arc<dyn PhysicalExpr>> { |
167 | 0 | let expr_type = &expr.data_type(input_schema)?; |
168 | 0 | let pattern_type = &pattern.data_type(input_schema)?; |
169 | 0 | if !expr_type.eq(pattern_type) && !can_like_type(expr_type) { |
170 | 0 | return internal_err!( |
171 | 0 | "The type of {expr_type} AND {pattern_type} of like physical should be same" |
172 | 0 | ); |
173 | 0 | } |
174 | 0 | Ok(Arc::new(LikeExpr::new( |
175 | 0 | negated, |
176 | 0 | case_insensitive, |
177 | 0 | expr, |
178 | 0 | pattern, |
179 | 0 | ))) |
180 | 0 | } |
181 | | |
182 | | #[cfg(test)] |
183 | | mod test { |
184 | | use super::*; |
185 | | use crate::expressions::col; |
186 | | use arrow::array::*; |
187 | | use arrow_schema::Field; |
188 | | use datafusion_common::cast::as_boolean_array; |
189 | | |
190 | | macro_rules! test_like { |
191 | | ($A_VEC:expr, $B_VEC:expr, $VEC:expr, $NULLABLE: expr, $NEGATED:expr, $CASE_INSENSITIVE:expr,) => {{ |
192 | | let schema = Schema::new(vec![ |
193 | | Field::new("a", DataType::Utf8, $NULLABLE), |
194 | | Field::new("b", DataType::Utf8, $NULLABLE), |
195 | | ]); |
196 | | let a = StringArray::from($A_VEC); |
197 | | let b = StringArray::from($B_VEC); |
198 | | |
199 | | let expression = like( |
200 | | $NEGATED, |
201 | | $CASE_INSENSITIVE, |
202 | | col("a", &schema)?, |
203 | | col("b", &schema)?, |
204 | | &schema, |
205 | | )?; |
206 | | let batch = RecordBatch::try_new( |
207 | | Arc::new(schema.clone()), |
208 | | vec![Arc::new(a), Arc::new(b)], |
209 | | )?; |
210 | | |
211 | | // compute |
212 | | let result = expression |
213 | | .evaluate(&batch)? |
214 | | .into_array(batch.num_rows()) |
215 | | .expect("Failed to convert to array"); |
216 | | let result = |
217 | | as_boolean_array(&result).expect("failed to downcast to BooleanArray"); |
218 | | let expected = &BooleanArray::from($VEC); |
219 | | assert_eq!(expected, result); |
220 | | }}; |
221 | | } |
222 | | |
223 | | #[test] |
224 | | fn like_op() -> Result<()> { |
225 | | test_like!( |
226 | | vec!["hello world", "world"], |
227 | | vec!["%hello%", "%hello%"], |
228 | | vec![true, false], |
229 | | false, |
230 | | false, |
231 | | false, |
232 | | ); // like |
233 | | test_like!( |
234 | | vec![Some("hello world"), None, Some("world")], |
235 | | vec![Some("%hello%"), None, Some("%hello%")], |
236 | | vec![Some(false), None, Some(true)], |
237 | | true, |
238 | | true, |
239 | | false, |
240 | | ); // not like |
241 | | test_like!( |
242 | | vec!["hello world", "world"], |
243 | | vec!["%helLo%", "%helLo%"], |
244 | | vec![true, false], |
245 | | false, |
246 | | false, |
247 | | true, |
248 | | ); // ilike |
249 | | test_like!( |
250 | | vec![Some("hello world"), None, Some("world")], |
251 | | vec![Some("%helLo%"), None, Some("%helLo%")], |
252 | | vec![Some(false), None, Some(true)], |
253 | | true, |
254 | | true, |
255 | | true, |
256 | | ); // not ilike |
257 | | |
258 | | Ok(()) |
259 | | } |
260 | | } |