/Users/andrewlamb/Software/datafusion/datafusion/physical-expr/src/expressions/is_null.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! IS NULL expression |
19 | | |
20 | | use std::hash::{Hash, Hasher}; |
21 | | use std::{any::Any, sync::Arc}; |
22 | | |
23 | | use arrow::{ |
24 | | datatypes::{DataType, Schema}, |
25 | | record_batch::RecordBatch, |
26 | | }; |
27 | | |
28 | | use crate::physical_expr::down_cast_any_ref; |
29 | | use crate::PhysicalExpr; |
30 | | use datafusion_common::Result; |
31 | | use datafusion_common::ScalarValue; |
32 | | use datafusion_expr::ColumnarValue; |
33 | | |
34 | | /// IS NULL expression |
35 | | #[derive(Debug, Hash)] |
36 | | pub struct IsNullExpr { |
37 | | /// Input expression |
38 | | arg: Arc<dyn PhysicalExpr>, |
39 | | } |
40 | | |
41 | | impl IsNullExpr { |
42 | | /// Create new not expression |
43 | 0 | pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self { |
44 | 0 | Self { arg } |
45 | 0 | } |
46 | | |
47 | | /// Get the input expression |
48 | 0 | pub fn arg(&self) -> &Arc<dyn PhysicalExpr> { |
49 | 0 | &self.arg |
50 | 0 | } |
51 | | } |
52 | | |
53 | | impl std::fmt::Display for IsNullExpr { |
54 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
55 | 0 | write!(f, "{} IS NULL", self.arg) |
56 | 0 | } |
57 | | } |
58 | | |
59 | | impl PhysicalExpr for IsNullExpr { |
60 | | /// Return a reference to Any that can be used for downcasting |
61 | 0 | fn as_any(&self) -> &dyn Any { |
62 | 0 | self |
63 | 0 | } |
64 | | |
65 | 0 | fn data_type(&self, _input_schema: &Schema) -> Result<DataType> { |
66 | 0 | Ok(DataType::Boolean) |
67 | 0 | } |
68 | | |
69 | 0 | fn nullable(&self, _input_schema: &Schema) -> Result<bool> { |
70 | 0 | Ok(false) |
71 | 0 | } |
72 | | |
73 | 0 | fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { |
74 | 0 | let arg = self.arg.evaluate(batch)?; |
75 | 0 | match arg { |
76 | 0 | ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new( |
77 | 0 | arrow::compute::is_null(&array)?, |
78 | | ))), |
79 | 0 | ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( |
80 | 0 | ScalarValue::Boolean(Some(scalar.is_null())), |
81 | 0 | )), |
82 | | } |
83 | 0 | } |
84 | | |
85 | 0 | fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> { |
86 | 0 | vec![&self.arg] |
87 | 0 | } |
88 | | |
89 | 0 | fn with_new_children( |
90 | 0 | self: Arc<Self>, |
91 | 0 | children: Vec<Arc<dyn PhysicalExpr>>, |
92 | 0 | ) -> Result<Arc<dyn PhysicalExpr>> { |
93 | 0 | Ok(Arc::new(IsNullExpr::new(Arc::clone(&children[0])))) |
94 | 0 | } |
95 | | |
96 | 0 | fn dyn_hash(&self, state: &mut dyn Hasher) { |
97 | 0 | let mut s = state; |
98 | 0 | self.hash(&mut s); |
99 | 0 | } |
100 | | } |
101 | | |
102 | | impl PartialEq<dyn Any> for IsNullExpr { |
103 | 0 | fn eq(&self, other: &dyn Any) -> bool { |
104 | 0 | down_cast_any_ref(other) |
105 | 0 | .downcast_ref::<Self>() |
106 | 0 | .map(|x| self.arg.eq(&x.arg)) |
107 | 0 | .unwrap_or(false) |
108 | 0 | } |
109 | | } |
110 | | |
111 | | /// Create an IS NULL expression |
112 | 0 | pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> { |
113 | 0 | Ok(Arc::new(IsNullExpr::new(arg))) |
114 | 0 | } |
115 | | |
116 | | #[cfg(test)] |
117 | | mod tests { |
118 | | use super::*; |
119 | | use crate::expressions::col; |
120 | | use arrow::{ |
121 | | array::{BooleanArray, StringArray}, |
122 | | datatypes::*, |
123 | | }; |
124 | | use arrow_array::{Array, Float64Array, Int32Array, UnionArray}; |
125 | | use arrow_buffer::ScalarBuffer; |
126 | | use datafusion_common::cast::as_boolean_array; |
127 | | |
128 | | #[test] |
129 | | fn is_null_op() -> Result<()> { |
130 | | let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]); |
131 | | let a = StringArray::from(vec![Some("foo"), None]); |
132 | | |
133 | | // expression: "a is null" |
134 | | let expr = is_null(col("a", &schema)?).unwrap(); |
135 | | let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?; |
136 | | |
137 | | let result = expr |
138 | | .evaluate(&batch)? |
139 | | .into_array(batch.num_rows()) |
140 | | .expect("Failed to convert to array"); |
141 | | let result = |
142 | | as_boolean_array(&result).expect("failed to downcast to BooleanArray"); |
143 | | |
144 | | let expected = &BooleanArray::from(vec![false, true]); |
145 | | |
146 | | assert_eq!(expected, result); |
147 | | |
148 | | Ok(()) |
149 | | } |
150 | | |
151 | | fn union_fields() -> UnionFields { |
152 | | [ |
153 | | (0, Arc::new(Field::new("A", DataType::Int32, true))), |
154 | | (1, Arc::new(Field::new("B", DataType::Float64, true))), |
155 | | (2, Arc::new(Field::new("C", DataType::Utf8, true))), |
156 | | ] |
157 | | .into_iter() |
158 | | .collect() |
159 | | } |
160 | | |
161 | | #[test] |
162 | | fn sparse_union_is_null() { |
163 | | // union of [{A=1}, {A=}, {B=1.1}, {B=1.2}, {B=}, {C=}, {C="a"}] |
164 | | let int_array = |
165 | | Int32Array::from(vec![Some(1), None, None, None, None, None, None]); |
166 | | let float_array = |
167 | | Float64Array::from(vec![None, None, Some(1.1), Some(1.2), None, None, None]); |
168 | | let str_array = |
169 | | StringArray::from(vec![None, None, None, None, None, None, Some("a")]); |
170 | | let type_ids = [0, 0, 1, 1, 1, 2, 2] |
171 | | .into_iter() |
172 | | .collect::<ScalarBuffer<i8>>(); |
173 | | |
174 | | let children = vec![ |
175 | | Arc::new(int_array) as Arc<dyn Array>, |
176 | | Arc::new(float_array), |
177 | | Arc::new(str_array), |
178 | | ]; |
179 | | |
180 | | let array = |
181 | | UnionArray::try_new(union_fields(), type_ids, None, children).unwrap(); |
182 | | |
183 | | let result = arrow::compute::is_null(&array).unwrap(); |
184 | | |
185 | | let expected = |
186 | | &BooleanArray::from(vec![false, true, false, false, true, true, false]); |
187 | | assert_eq!(expected, &result); |
188 | | } |
189 | | |
190 | | #[test] |
191 | | fn dense_union_is_null() { |
192 | | // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}] |
193 | | let int_array = Int32Array::from(vec![Some(1), None]); |
194 | | let float_array = Float64Array::from(vec![Some(3.2), None]); |
195 | | let str_array = StringArray::from(vec![Some("a"), None]); |
196 | | let type_ids = [0, 0, 1, 1, 2, 2].into_iter().collect::<ScalarBuffer<i8>>(); |
197 | | let offsets = [0, 1, 0, 1, 0, 1] |
198 | | .into_iter() |
199 | | .collect::<ScalarBuffer<i32>>(); |
200 | | |
201 | | let children = vec![ |
202 | | Arc::new(int_array) as Arc<dyn Array>, |
203 | | Arc::new(float_array), |
204 | | Arc::new(str_array), |
205 | | ]; |
206 | | |
207 | | let array = |
208 | | UnionArray::try_new(union_fields(), type_ids, Some(offsets), children) |
209 | | .unwrap(); |
210 | | |
211 | | let result = arrow::compute::is_null(&array).unwrap(); |
212 | | |
213 | | let expected = &BooleanArray::from(vec![false, true, false, true, false, true]); |
214 | | assert_eq!(expected, &result); |
215 | | } |
216 | | } |