Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/common/src/test_util.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Utility functions to make testing DataFusion based crates easier
19
20
use std::{error::Error, path::PathBuf};
21
22
/// Compares formatted output of a record batch with an expected
23
/// vector of strings, with the result of pretty formatting record
24
/// batches. This is a macro so errors appear on the correct line
25
///
26
/// Designed so that failure output can be directly copy/pasted
27
/// into the test code as expected results.
28
///
29
/// Expects to be called about like this:
30
///
31
/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
32
///
33
/// # Example
34
/// ```
35
/// # use std::sync::Arc;
36
/// # use arrow::record_batch::RecordBatch;
37
/// # use arrow_array::{ArrayRef, Int32Array};
38
/// # use datafusion_common::assert_batches_eq;
39
/// let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
40
///  let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
41
/// // Expected output is a vec of strings
42
/// let expected = vec![
43
///     "+--------+",
44
///     "| column |",
45
///     "+--------+",
46
///     "| 1      |",
47
///     "| 2      |",
48
///     "+--------+",
49
/// ];
50
/// // compare the formatted output of the record batch with the expected output
51
/// assert_batches_eq!(expected, &[batch]);
52
/// ```
53
#[macro_export]
54
macro_rules! assert_batches_eq {
55
    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
56
        let expected_lines: Vec<String> =
57
3.28k
            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
58
59
        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
60
            $CHUNKS,
61
            &$crate::format::DEFAULT_FORMAT_OPTIONS,
62
        )
63
        .unwrap()
64
        .to_string();
65
66
        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
67
68
        assert_eq!(
69
            expected_lines, actual_lines,
70
            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
71
            expected_lines, actual_lines
72
        );
73
    };
74
}
75
76
/// Compares formatted output of a record batch with an expected
77
/// vector of strings in a way that order does not matter.
78
/// This is a macro so errors appear on the correct line
79
///
80
/// See [`assert_batches_eq`] for more details and example.
81
///
82
/// Expects to be called about like this:
83
///
84
/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
85
#[macro_export]
86
macro_rules! assert_batches_sorted_eq {
87
    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
88
        let mut expected_lines: Vec<String> =
89
1.17k
            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
90
91
        // sort except for header + footer
92
        let num_lines = expected_lines.len();
93
        if num_lines > 3 {
94
            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
95
        }
96
97
        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
98
            $CHUNKS,
99
            &$crate::format::DEFAULT_FORMAT_OPTIONS,
100
        )
101
        .unwrap()
102
        .to_string();
103
        // fix for windows: \r\n -->
104
105
        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
106
107
        // sort except for header + footer
108
        let num_lines = actual_lines.len();
109
        if num_lines > 3 {
110
            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
111
        }
112
113
        assert_eq!(
114
            expected_lines, actual_lines,
115
            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
116
            expected_lines, actual_lines
117
        );
118
    };
119
}
120
121
/// A macro to assert that one string is contained within another with
122
/// a nice error message if they are not.
123
///
124
/// Usage: `assert_contains!(actual, expected)`
125
///
126
/// Is a macro so test error
127
/// messages are on the same line as the failure;
128
///
129
/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
130
#[macro_export]
131
macro_rules! assert_contains {
132
    ($ACTUAL: expr, $EXPECTED: expr) => {
133
        let actual_value: String = $ACTUAL.into();
134
        let expected_value: String = $EXPECTED.into();
135
        assert!(
136
            actual_value.contains(&expected_value),
137
            "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
138
            expected_value,
139
            actual_value
140
        );
141
    };
142
}
143
144
/// A macro to assert that one string is NOT contained within another with
145
/// a nice error message if they are are.
146
///
147
/// Usage: `assert_not_contains!(actual, unexpected)`
148
///
149
/// Is a macro so test error
150
/// messages are on the same line as the failure;
151
///
152
/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
153
#[macro_export]
154
macro_rules! assert_not_contains {
155
    ($ACTUAL: expr, $UNEXPECTED: expr) => {
156
        let actual_value: String = $ACTUAL.into();
157
        let unexpected_value: String = $UNEXPECTED.into();
158
        assert!(
159
            !actual_value.contains(&unexpected_value),
160
            "Found unexpected in actual.\n\nUnexpected:\n{}\n\nActual:\n{}",
161
            unexpected_value,
162
            actual_value
163
        );
164
    };
165
}
166
167
/// Returns the datafusion test data directory, which is by default rooted at `datafusion/core/tests/data`.
168
///
169
/// The default can be overridden by the optional environment
170
/// variable `DATAFUSION_TEST_DATA`
171
///
172
/// panics when the directory can not be found.
173
///
174
/// Example:
175
/// ```
176
/// let testdata = datafusion_common::test_util::datafusion_test_data();
177
/// let csvdata = format!("{}/window_1.csv", testdata);
178
/// assert!(std::path::PathBuf::from(csvdata).exists());
179
/// ```
180
pub fn datafusion_test_data() -> String {
181
    match get_data_dir("DATAFUSION_TEST_DATA", "../../datafusion/core/tests/data") {
182
        Ok(pb) => pb.display().to_string(),
183
        Err(err) => panic!("failed to get arrow data dir: {err}"),
184
    }
185
}
186
187
/// Returns the arrow test data directory, which is by default stored
188
/// in a git submodule rooted at `testing/data`.
189
///
190
/// The default can be overridden by the optional environment
191
/// variable `ARROW_TEST_DATA`
192
///
193
/// panics when the directory can not be found.
194
///
195
/// Example:
196
/// ```
197
/// let testdata = datafusion_common::test_util::arrow_test_data();
198
/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
199
/// assert!(std::path::PathBuf::from(csvdata).exists());
200
/// ```
201
pub fn arrow_test_data() -> String {
202
    match get_data_dir("ARROW_TEST_DATA", "../../testing/data") {
203
        Ok(pb) => pb.display().to_string(),
204
        Err(err) => panic!("failed to get arrow data dir: {err}"),
205
    }
206
}
207
208
/// Returns the parquet test data directory, which is by default
209
/// stored in a git submodule rooted at
210
/// `parquet-testing/data`.
211
///
212
/// The default can be overridden by the optional environment variable
213
/// `PARQUET_TEST_DATA`
214
///
215
/// panics when the directory can not be found.
216
///
217
/// Example:
218
/// ```
219
/// let testdata = datafusion_common::test_util::parquet_test_data();
220
/// let filename = format!("{}/binary.parquet", testdata);
221
/// assert!(std::path::PathBuf::from(filename).exists());
222
/// ```
223
#[cfg(feature = "parquet")]
224
pub fn parquet_test_data() -> String {
225
    match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") {
226
        Ok(pb) => pb.display().to_string(),
227
        Err(err) => panic!("failed to get parquet data dir: {err}"),
228
    }
229
}
230
231
/// Returns a directory path for finding test data.
232
///
233
/// udf_env: name of an environment variable
234
///
235
/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
236
///
237
///  Returns either:
238
/// The path referred to in `udf_env` if that variable is set and refers to a directory
239
/// The submodule_data directory relative to CARGO_MANIFEST_PATH
240
pub fn get_data_dir(
241
    udf_env: &str,
242
    submodule_data: &str,
243
) -> Result<PathBuf, Box<dyn Error>> {
244
    // Try user defined env.
245
    if let Ok(dir) = std::env::var(udf_env) {
246
        let trimmed = dir.trim().to_string();
247
        if !trimmed.is_empty() {
248
            let pb = PathBuf::from(trimmed);
249
            if pb.is_dir() {
250
                return Ok(pb);
251
            } else {
252
                return Err(format!(
253
                    "the data dir `{}` defined by env {} not found",
254
                    pb.display(),
255
                    udf_env
256
                )
257
                .into());
258
            }
259
        }
260
    }
261
262
    // The env is undefined or its value is trimmed to empty, let's try default dir.
263
264
    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
265
    // set by `cargo run` or `cargo test`, see:
266
    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
267
    let dir = env!("CARGO_MANIFEST_DIR");
268
269
    let pb = PathBuf::from(dir).join(submodule_data);
270
    if pb.is_dir() {
271
        Ok(pb)
272
    } else {
273
        Err(format!(
274
            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
275
             HINT: try running `git submodule update --init`",
276
            udf_env,
277
            pb.display(),
278
        ).into())
279
    }
280
}
281
282
#[cfg(test)]
283
mod tests {
284
    use super::*;
285
    use std::env;
286
287
    #[test]
288
    fn test_data_dir() {
289
        let udf_env = "get_data_dir";
290
        let cwd = env::current_dir().unwrap();
291
292
        let existing_pb = cwd.join("..");
293
        let existing = existing_pb.display().to_string();
294
        let existing_str = existing.as_str();
295
296
        let non_existing = cwd.join("non-existing-dir").display().to_string();
297
        let non_existing_str = non_existing.as_str();
298
299
        env::set_var(udf_env, non_existing_str);
300
        let res = get_data_dir(udf_env, existing_str);
301
        assert!(res.is_err());
302
303
        env::set_var(udf_env, "");
304
        let res = get_data_dir(udf_env, existing_str);
305
        assert!(res.is_ok());
306
        assert_eq!(res.unwrap(), existing_pb);
307
308
        env::set_var(udf_env, " ");
309
        let res = get_data_dir(udf_env, existing_str);
310
        assert!(res.is_ok());
311
        assert_eq!(res.unwrap(), existing_pb);
312
313
        env::set_var(udf_env, existing_str);
314
        let res = get_data_dir(udf_env, existing_str);
315
        assert!(res.is_ok());
316
        assert_eq!(res.unwrap(), existing_pb);
317
318
        env::remove_var(udf_env);
319
        let res = get_data_dir(udf_env, non_existing_str);
320
        assert!(res.is_err());
321
322
        let res = get_data_dir(udf_env, existing_str);
323
        assert!(res.is_ok());
324
        assert_eq!(res.unwrap(), existing_pb);
325
    }
326
327
    #[test]
328
    #[cfg(feature = "parquet")]
329
    fn test_happy() {
330
        let res = arrow_test_data();
331
        assert!(PathBuf::from(res).is_dir());
332
333
        let res = parquet_test_data();
334
        assert!(PathBuf::from(res).is_dir());
335
    }
336
}