Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split parquet test into multiple files #14663

Merged
merged 37 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0250e57
initial checkin
etseidl Dec 20, 2023
b3180ba
move random_values to common
etseidl Dec 20, 2023
ba481b2
move ascending/descending/unordered to parquet_common
etseidl Dec 20, 2023
2b708c7
Merge remote-tracking branch 'origin/branch-24.02' into test_parquet_…
etseidl Dec 20, 2023
f2135f3
move delta data gen to common
etseidl Dec 20, 2023
abafa82
split out parquet reader tests
etseidl Dec 20, 2023
955a41a
split out parquet writer tests
etseidl Dec 20, 2023
b1a2322
split out v2 tests
etseidl Dec 20, 2023
16eae3c
split out some misc stuff
etseidl Dec 20, 2023
200cd92
split out some misc stuff
etseidl Dec 20, 2023
39487c9
clean up includes
etseidl Dec 20, 2023
c043465
formatting
etseidl Dec 20, 2023
f4ecc6c
split out chunked writer
etseidl Dec 20, 2023
7e6d323
clean up includes
etseidl Dec 20, 2023
dde94e6
more includes
etseidl Dec 20, 2023
44cb383
Merge remote-tracking branch 'origin/branch-24.02' into test_parquet_…
etseidl Dec 20, 2023
cab0810
move comparable tests to misc
etseidl Dec 20, 2023
7b7eea3
fix copyright and trim includes
etseidl Dec 20, 2023
a297efa
move some more stuff around
etseidl Dec 21, 2023
a4acda6
move predicate test to reader
etseidl Dec 21, 2023
06a509d
Merge branch 'branch-24.02' into test_parquet_refactor
vuule Dec 26, 2023
0267c31
remove some unused test fixtures
etseidl Jan 2, 2024
32af582
more fixture cleanups
etseidl Jan 2, 2024
4a42a7a
more cleanup
etseidl Jan 2, 2024
5d22173
Merge branch 'branch-24.02' into test_parquet_refactor
etseidl Jan 2, 2024
0e49191
split out main into separate header
etseidl Jan 2, 2024
bd3bab1
split out random generators from base_fixture
etseidl Jan 2, 2024
28907da
Merge branch 'branch-24.02' into test_parquet_refactor
etseidl Jan 2, 2024
03a21d2
include error.hpp
etseidl Jan 2, 2024
9c0995e
empty parquet_test
etseidl Jan 3, 2024
133c652
update includes
etseidl Jan 3, 2024
a12a141
Merge branch 'branch-24.02' into test_parquet_refactor
vuule Jan 4, 2024
168cdaa
fix includes
etseidl Jan 4, 2024
1e75caa
Merge branch 'branch-24.02' into test_parquet_refactor
etseidl Jan 5, 2024
3d218e8
move temp_env to parquet_common.cpp, add note explaining the continued
etseidl Jan 9, 2024
b9dc324
Merge branch 'branch-24.02' into test_parquet_refactor
etseidl Jan 9, 2024
c9a2190
Merge branch 'branch-24.02' into test_parquet_refactor
etseidl Jan 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
308 changes: 5 additions & 303 deletions cpp/include/cudf_test/base_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,29 +16,17 @@

#pragma once

#include <random>

#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/cxxopts.hpp>
#include <cudf_test/default_stream.hpp>
#include <cudf_test/file_utilities.hpp>
#include <cudf_test/stream_checking_resource_adaptor.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/binning_memory_resource.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
#include <rmm/mr/device/owning_wrapper.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>

namespace cudf {
namespace test {

/**
* @brief Base test fixture class from which all libcudf tests should inherit.
*
Expand Down Expand Up @@ -80,152 +68,6 @@ class BaseFixtureWithParam : public ::testing::TestWithParam<T> {
rmm::mr::device_memory_resource* mr() const { return _mr; }
};

template <typename T, typename Enable = void>
struct uniform_distribution_impl {};
template <typename T>
struct uniform_distribution_impl<T, std::enable_if_t<std::is_integral_v<T>>> {
using type = std::uniform_int_distribution<T>;
};

template <>
struct uniform_distribution_impl<bool> {
using type = std::bernoulli_distribution;
};

template <typename T>
struct uniform_distribution_impl<T, std::enable_if_t<std::is_floating_point_v<T>>> {
using type = std::uniform_real_distribution<T>;
};

template <typename T>
struct uniform_distribution_impl<
T,
std::enable_if_t<cudf::is_chrono<T>() or cudf::is_fixed_point<T>()>> {
using type = std::uniform_int_distribution<typename T::rep>;
};

template <typename T>
using uniform_distribution_t = typename uniform_distribution_impl<T>::type;

namespace detail {

/**
* @brief Returns an incrementing seed value for use with UniformRandomGenerator.
*
* The intent behind this is to handle the following case:
*
* auto lhs = make_random_wrapped_column<TypeLhs>(10000);
* auto rhs = make_random_wrapped_column<TypeRhs>(10000);
*
* Previously, the binops test framework had a persistent UniformRandomGenerator
* that would produce unique values across two calls to make_random_wrapped_column()
* like this. However that code has been changed and each call to make_random_wrapped_column()
* now uses a local UniformRandomGenerator object. If we didn't generate an incrementing seed
* for each one, every call to make_random_wrapped_column() would return the same values. This
* fixes that case and also leaves results across multiple test runs deterministic.
*/
uint64_t random_generator_incrementing_seed();

} // namespace detail

/**
* @brief Provides uniform random number generation.
*
* It is often useful in testing to have a convenient source of random numbers.
* This class is intended to serve as a base class for test fixtures to provide
* random number generation. `UniformRandomGenerator::generate()` will generate
* the next random number in the sequence.
*
* Example:
* ```c++
* UniformRandomGenerator g(0,100);
* g.generate(); // Returns a random number in the range [0,100]
* ```
*
* @tparam T The type of values that will be generated.
*/
template <typename T = cudf::size_type, typename Engine = std::default_random_engine>
class UniformRandomGenerator {
public:
using uniform_distribution = uniform_distribution_t<T>; ///< The uniform distribution type for T.

UniformRandomGenerator() : rng{std::mt19937_64{detail::random_generator_incrementing_seed()}()} {}

/**
* @brief Construct a new Uniform Random Generator to generate uniformly
* random numbers in the range `[upper,lower]`
*
* @param lower Lower bound of the range
* @param upper Upper bound of the desired range
* @param seed seed to initialize generator with
*/
template <typename TL = T,
std::enable_if_t<cudf::is_numeric<TL>() && !cudf::is_boolean<TL>()>* = nullptr>
UniformRandomGenerator(T lower,
T upper,
uint64_t seed = detail::random_generator_incrementing_seed())
: dist{lower, upper}, rng{std::mt19937_64{seed}()}
{
}

/**
* @brief Construct a new Uniform Random Generator to generate uniformly random booleans
*
* @param lower ignored
* @param upper ignored
* @param seed seed to initialize generator with
*/
template <typename TL = T, std::enable_if_t<cudf::is_boolean<TL>()>* = nullptr>
UniformRandomGenerator(T lower,
T upper,
uint64_t seed = detail::random_generator_incrementing_seed())
: dist{0.5}, rng{std::mt19937_64{seed}()}
{
}

/**
* @brief Construct a new Uniform Random Generator to generate uniformly
* random numbers in the range `[upper,lower]`
*
* @param lower Lower bound of the range
* @param upper Upper bound of the desired range
* @param seed seed to initialize generator with
*/
template <typename TL = T,
std::enable_if_t<cudf::is_chrono<TL>() or cudf::is_fixed_point<TL>()>* = nullptr>
UniformRandomGenerator(typename TL::rep lower,
typename TL::rep upper,
uint64_t seed = detail::random_generator_incrementing_seed())
: dist{lower, upper}, rng{std::mt19937_64{seed}()}
{
}

/**
* @brief Returns the next random number.
*
* @return generated random number
*/
template <typename TL = T, std::enable_if_t<!cudf::is_timestamp<TL>()>* = nullptr>
T generate()
{
return T{dist(rng)};
}

/**
* @brief Returns the next random number.
* @return generated random number
*/
template <typename TL = T, std::enable_if_t<cudf::is_timestamp<TL>()>* = nullptr>
T generate()
{
return T{typename T::duration{dist(rng)}};
}

private:
uniform_distribution dist{}; ///< Distribution
Engine rng; ///< Random generator
};

/**
* @brief Provides temporary directory for temporary test files.
*
Expand Down Expand Up @@ -255,145 +97,5 @@ class TempDirTestEnvironment : public ::testing::Environment {
std::string get_temp_filepath(std::string filename) { return tmpdir.path() + filename; }
};

/// MR factory functions
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }

inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }

inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }

inline auto make_pool()
{
auto const [free, total] = rmm::detail::available_device_memory();
auto min_alloc =
rmm::detail::align_down(std::min(free, total / 10), rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), min_alloc);
}

inline auto make_arena()
{
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
}

inline auto make_binning()
{
auto pool = make_pool();
// Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
// Larger allocations will use the pool resource
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
return mr;
}

/**
* @brief Creates a memory resource for the unit test environment
* given the name of the allocation mode.
*
* The returned resource instance must be kept alive for the duration of
* the tests. Attaching the resource to a TestEnvironment causes
* issues since the environment objects are not destroyed until
* after the runtime is shutdown.
*
* @throw cudf::logic_error if the `allocation_mode` is unsupported.
*
* @param allocation_mode String identifies which resource type.
* Accepted types are "pool", "cuda", and "managed" only.
* @return Memory resource instance
*/
inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
std::string const& allocation_mode)
{
if (allocation_mode == "binning") return make_binning();
if (allocation_mode == "cuda") return make_cuda();
if (allocation_mode == "async") return make_async();
if (allocation_mode == "pool") return make_pool();
if (allocation_mode == "arena") return make_arena();
if (allocation_mode == "managed") return make_managed();
CUDF_FAIL("Invalid RMM allocation mode: " + allocation_mode);
}

} // namespace test
} // namespace cudf

/**
* @brief Parses the cuDF test command line options.
*
* Currently only supports 'rmm_mode' string parameter, which set the rmm
* allocation mode. The default value of the parameter is 'pool'.
* Environment variable 'CUDF_TEST_RMM_MODE' can also be used to set the rmm
* allocation mode. If both are set, the value of 'rmm_mode' string parameter
* takes precedence.
*
* @return Parsing results in the form of unordered map
*/
inline auto parse_cudf_test_opts(int argc, char** argv)
{
try {
cxxopts::Options options(argv[0], " - cuDF tests command line options");
char const* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE"); // Overridden by CLI options
char const* env_stream_mode =
std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options
char const* env_stream_error_mode =
std::getenv("GTEST_CUDF_STREAM_ERROR_MODE"); // Overridden by CLI options
auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool";
auto default_stream_mode = env_stream_mode ? env_stream_mode : "default";
auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error";
options.allow_unrecognised_options().add_options()(
"rmm_mode",
"RMM allocation mode",
cxxopts::value<std::string>()->default_value(default_rmm_mode));
// `new_cudf_default` means that cudf::get_default_stream has been patched,
// so we raise errors anywhere that a CUDA default stream is observed
// instead of cudf::get_default_stream(). This corresponds to compiling
// identify_stream_usage with STREAM_MODE_TESTING=OFF (must do both at the
// same time).
// `new_testing_default` means that cudf::test::get_default_stream has been
// patched, so we raise errors anywhere that _any_ other stream is
// observed. This corresponds to compiling identify_stream_usage with
// STREAM_MODE_TESTING=ON (must do both at the same time).
options.allow_unrecognised_options().add_options()(
"stream_mode",
"Whether to use a non-default stream",
cxxopts::value<std::string>()->default_value(default_stream_mode));
options.allow_unrecognised_options().add_options()(
"stream_error_mode",
"Whether to error or print to stdout when a non-default stream is observed and stream_mode "
"is not \"default\"",
cxxopts::value<std::string>()->default_value(default_stream_error_mode));
return options.parse(argc, argv);
} catch (cxxopts::OptionException const& e) {
CUDF_FAIL("Error parsing command line options");
}
}

/**
* @brief Macro that defines main function for gtest programs that use rmm
*
* Should be included in every test program that uses rmm allocators since
* it maintains the lifespan of the rmm default memory resource.
* This `main` function is a wrapper around the google test generated `main`,
* maintaining the original functionality. In addition, this custom `main`
* function parses the command line to customize test behavior, like the
* allocation mode used for creating the default memory resource.
*/
#define CUDF_TEST_PROGRAM_MAIN() \
int main(int argc, char** argv) \
{ \
::testing::InitGoogleTest(&argc, argv); \
auto const cmd_opts = parse_cudf_test_opts(argc, argv); \
auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>(); \
auto resource = cudf::test::create_memory_resource(rmm_mode); \
rmm::mr::set_current_device_resource(resource.get()); \
\
auto const stream_mode = cmd_opts["stream_mode"].as<std::string>(); \
if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { \
auto const stream_error_mode = cmd_opts["stream_error_mode"].as<std::string>(); \
auto const error_on_invalid_stream = (stream_error_mode == "error"); \
auto const check_default_stream = (stream_mode == "new_cudf_default"); \
auto adaptor = make_stream_checking_resource_adaptor( \
resource.get(), error_on_invalid_stream, check_default_stream); \
rmm::mr::set_current_device_resource(&adaptor); \
return RUN_ALL_TESTS(); \
} \
\
return RUN_ALL_TESTS(); \
}
Loading