From 1626debfbf7d6ed47db97612b08450ed53e4b903 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 7 Jul 2021 00:43:42 +0530 Subject: [PATCH 1/4] fix spelling mistakes in comments and documentations --- cpp/benchmarks/common/generate_benchmark_input.cpp | 3 ++- cpp/benchmarks/common/generate_benchmark_input.hpp | 4 ++-- cpp/benchmarks/join/generate_input_tables.cuh | 6 +++--- cpp/docs/DEVELOPER_GUIDE.md | 2 +- cpp/docs/TESTING.md | 2 +- cpp/include/cudf/aggregation.hpp | 4 ++-- cpp/include/cudf/column/column_device_view.cuh | 4 ++-- cpp/include/cudf/column/column_factories.hpp | 6 +++--- cpp/include/cudf/column/column_view.hpp | 2 +- cpp/include/cudf/detail/aggregation/aggregation.cuh | 2 +- cpp/include/cudf/detail/indexalator.cuh | 6 +++--- cpp/include/cudf/detail/iterator.cuh | 4 ++-- cpp/include/cudf/detail/nvtx/nvtx3.hpp | 8 ++++---- cpp/include/cudf/detail/unary.hpp | 2 +- cpp/include/cudf/detail/utilities/device_atomics.cuh | 2 +- cpp/include/cudf/detail/valid_if.cuh | 2 +- cpp/include/cudf/scalar/scalar.hpp | 2 +- cpp/include/cudf/scalar/scalar_device_view.cuh | 2 +- cpp/include/cudf/sorting.hpp | 4 ++-- cpp/include/cudf/strings/combine.hpp | 2 +- cpp/include/cudf/strings/repeat_strings.hpp | 6 +++--- cpp/include/cudf/strings/replace.hpp | 2 +- cpp/include/cudf/strings/split/split.hpp | 4 ++-- cpp/include/cudf/strings/string_view.hpp | 2 +- cpp/include/cudf/table/table_device_view.cuh | 2 +- cpp/include/cudf/transform.hpp | 6 +++--- cpp/include/cudf_test/base_fixture.hpp | 2 +- cpp/include/cudf_test/type_list_utilities.hpp | 4 ++-- cpp/include/cudf_test/type_lists.hpp | 2 +- cpp/src/copying/contiguous_split.cu | 6 +++--- cpp/src/io/json/json_gpu.cu | 4 ++-- cpp/src/io/json/json_gpu.h | 4 ++-- cpp/src/io/json/reader_impl.cu | 2 +- cpp/src/io/orc/orc.cpp | 2 +- cpp/src/io/orc/stripe_enc.cu | 2 +- cpp/src/io/orc/writer_impl.hpp | 2 +- cpp/src/io/parquet/parquet.cpp | 2 +- cpp/src/io/parquet/parquet.hpp | 2 +- cpp/src/io/statistics/conversion_type_select.cuh | 2 +- .../io/statistics/statistics_type_identification.cuh | 4 ++-- cpp/src/join/hash_join.cu | 2 +- cpp/src/reshape/byte_cast.cu | 4 ++-- cpp/src/rolling/rolling_detail.cuh | 8 ++++---- cpp/src/strings/convert/convert_fixed_point.cu | 2 +- cpp/src/strings/json/json_path.cu | 2 +- cpp/src/text/generate_ngrams.cu | 2 +- cpp/src/transform/row_bit_count.cu | 2 +- cpp/tests/io/orc_test.cpp | 4 ++-- cpp/tests/io/parquet_test.cpp | 6 +++--- cpp/tests/join/join_tests.cpp | 10 +++++----- cpp/tests/sort/segmented_sort_tests.cpp | 2 +- cpp/tests/strings/repeat_strings_tests.cpp | 2 +- cpp/tests/table/table_view_tests.cu | 2 +- 53 files changed, 90 insertions(+), 89 deletions(-) diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp index 591e42ceddf..02e901be62e 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.cpp +++ b/cpp/benchmarks/common/generate_benchmark_input.cpp @@ -108,7 +108,8 @@ size_t avg_element_bytes(data_profile const& profile, cudf::type_id tid) /** * @brief Functor that computes a random column element with the given data profile. * - * The implementation is SFINAEd for diffent type groups. Currently only used for fixed-width types. + * The implementation is SFINAEd for different type groups. Currently only used for fixed-width + * types. */ template struct random_value_fn; diff --git a/cpp/benchmarks/common/generate_benchmark_input.hpp b/cpp/benchmarks/common/generate_benchmark_input.hpp index acb8adc98e9..6c2a43a34e2 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.hpp +++ b/cpp/benchmarks/common/generate_benchmark_input.hpp @@ -137,7 +137,7 @@ struct distribution_params< }; /** - * @brief Boolens are parameterized with the probability of getting `true` value. + * @brief Booleans are parameterized with the probability of getting `true` value. */ template struct distribution_params::value>> { @@ -195,7 +195,7 @@ std::vector get_type_or_group(int32_t id); * * If an element of the input vector is a `cudf::type_id` enumerator, function return value simply * includes this type. If an element of the input vector is a `type_group_id` enumerator, function - * return value includes all types coresponding to the group enumerator. + * return value includes all types corresponding to the group enumerator. * * @param ids Vector of integers equal to either a `cudf::type_id` enumerator or a `type_group_id` * enumerator. diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 285a9241a26..edd9e5d8ce2 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -141,7 +141,7 @@ __global__ void init_probe_tbl(key_type* const probe_tbl, * (e.g. device memory, zero copy memory or unified memory). Each value in the build table * will be from [0,rand_max] and if uniq_build_tbl_keys is true it is ensured that each value * will be uniq in the build table. Each value in the probe table will be also in the build - * table with a propability of selectivity and a random number from + * table with a probability of selectivity and a random number from * [0,rand_max] \setminus \{build_tbl\} otherwise. * * @param[out] build_tbl The build table to generate. Usually the smaller table used to @@ -150,7 +150,7 @@ __global__ void init_probe_tbl(key_type* const probe_tbl, * @param[out] probe_tbl The probe table to generate. Usually the larger table used to * probe into the hash table created from the build table. * @param[in] build_tbl_size number of keys in the build table - * @param[in] selectivity propability with which an element of the probe table is + * @param[in] selectivity probability with which an element of the probe table is * present in the build table. * @param[in] rand_max maximum random number to generate. I.e. random numbers are * integers from [0,rand_max]. @@ -169,7 +169,7 @@ void generate_input_tables(key_type* const build_tbl, // expense of not being that accurate with applying the selectivity an especially more memory // efficient implementations would be to partition the random numbers into two intervals and then // let one table choose random numbers from only one interval and the other only select with - // selectivity propability from the same interval and from the other in the other cases. + // selectivity probability from the same interval and from the other in the other cases. static_assert(std::is_signed::value, "key_type needs to be signed for lottery to work"); diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 8ec111acdb2..9ec64060847 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -470,7 +470,7 @@ libcudf, and you should not use it in new code in libcudf without careful consid use `rmm::device_uvector` along with the utility factories in `device_factories.hpp`. These utilities enable creation of `uvector`s from host-side vectors, or creating zero-initialized `uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has -a number of benefits, as described in the folling section on `rmm::device_uvector`. +a number of benefits, as described in the following section on `rmm::device_uvector`. #### `rmm::device_uvector` diff --git a/cpp/docs/TESTING.md b/cpp/docs/TESTING.md index 2c7b62b8b6d..3c741b5d4e7 100644 --- a/cpp/docs/TESTING.md +++ b/cpp/docs/TESTING.md @@ -67,7 +67,7 @@ not necessary for your test fixtures to inherit from it. Example: ```c++ -class MyTestFiture : public cudf::test::BaseFixture {...}; +class MyTestFixture : public cudf::test::BaseFixture {...}; ``` ## Typed Tests diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 5fab284d506..9ca6d773626 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -275,7 +275,7 @@ std::unique_ptr make_collect_set_aggregation(null_policy null_handling = n * @brief Factory to create a MERGE_LISTS aggregation. * * Given a lists column, this aggregation merges all the lists corresponding to the same key value - * into one list. It is designed specificly to merge the partial results of multiple (distributed) + * into one list. It is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the * input lists column to be non-nullable (the child column containing list entries is not subjected * to this requirement). @@ -290,7 +290,7 @@ std::unique_ptr make_merge_lists_aggregation(); * value into one list, then it drops all the duplicate entries in each lists, producing a lists * column containing non-repeated entries. * - * This aggregation is designed specificly to merge the partial results of multiple (distributed) + * This aggregation is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` or `COLLECT_SET` aggregations into a final `COLLECT_SET` result. As such, * it requires the input lists column to be non-nullable (the child column containing list entries * is not subjected to this requirement). diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 8cb05ca0bad..02e3eee6b43 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -37,7 +37,7 @@ /** * @file column_device_view.cuh - * @brief Column device view class definitons + * @brief Column device view class definitions */ namespace cudf { @@ -541,7 +541,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * optional_begin with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple * iterators and you don't want to compile all the combinations of iterator types * * Example: diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index e5424f0fc44..bdb7fd48e60 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -399,7 +399,7 @@ std::unique_ptr make_strings_column( * one more than the total number of strings so the `offsets.back()` is the total number of bytes * in the strings array. `offsets.front()` must always be 0 to point to the beginning of `strings`. * @param[in] null_mask Device span containing the null element indicator bitmask. Arrow format for - * nulls is used for interpeting this bitmask. + * nulls is used for interpreting this bitmask. * @param[in] null_count The number of null string entries. If equal to `UNKNOWN_NULL_COUNT`, the * null count will be computed dynamically on the first invocation of `column::null_count()` * @param[in] stream CUDA stream used for device memory operations and kernel launches. @@ -428,7 +428,7 @@ std::unique_ptr make_strings_column( * strings are identified by the offsets and the nullmask. * @param[in] null_count The number of null string entries. * @param[in] null_mask The bits specifying the null strings in device memory. Arrow format for - * nulls is used for interpeting this bitmask. + * nulls is used for interpreting this bitmask. * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used for allocation of the column's `null_mask` and children * columns' device memory. @@ -491,7 +491,7 @@ std::unique_ptr make_strings_column( * further nested. * @param[in] null_count The number of null list entries. * @param[in] null_mask The bits specifying the null lists in device memory. - * Arrow format for nulls is used for interpeting this bitmask. + * Arrow format for nulls is used for interpreting this bitmask. * @param[in] stream Optional stream for use with all memory allocation * and device kernels * @param[in] mr Optional resource to use for device memory diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 82326a21d7d..7ab8cc0f6b1 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -22,7 +22,7 @@ /** * @file column_view.hpp - * @brief column view class definitons + * @brief column view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 09763d66403..53c1f47c201 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -643,7 +643,7 @@ struct identity_initializer { * The `i`th column will be initialized with the identity value of the `i`th * aggregation operation in `aggs`. * - * @throw cudf::logic_error if column type and corresponging agg are incompatible + * @throw cudf::logic_error if column type and corresponding agg are incompatible * @throw cudf::logic_error if column type is not fixed-width * * @param table The table of columns to initialize. diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 8bbd0d1aada..4a2b40e8be7 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -29,7 +29,7 @@ namespace detail { /** * @brief The base class for the input or output index normalizing iterator. * - * This implementation uses CTRP to define the `input_indexalator` and the + * This implementation uses CRTP to define the `input_indexalator` and the * `output_indexalator` classes. This is so this class can manipulate the * uniquely typed subclass member variable `p_` directly without requiring * virtual functions since iterator instances will be copied to device memory. @@ -241,7 +241,7 @@ struct base_indexalator { */ struct input_indexalator : base_indexalator { friend struct indexalator_factory; - friend struct base_indexalator; // for CTRP + friend struct base_indexalator; // for CRTP using reference = size_type const; // this keeps STL and thrust happy @@ -326,7 +326,7 @@ struct input_indexalator : base_indexalator { */ struct output_indexalator : base_indexalator { friend struct indexalator_factory; - friend struct base_indexalator; // for CTRP + friend struct base_indexalator; // for CRTP using reference = output_indexalator const&; // required for output iterators diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4cb0c6e1877..deb161fd9c2 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -177,7 +177,7 @@ auto make_null_replacement_iterator(column_device_view const& column, * * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple * iterators and you don't want to compile all the combinations of iterator types * * Example: @@ -819,7 +819,7 @@ auto inline make_pair_iterator(scalar const& scalar_value) * * Else, if the scalar is null, then the value of `p.first` is undefined and `p.second == false`. * - * The behaviour is undefined if the scalar is destroyed before iterator dereferencing. + * The behavior is undefined if the scalar is destroyed before iterator dereferencing. * * @throws cudf::logic_error if scalar datatype and Element type mismatch. * @throws cudf::logic_error if the returned iterator is dereferenced in host diff --git a/cpp/include/cudf/detail/nvtx/nvtx3.hpp b/cpp/include/cudf/detail/nvtx/nvtx3.hpp index add5699e34a..0e1a82a0657 100644 --- a/cpp/include/cudf/detail/nvtx/nvtx3.hpp +++ b/cpp/include/cudf/detail/nvtx/nvtx3.hpp @@ -54,7 +54,7 @@ * \code{.cpp} * #include "nvtx3.hpp" * void some_function(){ - * // Begins a NVTX range with the messsage "some_function" + * // Begins a NVTX range with the message "some_function" * // The range ends when some_function() returns and `r` is destroyed * nvtx3::thread_range r{"some_function"}; * @@ -322,7 +322,7 @@ * Example: * \code{.cpp} * // Create an `event_attributes` with the custom message "my message" - * nvtx3::event_attributes attr{nvtx3::Mesage{"my message"}}; + * nvtx3::event_attributes attr{nvtx3::message{"my message"}}; * * // strings and string literals implicitly assumed to be a `nvtx3::message` * nvtx3::event_attributes attr{"my message"}; @@ -1267,7 +1267,7 @@ class registered_message { * nvtx3::thread_range range1{attr1}; * * // `range2` contains message "message 2" - * nvtx3::thread_range range2{nvtx3::Mesage{"message 2"}}; + * nvtx3::thread_range range2{nvtx3::message{"message 2"}}; * * // `std::string` and string literals are implicitly assumed to be * // the contents of an `nvtx3::message` @@ -1525,7 +1525,7 @@ class payload { * * // For convenience, the arguments that can be passed to the * `event_attributes` - * // constructor may be passed to the `domain_thread_range` contructor where + * // constructor may be passed to the `domain_thread_range` constructor where * // they will be forwarded to the `EventAttribute`s constructor * nvtx3::thread_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"}; * \endcode diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 0615e502c60..e672cf01488 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -31,7 +31,7 @@ namespace detail { * doesn't. * * @tparam InputIterator Iterator type for `begin` and `end` - * @tparam Predicate A predicator type which will be evaludated + * @tparam Predicate A predicator type which will be evaluated * @param begin Beginning of the sequence of elements * @param end End of the sequence of elements * @param p Predicate to be applied to each element in `[begin,end)` diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 16b7da0a083..2124e08d605 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -161,7 +161,7 @@ struct genericAtomicOperationImpl { // ----------------------------------------------------------------------- // specialized functions for operators -// `atomicAdd` supports int32, float, double (signed int64 is not supproted.) +// `atomicAdd` supports int32, float, double (signed int64 is not supported.) // `atomicMin`, `atomicMax` support int32_t, int64_t // `atomicAnd`, `atomicOr`, `atomicXor` support int32_t, int64_t template <> diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 11ce9199c2d..4a7e9b89c80 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -117,7 +117,7 @@ std::pair valid_if( * input ranges. * Given a set of bitmasks, `masks`, the state of bit `j` in mask `i` is - * determined by `p( *(begin1 + i), *(begin2 + j))`. If the predivate evaluates + * determined by `p( *(begin1 + i), *(begin2 + j))`. If the predicate evaluates * to true, the the bit is set to `1`. If false, set to `0`. * * Example Arguments: diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 2e57e56255d..717cf8ea7b0 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -78,7 +78,7 @@ class scalar { /** * @brief Indicates whether the scalar contains a valid value. * - * @note Using the value when `is_valid() == false` is undefined behaviour. In addition, this + * @note Using the value when `is_valid() == false` is undefined behavior. In addition, this * function does a stream synchronization. * * @param stream CUDA stream used for device memory operations. diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index d56d5d5eb0d..884b412d3e2 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -21,7 +21,7 @@ /** * @file scalar_device_view.cuh - * @brief Scalar device view class definitons + * @brief Scalar device view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 2454cfe7c7b..36a8131a78e 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -187,7 +187,7 @@ std::unique_ptr rank( /** * @brief Returns sorted order after sorting each segment in the table. * - * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * If segment_offsets contains values larger than number of rows, behavior is undefined. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * * @param keys The table that determines the ordering of elements in each segment @@ -214,7 +214,7 @@ std::unique_ptr segmented_sorted_order( /** * @brief Performs a lexicographic segmented sort of a table * - * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * If segment_offsets contains values larger than number of rows, behavior is undefined. * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 3e069de2f0f..32f8d482a34 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -272,7 +272,7 @@ std::unique_ptr join_list_elements( * delimited by the @p separator provided. * * A null list row will always result in a null string in the output row. Any non-null list row - * having a null elenent will result in the corresponding output row to be null unless a + * having a null element will result in the corresponding output row to be null unless a * @p narep string is specified to be used in its place. * * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index 4023dbc6c84..2b39662456b 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -31,7 +31,7 @@ namespace strings { * @brief Repeat the given string scalar by a given number of times. * * For a given string scalar, an output string scalar is generated by repeating the input string by - * a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not a positve + * a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not a positive * value, an empty (valid) string scalar will be returned. An invalid input scalar will always * result in an invalid output scalar regardless of the value of `repeat_times` parameter. * @@ -42,7 +42,7 @@ namespace strings { * out is '123XYZ-123XYZ-123XYZ-' * @endcode * - * @throw cudf::logic_error if the size of the ouput string scalar exceeds the maximum value that + * @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that * can be stored by the index type * (i.e., `input.size() * repeat_times > numeric_limits::max()`). * @@ -61,7 +61,7 @@ std::unique_ptr repeat_strings( * * For a given strings column, an output strings column is generated by repeating each string from * the input by a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not - * a positve value, all the rows of the output strings column will be an empty string. Any null row + * a positive value, all the rows of the output strings column will be an empty string. Any null row * will result in a null row regardless of the value of `repeat_times` parameter. * * Note that this function cannot handle the cases when the size of the output column exceeds the diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index e9091b88b08..40eb796eba7 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -36,7 +36,7 @@ namespace strings { * input string. If not found, the output entry is just a copy of the * corresponding input string. * - * Specifing an empty string for repl will essentially remove the target + * Specifying an empty string for repl will essentially remove the target * string if found in each string. * * Null string entries will return null output string entries. diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp index 82b191a8e1b..4978bad3bb3 100644 --- a/cpp/include/cudf/strings/split/split.hpp +++ b/cpp/include/cudf/strings/split/split.hpp @@ -139,7 +139,7 @@ std::unique_ptr rsplit( * * @throw cudf:logic_error if `delimiter` is invalid. * - * @param strings A column of string elements to be splitted. + * @param strings A column of string elements to be split. * @param delimiter The string to identify split points in each string. * Default of empty string indicates split on whitespace. * @param maxsplit Maximum number of splits to perform. @@ -216,7 +216,7 @@ std::unique_ptr split_record( * * @throw cudf:logic_error if `delimiter` is invalid. * - * @param strings A column of string elements to be splitted. + * @param strings A column of string elements to be split. * @param delimiter The string to identify split points in each string. * Default of empty string indicates split on whitespace. * @param maxsplit Maximum number of splits to perform. diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index 5a3dbd5c1bc..be182cb0e9d 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -410,7 +410,7 @@ CUDA_HOST_DEVICE_CALLABLE size_type to_char_utf8(const char* str, char_utf8& cha * @brief Place a char_utf8 value into a char array. * * @param character Single character - * @param[out] str Allocated char array with enough space to hold the encoded characer. + * @param[out] str Allocated char array with enough space to hold the encoded character. * @return The number of bytes in the character */ CUDA_HOST_DEVICE_CALLABLE size_type from_char_utf8(char_utf8 character, char* str) diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh index 7c80c958f92..71e48370ccf 100644 --- a/cpp/include/cudf/table/table_device_view.cuh +++ b/cpp/include/cudf/table/table_device_view.cuh @@ -27,7 +27,7 @@ /** * @file table_device_view.cuh - * @brief Table device view class definitons + * @brief Table device view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index e99e0db21c5..460c62e3598 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -40,7 +40,7 @@ namespace cudf { * * @param input An immutable view of the input column to transform * @param unary_udf The PTX/CUDA string of the unary function to apply - * @param outout_type The output type that is compatible with the output type in the UDF + * @param output_type The output type that is compatible with the output type in the UDF * @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code * @param mr Device memory resource used to allocate the returned column's device memory * @return The column resulting from applying the unary function to @@ -133,7 +133,7 @@ std::pair, std::unique_ptr> encode( * @param bitmask A device pointer to the bitmask which needs to be converted * @param begin_bit position of the bit from which the conversion should start * @param end_bit position of the bit before which the conversion should stop - * @param mr Device memory resource used to allocate the returned columns's device memory + * @param mr Device memory resource used to allocate the returned columns' device memory * @return A boolean column representing the given mask from [begin_bit, end_bit). */ std::unique_ptr mask_to_bools( @@ -164,7 +164,7 @@ std::unique_ptr mask_to_bools( * row_bit_count(column(x)) >= row_bit_count(gather(column(x))) * * @param t The table view to perform the computation on. - * @param mr Device memory resource used to allocate the returned columns's device memory + * @param mr Device memory resource used to allocate the returned columns' device memory * @return A 32-bit integer column containing the per-row bit counts. */ std::unique_ptr row_bit_count( diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 9fa67dccb52..766dda19243 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -252,7 +252,7 @@ inline std::shared_ptr create_memory_resource( /** * @brief Parses the cuDF test command line options. * - * Currently only supports 'rmm_mode' string paramater, which set the rmm + * Currently only supports 'rmm_mode' string parameter, which set the rmm * allocation mode. The default value of the parameter is 'pool'. * * @return Parsing results in the form of unordered map diff --git a/cpp/include/cudf_test/type_list_utilities.hpp b/cpp/include/cudf_test/type_list_utilities.hpp index a3f771c2f72..1588e3c9be9 100644 --- a/cpp/include/cudf_test/type_list_utilities.hpp +++ b/cpp/include/cudf_test/type_list_utilities.hpp @@ -32,7 +32,7 @@ * template * class TestFixture : ::testing::Test { }; * - * TYPED_TEST_CASE(TestFixure, TestTypes); + * TYPED_TEST_CASE(TestFixture, TestTypes); * * TYPED_TEST(TestFixture, mytest){ * using Type0 = GetType; // the first type element @@ -169,7 +169,7 @@ struct ConcatImpl<> { }; /** - * @brief Concantenates compile-time lists of types into a single type list. + * @brief Concatenates compile-time lists of types into a single type list. * * Example: * ``` diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index aeddafae253..5c1b0c6c458 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -59,7 +59,7 @@ constexpr std::array types_to_ids_impl( * array == {type_id::INT32, type_id::FLOAT}; * ``` * - * @tparam TYPES List of types to conver to `type_id`s + * @tparam TYPES List of types to convert to `type_id`s * @return `std::array` of `type_id`s corresponding to each type in `TYPES` */ template diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 4b11382a3f2..d4d54a3f94f 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -53,7 +53,7 @@ inline __device__ std::size_t _round_up_safe(std::size_t number_to_round, std::s * The definition of "buffer" used throughout this module is a component piece of a * cudf column. So for example, a fixed-width column with validity would have 2 associated * buffers : the data itself and the validity buffer. contiguous_split operates by breaking - * each column up into it's individual components and copying each one as a seperate kernel + * each column up into it's individual components and copying each one as a separate kernel * block. */ struct src_buf_info { @@ -188,7 +188,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, } // if we're performing a value shift (offsets), or a bit shift (validity) the # of bytes and - // alignment must be a multiple of 4. value shifting and bit shifting are mututally exclusive + // alignment must be a multiple of 4. value shifting and bit shifting are mutually exclusive // and will never both be true at the same time. if (value_shift || bit_shift) { std::size_t idx = (num_bytes - remainder) / 4; @@ -249,7 +249,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, * * @param num_src_bufs Total number of source buffers (N) * @param src_bufs Input source buffers (N) - * @param dst_bufs Desination buffers (N*M) + * @param dst_bufs Destination buffers (N*M) * @param buf_info Information on the range of values to be copied for each destination buffer. */ template diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 2bc7969d5e5..771eb64d24e 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -416,7 +416,7 @@ struct field_descriptor { * @param[in] end pointer to the first character after the parsing range * @param[in] opts The global parsing behavior options * @param[in] field_idx Index of the current field in the input row - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @return Descriptor of the parsed field */ @@ -481,7 +481,7 @@ __device__ std::pair get_row_data_range( * @param[in] data The entire data to read * @param[in] row_offsets The offset of each row in the input * @param[in] column_types The data type of each column - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[out] output_columns The output column data * @param[out] valid_fields The bitmaps indicating whether column fields are valid diff --git a/cpp/src/io/json/json_gpu.h b/cpp/src/io/json/json_gpu.h index 4a68ce48f20..f91bfc70882 100644 --- a/cpp/src/io/json/json_gpu.h +++ b/cpp/src/io/json/json_gpu.h @@ -44,7 +44,7 @@ using col_map_type = concurrent_unordered_map; * @param[in] data The entire data to read * @param[in] row_offsets The start of each data record * @param[in] dtypes The data type of each column - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[out] output_columns The output column data * @param[out] valid_fields The bitmaps indicating whether column fields are valid @@ -68,7 +68,7 @@ void convert_json_to_columns(parse_options_view const &options, * @param[in] data Input data buffer * @param[in] row_offsets The offset of each row in the input * @param[in] num_columns The number of columns of input data - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[in] stream CUDA stream used for device memory operations and kernel launches. * diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 4d5eee6cac7..124e1a5e083 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -134,7 +134,7 @@ col_map_ptr_type create_col_names_hash_map(column_view column_name_hashes, * @param[in] row_offsets Device array of row start locations in the input buffer * @param[in] stream CUDA stream used for device memory operations and kernel launches * - * @return std::unique_ptr
cudf table with three columns (offsets, lenghts, hashes) + * @return std::unique_ptr
cudf table with three columns (offsets, lengths, hashes) */ std::unique_ptr
create_json_keys_info_table(const parse_options_view &options, device_span const data, diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index ea6d6b6ac85..18a1779d7a6 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -461,7 +461,7 @@ metadata::metadata(datasource *const src) : source(src) auto md_data = decompressor->Decompress(buffer->data(), ps.metadataLength, &md_length); orc::ProtobufReader(md_data, md_length).read(md); - // Initilize the column names + // Initialize the column names init_column_names(); } diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b469d7215b4..80f6f4b2fde 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -619,7 +619,7 @@ static const __device__ __constant__ int32_t kTimeScale[10] = { * @brief Encode column data * * @param[in] chunks encoder chunks device array [column][rowgroup] - * @param[in, out] chunks cunk streams device array [column][rowgroup] + * @param[in, out] chunks chunk streams device array [column][rowgroup] */ // blockDim {512,1,1} template diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 155c83a88d9..6c7a10dca3e 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -74,7 +74,7 @@ struct encoder_decimal_info { }; /** - * @brief Returns the total number of rowgroups in the list of contigious stripes. + * @brief Returns the total number of rowgroups in the list of contiguous stripes. */ inline auto stripes_size(host_span stripes) { diff --git a/cpp/src/io/parquet/parquet.cpp b/cpp/src/io/parquet/parquet.cpp index 2a1bd0d5a18..bfc85e57ae9 100644 --- a/cpp/src/io/parquet/parquet.cpp +++ b/cpp/src/io/parquet/parquet.cpp @@ -289,7 +289,7 @@ bool CompactProtocolReader::InitSchema(FileMetaData *md) /* Inside FileMetaData, there is a std::vector of RowGroups and each RowGroup contains a * a std::vector of ColumnChunks. Each ColumnChunk has a member ColumnMetaData, which contains * a std::vector of std::strings representing paths. The purpose of the code below is to set the - * schema_idx of each column of each row to it corresonding row_group. This is effectively + * schema_idx of each column of each row to it corresponding row_group. This is effectively * mapping the columns to the schema. */ for (auto &row_group : md->row_groups) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index eefff518a9a..0eb1c2036b0 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -232,7 +232,7 @@ struct ColumnChunkMetaData { * column * * Each column chunk lives in a particular row group and are guaranteed to be - * contiguous in the file. Any mssing or corrupted chunks can be skipped during + * contiguous in the file. Any missing or corrupted chunks can be skipped during * reading. */ struct ColumnChunk { diff --git a/cpp/src/io/statistics/conversion_type_select.cuh b/cpp/src/io/statistics/conversion_type_select.cuh index 225377bfc4b..b76a5fcf3cd 100644 --- a/cpp/src/io/statistics/conversion_type_select.cuh +++ b/cpp/src/io/statistics/conversion_type_select.cuh @@ -70,7 +70,7 @@ template class Detect; /** - * @brief Utility class to detect multiple occurences of a type in the first element of pairs in a + * @brief Utility class to detect multiple occurrences of a type in the first element of pairs in a * tuple For eg. with the following tuple : * * using conversion_types = diff --git a/cpp/src/io/statistics/statistics_type_identification.cuh b/cpp/src/io/statistics/statistics_type_identification.cuh index 84399a307a5..869e2833285 100644 --- a/cpp/src/io/statistics/statistics_type_identification.cuh +++ b/cpp/src/io/statistics/statistics_type_identification.cuh @@ -55,8 +55,8 @@ struct conversion_map { std::pair>; }; -// In Parquet timestamps and durations with second resoluion are converted to -// milliseconds. Timestamps and durations with nanosecond resoluion are +// In Parquet timestamps and durations with second resolution are converted to +// milliseconds. Timestamps and durations with nanosecond resolution are // converted to microseconds. template <> struct conversion_map { diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index dfe3231e897..7904d54fdba 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -342,7 +342,7 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, right_indices->data(), write_index.data(), join_size); - // Rlease intermediate memory alloation + // Release intermediate memory allocation left_indices->resize(0, stream); auto const left_table_row_count = probe_table.num_rows(); diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 5bbdb5988e7..98156224cfe 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -108,7 +108,7 @@ std::unique_ptr byte_list_conversion::operator()( } // namespace /** - * @copydoc cudf::byte_cast(input_column,flip_endianess,rmm::mr::device_memory_resource) + * @copydoc cudf::byte_cast(input_column,flip_endianness,rmm::mr::device_memory_resource) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -124,7 +124,7 @@ std::unique_ptr byte_cast(column_view const& input_column, } // namespace detail /** - * @copydoc cudf::byte_cast(input_column,flip_endianess,rmm::mr::device_memory_resource) + * @copydoc cudf::byte_cast(input_column,flip_endianness,rmm::mr::device_memory_resource) */ std::unique_ptr byte_cast(column_view const& input_column, flip_endianness endian_configuration, diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh index d7114608787..862e44a0d2b 100644 --- a/cpp/src/rolling/rolling_detail.cuh +++ b/cpp/src/rolling/rolling_detail.cuh @@ -339,8 +339,8 @@ std::unique_ptr empty_output_for_rolling_aggregation(column_view const& // TODO: // Ideally, for UDF aggregations, the returned column would match // the agg's return type. It currently returns empty_like(input), because: - // 1. This preserves prior behaviour for empty input columns. - // 2. There is insufficient information to construct nested return colums. + // 1. This preserves prior behavior for empty input columns. + // 2. There is insufficient information to construct nested return columns. // `cudf::make_udf_aggregation()` expresses the return type as a `data_type` // which cannot express recursively nested types (e.g. `STRUCT>`.) // 3. In any case, UDFs that return nested types are not currently supported. @@ -616,7 +616,7 @@ class rolling_aggregation_preprocessor final : public cudf::detail::simple_aggre return aggs; } - // COLLECT_LIST aggregations do not peform a rolling operation at all. They get processed + // COLLECT_LIST aggregations do not perform a rolling operation at all. They get processed // entirely in the finalize() step. std::vector> visit( data_type, cudf::detail::collect_list_aggregation const&) override @@ -624,7 +624,7 @@ class rolling_aggregation_preprocessor final : public cudf::detail::simple_aggre return {}; } - // COLLECT_SET aggregations do not peform a rolling operation at all. They get processed + // COLLECT_SET aggregations do not perform a rolling operation at all. They get processed // entirely in the finalize() step. std::vector> visit( data_type, cudf::detail::collect_set_aggregation const&) override diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 94c34f92c66..2f57b38249f 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -192,7 +192,7 @@ namespace { * @brief Calculate the size of the each string required for * converting each value in base-10 format. * - * ouput format is [-]integer.fraction + * output format is [-]integer.fraction */ template struct decimal_to_string_size_fn { diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 0cf08892adc..409e1892c91 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -669,7 +669,7 @@ std::pair>, int> build_comma if (op.type == path_operator_type::ROOT) { CUDF_EXPECTS(h_operators.size() == 0, "Root operator ($) can only exist at the root"); } - // if we havent' gotten a root operator to start, and we're not empty, quietly push a + // if we have not gotten a root operator to start, and we're not empty, quietly push a // root operator now. if (h_operators.size() == 0 && op.type != path_operator_type::ROOT && op.type != path_operator_type::END) { diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index cab5a54a57d..f9b2355b2ff 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -41,7 +41,7 @@ namespace { /** * @brief Generate ngrams from strings column. * - * Adjacent strings are concatented with the provided separator. + * Adjacent strings are concatenated with the provided separator. * The number of adjacent strings join depends on the specified ngrams value. * For example: for bigrams (ngrams=2), pairs of strings are concatenated. */ diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index f99c831e745..e20c7120571 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -380,7 +380,7 @@ __device__ size_type row_size_functor::operator()(column_device_vie /** * @brief Kernel for computing per-row sizes in bits. * - * @param cols An span of column_device_views represeting a column hierarcy + * @param cols An span of column_device_views representing a column hierarchy * @param info An span of column_info structs corresponding the elements in `cols` * @param output Output span of size (# rows) where per-row bit sizes are stored * @param max_branch_depth Maximum depth of the span stack needed per-thread diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 4eed81298a2..56573ddab40 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -526,7 +526,7 @@ TEST_F(OrcWriterTest, Strings) TEST_F(OrcWriterTest, SlicedTable) { - // This test checks for writing zero copy, offseted views into existing cudf tables + // This test checks for writing zero copy, offsetted views into existing cudf tables std::vector strings{ "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; @@ -609,7 +609,7 @@ TEST_F(OrcWriterTest, HostBuffer) TEST_F(OrcWriterTest, negTimestampsNano) { // This is a separate test because ORC format has a bug where writing a timestamp between -1 and 0 - // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behaviour and so + // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behavior and so // this test has to hardcode test values which are < -1 second. // Details: https://github.com/rapidsai/cudf/pull/5529#issuecomment-648768925 using namespace cudf::test; diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index e59a4accf66..5e521291a0c 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -526,7 +526,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); // TODO: Need to be able to return metadata in tree form from reader so they can be compared. - // Unfortunately the closest thing to a heirarchical schema is column_name_info which does not + // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not // have any tests for it c++ or python. compare_metadata_equality(expected_metadata, result.metadata); } @@ -1569,7 +1569,7 @@ TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile) srand(31337); auto table = create_random_fixed_table(4, 4, true); - auto filepath = temp_env->get_temp_filepath("ReadingUnlosedFile.parquet"); + auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet"); cudf_io::chunked_parquet_writer_options args = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); cudf_io::parquet_chunked_writer writer(args); @@ -2286,7 +2286,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) // skip_rows / num_rows // clang-format off - std::vector> params{ {-1, -1}, {1, 3}, {3, -1}, + std::vector> params{ {-1, -1}, {1, 3}, {3, -1}, {31, -1}, {32, -1}, {33, -1}, {31, 5}, {32, 5}, {33, 5}, {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33}, diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 212458d5118..e468368842a 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -265,7 +265,7 @@ TEST_F(JoinTest, FullJoinOnNulls) cols_gold.push_back(col_gold_3.release()); cols_gold.push_back(col_gold_4.release()); cols_gold.push_back(col_gold_5.release()); - + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); @@ -549,7 +549,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) { 1, 1, 0}); column_wrapper col_gold_5{{ 2, 8, -1}, { 1, 1, 0}}; - + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); @@ -579,7 +579,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); - + col_gold_0 = {{ 3, -1, 2}, { 1, 0, 1}}; col_gold_1 = {{ "s0", "s1", "s2"}, @@ -782,7 +782,7 @@ TEST_F(JoinTest, InnerJoinWithStructsAndNulls) CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } -// // Test to check join behaviour when join keys are null. +// // Test to check join behavior when join keys are null. TEST_F(JoinTest, InnerJoinOnNulls) { // clang-format off @@ -826,7 +826,7 @@ TEST_F(JoinTest, InnerJoinOnNulls) cols_gold.push_back(col_gold_3.release()); cols_gold.push_back(col_gold_4.release()); cols_gold.push_back(col_gold_5.release()); - + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index e907212c9e8..1e5cb941392 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -264,7 +264,7 @@ TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes) {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}), logic_error); - // segmented_offsets beyond num_rows - undefined behaviour, no throw. + // segmented_offsets beyond num_rows - undefined behavior, no throw. CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, col2)); } diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp index a229e1b468a..feca4b25c4d 100644 --- a/cpp/tests/strings/repeat_strings_tests.cpp +++ b/cpp/tests/strings/repeat_strings_tests.cpp @@ -71,7 +71,7 @@ TEST_F(RepeatJoinStringTest, ValidStringScalar) EXPECT_EQ(result->size(), 0); } - // Negatitve repeat times. + // Negative repeat times. { auto const result = cudf::strings::repeat_strings(str, -10); EXPECT_EQ(result->is_valid(), true); diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index 1fb4b88c79e..3a792573108 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -31,7 +31,7 @@ #include // Compares two tables row by row, if table1 row is less than table2, then corresponding row value -// in `ouput` would be `true`/1 else `false`/0. +// in `output` would be `true`/1 else `false`/0. struct TableViewTest : public cudf::test::BaseFixture { }; void row_comparison(cudf::table_view input1, From 76f463cf0854e6aba833ba06650a22ed42bae1aa Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 7 Jul 2021 01:13:06 +0530 Subject: [PATCH 2/4] more spell fix in comments --- cpp/include/cudf/column/column.hpp | 2 +- cpp/include/cudf/groupby.hpp | 4 ++-- cpp/include/cudf/io/parquet.hpp | 2 +- cpp/include/cudf_test/column_wrapper.hpp | 4 ++-- cpp/src/bitmask/null_mask.cu | 2 +- cpp/src/dictionary/replace.cu | 2 +- cpp/src/hash/concurrent_unordered_multimap.cuh | 8 ++++---- cpp/src/interop/from_arrow.cu | 2 +- cpp/src/interop/to_arrow.cu | 2 +- cpp/src/io/parquet/page_data.cu | 2 +- cpp/src/jit/parser.hpp | 6 +++--- cpp/src/strings/convert/convert_floats.cu | 2 +- cpp/src/strings/replace/multi_re.cu | 2 +- cpp/tests/io/csv_test.cpp | 2 +- cpp/tests/io/parquet_test.cpp | 2 +- cpp/tests/utilities/column_utilities.cu | 2 +- 16 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index ee367840644..8decce7f260 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -293,7 +293,7 @@ class column { /** * @brief Implicit conversion operator to a `mutable_column_view`. * - * This allows pasing a `column` object into a function that accepts a + * This allows passing a `column` object into a function that accepts a *`mutable_column_view`. The conversion is automatic. * @note Creating a mutable view of a `column` invalidates the `column`'s diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 85c469f58f8..5656b38a0ef 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -116,7 +116,7 @@ class groupby { /** * @brief Performs grouped aggregations on the specified values. * - * The values to aggregate and the aggregations to perform are specifed in an + * The values to aggregate and the aggregations to perform are specified in an * `aggregation_request`. Each request contains a `column_view` of values to * aggregate and a set of `aggregation`s to perform on those elements. * @@ -173,7 +173,7 @@ class groupby { /** * @brief Performs grouped scans on the specified values. * - * The values to aggregate and the aggregations to perform are specifed in an + * The values to aggregate and the aggregations to perform are specified in an * `aggregation_request`. Each request contains a `column_view` of values to * aggregate and a set of `aggregation`s to perform on those elements. * diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 178e46a0c5c..6c67c62fb28 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -973,7 +973,7 @@ class chunked_parquet_writer_options_builder { * @brief Set to true if timestamps should be written as * int96 types instead of int64 types. Even though int96 is deprecated and is * not an internal type for cudf, it needs to be written for backwards - * compatability reasons. + * compatibility reasons. * * @param enabled Boolean value to enable/disable int96 timestamps. * @return this for chaining. diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 74d22085b26..a4857552831 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -1239,7 +1239,7 @@ class lists_column_wrapper : public detail::column_wrapper { /** * @brief Construct a lists column containing a single list of fixed-width - * type from an interator range. + * type from an iterator range. * * Example: * @code{.cpp} @@ -1621,7 +1621,7 @@ class lists_column_wrapper : public detail::column_wrapper { std::back_inserter(cols), [&](lists_column_wrapper const& l) -> column_view { // depth mismatch. attempt to normalize the short column. - // this function will also catch if this is a legitmately broken + // this function will also catch if this is a legitimately broken // set of input if (l.depth < expected_depth) { if (l.root) { diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 28d1411c30d..534fe051895 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -517,7 +517,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, // first_word_indices and last_word_indices to have the same type. to_word_index(false, d_last_indices.data())); - // first allocate temporary memroy + // first allocate temporary memory size_t temp_storage_bytes{0}; CUDA_TRY(cub::DeviceSegmentedReduce::Sum(nullptr, diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 1dbb844a606..37118779248 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -123,7 +123,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, } CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); - // first add the replacment to the keys so only the indices need to be processed + // first add the replacement to the keys so only the indices need to be processed auto input_matched = dictionary::detail::add_keys( input, make_column_from_scalar(replacement, 1, stream)->view(), stream, mr); auto const input_view = dictionary_column_view(input_matched->view()); diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh index 071214e80b0..2b92c9142ca 100644 --- a/cpp/src/hash/concurrent_unordered_multimap.cuh +++ b/cpp/src/hash/concurrent_unordered_multimap.cuh @@ -239,7 +239,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @tparam hash_value_type The datatype of the hash value * @@ -284,7 +284,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal @@ -375,7 +375,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal @@ -423,7 +423,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 28fc2ae9d4f..917a5b1ac9c 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -94,7 +94,7 @@ namespace { */ struct dispatch_to_cudf_column { /** - * @brief Returns mask from an array withut any offsets. + * @brief Returns mask from an array without any offsets. */ std::unique_ptr get_mask_buffer(arrow::Array const& array, rmm::cuda_stream_view stream, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index f8fcf03a77e..3cd515e9981 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -96,7 +96,7 @@ std::shared_ptr fetch_mask_buffer(column_view input_view, */ struct dispatch_to_arrow { /** - * @brief Creates vector Arrays from given cudf column childrens + * @brief Creates vector Arrays from given cudf column children */ std::vector> fetch_child_array( column_view input_view, diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index dfd9c1384c5..cebc01af173 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -1467,7 +1467,7 @@ __device__ void gpuDecodeLevels(page_state_s *s, int32_t target_leaf_count, int gpuDecodeStream(s->def, s, cur_leaf_count, t, level_type::DEFINITION); __syncwarp(); - // because the rep and def streams are encoded seperately, we cannot request an exact + // because the rep and def streams are encoded separately, we cannot request an exact // # of values to be decoded at once. we can only process the lowest # of decoded rep/def // levels we get. int actual_leaf_count = has_repetition ? min(s->lvl_count[level_type::REPETITION], diff --git a/cpp/src/jit/parser.hpp b/cpp/src/jit/parser.hpp index 61228d7ffce..0b752d77d1f 100644 --- a/cpp/src/jit/parser.hpp +++ b/cpp/src/jit/parser.hpp @@ -106,7 +106,7 @@ class ptx_parser { std::vector parse_function_body(const std::string& src); /** - * @brief Remove leading white chractors and call `parse_instruction`. + * @brief Remove leading white characters and call `parse_instruction`. * * @param src The statement to be parsed. * @return The resulting CUDA statement. @@ -124,8 +124,8 @@ class ptx_parser { * * ---> asm volatile (" fma.rn.f32 _f4, _f3, _f1, _f2;"); * - * If a regiter from the input parameters list is used in an instruction - * its type is inferred from the intruction and saved in the `input_arg_list` + * If a register from the input parameters list is used in an instruction + * its type is inferred from the instruction and saved in the `input_arg_list` * to be used in when parsing the function header. * * See the document at https://github.com/hummingtree/cudf/wiki/PTX-parser diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index d4d6974cef5..10376f8c5df 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -234,7 +234,7 @@ struct ftos_converter { static constexpr double upper_limit = 1000000000; // max is 1x10^9 static constexpr double lower_limit = 0.0001; // printf uses scientific notation below this // Tables for doing normalization: converting to exponent form - // IEEE double float has maximum exponent of 305 so these should cover everthing + // IEEE double float has maximum exponent of 305 so these should cover everything const double upper10[9] = {10, 100, 10000, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256}; const double lower10[9] = {.1, .01, .0001, 1e-8, 1e-16, 1e-32, 1e-64, 1e-128, 1e-256}; const double blower10[9] = {1.0, .1, .001, 1e-7, 1e-15, 1e-31, 1e-63, 1e-127, 1e-255}; diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 2d9d40e2d68..08fa523e794 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -56,7 +56,7 @@ struct replace_multi_regex_fn { reprog_device* progs; // array of regex progs size_type number_of_patterns; found_range* d_found_ranges; // working array matched (begin,end) values - column_device_view const d_repls; // replacment strings + column_device_view const d_repls; // replacement strings int32_t* d_offsets{}; // these are null when char* d_chars{}; // only computing size diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 8996dd95e06..6ccb2301ee5 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -171,7 +171,7 @@ void check_float_column(cudf::column_view const& col_lhs, } // timestamp column checker within tolerance -// given by `tol_ms` (miliseconds) +// given by `tol_ms` (milliseconds) void check_timestamp_column(cudf::column_view const& col_lhs, cudf::column_view const& col_rhs, long tol_ms = 1000l) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 5e521291a0c..85fb33fd97b 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -573,7 +573,7 @@ TEST_F(ParquetWriterTest, Strings) TEST_F(ParquetWriterTest, SlicedTable) { - // This test checks for writing zero copy, offseted views into existing cudf tables + // This test checks for writing zero copy, offsetted views into existing cudf tables std::vector strings{ "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 2ff06436853..7177f78e652 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -683,7 +683,7 @@ struct column_view_printer { { lists_column_view lcv(col); - // propage slicing to the child if necessary + // propagate slicing to the child if necessary column_view child = lcv.get_sliced_child(rmm::cuda_stream_default); bool const is_sliced = lcv.offset() > 0 || child.offset() > 0; From 012da1445147f7fc6b0bfb29ec5eb92eaac3d30e Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 7 Jul 2021 01:27:40 +0530 Subject: [PATCH 3/4] fix param to tparam --- cpp/src/io/orc/stripe_enc.cu | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 80f6f4b2fde..b6f51ab19f3 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -115,9 +115,9 @@ static inline __device__ uint32_t CountLeadingBytes64(uint64_t v) { return __clz /** * @brief Raw data output * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) - * @param[in] inmask input buffer position mask for circular buffers + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer @@ -143,12 +143,12 @@ static __device__ void StoreBytes( /** * @brief ByteRLE encoder * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer - * @param[in] inmask input buffer position mask for circular buffers * @param[in] numvals max number of values to encode * @param[in] flush encode all remaining values if nonzero * @param[in] t thread id @@ -324,12 +324,12 @@ static inline __device__ void StoreBitsBigEndian( /** * @brief Integer RLEv2 encoder * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer - * @param[in] inmask input buffer position mask for circular buffers * @param[in] numvals max number of values to encode * @param[in] flush encode all remaining values if nonzero * @param[in] t thread id @@ -619,7 +619,7 @@ static const __device__ __constant__ int32_t kTimeScale[10] = { * @brief Encode column data * * @param[in] chunks encoder chunks device array [column][rowgroup] - * @param[in, out] chunks chunk streams device array [column][rowgroup] + * @param[in, out] streams chunk streams device array [column][rowgroup] */ // blockDim {512,1,1} template From 1107b5ff4b017761670230c266c88d425511f28d Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 7 Jul 2021 18:55:55 +0530 Subject: [PATCH 4/4] Update cpp/benchmarks/join/generate_input_tables.cuh Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/benchmarks/join/generate_input_tables.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index edd9e5d8ce2..d7f64716e58 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -169,7 +169,7 @@ void generate_input_tables(key_type* const build_tbl, // expense of not being that accurate with applying the selectivity an especially more memory // efficient implementations would be to partition the random numbers into two intervals and then // let one table choose random numbers from only one interval and the other only select with - // selectivity probability from the same interval and from the other in the other cases. + // selective probability from the same interval and from the other in the other cases. static_assert(std::is_signed::value, "key_type needs to be signed for lottery to work");