From 1b163cc6a6c2e26e29087ff977ed8048cd788300 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 15 Mar 2024 18:17:24 -0400 Subject: [PATCH 1/5] Fix gtests/ERROR_TEST errors when run in Debug (#15317) Fixes errors reported by `gtests/ERROR_TEST` when run with a Debug build. Both errors occur due to invalid stream usage. ``` [ RUN ] DebugAssert.cudf_assert_true libcudf was not built with stacktrace support. unknown file: Failure C++ exception with description "cudf_identify_stream_usage found unexpected stream!" thrown in the test body. [ RUN ] DebugAssertDeathTest.cudf_assert_false libcudf was not built with stacktrace support. /cudf/cpp/tests/error/error_handling_test.cu:112: Failure Death test: call_kernel() Result: threw an exception. Error msg: [ DEATH ] [ DEATH ] /cudf/cpp/tests/error/error_handling_test.cu:112:: Caught std::exception-derived exception escaping the death test statement. Exception message: cudf_identify_stream_usage found unexpected stream! ``` Fixes the test logic to use the correct stream. These tests are only built/run with a Debug build. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/15317 --- cpp/tests/error/error_handling_test.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index 5cb2d729f3d..674d2e0a6ea 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -97,7 +97,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false) testing::FLAGS_gtest_death_test_style = "threadsafe"; auto call_kernel = []() { - assert_false_kernel<<<1, 1>>>(); + auto const stream = cudf::get_default_stream().value(); + assert_false_kernel<<<1, 1, 0, stream>>>(); // Kernel should fail with `cudaErrorAssert` // This error invalidates the current device context, so we need to kill @@ -114,7 +115,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false) TEST(DebugAssert, cudf_assert_true) { - assert_true_kernel<<<1, 1>>>(); + auto const stream = cudf::get_default_stream().value(); + assert_true_kernel<<<1, 1, 0, stream>>>(); ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize()); } @@ -136,6 +138,7 @@ int main(int argc, char** argv) auto adaptor = make_stream_checking_resource_adaptor( resource, error_on_invalid_stream, check_default_stream); rmm::mr::set_current_device_resource(&adaptor); + return RUN_ALL_TESTS(); } return RUN_ALL_TESTS(); } From 13f6cd37900a2c4031b8861a38ef2baef4a3fbf7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 15 Mar 2024 12:45:43 -1000 Subject: [PATCH 2/5] Replace black with ruff-format (#15312) xref https://github.com/rapidsai/cudf/issues/14882 This PR replaces `black` with `ruff-format` with it's default configurations. The ruff configuration had a line length of 88 while black had a line length configuration of 79, so aligned them to 79. The next step would be to consider replacing `isort` too Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Lawrence Mitchell (https://github.com/wence-) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15312 --- .pre-commit-config.yaml | 12 ++----- pyproject.toml | 24 +++---------- python/cudf/cudf/core/_internals/timezones.py | 5 +-- python/cudf/cudf/core/column/column.py | 3 +- python/cudf/cudf/core/dataframe.py | 33 ++++++++++------- python/cudf/cudf/core/groupby/groupby.py | 13 ++++--- python/cudf/cudf/core/indexed_frame.py | 35 ++++++------------- python/cudf/cudf/core/tools/datetimes.py | 6 ++-- python/cudf/cudf/core/tools/numeric.py | 6 ++-- python/cudf/cudf/core/udf/strings_lowering.py | 4 +-- python/cudf/cudf/core/udf/utils.py | 4 +-- python/cudf/cudf/pandas/fast_slow_proxy.py | 6 ++-- python/cudf/cudf/pandas/profiler.py | 18 +++++----- python/cudf/cudf/tests/test_dataframe.py | 2 +- python/cudf/cudf/tests/test_index.py | 3 +- python/cudf/cudf/tests/test_orc.py | 12 +++---- python/cudf/cudf/tests/test_parquet.py | 5 +-- python/cudf/cudf/utils/ioutils.py | 8 ++--- 18 files changed, 83 insertions(+), 116 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9235c80bdc9..67a71021a63 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,13 +23,6 @@ repos: args: ["--config-root=python/", "--resolve-all-configs"] files: python/.* types_or: [python, cython, pyi] - - repo: https://github.com/psf/black - rev: 23.12.1 - hooks: - - id: black - files: python/.* - # Explicitly specify the pyproject.toml at the repo root, not per-project. - args: ["--config", "pyproject.toml"] - repo: https://github.com/MarcoGorelli/cython-lint rev: v0.16.0 hooks: @@ -64,9 +57,6 @@ repos: # Use the cudf_kafka isort orderings in notebooks so that dask # and RAPIDS packages have their own sections. args: ["--settings-file=python/cudf_kafka/pyproject.toml"] - - id: nbqa-black - # Explicitly specify the pyproject.toml at the repo root, not per-project. - args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format rev: v16.0.6 hooks: @@ -155,6 +145,8 @@ repos: hooks: - id: ruff files: python/.*$ + - id: ruff-format + files: python/.*$ - repo: https://github.com/rapidsai/pre-commit-hooks rev: v0.0.1 hooks: diff --git a/pyproject.toml b/pyproject.toml index 4048eb9452c..c71394058df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,22 +1,4 @@ -[tool.black] -line-length = 79 -target-version = ["py39"] -include = '\.py?$' -force-exclude = ''' -/( - thirdparty | - \.eggs | - \.git | - \.hg | - \.mypy_cache | - \.tox | - \.venv | - _build | - buck-out | - build | - dist -)/ -''' +# Copyright (c) 2019-2024, NVIDIA CORPORATION. [tool.pydocstyle] # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather @@ -60,13 +42,15 @@ select = ["E", "F", "W"] ignore = [ # whitespace before : "E203", + # line-too-long (due to Copyright header) + "E501", ] fixable = ["ALL"] exclude = [ # TODO: Remove this in a follow-up where we fix __all__. "__init__.py", ] -line-length = 88 +line-length = 79 [tool.ruff.per-file-ignores] # Lots of pytest implicitly injected attributes in conftest-patch.py diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py index 053425fff8d..4e2fad08d56 100644 --- a/python/cudf/cudf/core/_internals/timezones.py +++ b/python/cudf/cudf/core/_internals/timezones.py @@ -85,8 +85,9 @@ def _read_tzfile_as_frame(tzdir, zone_name): if not transition_times_and_offsets: # this happens for UTC-like zones min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]") - transition_times_and_offsets = as_column([min_date]), as_column( - [np.timedelta64(0, "s")] + transition_times_and_offsets = ( + as_column([min_date]), + as_column([np.timedelta64(0, "s")]), ) return DataFrame._from_data( diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 3e0ec4b5cd7..f13d8cf12f7 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1731,7 +1731,8 @@ def as_column( If None (default), treats NaN values in arbitrary as null if there is no mask passed along with it. If True, combines the mask and NaNs to form a new validity mask. If False, leaves NaN values as is. - Only applies when arbitrary is not a cudf object (Index, Series, Column). + Only applies when arbitrary is not a cudf object + (Index, Series, Column). dtype : optional Optionally typecast the constructed Column to the given dtype. diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0440512c467..35588725655 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -470,9 +470,12 @@ class _DataFrameIlocIndexer(_DataFrameIndexer): _frame: DataFrame def __getitem__(self, arg): - row_key, ( - col_is_scalar, - column_names, + ( + row_key, + ( + col_is_scalar, + column_names, + ), ) = indexing_utils.destructure_dataframe_iloc_indexer(arg, self._frame) row_spec = indexing_utils.parse_row_iloc_indexer( row_key, len(self._frame) @@ -6901,16 +6904,18 @@ def stack(self, level=-1, dropna=no_default, future_stack=False): if future_stack: if dropna is not no_default: raise ValueError( - "dropna must be unspecified with future_stack=True as the new " - "implementation does not introduce rows of NA values. This " - "argument will be removed in a future version of cudf." + "dropna must be unspecified with future_stack=True as " + "the new implementation does not introduce rows of NA " + "values. This argument will be removed in a future " + "version of cudf." ) else: if dropna is not no_default or self._data.nlevels > 1: warnings.warn( - "The previous implementation of stack is deprecated and will be " - "removed in a future version of cudf. Specify future_stack=True " - "to adopt the new implementation and silence this warning.", + "The previous implementation of stack is deprecated and " + "will be removed in a future version of cudf. Specify " + "future_stack=True to adopt the new implementation and " + "silence this warning.", FutureWarning, ) if dropna is no_default: @@ -7028,9 +7033,13 @@ def unnamed_group_generator(): unique_named_levels, axis=0, fill_value=-1 ).values else: - yield grpdf.reindex( - unique_named_levels, axis=0, fill_value=-1 - ).sort_index().values + yield ( + grpdf.reindex( + unique_named_levels, axis=0, fill_value=-1 + ) + .sort_index() + .values + ) else: if future_stack: yield column_idx_df.values diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e5030eb634b..d995964057b 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -282,9 +282,12 @@ def __iter__(self): if isinstance(group_names, cudf.BaseIndex): group_names = group_names.to_pandas() for i, name in enumerate(group_names): - yield (name,) if isinstance(self._by, list) and len( - self._by - ) == 1 else name, grouped_values[offsets[i] : offsets[i + 1]] + yield ( + (name,) + if isinstance(self._by, list) and len(self._by) == 1 + else name, + grouped_values[offsets[i] : offsets[i + 1]], + ) @property def dtypes(self): @@ -2269,8 +2272,8 @@ def fillna( """ warnings.warn( "groupby fillna is deprecated and " - "will be removed in a future version. Use groupby ffill or groupby bfill " - "for forward or backward filling instead.", + "will be removed in a future version. Use groupby ffill " + "or groupby bfill for forward or backward filling instead.", FutureWarning, ) if inplace: diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 94d862d52b4..ca9d5590044 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -211,8 +211,8 @@ def _get_label_range_or_mask(index, start, stop, step): return slice(start_loc, stop_loc) else: raise KeyError( - "Value based partial slicing on non-monotonic DatetimeIndexes " - "with non-existing keys is not allowed.", + "Value based partial slicing on non-monotonic " + "DatetimeIndexes with non-existing keys is not allowed.", ) elif start is not None: boolean_mask = index >= start @@ -2449,7 +2449,8 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None): ---------- axis : {0 or 'index', 1 or 'columns', None}, default None A specific axis to squeeze. By default, all length-1 axes are - squeezed. For `Series` this parameter is unused and defaults to `None`. + squeezed. For `Series` this parameter is unused and defaults + to `None`. Returns ------- @@ -5835,9 +5836,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rfloordiv( - self, other, axis, level=None, fill_value=None - ): # noqa: D102 + def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5967,9 +5966,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def eq( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__eq__", fill_value=fill_value, can_reindex=True ) @@ -6009,9 +6006,7 @@ def eq( ), ) ) - def ne( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__ne__", fill_value=fill_value, can_reindex=True ) @@ -6051,9 +6046,7 @@ def ne( ), ) ) - def lt( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__lt__", fill_value=fill_value, can_reindex=True ) @@ -6093,9 +6086,7 @@ def lt( ), ) ) - def le( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__le__", fill_value=fill_value, can_reindex=True ) @@ -6135,9 +6126,7 @@ def le( ), ) ) - def gt( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__gt__", fill_value=fill_value, can_reindex=True ) @@ -6177,9 +6166,7 @@ def gt( ), ) ) - def ge( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: D102 + def ge(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 return self._binaryop( other=other, op="__ge__", fill_value=fill_value, can_reindex=True ) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index d182b7b4a7c..65f97c99934 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -164,9 +164,9 @@ def to_datetime( if errors == "ignore": warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_datetime without passing `errors` and catch exceptions " - "explicitly instead", + "errors='ignore' is deprecated and will raise in a " + "future version. Use to_datetime without passing `errors` " + "and catch exceptions explicitly instead", FutureWarning, ) diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index e1424459c8f..68b23f1e059 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -97,9 +97,9 @@ def to_numeric(arg, errors="raise", downcast=None): raise ValueError("invalid error value specified") elif errors == "ignore": warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_numeric without passing `errors` and catch exceptions " - "explicitly instead", + "errors='ignore' is deprecated and will raise in " + "a future version. Use to_numeric without passing `errors` " + "and catch exceptions explicitly instead", FutureWarning, ) diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py index fdce404d887..3c02ee52b25 100644 --- a/python/cudf/cudf/core/udf/strings_lowering.py +++ b/python/cudf/cudf/core/udf/strings_lowering.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. import operator from functools import partial @@ -249,7 +249,7 @@ def replace_impl(context, builder, sig, args): replacement_ptr = builder.alloca(args[2].type) builder.store(args[0], src_ptr) - builder.store(args[1], to_replace_ptr), + builder.store(args[1], to_replace_ptr) builder.store(args[2], replacement_ptr) udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type()) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 12baf1ea6d1..bc1f4f2557e 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -41,9 +41,7 @@ from cudf.utils.utils import initfunc # Maximum size of a string column is 2 GiB -_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( - "STRINGS_UDF_HEAP_SIZE", 2**31 -) +_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get("STRINGS_UDF_HEAP_SIZE", 2**31) _heap_size = 0 _cudf_str_dtype = dtype(str) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 3f5df18eae1..e811ba1351a 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # noqa: E501 # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -437,9 +437,7 @@ def __get__(self, obj, owner=None) -> Any: # methods because dir for the method won't be the same as for # the pure unbound function, but the alternative is # materializing the slow object when we don't really want to. - result._fsproxy_slow_dir = dir( - slow_result_type - ) # type: ignore + result._fsproxy_slow_dir = dir(slow_result_type) # type: ignore return result diff --git a/python/cudf/cudf/pandas/profiler.py b/python/cudf/cudf/pandas/profiler.py index c5662d06e09..0124d411e3b 100644 --- a/python/cudf/cudf/pandas/profiler.py +++ b/python/cudf/cudf/pandas/profiler.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -124,7 +124,7 @@ def get_namespaced_function_name( _MethodProxy, type[_FinalProxy], type[_IntermediateProxy], - ] + ], ): if isinstance(func_obj, _MethodProxy): # Extract classname from method object @@ -177,17 +177,15 @@ def _tracefunc(self, frame, event, arg): if self._currkey is not None and arg is not None: if arg[1]: # fast run_time = time.perf_counter() - self._timer[self._currkey] - self._results[self._currkey][ - "gpu_time" - ] = run_time + self._results[self._currkey].get( - "gpu_time", 0 + self._results[self._currkey]["gpu_time"] = ( + run_time + + self._results[self._currkey].get("gpu_time", 0) ) else: run_time = time.perf_counter() - self._timer[self._currkey] - self._results[self._currkey][ - "cpu_time" - ] = run_time + self._results[self._currkey].get( - "cpu_time", 0 + self._results[self._currkey]["cpu_time"] = ( + run_time + + self._results[self._currkey].get("cpu_time", 0) ) frame_locals = inspect.getargvalues(frame).locals diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index e034a3f5e10..ead1ab2da6c 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -2351,7 +2351,7 @@ def test_dataframe_reductions(data, axis, func, skipna): for kwargs in all_kwargs: if expected_exception is not None: with pytest.raises(expected_exception): - getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs), + (getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),) else: expect = getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs) with expect_warning_if( diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 51e9a3022f4..05213d7601c 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1721,8 +1721,7 @@ def test_get_indexer_single_unique_numeric(idx, key, method): if ( # `method` only applicable to monotonic index - not pi.is_monotonic_increasing - and method is not None + not pi.is_monotonic_increasing and method is not None ): assert_exceptions_equal( lfunc=pi.get_loc, diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 69ddd936eee..a9bca7d8b98 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -608,7 +608,8 @@ def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq): from pyarrow import orc supported_stat_types = supported_numpy_dtypes + ["str"] - # Writing bool columns to multiple row groups is disabled until #6763 is fixed + # Writing bool columns to multiple row groups is disabled + # until #6763 is fixed if nrows == 100000: supported_stat_types.remove("bool") @@ -683,7 +684,8 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq): np.random.seed(0) supported_stat_types = supported_numpy_dtypes + ["str"] - # Writing bool columns to multiple row groups is disabled until #6763 is fixed + # Writing bool columns to multiple row groups is disabled + # until #6763 is fixed if nrows == 200000: supported_stat_types.remove("bool") @@ -697,8 +699,7 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq): # Make a dataframe gdf = cudf.DataFrame( { - "col_" - + str(dtype): gen_rand_series( + "col_" + str(dtype): gen_rand_series( dtype, nrows // 2, has_nulls=True, @@ -716,8 +717,7 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq): # write and no pointers are saved into the original table gdf = cudf.DataFrame( { - "col_" - + str(dtype): gen_rand_series( + "col_" + str(dtype): gen_rand_series( dtype, nrows // 2, has_nulls=True, diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 18efd4417a1..8b72fe84359 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1087,8 +1087,9 @@ def struct_gen(gen, skip_rows, num_rows, include_validity=False): def R(first_val, num_fields): return { - "col" - + str(f): (gen[f](first_val, first_val) if f % 4 != 0 else None) + "col" + str(f): ( + gen[f](first_val, first_val) if f % 4 != 0 else None + ) if include_validity else (gen[f](first_val, first_val)) for f in range(len(gen)) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 925fd24e6c8..85abf438efb 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -85,9 +85,7 @@ 0 10 hello 1 20 rapids 2 30 ai -""".format( - remote_data_sources=_docstring_remote_sources -) +""".format(remote_data_sources=_docstring_remote_sources) doc_read_avro = docfmt_partial(docstring=_docstring_read_avro) _docstring_read_parquet_metadata = """ @@ -1416,9 +1414,7 @@ list of Filepath strings or in-memory buffers of data. compression : str Type of compression algorithm for the content - """.format( - bytes_per_thread=_BYTES_PER_THREAD_DEFAULT -) + """.format(bytes_per_thread=_BYTES_PER_THREAD_DEFAULT) doc_get_reader_filepath_or_buffer = docfmt_partial( From f697b3eaac829b366c55b0224d558bbb28ffa06f Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 15 Mar 2024 17:36:50 -0700 Subject: [PATCH 3/5] Work around a cuFile error when running CSV tests with memcheck (#15293) Closes https://github.com/rapidsai/cudf/issues/14140 Added a no-op CUDA call before creating a `kvikio::FileHandle` to avoid the error in `cuFileDriverOpen`. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/15293 --- cpp/src/io/utilities/datasource.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index d2026473b6c..54e7c6bf1d6 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -44,6 +44,11 @@ class file_source : public datasource { explicit file_source(char const* filepath) : _file(filepath, O_RDONLY) { if (detail::cufile_integration::is_kvikio_enabled()) { + // Workaround for https://github.com/rapidsai/cudf/issues/14140, where cuFileDriverOpen errors + // out if no CUDA calls have been made before it. This is a no-op if the CUDA context is + // already initialized + cudaFree(0); + _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); From 2a8540408d55e0296a8621b87dbb3b67716f1e1f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 18 Mar 2024 11:07:03 -0500 Subject: [PATCH 4/5] Enable branch testing for `cudf.pandas` (#15316) This PR enables branch testing for `cudf.pandas` pandas pytest suite. This is 1st half of the actual changes I'd like to make. Once we merge this PR have a json file generated in s3 for `branch-24.04`, In a follow-up PR I will enable diff comparison and reporting. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/15316 --- .github/workflows/test.yaml | 3 +-- ci/cudf_pandas_scripts/pandas-tests/run.sh | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4cb6baf2d63..d8f8c6f1e16 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -125,5 +125,4 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - # pr mode uses the HEAD of the branch, which is also correct for nightlies - script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr + script: ci/cudf_pandas_scripts/pandas-tests/run.sh main diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index 1de20e7fb25..f3c37ecde26 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -45,3 +45,4 @@ python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json p RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} mkdir -p "${RAPIDS_ARTIFACTS_DIR}" mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/ +rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${PANDAS_TESTS_BRANCH}-results.json "${RAPIDS_ARTIFACTS_DIR}" From fa6130f805ad8b6b9fa44722791a9aabb40a7ce2 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:55:15 -0500 Subject: [PATCH 5/5] Update script input name (#15301) This PR updates the script inputs in the relevant workflows from `build_script` and `test_script` to `script`. Depends on https://github.com/rapidsai/shared-workflows/pull/191 --- .github/workflows/pr.yaml | 4 ++-- .github/workflows/test.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e4aed2b2ef8..5bf9025d68d 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -69,7 +69,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: pull-request - test_script: "ci/test_python_cudf.sh" + script: "ci/test_python_cudf.sh" conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build @@ -77,7 +77,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: pull-request - test_script: "ci/test_python_other.sh" + script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d8f8c6f1e16..aeb092111a7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -51,7 +51,7 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - test_script: "ci/test_python_cudf.sh" + script: "ci/test_python_cudf.sh" conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit @@ -61,7 +61,7 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - test_script: "ci/test_python_other.sh" + script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04