Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into wence/chore/pre-commit-upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Dec 2, 2024
2 parents 89a41aa + 5190b44 commit f81306f
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 104 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
files_yaml: |
test_cpp:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -71,6 +72,7 @@ jobs:
- '!python/**'
test_cudf_pandas:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
Expand All @@ -79,6 +81,7 @@ jobs:
- '!notebooks/**'
test_java:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -88,12 +91,14 @@ jobs:
- '!python/**'
test_notebooks:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!java/**'
test_python:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,6 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu)

ConfigureNVBench(
STRINGS_NVBENCH
string/case.cpp
Expand All @@ -377,6 +375,7 @@ ConfigureNVBench(
string/copy_range.cpp
string/count.cpp
string/extract.cpp
string/factory.cpp
string/filter.cpp
string/find.cpp
string/find_multiple.cpp
Expand Down
60 changes: 60 additions & 0 deletions cpp/benchmarks/string/factory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_uvector.hpp>

#include <nvbench/nvbench.cuh>

#include <limits>

static void bench_factory(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
auto const sv = cudf::strings_column_view(column->view());

auto stream = cudf::get_default_stream();
auto mr = cudf::get_current_device_resource_ref();
auto d_strings = cudf::strings::detail::create_string_vector_from_column(sv, stream, mr);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = sv.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0});
});
}

NVBENCH_BENCH(bench_factory)
.set_name("factory")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144, 2097152});
92 changes: 0 additions & 92 deletions cpp/benchmarks/string/factory.cu

This file was deleted.

2 changes: 1 addition & 1 deletion cpp/src/replace/nans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ std::unique_ptr<column> normalize_nans_and_zeros(column_view const& input,
void normalize_nans_and_zeros(mutable_column_view& in_out, rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
detail::normalize_nans_and_zeros(in_out, cudf::get_default_stream());
detail::normalize_nans_and_zeros(in_out, stream);
}

} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/tests/streams/replace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,6 @@ TEST_F(ReplaceTest, NormalizeNansAndZerosMutable)
auto nan = std::numeric_limits<double>::quiet_NaN();
auto input_column = cudf::test::make_type_param_vector<double>({-0.0, 0.0, -nan, nan, nan});
cudf::test::fixed_width_column_wrapper<double> input(input_column.begin(), input_column.end());
cudf::normalize_nans_and_zeros(static_cast<cudf::mutable_column_view>(input),
cudf::test::get_default_stream());
cudf::mutable_column_view mutable_view = cudf::column(input, cudf::test::get_default_stream());
cudf::normalize_nans_and_zeros(mutable_view, cudf::test::get_default_stream());
}
56 changes: 49 additions & 7 deletions python/custreamz/custreamz/tests/test_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,13 @@ def test_set_index():
assert_eq(b[0], df.set_index(df.y + 1))


def test_binary_stream_operators(stream):
def test_binary_stream_operators(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
df = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

expected = df.x + df.y
Expand All @@ -242,7 +248,13 @@ def test_index(stream):
assert_eq(L[1], df.index + 5)


def test_pair_arithmetic(stream):
def test_pair_arithmetic(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
df = cudf.DataFrame({"x": list(range(10)), "y": [1] * 10})

a = DataFrame(example=df.iloc[:0], stream=stream)
Expand All @@ -255,7 +267,13 @@ def test_pair_arithmetic(stream):
assert_eq(cudf.concat(L), (df.x + df.y) * 2)


def test_getitem(stream):
def test_getitem(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
df = cudf.DataFrame({"x": list(range(10)), "y": [1] * 10})

a = DataFrame(example=df.iloc[:0], stream=stream)
Expand Down Expand Up @@ -332,7 +350,13 @@ def test_repr_html(stream):
assert "1" in html


def test_setitem(stream):
def test_setitem(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
df = cudf.DataFrame({"x": list(range(10)), "y": [1] * 10})

sdf = DataFrame(example=df.iloc[:0], stream=stream)
Expand All @@ -356,7 +380,13 @@ def test_setitem(stream):
assert_eq(L[-1], df.mean())


def test_setitem_overwrites(stream):
def test_setitem_overwrites(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
df = cudf.DataFrame({"x": list(range(10))})
sdf = DataFrame(example=df.iloc[:0], stream=stream)
stream = sdf.stream
Expand Down Expand Up @@ -413,8 +443,14 @@ def test_setitem_overwrites(stream):
],
)
def test_rolling_count_aggregations(
op, window, m, pre_get, post_get, kwargs, stream
request, op, window, m, pre_get, post_get, kwargs, stream
):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream) and len(kwargs) == 0,
reason="https://github.com/dask/distributed/issues/8953",
)
)
index = pd.DatetimeIndex(
pd.date_range("2000-01-01", "2000-01-03", freq="1h")
)
Expand Down Expand Up @@ -808,7 +844,13 @@ def test_reductions_with_start_state(stream):
assert output2[0] == 360


def test_rolling_aggs_with_start_state(stream):
def test_rolling_aggs_with_start_state(request, stream):
request.applymarker(
pytest.mark.xfail(
isinstance(stream, DaskStream),
reason="https://github.com/dask/distributed/issues/8953",
)
)
example = cudf.DataFrame({"name": [], "amount": []}, dtype="float64")
sdf = DataFrame(stream, example=example)
output0 = (
Expand Down

0 comments on commit f81306f

Please sign in to comment.