Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into test-notebooks-deps
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Jan 8, 2024
2 parents a5e2658 + 79d5070 commit a2c7499
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ jobs:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-dask-cudf:
needs: wheel-tests-cudf
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
6 changes: 3 additions & 3 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -23,7 +23,7 @@ pyproject_file="${package_dir}/pyproject.toml"

sed -i "s/^name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
echo "${version}" > VERSION
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name}/_version.py"
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name//-/_}/_version.py"

# For nightlies we want to ensure that we're pulling in alphas as well. The
# easiest way to do so is to augment the spec with a constraint containing a
Expand All @@ -34,7 +34,7 @@ if ! rapids-is-release-build; then
alpha_spec=',>=0.0.0a0'
fi

if [[ ${package_name} == "dask_cudf" ]]; then
if [[ ${package_name} == "dask-cudf" ]]; then
sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/dask-cuda==(.*)\"/dask-cuda==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file}
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/dask_cudf"

./ci/build_wheel.sh dask_cudf ${package_dir}
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist
1 change: 0 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ dependencies:
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.2.*
- make
- mimesis>=4.1.0
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand Down
1 change: 0 additions & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ dependencies:
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.2.*
- make
- mimesis>=4.1.0
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand Down
1 change: 0 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,6 @@ dependencies:
- cramjam
- fastavro>=0.22.9
- hypothesis
- mimesis>=4.1.0
- pytest-benchmark
- pytest-cases
- python-snappy>=0.6.0
Expand Down
25 changes: 9 additions & 16 deletions python/cudf/cudf/testing/dataset_generator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

# This module is for generating "synthetic" datasets. It was originally
# designed for testing filtered reading. Generally, it should be useful
Expand All @@ -11,11 +11,9 @@
import uuid
from multiprocessing import Pool

import mimesis
import numpy as np
import pandas as pd
import pyarrow as pa
from mimesis import Generic
from pyarrow import parquet as pq

import cudf
Expand All @@ -35,8 +33,7 @@ class ColumnParameters:
null_frequency : 0.1
Probability of a generated value being null
generator : Callable
Function for generating random data. It is passed a Mimesis Generic
provider and returns an Iterable that generates data.
Function for generating random data.
is_sorted : bool
Sort this column. Columns are sorted in same order as ColumnParameters
instances stored in column_params of Parameters. If there are one or
Expand All @@ -51,7 +48,10 @@ def __init__(
self,
cardinality=100,
null_frequency=0.1,
generator=lambda g: [g.address.country for _ in range(100)],
generator=lambda: [
_generate_string(string.ascii_letters, random.randint(4, 8))
for _ in range(100)
],
is_sorted=True,
dtype=None,
):
Expand Down Expand Up @@ -235,15 +235,9 @@ def get_dataframe(parameters, use_threads):
if parameters.seed is not None:
np.random.seed(parameters.seed)

# For each column, use a generic Mimesis producer to create an Iterable
# for generating data
for i, column_params in enumerate(parameters.column_parameters):
if column_params.dtype is None:
column_params.generator = column_params.generator(
Generic("en", seed=parameters.seed)
)
else:
column_params.generator = column_params.generator()
# For each column, invoke the data generator
for column_params in parameters.column_parameters:
column_params.generator = column_params.generator()

# Get schema for each column
table_fields = []
Expand Down Expand Up @@ -343,7 +337,6 @@ def rand_dataframe(
# Apply seed
random.seed(seed)
np.random.seed(seed)
mimesis.random.random.seed(seed)

column_params = []
for meta in dtypes_meta:
Expand Down
14 changes: 11 additions & 3 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

import datetime
import glob
import math
import os
import pathlib
import random
import string
from contextlib import contextmanager
from io import BytesIO
from string import ascii_letters
Expand Down Expand Up @@ -432,13 +433,20 @@ def test_parquet_read_filtered(tmpdir, rdg_seed):
dg.ColumnParameters(
cardinality=40,
null_frequency=0.05,
generator=lambda g: [g.address.city() for _ in range(40)],
generator=lambda: [
"".join(
random.sample(
string.ascii_letters, random.randint(4, 8)
)
)
for _ in range(40)
],
is_sorted=False,
),
dg.ColumnParameters(
40,
0.2,
lambda g: [g.person.age() for _ in range(40)],
lambda: np.random.default_rng().integers(0, 100, size=40),
True,
),
],
Expand Down
1 change: 0 additions & 1 deletion python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ test = [
"cramjam",
"fastavro>=0.22.9",
"hypothesis",
"mimesis>=4.1.0",
"msgpack",
"pytest",
"pytest-benchmark",
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ requires = [
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

[project]
name = "dask_cudf"
name = "dask-cudf"
dynamic = ["version"]
description = "Utilities for Dask and cuDF interactions"
readme = { file = "README.md", content-type = "text/markdown" }
Expand Down

0 comments on commit a2c7499

Please sign in to comment.