Skip to content

Commit

Permalink
add notebook tests, build.sh args
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Nov 1, 2024
1 parent f7ab898 commit ced22e2
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 3 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- conda-cpp-tests
- conda-python-build
- conda-python-tests
- conda-notebook-tests
- wheel-build-pylibwholegraph
- wheel-tests-pylibwholegraph
- wheel-build-cugraph-dgl
Expand Down Expand Up @@ -51,6 +52,7 @@ jobs:
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!readme_pages/**'
test_python:
- '**'
- '!.devcontainers/**'
Expand All @@ -59,6 +61,7 @@ jobs:
- '!docs/**'
- '!img/**'
- '!notebooks/**'
- '!readme_pages/**'
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand All @@ -83,6 +86,17 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-notebook-tests:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10"
run_script: "ci/test_notebooks.sh"
conda-python-tests:
needs: [conda-python-build, changed-files]
secrets: inherit
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-notebook-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.12"
run_script: "ci/test_notebooks.sh"
conda-python-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
18 changes: 17 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ VALIDARGS="
pylibwholegraph
libwholegraph
tests
benchmarks
all
-v
-g
-n
--pydevelop
--allgpuarch
--compile-cmd
--clean
-h
--help
Expand All @@ -49,13 +51,16 @@ HELP="$0 [<target> ...] [<flag> ...]
pylibwholegraph - build the pylibwholegraph Python package
libwholegraph - build the libwholegraph library
tests - build the C++ tests
benchmarks - build benchmarks
all - build everything
and <flag> is:
-v - verbose build mode
-g - build for debug
-n - do not install after a successful build (does not affect Python packages)
--pydevelop - install the Python packages in editable mode
--allgpuarch - build for all supported GPU architectures
--enable-nvshmem - build with nvshmem support (beta).
--compile-cmd - only output compile commands (invoke CMake without build)
--clean - clean an individual target (note: to do a complete rebuild, use the clean target described above)
-h - print this text
Expand Down Expand Up @@ -140,11 +145,22 @@ if hasArg --pydevelop; then
PYTHON_ARGS_FOR_INSTALL="${PYTHON_ARGS_FOR_INSTALL} -e"
fi

if hasArg --enable-nvshmem; then
BUILD_WITH_NVSHMEM=ON
else
BUILD_WITH_NVSHMEM=OFF
fi
if hasArg tests; then
BUILD_TESTS=ON
else
BUILD_TESTS=OFF
fi
if hasArg benchmarks; then
BUILD_BENCHMARKS=ON
else
BUILD_BENCHMARKS=OFF
fi


# If clean or uninstall targets given, run them prior to any other steps
if hasArg uninstall; then
Expand Down Expand Up @@ -250,7 +266,7 @@ if hasArg cugraph-pyg || buildDefault || hasArg all; then
fi
fi

# Install the cugraph-dgl extensions for DGL
# Build and install the cugraph-dgl Python package
if hasArg cugraph-dgl || buildDefault ||hasArg all; then
if hasArg --clean; then
cleanPythonDir ${REPODIR}/python/cugraph-dgl
Expand Down
6 changes: 5 additions & 1 deletion ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -Eeuo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate notebook testing dependencies"
rapids-dependency-file-generator \
--output conda \
Expand All @@ -27,7 +29,9 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcugraph pylibcugraph cugraph
"libcugraph=${RAPIDS_VERSION}" \
"pylibcugraph=${RAPIDS_VERSION}" \
"cugraph=${RAPIDS_VERSION}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
NOTEBOOK_LIST="$(realpath "$(dirname "$0")/notebook_list.py")"
Expand Down
7 changes: 7 additions & 0 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"

# RAPIDS_DATASET_ROOT_DIR is used by test scripts
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
pushd "${RAPIDS_DATASET_ROOT_DIR}"
./get_test_data.sh --benchmark
popd

EXITCODE=0
trap "EXITCODE=1" ERR
set +e
Expand Down
2 changes: 1 addition & 1 deletion ci/utils/nbtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ for nb in $*; do
echo --------------------------------------------------------------------------------
echo STARTING: ${NBNAME}
echo --------------------------------------------------------------------------------
jupyter nbconvert --to script ${NBFILENAME} --output ${NBTMPDIR}/${NBNAME}-test
jupyter nbconvert --to python ${NBFILENAME} --output ${NBTMPDIR}/${NBNAME}-test
echo "${MAGIC_OVERRIDE_CODE}" > ${NBTMPDIR}/tmpfile
cat ${NBTESTSCRIPT} >> ${NBTMPDIR}/tmpfile
mv ${NBTMPDIR}/tmpfile ${NBTESTSCRIPT}
Expand Down
128 changes: 128 additions & 0 deletions datasets/get_test_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#!/bin/bash
set -e
set -o pipefail

# Ensure we're in the cugraph/datasets dir
cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )";

# Update this to add/remove/change a dataset, using the following format:
#
# comment about the dataset
# dataset download URL
# destination dir to untar to
# blank line separator
#
# FIXME: some test data needs to be extracted to "benchmarks", which is
# confusing now that there's dedicated datasets for benchmarks.
CPP_CI_DATASET_DATA="
# ~10s download
https://data.rapids.ai/cugraph/test/cpp_ci_datasets.tgz
test
"

BASE_DATASET_DATA="
# ~22s download
https://data.rapids.ai/cugraph/test/datasets.tgz
test
# ~14s download
https://data.rapids.ai/cugraph/test/ref/pagerank.tgz
test/ref
# ~1s download
https://data.rapids.ai/cugraph/test/ref/sssp.tgz
test/ref
# ~15s download
https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_large.tgz
benchmark
# ~1s download
https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_small.tgz
benchmark
# ~0.6s download
https://data.rapids.ai/cugraph/test/tsplib/datasets.tar.gz
tsplib
"

EXTENDED_DATASET_DATA="
# ~42s download - tests using this dataset are currently not run in test.sh with --quick
https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_huge.tgz
benchmark
"

BENCHMARK_DATASET_DATA="
# ~90s download - these are used for benchmarks runs (code in <cugraph root>/benchmarks)
https://data.rapids.ai/cugraph/benchmark/benchmark_csv_data.tgz
csv
"

SELF_LOOPS_DATASET_DATA="
# ~1s download
https://data.rapids.ai/cugraph/benchmark/benchmark_csv_data_self_loops.tgz
self_loops
"
################################################################################
# Do not change the script below this line if only adding/updating a dataset

NUMARGS=$#
ARGS=$*
function hasArg {
(( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
}

if hasArg -h || hasArg --help; then
echo "$0 [--subset | --benchmark | --self_loops]"
exit 0
fi

# Select the datasets to install
if hasArg "--benchmark"; then
DATASET_DATA="${BENCHMARK_DATASET_DATA}"
elif hasArg "--subset"; then
DATASET_DATA="${BASE_DATASET_DATA}"
elif hasArg "--cpp_ci_subset"; then
DATASET_DATA="${CPP_CI_DATASET_DATA}"
elif hasArg "--self_loops"; then
DATASET_DATA="${SELF_LOOPS_DATASET_DATA}"
# Do not include benchmark datasets by default - too big
else
DATASET_DATA="${BASE_DATASET_DATA} ${EXTENDED_DATASET_DATA}"
fi

URLS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 3) print $0}')) # extract 3rd fields to a bash array
DESTDIRS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 0) print $0}')) # extract 4th fields to a bash array

echo Downloading ...

# Download all tarfiles to a tmp dir
mkdir -p tmp
cd tmp
for url in ${URLS[*]}; do
time wget -N --progress=dot:giga ${url}
done
cd ..

# create the destination dirs
mkdir -p "${DESTDIRS[@]}"

# Iterate over the arrays and untar the nth tarfile to the nth dest directory.
# The tarfile name is derived from the download url.
echo Decompressing ...
for index in ${!DESTDIRS[*]}; do
echo "tmp/$(basename "${URLS[$index]}") -C ${DESTDIRS[$index]}" | tr '\n' '\0'
done | xargs -0 -t -r -n1 -P$(nproc --all) sh -c 'tar -xzvf $0 --overwrite'

0 comments on commit ced22e2

Please sign in to comment.