From 0b48428850505c07dfc7727c1fa3fb32b04d2a23 Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Mon, 19 Apr 2021 12:39:28 -0700 Subject: [PATCH 1/5] bash benchmark runner --- gpu_bdb/benchmark_runner.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 gpu_bdb/benchmark_runner.sh diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh new file mode 100644 index 00000000..de181f25 --- /dev/null +++ b/gpu_bdb/benchmark_runner.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +USERNAME=$(whoami) +GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb + +INCLUDE_DASK=True +INCLUDE_BLAZING=True +N_REPEATS=1 + +# Dask queries +if [ $INCLUDE_DASK = "True" ]; then + for qnum in {01..30} + do + cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/ + for j in $(seq 1 $N_REPEATS) + do + python gpu_bdb_query_$qnum.py --config_file ../../benchmark_runner/benchmark_config.yaml + sleep 3 + done + sleep 3 + done +fi + +# BlazingSQL Queries +if [ $INCLUDE_BLAZING = "True" ]; then + for qnum in {01..30} + do + cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/ + for j in $(seq 1 $N_REPEATS) + do + python gpu_bdb_query_$qnum\_sql.py --config_file ../../benchmark_runner/benchmark_config.yaml + sleep 3 + done + sleep 3 + done +fi \ No newline at end of file From 289bf9eeaad4f4004fddc6756c26ae92c0ee6c0f Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Mon, 19 Apr 2021 12:43:55 -0700 Subject: [PATCH 2/5] remove old runner --- gpu_bdb/benchmark_runner.py | 96 ------------------------------------- gpu_bdb/benchmark_runner.sh | 2 +- 2 files changed, 1 insertion(+), 97 deletions(-) delete mode 100755 gpu_bdb/benchmark_runner.py diff --git a/gpu_bdb/benchmark_runner.py b/gpu_bdb/benchmark_runner.py deleted file mode 100755 index c26c75dc..00000000 --- a/gpu_bdb/benchmark_runner.py +++ /dev/null @@ -1,96 +0,0 @@ -import glob -import re -import os -import gc -import time -import uuid - -N_REPEATS = 1 - - -def get_qnum_from_filename(name): - m = re.search("[0-9]{2}", name).group() - return m - - -def load_query(qnum, fn): - import importlib, types - loader = importlib.machinery.SourceFileLoader(qnum, fn) - mod = types.ModuleType(loader.name) - loader.exec_module(mod) - return mod.main - - -dask_qnums = [str(i).zfill(2) for i in range(1, 31)] -bsql_qnums = [str(i).zfill(2) for i in range(1, 31)] - - -if __name__ == "__main__": - from bdb_tools.cluster_startup import attach_to_cluster, import_query_libs - from bdb_tools.utils import run_query, gpubdb_argparser - - import_query_libs() - dask_queries = { - qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}.py") - for qnum in dask_qnums - } - - bsql_queries = { - qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}_sql.py") - for qnum in bsql_qnums - } - - config = gpubdb_argparser() - config["run_id"] = uuid.uuid4().hex - - include_blazing = config.get("benchmark_runner_include_bsql") - client, bc = attach_to_cluster(config, create_blazing_context=include_blazing) - # Preload required libraries for queries on all workers - client.run(import_query_libs) - - base_path = os.getcwd() - - # Run BSQL Queries - if include_blazing and len(bsql_qnums) > 0: - print("Blazing Queries") - for qnum, q_func in bsql_queries.items(): - print(qnum) - - qpath = f"{base_path}/queries/q{qnum}/" - os.chdir(qpath) - if os.path.exists("current_query_num.txt"): - os.remove("current_query_num.txt") - with open("current_query_num.txt", "w") as fp: - fp.write(qnum) - - for r in range(N_REPEATS): - run_query( - config=config, - client=client, - query_func=q_func, - blazing_context=bc, - ) - client.run(gc.collect) - client.run_on_scheduler(gc.collect) - gc.collect() - time.sleep(3) - - # Run Pure Dask Queries - if len(dask_qnums) > 0: - print("Pure Dask Queries") - for qnum, q_func in dask_queries.items(): - print(qnum) - - qpath = f"{base_path}/queries/q{qnum}/" - os.chdir(qpath) - if os.path.exists("current_query_num.txt"): - os.remove("current_query_num.txt") - with open("current_query_num.txt", "w") as fp: - fp.write(qnum) - - for r in range(N_REPEATS): - run_query(config=config, client=client, query_func=q_func) - client.run(gc.collect) - client.run_on_scheduler(gc.collect) - gc.collect() - time.sleep(3) diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh index de181f25..cb3cc033 100644 --- a/gpu_bdb/benchmark_runner.sh +++ b/gpu_bdb/benchmark_runner.sh @@ -33,4 +33,4 @@ if [ $INCLUDE_BLAZING = "True" ]; then done sleep 3 done -fi \ No newline at end of file +fi From d07509eefaf41808c65487923acf3e76baeeca0a Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Wed, 21 Apr 2021 08:41:44 -0700 Subject: [PATCH 3/5] simplify bash conditionals --- gpu_bdb/benchmark_runner.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh index cb3cc033..400a41ee 100644 --- a/gpu_bdb/benchmark_runner.sh +++ b/gpu_bdb/benchmark_runner.sh @@ -3,12 +3,12 @@ USERNAME=$(whoami) GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb -INCLUDE_DASK=True -INCLUDE_BLAZING=True +INCLUDE_DASK=true +INCLUDE_BLAZING=false N_REPEATS=1 # Dask queries -if [ $INCLUDE_DASK = "True" ]; then +if $INCLUDE_DASK; then for qnum in {01..30} do cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/ @@ -22,7 +22,7 @@ if [ $INCLUDE_DASK = "True" ]; then fi # BlazingSQL Queries -if [ $INCLUDE_BLAZING = "True" ]; then +if $INCLUDE_BLAZING; then for qnum in {01..30} do cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/ From ee65e816cb62cf5507f963466b9773e1632e16ed Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Thu, 22 Apr 2021 12:45:07 -0700 Subject: [PATCH 4/5] additional slurm updates --- gpu_bdb/benchmark_runner.sh | 8 +++++++- gpu_bdb/benchmark_runner/slurm/run_bench.sh | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh index 400a41ee..17d74998 100644 --- a/gpu_bdb/benchmark_runner.sh +++ b/gpu_bdb/benchmark_runner.sh @@ -1,7 +1,13 @@ #!/bin/bash USERNAME=$(whoami) -GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb + +if [ -z "$GPU_BDB_HOME" ] +then + GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb +else + GPU_BDB_HOME=$GPU_BDB_HOME +fi INCLUDE_DASK=true INCLUDE_BLAZING=false diff --git a/gpu_bdb/benchmark_runner/slurm/run_bench.sh b/gpu_bdb/benchmark_runner/slurm/run_bench.sh index 56271f2e..be22d04a 100755 --- a/gpu_bdb/benchmark_runner/slurm/run_bench.sh +++ b/gpu_bdb/benchmark_runner/slurm/run_bench.sh @@ -1,7 +1,6 @@ set -e pipefail USERNAME=$(whoami) -GPU_BDB_HOME=$HOME/gpu-bdb LOGDIR=$HOME/dask-local-directory/logs STATUS_FILE=${LOGDIR}/status.txt @@ -16,6 +15,8 @@ CONDA_ENV_PATH="/opt/conda/etc/profile.d/conda.sh" source $CONDA_ENV_PATH conda activate $CONDA_ENV_NAME +export GPU_BDB_HOME=$HOME/gpu-bdb + if [[ "$SLURM_NODEID" -eq 0 ]]; then bash $GPU_BDB_HOME/gpu_bdb/cluster_configuration/cluster-startup-slurm.sh SCHEDULER & echo "STARTED SCHEDULER" @@ -29,8 +30,7 @@ if [[ "$SLURM_NODEID" -eq 0 ]]; then # echo "Starting load test.." # python queries/load_test/gpu_bdb_load_test.py --config_file benchmark_runner/benchmark_config.yaml > $LOGDIR/load_test.log echo "Starting E2E run.." - python benchmark_runner.py --config_file benchmark_runner/benchmark_config.yaml > $LOGDIR/benchmark_runner.log - + bash benchmark_runner.sh echo "FINISHED" > ${STATUS_FILE} else sleep 15 # Sleep and wait for the scheduler to spin up From c8ba3e705f3183396b09da667842dbc5a7a4e54e Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Thu, 22 Apr 2021 13:05:33 -0700 Subject: [PATCH 5/5] update README --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a02eeb10..d140c3ee 100755 --- a/README.md +++ b/README.md @@ -108,15 +108,17 @@ Then configure the `--sheet` and `--tab` arguments in benchmark_config.yaml. ### Running all of the Queries -The included `benchmark_runner.py` script will run all queries sequentially. Configuration for this type of end-to-end run is specified in `benchmark_runner/benchmark_config.yaml`. +The included `benchmark_runner.sh` script will run all queries sequentially. Configuration for this type of end-to-end run is specified in `benchmark_runner/benchmark_config.yaml`. + +First, set `GPU_BDB_HOME` in the bash script to the location of this repository. This is the same environment variable mentioned in the configuration above. To run all queries, cd to `gpu_bdb/` and: ```python -python benchmark_runner.py --config_file benchmark_runner/benchmark_config.yaml +bash benchmark_runner.sh ``` -By default, this will run each Dask query once, and, if BlazingSQL queries are enabled in `benchmark_config.yaml`, each BlazingSQL query once. You can control the number of repeats by changing the `N_REPEATS` variable in the script. +By default, this will run each Dask query once. If BlazingSQL queries are enabled with `INCLUDE_BLAZING` in `benchmark_runner.sh` and in `benchmark_config.yaml`, this will run each BlazingSQL query once. You can control the number of repeats by changing the `N_REPEATS` variable in the script. ## BlazingSQL