From 0b48428850505c07dfc7727c1fa3fb32b04d2a23 Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Mon, 19 Apr 2021 12:39:28 -0700
Subject: [PATCH 1/5] bash benchmark runner

---
 gpu_bdb/benchmark_runner.sh | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 gpu_bdb/benchmark_runner.sh

diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh
new file mode 100644
index 00000000..de181f25
--- /dev/null
+++ b/gpu_bdb/benchmark_runner.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+USERNAME=$(whoami)
+GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb
+
+INCLUDE_DASK=True
+INCLUDE_BLAZING=True
+N_REPEATS=1
+
+# Dask queries
+if [ $INCLUDE_DASK = "True" ]; then
+    for qnum in {01..30}
+    do
+        cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/
+        for j in $(seq 1 $N_REPEATS)
+        do
+            python gpu_bdb_query_$qnum.py --config_file ../../benchmark_runner/benchmark_config.yaml
+            sleep 3
+        done
+        sleep 3
+    done
+fi
+
+# BlazingSQL Queries
+if [ $INCLUDE_BLAZING = "True" ]; then
+    for qnum in {01..30}
+    do
+        cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/
+        for j in $(seq 1 $N_REPEATS)
+        do
+            python gpu_bdb_query_$qnum\_sql.py --config_file ../../benchmark_runner/benchmark_config.yaml
+            sleep 3
+        done
+        sleep 3
+    done
+fi
\ No newline at end of file

From 289bf9eeaad4f4004fddc6756c26ae92c0ee6c0f Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Mon, 19 Apr 2021 12:43:55 -0700
Subject: [PATCH 2/5] remove old runner

---
 gpu_bdb/benchmark_runner.py | 96 -------------------------------------
 gpu_bdb/benchmark_runner.sh |  2 +-
 2 files changed, 1 insertion(+), 97 deletions(-)
 delete mode 100755 gpu_bdb/benchmark_runner.py

diff --git a/gpu_bdb/benchmark_runner.py b/gpu_bdb/benchmark_runner.py
deleted file mode 100755
index c26c75dc..00000000
--- a/gpu_bdb/benchmark_runner.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import glob
-import re
-import os
-import gc
-import time
-import uuid
-
-N_REPEATS = 1
-
-
-def get_qnum_from_filename(name):
-    m = re.search("[0-9]{2}", name).group()
-    return m
-
-
-def load_query(qnum, fn):
-    import importlib, types
-    loader = importlib.machinery.SourceFileLoader(qnum, fn)
-    mod = types.ModuleType(loader.name)
-    loader.exec_module(mod)
-    return mod.main
-
-
-dask_qnums = [str(i).zfill(2) for i in range(1, 31)]
-bsql_qnums = [str(i).zfill(2) for i in range(1, 31)]
-
-
-if __name__ == "__main__":
-    from bdb_tools.cluster_startup import attach_to_cluster, import_query_libs
-    from bdb_tools.utils import run_query, gpubdb_argparser
-
-    import_query_libs()
-    dask_queries = {
-        qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}.py")
-        for qnum in dask_qnums
-    }
-
-    bsql_queries = {
-        qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}_sql.py")
-        for qnum in bsql_qnums
-    }
-
-    config = gpubdb_argparser()
-    config["run_id"] = uuid.uuid4().hex
-
-    include_blazing = config.get("benchmark_runner_include_bsql")
-    client, bc = attach_to_cluster(config, create_blazing_context=include_blazing)
-    # Preload required libraries for queries on all workers
-    client.run(import_query_libs)
-
-    base_path = os.getcwd()
-
-    # Run BSQL Queries
-    if include_blazing and len(bsql_qnums) > 0:
-        print("Blazing Queries")
-        for qnum, q_func in bsql_queries.items():
-            print(qnum)
-
-            qpath = f"{base_path}/queries/q{qnum}/"
-            os.chdir(qpath)
-            if os.path.exists("current_query_num.txt"):
-                os.remove("current_query_num.txt")
-            with open("current_query_num.txt", "w") as fp:
-                fp.write(qnum)
-
-            for r in range(N_REPEATS):
-                run_query(
-                    config=config,
-                    client=client,
-                    query_func=q_func,
-                    blazing_context=bc,
-                )
-                client.run(gc.collect)
-                client.run_on_scheduler(gc.collect)
-                gc.collect()
-                time.sleep(3)
-
-    # Run Pure Dask Queries
-    if len(dask_qnums) > 0:
-        print("Pure Dask Queries")
-        for qnum, q_func in dask_queries.items():
-            print(qnum)
-
-            qpath = f"{base_path}/queries/q{qnum}/"
-            os.chdir(qpath)
-            if os.path.exists("current_query_num.txt"):
-                os.remove("current_query_num.txt")
-            with open("current_query_num.txt", "w") as fp:
-                fp.write(qnum)
-
-            for r in range(N_REPEATS):
-                run_query(config=config, client=client, query_func=q_func)
-                client.run(gc.collect)
-                client.run_on_scheduler(gc.collect)
-                gc.collect()
-                time.sleep(3)
diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh
index de181f25..cb3cc033 100644
--- a/gpu_bdb/benchmark_runner.sh
+++ b/gpu_bdb/benchmark_runner.sh
@@ -33,4 +33,4 @@ if [ $INCLUDE_BLAZING = "True" ]; then
         done
         sleep 3
     done
-fi
\ No newline at end of file
+fi

From d07509eefaf41808c65487923acf3e76baeeca0a Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Wed, 21 Apr 2021 08:41:44 -0700
Subject: [PATCH 3/5] simplify bash conditionals

---
 gpu_bdb/benchmark_runner.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh
index cb3cc033..400a41ee 100644
--- a/gpu_bdb/benchmark_runner.sh
+++ b/gpu_bdb/benchmark_runner.sh
@@ -3,12 +3,12 @@
 USERNAME=$(whoami)
 GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb
 
-INCLUDE_DASK=True
-INCLUDE_BLAZING=True
+INCLUDE_DASK=true
+INCLUDE_BLAZING=false
 N_REPEATS=1
 
 # Dask queries
-if [ $INCLUDE_DASK = "True" ]; then
+if $INCLUDE_DASK; then
     for qnum in {01..30}
     do
         cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/
@@ -22,7 +22,7 @@ if [ $INCLUDE_DASK = "True" ]; then
 fi
 
 # BlazingSQL Queries
-if [ $INCLUDE_BLAZING = "True" ]; then
+if $INCLUDE_BLAZING; then
     for qnum in {01..30}
     do
         cd $GPU_BDB_HOME/gpu_bdb/queries/q$qnum/

From ee65e816cb62cf5507f963466b9773e1632e16ed Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Thu, 22 Apr 2021 12:45:07 -0700
Subject: [PATCH 4/5] additional slurm updates

---
 gpu_bdb/benchmark_runner.sh                 | 8 +++++++-
 gpu_bdb/benchmark_runner/slurm/run_bench.sh | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/gpu_bdb/benchmark_runner.sh b/gpu_bdb/benchmark_runner.sh
index 400a41ee..17d74998 100644
--- a/gpu_bdb/benchmark_runner.sh
+++ b/gpu_bdb/benchmark_runner.sh
@@ -1,7 +1,13 @@
 #!/bin/bash
 
 USERNAME=$(whoami)
-GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb
+
+if [ -z "$GPU_BDB_HOME" ]
+then
+    GPU_BDB_HOME=/raid/$USERNAME/prod/gpu-bdb
+else
+    GPU_BDB_HOME=$GPU_BDB_HOME
+fi
 
 INCLUDE_DASK=true
 INCLUDE_BLAZING=false
diff --git a/gpu_bdb/benchmark_runner/slurm/run_bench.sh b/gpu_bdb/benchmark_runner/slurm/run_bench.sh
index 56271f2e..be22d04a 100755
--- a/gpu_bdb/benchmark_runner/slurm/run_bench.sh
+++ b/gpu_bdb/benchmark_runner/slurm/run_bench.sh
@@ -1,7 +1,6 @@
 set -e pipefail
 
 USERNAME=$(whoami)
-GPU_BDB_HOME=$HOME/gpu-bdb
 LOGDIR=$HOME/dask-local-directory/logs
 STATUS_FILE=${LOGDIR}/status.txt
 
@@ -16,6 +15,8 @@ CONDA_ENV_PATH="/opt/conda/etc/profile.d/conda.sh"
 source $CONDA_ENV_PATH
 conda activate $CONDA_ENV_NAME
 
+export GPU_BDB_HOME=$HOME/gpu-bdb
+
 if [[ "$SLURM_NODEID" -eq 0 ]]; then
     bash $GPU_BDB_HOME/gpu_bdb/cluster_configuration/cluster-startup-slurm.sh SCHEDULER &
     echo "STARTED SCHEDULER"
@@ -29,8 +30,7 @@ if [[ "$SLURM_NODEID" -eq 0 ]]; then
     # echo "Starting load test.."
     # python queries/load_test/gpu_bdb_load_test.py --config_file benchmark_runner/benchmark_config.yaml > $LOGDIR/load_test.log
     echo "Starting E2E run.."
-    python benchmark_runner.py --config_file benchmark_runner/benchmark_config.yaml > $LOGDIR/benchmark_runner.log
-
+    bash benchmark_runner.sh
     echo "FINISHED" > ${STATUS_FILE}
 else
     sleep 15 # Sleep and wait for the scheduler to spin up

From c8ba3e705f3183396b09da667842dbc5a7a4e54e Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Thu, 22 Apr 2021 13:05:33 -0700
Subject: [PATCH 5/5] update README

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a02eeb10..d140c3ee 100755
--- a/README.md
+++ b/README.md
@@ -108,15 +108,17 @@ Then configure the `--sheet` and `--tab` arguments in benchmark_config.yaml.
 
 ### Running all of the Queries
 
-The included `benchmark_runner.py` script will run all queries sequentially. Configuration for this type of end-to-end run is specified in `benchmark_runner/benchmark_config.yaml`.
+The included `benchmark_runner.sh` script will run all queries sequentially. Configuration for this type of end-to-end run is specified in `benchmark_runner/benchmark_config.yaml`.
+
+First, set `GPU_BDB_HOME` in the bash script to the location of this repository. This is the same environment variable mentioned in the configuration above.
 
 To run all queries, cd to `gpu_bdb/` and:
 
 ```python
-python benchmark_runner.py --config_file benchmark_runner/benchmark_config.yaml
+bash benchmark_runner.sh
 ```
 
-By default, this will run each Dask query once, and, if BlazingSQL queries are enabled in `benchmark_config.yaml`, each BlazingSQL query once. You can control the number of repeats by changing the `N_REPEATS` variable in the script.
+By default, this will run each Dask query once. If BlazingSQL queries are enabled with `INCLUDE_BLAZING` in `benchmark_runner.sh` and in `benchmark_config.yaml`, this will run each BlazingSQL query once. You can control the number of repeats by changing the `N_REPEATS` variable in the script.
 
 
 ## BlazingSQL