From 15f345eb6ea2d79bec54cedc7ce53dd67b94c667 Mon Sep 17 00:00:00 2001 From: Shreya-gaur Date: Wed, 28 Jun 2023 13:29:48 -0400 Subject: [PATCH 1/5] cutlass: Added cutlass 3.0 to gpu-app-collection This commit contains the changes in the make file and the define-app-apps file. --- util/job_launching/apps/define-all-apps.yml | 76 ++++++++++++--------- util/tracer_nvbit/run_cutlass_trace.sh | 16 +++++ 2 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 util/tracer_nvbit/run_cutlass_trace.sh diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml index ae8174e47..c57b796f5 100644 --- a/util/job_launching/apps/define-all-apps.yml +++ b/util/job_launching/apps/define-all-apps.yml @@ -511,51 +511,59 @@ cutlass_5_trace: exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/" data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - - cutlass_perf_test: - - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 3G - - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 3G - - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 3G - - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 3G - - args: --seed=2020 --dist=0 --m=2560 --n=7000 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 3G - - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 5G - - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 5G - - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 5G - - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 5G - - args: --seed=2020 --dist=0 --m=4096 --n=7000 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass - accel-sim-mem: 5G - - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - cutlass_profiler: + #sparse gemm kernels + - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 20G + #gemm kernels on tensor cores + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=16 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=32 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=64 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=128 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=512 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=1024 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=2056 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=16 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=32 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=64 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=128 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=512 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=4096 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + accel-sim-mem: 13G ## Not sure how much memory the following apps take - just letting them go with the default @@ -774,4 +782,4 @@ mlperf_inference_no_external_datasets: accel-sim-mem: 60G - inference_mlperf_ssd: - args: - accel-sim-mem: 60G \ No newline at end of file + accel-sim-mem: 60G diff --git a/util/tracer_nvbit/run_cutlass_trace.sh b/util/tracer_nvbit/run_cutlass_trace.sh new file mode 100644 index 000000000..1112f0450 --- /dev/null +++ b/util/tracer_nvbit/run_cutlass_trace.sh @@ -0,0 +1,16 @@ +LD_PRELOAD=./tracer_tool/tracer_tool.so ~/accel-sim-updated/accel-sim-framework/gpu-app-collection/bin/11.0/release/cutlass_profiler + +export CUDA_VERSION="11.0"; + +export CUDA_VISIBLE_DEVICES="7" + +#LD_PRELOAD=/scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/util/tracer_nvbit/tracer_tool/tracer_tool.so /home/tgrogers-raid/a/tgrogers/github/accel-sim/accel-sim-framework/gpu-app-collection/src/..//bin/11.0/release/cutlass_perf_test --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass + + + +/scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/kernelslist + +rm -f /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/*.trace + +rm -f /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/kernelslist + From fb6af97d6c54dd4f25c797b6375f65a6a12f543d Mon Sep 17 00:00:00 2001 From: Shreya-gaur Date: Mon, 10 Jul 2023 15:49:07 -0400 Subject: [PATCH 2/5] Commit changes to cutlass app --- util/job_launching/apps/define-all-apps.yml | 94 ++++++++++----------- util/tracer_nvbit/run_cutlass_trace.sh | 16 ---- 2 files changed, 47 insertions(+), 63 deletions(-) delete mode 100644 util/tracer_nvbit/run_cutlass_trace.sh diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml index c57b796f5..2dbc56699 100644 --- a/util/job_launching/apps/define-all-apps.yml +++ b/util/job_launching/apps/define-all-apps.yml @@ -511,59 +511,59 @@ cutlass_5_trace: exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/" data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - - cutlass_profiler: + - cutlass_perf_test: #sparse gemm kernels - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 16G - - args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass - accel-sim-mem: 20G + # - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=2560 --n=2560 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --m=4096 --n=16 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 16G + # - args: --seed=2020 --dist=0 --m=4096 --n=32 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 16G + # - args: --seed=2020 --dist=0 --m=4096 --n=64 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 16G + # - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 16G + # - args: --seed=2020 --dist=0 --m=4096 --n=4096 --k=4096 --kernels=sgemm --iterations=5 --providers=cutlass + # accel-sim-mem: 20G #gemm kernels on tensor cores - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=16 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=32 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=64 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=128 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=512 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=1024 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=2056 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=16 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=32 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=64 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=128 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=512 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G - - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=4096 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass - accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=32 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=64 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=128 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=512 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=1024 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=2560 --n=2056 --k=2560 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=16 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=32 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=64 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=128 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=512 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G + # - args: --seed=2020 --dist=0 --operation=gemm --m=4096 --n=4096 --k=4096 --op_class=tensorop --iterations=5 --provider=cutlass + # accel-sim-mem: 13G ## Not sure how much memory the following apps take - just letting them go with the default diff --git a/util/tracer_nvbit/run_cutlass_trace.sh b/util/tracer_nvbit/run_cutlass_trace.sh deleted file mode 100644 index 1112f0450..000000000 --- a/util/tracer_nvbit/run_cutlass_trace.sh +++ /dev/null @@ -1,16 +0,0 @@ -LD_PRELOAD=./tracer_tool/tracer_tool.so ~/accel-sim-updated/accel-sim-framework/gpu-app-collection/bin/11.0/release/cutlass_profiler - -export CUDA_VERSION="11.0"; - -export CUDA_VISIBLE_DEVICES="7" - -#LD_PRELOAD=/scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/util/tracer_nvbit/tracer_tool/tracer_tool.so /home/tgrogers-raid/a/tgrogers/github/accel-sim/accel-sim-framework/gpu-app-collection/src/..//bin/11.0/release/cutlass_perf_test --seed=2020 --dist=0 --m=2560 --n=1024 --k=2560 --kernels=sgemm_nn --iterations=5 --providers=cutlass - - - -/scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/kernelslist - -rm -f /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/*.trace - -rm -f /scratch/tgrogers-disk01/a/tgrogers/accel-sim-framework/hw_run/traces/device-7/11.0/cutlass_perf_test/__seed_2020___dist_0____m_2560___n_1024___k_2560___kernels_sgemm_nn____iterations_5___providers_cutlass/traces/kernelslist - From 5384c3d01230419f1b95ef60c038f945eb7deefe Mon Sep 17 00:00:00 2001 From: Shreya-gaur Date: Mon, 10 Jul 2023 18:08:19 -0400 Subject: [PATCH 3/5] Changes for cutlass in define-all-apps --- util/job_launching/apps/define-all-apps.yml | 6 +++--- util/tracer_nvbit/run_hw_trace.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml index 2dbc56699..fe9c015aa 100644 --- a/util/job_launching/apps/define-all-apps.yml +++ b/util/job_launching/apps/define-all-apps.yml @@ -511,12 +511,12 @@ cutlass_5_trace: exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/" data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - - cutlass_perf_test: + - cutlass_profiler: #sparse gemm kernels - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - # - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass - # accel-sim-mem: 13G + - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass + accel-sim-mem: 13G # - args: --seed=2020 --dist=0 --m=2560 --n=64 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass # accel-sim-mem: 13G # - args: --seed=2020 --dist=0 --m=2560 --n=128 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass diff --git a/util/tracer_nvbit/run_hw_trace.py b/util/tracer_nvbit/run_hw_trace.py index 77d889565..ab9e3574e 100755 --- a/util/tracer_nvbit/run_hw_trace.py +++ b/util/tracer_nvbit/run_hw_trace.py @@ -54,7 +54,7 @@ args = argpair["args"] run_name = os.path.join( exe, common.get_argfoldername( args ) ) this_run_dir = os.path.abspath(os.path.expandvars( - os.path.join(this_directory, "..", "..", "hw_run","traces","device-" + options.device_num, cuda_version, run_name))) + os.path.join(scratch_dir, "hw_run","traces","device-" + options.device_num, cuda_version, run_name))) this_trace_folder = os.path.join(this_run_dir, "traces") if not os.path.exists(this_run_dir): os.makedirs(this_run_dir) From 884e9581dc7f09a42bf9a87d446cffce01f45139 Mon Sep 17 00:00:00 2001 From: Shreya-gaur Date: Tue, 11 Jul 2023 11:11:11 -0400 Subject: [PATCH 4/5] Changes to define-power.yml for cutlass 3 --- util/job_launching/apps/define-power.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/util/job_launching/apps/define-power.yml b/util/job_launching/apps/define-power.yml index 8da36657a..681599360 100644 --- a/util/job_launching/apps/define-power.yml +++ b/util/job_launching/apps/define-power.yml @@ -206,13 +206,13 @@ cutlass_5_trace_validation: data_dirs: "$ACCELSIM_ROOT/../util/accelwattch/accelwattch_benchmarks/data_dirs/" execs: - cutlass_perf_test_k1: - - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass accel-sim-mem: 5G - cutlass_perf_test_k2: - - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=4096 --n=128 --k=4096 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass accel-sim-mem: 5G - cutlass_perf_test_k3: - - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --kernels=wmma_gemm_nn --iterations=5 --providers=cutlass + - args: --seed=2020 --dist=0 --m=2560 --n=512 --k=2560 --operation=gemm --op_class=tensorop --iterations=5 --providers=cutlass accel-sim-mem: 5G Deepbench_validation: @@ -454,4 +454,4 @@ power_ubench: - SHRD_TEX_SFU: - args: 100 - TENSOR: - - args: 10 \ No newline at end of file + - args: 10 From 764b72befad5566de120ae4cae343b5fb65ceae3 Mon Sep 17 00:00:00 2001 From: Shreya-gaur Date: Tue, 18 Jul 2023 13:59:45 -0400 Subject: [PATCH 5/5] Comment change of cutlass --- util/job_launching/apps/define-all-apps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml index fe9c015aa..eb8016082 100644 --- a/util/job_launching/apps/define-all-apps.yml +++ b/util/job_launching/apps/define-all-apps.yml @@ -512,7 +512,7 @@ cutlass_5_trace: data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - cutlass_profiler: - #sparse gemm kernels + #single precision gemm kernels - args: --seed=2020 --dist=0 --m=2560 --n=16 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass accel-sim-mem: 13G - args: --seed=2020 --dist=0 --m=2560 --n=32 --k=2560 --kernels=sgemm --iterations=5 --providers=cutlass