From a3c88292ec1bb61f711eb1ddafa835024a2c1ef8 Mon Sep 17 00:00:00 2001
From: Filipe Marques <filipe.s.marques@tecnico.ulisboa.pt>
Date: Sun, 24 Nov 2024 19:08:02 +0000
Subject: [PATCH] Artifact fixes

---
 bench/README.md             | 22 +++++++++++++++++-----
 bench/eq1.sh                |  2 +-
 bench/eq2.sh                |  2 +-
 bench/eq3.sh                |  6 ++++--
 bench/run_benchmarks.py     |  8 ++++----
 bench/runner/multi_query.ml |  6 +++---
 6 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/bench/README.md b/bench/README.md
index cac16ab9..ae53dfa2 100644
--- a/bench/README.md
+++ b/bench/README.md
@@ -10,7 +10,7 @@ solver usage, with notable improvements for batched solver interactions.
 
 ## Getting the Artifact
 
-You can download the artifact through [Zenodo](https://zenodo.org/records/13959671/files/smtml_0.3.tar.gz?download=1).
+You can download the artifact through [Zenodo](https://zenodo.org/records/14104822).
 
 ## Tested platforms
 
@@ -29,7 +29,7 @@ modification, and distribution.
 
 ## MD5 Hash of the Artifact
 
-8edce799971395168bdf728c9a136399
+46c6e43adf9bd5d9a922b3b7db307d12
 
 ## Building and Running the Docker Image
 
@@ -191,10 +191,14 @@ $ git submodule update --remote -- smt-comp/
 ```
 
 2. Run the `eq1.sh` script. Note that, this script can take up multiple
-hours to complete.
+hours to complete. Therefore, to avoid running the entire dataset specify
+a number of benchmark to run with the `-n` flag. This will run the benchmarks
+quicker, but only provide an aproximation of the papers results.
 
 ```sh
 $ ./eq1.sh
+# Or, to just run 20 benchmarks to finish quickly
+$ ./eq1.sh -n 20
 ```
 
 The script will generate the results in directory `eq1`, which
@@ -237,10 +241,14 @@ $ git submodule update --remote -- smt-comp/
 ```
 
 2. Run the `eq2.sh` script. Note that, this script can take up multiple
-hours to complete.
+hours to complete. Therefore, to avoid running the entire dataset specify
+a number of benchmark to run with the `-n` flag. This will run the benchmarks
+quicker, but only provide an aproximation of the papers results.
 
 ```sh
 $ ./eq2.sh
+# Or, to just run 20 benchmarks to finish quickly
+$ ./eq2.sh -n 20
 ```
 
 The script will generate the following files:
@@ -315,10 +323,14 @@ $ git submodule update --remote -- smt-testcomp23/
 ```
 
 2. Run the `testcomp.sh` script. Note that, this script can take up multiple
-hours to complete.
+hours to complete. Therefore, to avoid running the entire dataset specify
+a number of benchmark to run with the `-n` flag. This will run the benchmarks
+quicker, but only provide an aproximation of the papers results.
 
 ```sh
 $ ./testcomp.sh
+# Or, to just run 20 benchmarks to finish quickly
+$ ./testcomp.sh -n 20
 ```
 
 The script will generate the following files:
diff --git a/bench/eq1.sh b/bench/eq1.sh
index 1bb7af3f..c2543e80 100755
--- a/bench/eq1.sh
+++ b/bench/eq1.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 TEMPLATE=./tmp.XXXXXXXXXX
 
diff --git a/bench/eq2.sh b/bench/eq2.sh
index abadb706..988b5db9 100755
--- a/bench/eq2.sh
+++ b/bench/eq2.sh
@@ -100,7 +100,7 @@ python3 run_benchmarks.py --single --dir $QF_LIA_DIR --output-dir csvs_single --
 python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_z3 --prover smtml-z3
 python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_bitwuzla --prover smtml-bitwuzla
 python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_z3_solver --prover z3
-python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_bitwuzla --prover smtml-bitwuzla
+python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_bitwuzla_solver --prover bitwuzla
 #### QF_S ####
 python3 run_benchmarks.py --single --dir $QF_S_DIR --output-dir csvs_single --output-filename QF_S_z3 --prover smtml-z3
 python3 run_benchmarks.py --single --dir $QF_S_DIR --output-dir csvs_single --output-filename QF_S_z3_solver --prover z3
diff --git a/bench/eq3.sh b/bench/eq3.sh
index 02f2a944..d6d73f03 100755
--- a/bench/eq3.sh
+++ b/bench/eq3.sh
@@ -127,6 +127,8 @@ fi
 
 $dry_value && exit 0
 
+mkdir csvs_multi
+
 opam sw z3-bitwuzla
 eval $(opam env)
 
@@ -147,7 +149,7 @@ else
   python3 run_benchmarks.py --single --dir $QF_FP_DIR --output-dir csvs_single --output-filename QF_FP_bitwuzla_solver --prover bitwuzla
 fi
 
-python3 run_benchmarks.py --multi -F $QF_FP_LIST --output-dir csvs_multi --output-filename QF_FP_bitwuzla_solver --prover bitwuzla
+python3 run_benchmarks.py --multi -F $QF_FP_LIST --output-dir csvs_multi --output-filename QF_FP_bitwuzla --prover smtml-bitwuzla
 
 #### QF_LIA ####
 ## Z3 ##
@@ -176,7 +178,7 @@ else
   python3 run_benchmarks.py --single --dir $QF_BV_DIR --output-dir csvs_single --output-filename QF_BV_bitwuzla_solver --prover bitwuzla
 fi
 
-python3 run_benchmarks.py --multi -F $QF_BV_LIST --output-dir csvs_multi --output-filename QF_BV_bitwuzla_solver --prover bitwuzla
+python3 run_benchmarks.py --multi -F $QF_BV_LIST --output-dir csvs_multi --output-filename QF_BV_bitwuzla --prover smtml-bitwuzla
 
 #### QF_S ####
 ## Z3 ##
diff --git a/bench/run_benchmarks.py b/bench/run_benchmarks.py
index 93869bdf..4550d369 100644
--- a/bench/run_benchmarks.py
+++ b/bench/run_benchmarks.py
@@ -32,7 +32,7 @@ def get_latest_sqlite_file(benchpress_data_dir):
   files = [f for f in os.listdir(benchpress_data_dir) if f.endswith('.sqlite')]
   if not files:
     raise FileNotFoundError("No .sqlite files found in benchpress data directory.")
-  
+
   latest_file = max(files, key=lambda x: os.path.getmtime(os.path.join(benchpress_data_dir, x)))
   return os.path.join(benchpress_data_dir, latest_file)
 
@@ -65,13 +65,13 @@ def automate_benchpress_single(directory_to_benchmark, output_csv_dir, output_cs
       print_error(f"Error during SQLite to CSV process: {e}")
 
 def get_latest_n_csvs(n):
-  result = subprocess.run(['ls', '-d', os.path.join('/home/smtml/smtml/bench', 'res-multi-query-*')], 
+  result = subprocess.run(['ls', '-d', os.path.join('/home/smtml/smtml/bench', 'res-multi-query-*')],
                             stdout=subprocess.PIPE, text=True, shell=True)
   query_dirs = result.stdout.splitlines()
   query_dirs = query_dirs[-n:]
   csv_files = []
   for query_dir in query_dirs:
-        csv_file = glob(os.path.join(query_dir, '*.csv'))
+        csv_file = glob.glob(os.path.join(query_dir, '*.csv'))
         if csv_file:
             csv_files.append(csv_file[0])
   return csv_files
@@ -135,4 +135,4 @@ def automate_multi_query(list_file, output_csv_dir, output_csv_name, prover):
   elif args.multi:
     automate_multi_query(args.F, args.output_dir, args.output_filename, args.prover)
   else:
-    print_error("Please specify --single or --multi.")
\ No newline at end of file
+    print_error("Please specify --single or --multi.")
diff --git a/bench/runner/multi_query.ml b/bench/runner/multi_query.ml
index 3292f768..c43af715 100644
--- a/bench/runner/multi_query.ml
+++ b/bench/runner/multi_query.ml
@@ -75,11 +75,11 @@ let main _hook provers from_file dirs =
   let results =
     List.map
       (fun p ->
-        let ((status, stdout, _stderr, _) as result) =
+        let ((status, stdout, stderr, _) as result) =
           Tool.fork_and_run ?from_file p dirs
         in
-        Fmt.pr "@[<v 2>Run %a@;Exited: %a@;Stdout:@; %a@]@." Tool.pp_prover p
-          pp_exit_status status Fmt.string (String.escaped stdout);
+        Fmt.pr "@[<v 2>Run %a@;Exited: %a@;Stdout:@; %a@;Stderr:@; %a@]@." Tool.pp_prover p
+          pp_exit_status status Fmt.string (String.escaped stdout) Fmt.string (String.escaped stderr);
         (p, result) )
       provers
   in