Skip to content

Commit

Permalink
feat: improved proof replay evaluation scripts (#28)
Browse files Browse the repository at this point in the history
* feat: script on selected 10 egg tests

* feat: script to build egg binary with stop-on-success

* fix: align `gp4` test

* feat: constant expressions have term size 1

* fix: revert size calculation

* style: remove random check

* feat: another Python script for data extraction

* fix: directory in Lean-pre-DCP evaluation script

* feat: save results in CSV file
  • Loading branch information
ramonfmir authored Apr 8, 2024
1 parent c11be2c commit f301364
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 11 deletions.
2 changes: 0 additions & 2 deletions CvxLean/Test/PreDCP/MainExample.lean
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ def p :=
time_cmd equivalence eq/q : p := by
pre_dcp

#check congrArg

#print q
-- optimization (x : ℝ)
-- minimize x
Expand Down
6 changes: 3 additions & 3 deletions egg-pre-dcp/tests/test_dgp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ fn test_gp3() {
#[test]
fn test_gp4() {
pre_dcp_check_and_print(
"(div 1 (div (exp (var u)) (exp (var v))))",
"(div (exp (var u)) (exp (var v)))",
vec![
"(le 2 (exp (var u)))",
"(le (exp (var u)) 3)",
"(le (add (pow (exp (var u)) 2) (div (mul 3 (exp (var v))) (exp (var w)))) (sqrt (exp (var u))))",
"(eq (div (exp (var u)) (exp (var v))) (pow (exp (var w)) 2))"
"(le (add (pow (exp (var u)) 2) (div (mul 6 (exp (var v))) (exp (var w)))) (sqrt (exp (var u))))",
"(eq (mul (exp (var u)) (exp (var v))) (exp (var w)))"
]);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

RUSTFLAGS="--cfg stop_on_success" cargo build --release --manifest-path egg-pre-dcp/Cargo.toml
mkdir -p egg-pre-dcp/utils
cp egg-pre-dcp/target/release/egg-pre-dcp egg-pre-dcp/utils/egg-pre-dcp
12 changes: 12 additions & 0 deletions scripts/evaluation/lean-pre-dcp/extract_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import matplotlib.pyplot as plt
import numpy as np
import heapq
import csv

def extract_stats_from_file(file_path):
with open(file_path, 'r') as file:
Expand Down Expand Up @@ -167,6 +168,17 @@ def plot_stats1(fn, xs, xs_label, ys, ys_label):
command_times_per_step = np.array([0 if numbers_of_steps[i] == 0 else command_times[i] / numbers_of_steps[i] for i in range(len(command_times))])
print('Average command time per step: ', np.average(command_times_per_step))

results = [(problem_names[i], egg_times[i], command_times[i]) for i in range(len(problem_names))]

output_file = eval_dir + "/pre_dcp_eval_results.csv"

with open(output_file, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["problem_name", "egg_time", "command_time"])
writer.writerows(results)

print("Results saved to:", output_file)

# Remove some outliers.
to_remove = 8
idxs_to_remove = heapq.nlargest(to_remove, range(len(command_times)), key=command_times.__getitem__)
Expand Down
109 changes: 109 additions & 0 deletions scripts/evaluation/lean-pre-dcp/extract_stats_selected_10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import sys
import re

def extract_test_results(file_path):
test_results = {}

with open(file_path, 'r') as file:
data = file.read()

# Define regex patterns.
test_name_pattern = r'test_(\w+)::(\w+)'
total_time_pattern = r'Total time: (\d+) ms\.'
e_graph_time_pattern = r'E-graph building time: (\d+) ms\.'
step_extraction_time_pattern = r'Step extraction time: (\d+) ms\.'
num_of_nodes_pattern = r'Succeeded with node limit (\d+) \(using (\d+) nodes\).'
num_steps_pattern = r'Number of steps: (\d+)\.'
best_term_size_pattern = r'Best term size: (\d+)\.'
best_num_variables_pattern = r'Best number of variables: (\d+)\.'
num_of_iterations = r'Number of iterations: (\d+)\.'
num_of_rules_applied = r'Number of rules applied: (\d+)\.'

failure_pattern = r'failures:(.*?)test_(\w+)'

# Extract test names and corresponding total times or failure status.
test_names = re.findall(test_name_pattern, data)
test_names = [x[1] for x in test_names]
total_times = re.findall(total_time_pattern, data)
e_graph_times = re.findall(e_graph_time_pattern, data)
step_extraction_times = re.findall(step_extraction_time_pattern, data)
num_of_nodes = re.findall(num_of_nodes_pattern, data)
num_of_nodes = [int(x[1]) for x in num_of_nodes]
num_steps = re.findall(num_steps_pattern, data)
best_term_sizes = re.findall(best_term_size_pattern, data)
best_num_variables = re.findall(best_num_variables_pattern, data)
num_of_iterations = re.findall(num_of_iterations, data)
num_of_rules_applied = re.findall(num_of_rules_applied, data)

failures = re.findall(failure_pattern, data, re.DOTALL)
failed_tests = ["test_" + test[1] for test in failures]

print(num_of_nodes)
print(len(test_names), len(total_times))
assert len(test_names) == len(total_times) + len(failed_tests)
assert len(total_times) == len(e_graph_times)
assert len(total_times) == len(step_extraction_times)
assert len(total_times) == len(num_of_nodes)
assert len(total_times) == len(num_steps)
assert len(total_times) == len(best_term_sizes)
assert len(total_times) == len(best_num_variables)
assert len(total_times) == len(num_of_iterations)
assert len(total_times) == len(num_of_rules_applied)

# Match test names with their total times or failure status.
i = 0
for test_name in test_names:
if test_name in failed_tests:
test_results[test_name] = {
'total_time': None,
'e_graph_time': None,
'step_extraction_time': None,
'num_of_nodes': None,
'steps': None,
'term_size': None,
'num_variables': None,
'num_of_iterations': None,
'num_of_rules_applied': None
}
else:
test_results[test_name] = {
'total_time': int(total_times[i]),
'e_graph_time': int(e_graph_times[i]),
'step_extraction_time': int(step_extraction_times[i]),
'num_of_nodes': num_of_nodes[i],
'steps': int(num_steps[i]),
'term_size': int(best_term_sizes[i]),
'num_variables': int(best_num_variables[i]),
'num_of_iterations': int(num_of_iterations[i]),
'num_of_rules_applied': int(num_of_rules_applied[i])
}
i += 1

return test_results

file_path = 'scripts/evaluation/data/' + sys.argv[1]
results = extract_test_results(file_path)

benchmark_path = 'scripts/evaluation/egg-pre-dcp-options/benchmark.txt'
benchmark = []
with open(benchmark_path, 'r') as file:
benchmark = file.readlines()

with open('scripts/evaluation/data/summary.csv', 'w') as file:
file.write("name,term_size_after,num_nodes,num_steps,num_of_iterations,num_of_rules_applied\n")
for prob in benchmark:
prob = "test_" + prob.strip()
if prob not in results:
continue
prob_results = results[prob]
values = [prob[5:]]
values += ["te", "tl", "sb"]
values += ["-" if prob_results["term_size"] is None else str(prob_results["term_size"])]
values += ["-" if prob_results["num_of_nodes"] is None else str(prob_results["num_of_nodes"])]
values += ["-" if prob_results["steps"] is None else str(prob_results["steps"])]
values += ["-" if prob_results["num_of_iterations"] is None else str(prob_results["num_of_iterations"])]
values += ["-" if prob_results["num_of_rules_applied"] is None else str(prob_results["num_of_rules_applied"])]
file.write(','.join(values) + '\n')
# To copy-paste into LaTeX.
print(' & '.join(values) + ' \\\\')
print('\\hline')
12 changes: 6 additions & 6 deletions scripts/evaluation/lean-pre-dcp/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

OUT=$1

mkdir -p ./evaluation/data/$OUT
mkdir -p ./scripts/evaluation/data/$OUT

./scripts/evaluation/lean-pre-dcp/run.sh ./evaluation/data/$OUT/lean_pre_dcp_test_out_1.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./evaluation/data/$OUT/lean_pre_dcp_test_out_2.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./evaluation/data/$OUT/lean_pre_dcp_test_out_3.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./evaluation/data/$OUT/lean_pre_dcp_test_out_4.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./evaluation/data/$OUT/lean_pre_dcp_test_out_5.txt
./scripts/evaluation/lean-pre-dcp/run.sh ./scripts/evaluation/data/$OUT/lean_pre_dcp_test_out_1.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./scripts/evaluation/data/$OUT/lean_pre_dcp_test_out_2.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./scripts/evaluation/data/$OUT/lean_pre_dcp_test_out_3.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./scripts/evaluation/data/$OUT/lean_pre_dcp_test_out_4.txt;
./scripts/evaluation/lean-pre-dcp/run.sh ./scripts/evaluation/data/$OUT/lean_pre_dcp_test_out_5.txt
14 changes: 14 additions & 0 deletions scripts/evaluation/lean-pre-dcp/run_selected_10_stop_on_success.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

cd egg-pre-dcp

RUSTFLAGS="--cfg stop_on_success" cargo test test_dgp::test_gp4 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_dgp::test_gp5 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_dgp::test_gp8 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_dgp::test_gp9 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_almost_dgp::test_agp3 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_dqcp::test_qcp4 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_stanford::test_stan3 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_stanford::test_stan4 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_quiz::test_quiz9 -- --test-threads=1 --nocapture
RUSTFLAGS="--cfg stop_on_success" cargo test test_main_example::test_main_example -- --test-threads=1 --nocapture

0 comments on commit f301364

Please sign in to comment.