diff --git a/.github/imgs/runtimes.png b/.github/imgs/runtimes.png new file mode 100644 index 0000000..b675f8f Binary files /dev/null and b/.github/imgs/runtimes.png differ diff --git a/.gitignore b/.gitignore index da94d98..21b797f 100644 --- a/.gitignore +++ b/.gitignore @@ -384,3 +384,7 @@ datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split4.fasta datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split5.fasta datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split6.fasta datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split7.fasta +avGFP_shortened_dca_encoded.csv +datasets/AVGFP/avGFP_shortened.csv +avGFP_dca_encoded.csv +scripts/Runtime_tests/runtimes.png diff --git a/scripts/Runtime_tests/README.md b/scripts/Runtime_tests/README.md index cb1a43c..90439a2 100644 --- a/scripts/Runtime_tests/README.md +++ b/scripts/Runtime_tests/README.md @@ -1,4 +1,7 @@ -TODO ## Benchmarking runtimes -CPU runtimes using multiple threads. \ No newline at end of file +Wall clock runtimes for PLMC-DCA CPU-based sequence encoding (with PLMC parameter file) using multiple cores/threads. + +

+ drawing +

\ No newline at end of file diff --git a/scripts/Runtime_tests/os_system_runtimes.py b/scripts/Runtime_tests/os_system_runtimes.py new file mode 100644 index 0000000..2b7c034 --- /dev/null +++ b/scripts/Runtime_tests/os_system_runtimes.py @@ -0,0 +1,44 @@ + + +import os +import sys +import time +import matplotlib.pyplot as plt + +pypef_path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '../..' + ) +) + +sys.path.append(pypef_path) + +avgfp_path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '../../datasets/AVGFP' + ) +) + +# Assuming that the Conda environment 'pypef' exists and contains all necessary Python packages, +# using'avGFP_shortened.csv' instead of 'avGFP.csv' takes much less computing time +cmd = f"conda run -n pypef python {os.path.join(pypef_path, 'pypef', 'main.py')} "\ + f"encode -i {os.path.join(avgfp_path, 'avGFP.csv')} "\ + f"-e dca -w {os.path.join(avgfp_path, 'P42212_F64L.fasta')} "\ + f"--params {os.path.join(avgfp_path, 'uref100_avgfp_jhmmer_119_plmc_42.6.params')} "\ + f"--threads XX" + +print(os.cpu_count()) +all_run_times = [] +for n_cores in range(1, os.cpu_count() + 1): + run_time_1 = time.time() + print(f"Running command:\n============\n{cmd.replace('XX', str(n_cores))}") + os.system(cmd.replace('XX', str(n_cores))) + run_time_2 = time.time() + all_run_times.append(run_time_2 - run_time_1) +plt.plot(range(1, os.cpu_count() + 1), all_run_times, 'o--') +plt.grid() +plt.xlabel('# Cores/Threads') +plt.ylabel('Runtime (s)') +plt.savefig(os.path.join(os.path.dirname(__file__), 'runtimes.png'), dpi=300)