From f157b96a7defa609dbe15667540361745449f453 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Mon, 18 Oct 2021 20:05:19 +0100 Subject: [PATCH 1/6] Add `multiprocessing` support to `pyani plot` --- pyani/scripts/parsers/plot_parser.py | 9 +++++++ pyani/scripts/subcommands/subcmd_plot.py | 34 +++++++++++++++++++----- tests/test_subcmd_06_plot.py | 8 ++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pyani/scripts/parsers/plot_parser.py b/pyani/scripts/parsers/plot_parser.py index edc12d17..c120eb54 100644 --- a/pyani/scripts/parsers/plot_parser.py +++ b/pyani/scripts/parsers/plot_parser.py @@ -92,4 +92,13 @@ def build( help="graphics method to use for plotting", choices=["seaborn", "mpl", "plotly"], ) + parser.add_argument( + "--workers", + dest="workers", + action="store", + default=None, + type=int, + help="Number of worker processes for multiprocessing " + "(default zero, meaning use all available cores)", + ) parser.set_defaults(func=subcommands.subcmd_plot) diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index 86ded7b5..7aeb3f2c 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -41,6 +41,7 @@ import logging import os +import multiprocessing from argparse import Namespace from pathlib import Path @@ -114,9 +115,17 @@ def write_run_heatmaps( ) result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id) result_class_dict = pyani_orm.get_matrix_classes_for_run(session, args.run_id) - logger.debug(f"Have {len(result_label_dict)} labels and {len(result_class_dict)} classes") + logger.debug( + f"Have {len(result_label_dict)} labels and {len(result_class_dict)} classes" + ) + + # Write heatmap and distribution plot for each results matrix + + # Create worker pool and empty command list + pool = multiprocessing.Pool(processes=args.workers) + plotting_commands = [] - # Write heatmap for each results matrix + # Build and collect the plotting commands for matdata in [ MatrixData(*_) for _ in [ @@ -127,14 +136,27 @@ def write_run_heatmaps( ("hadamard", pd.read_json(results.df_hadamard), {}), ] ]: - write_heatmap( - run_id, matdata, result_label_dict, result_class_dict, outfmts, args + plotting_commands.append( + ( + write_heatmap, + [run_id, matdata, result_label_dict, result_class_dict, outfmts, args], + ) ) - write_distribution(run_id, matdata, outfmts, args) + plotting_commands.append((write_distribution, [run_id, matdata, outfmts, args])) + + # Run the plotting commands + [pool.apply_async(func, args, {}) for func, args in plotting_commands] + + # Close worker pool + pool.close() + pool.join() def write_distribution( - run_id: int, matdata: MatrixData, outfmts: List[str], args: Namespace, + run_id: int, + matdata: MatrixData, + outfmts: List[str], + args: Namespace, ) -> None: """Write distribution plots for each matrix type. diff --git a/tests/test_subcmd_06_plot.py b/tests/test_subcmd_06_plot.py index 16b96bbe..989a1e7f 100644 --- a/tests/test_subcmd_06_plot.py +++ b/tests/test_subcmd_06_plot.py @@ -80,6 +80,7 @@ def setUp(self): dbpath=self.dbpath, formats="pdf", method="mpl", + workers=None, ), "mpl_png": Namespace( outdir=self.outdir / "mpl", @@ -87,6 +88,7 @@ def setUp(self): dbpath=self.dbpath, formats="png", method="mpl", + workers=None, ), "mpl_svg": Namespace( outdir=self.outdir / "mpl", @@ -94,6 +96,7 @@ def setUp(self): dbpath=self.dbpath, formats="svg", method="mpl", + workers=None, ), "mpl_jpg": Namespace( outdir=self.outdir / "mpl", @@ -101,6 +104,7 @@ def setUp(self): dbpath=self.dbpath, formats="jpg", method="mpl", + workers=None, ), "seaborn_pdf": Namespace( outdir=self.outdir / "seaborn", @@ -108,6 +112,7 @@ def setUp(self): dbpath=self.dbpath, formats="pdf", method="seaborn", + workers=None, ), "seaborn_png": Namespace( outdir=self.outdir / "seaborn", @@ -115,6 +120,7 @@ def setUp(self): dbpath=self.dbpath, formats="png", method="seaborn", + workers=None, ), "seaborn_svg": Namespace( outdir=self.outdir / "seaborn", @@ -122,6 +128,7 @@ def setUp(self): dbpath=self.dbpath, formats="svg", method="seaborn", + workers=None, ), "seaborn_jpg": Namespace( outdir=self.outdir / "seaborn", @@ -129,6 +136,7 @@ def setUp(self): dbpath=self.dbpath, formats="jpg", method="seaborn", + workers=None, ), } From 2b09fb0b99bb198c31d51cbf7ce7af00171d5262 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Tue, 19 Oct 2021 13:54:14 +0100 Subject: [PATCH 2/6] Change list comprehension to `for` loop for readability --- pyani/scripts/subcommands/subcmd_plot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index 398d0934..dd39ca78 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -42,6 +42,7 @@ import logging import os import multiprocessing +import time from argparse import Namespace from pathlib import Path @@ -162,7 +163,8 @@ def write_run_heatmaps( ) # Run the plotting commands - [pool.apply_async(func, args, {}) for func, args in plotting_commands] + for func, args in plotting_commands: + pool.apply_async(func, args, {}) # Close worker pool pool.close() From 2846bdf5e52d0db8adbd534217192c83133fd82f Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Tue, 19 Oct 2021 13:57:01 +0100 Subject: [PATCH 3/6] Rename `write_run_heatmaps()` The new name will be less confusing because this function also runs the code that generates distribution and the scatter plots. --- pyani/scripts/subcommands/subcmd_plot.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index dd39ca78..9f9eb6a7 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -92,14 +92,12 @@ def subcmd_plot(args: Namespace) -> int: run_ids = [int(run) for run in args.run_id.split(",")] logger.debug("Generating graphics for runs: %s", run_ids) for run_id in run_ids: - write_run_heatmaps(run_id, session, outfmts, args) + write_run_plots(run_id, session, outfmts, args) return 0 -def write_run_heatmaps( - run_id: int, session, outfmts: List[str], args: Namespace -) -> None: +def write_run_plots(run_id: int, session, outfmts: List[str], args: Namespace) -> None: """Write all heatmaps for a specified run to file. :param run_id: int, run identifier in database session From a23d1424589f4d5d44556858367b0c9c8cc53282 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Thu, 21 Oct 2021 15:22:20 +0100 Subject: [PATCH 4/6] Add new dependencies For generating trees --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 79f24435..15d97e59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ biopython +ete3 matplotlib namedlist networkx @@ -6,6 +7,7 @@ numpy openpyxl pandas Pillow +PyQt5 scipy seaborn sqlalchemy==1.3.10 From 451e2d372faea24e114740d812c62de648adefe5 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Mon, 29 Nov 2021 17:51:03 +0000 Subject: [PATCH 5/6] Change local variable name to avoid conflicts --- pyani/scripts/subcommands/subcmd_plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index 9f9eb6a7..09281932 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -161,8 +161,8 @@ def write_run_plots(run_id: int, session, outfmts: List[str], args: Namespace) - ) # Run the plotting commands - for func, args in plotting_commands: - pool.apply_async(func, args, {}) + for func, options in plotting_commands: + pool.apply_async(func, options, {}) # Close worker pool pool.close() From 122f6d4a92162efb64d780c24e9414b715fdf3ad Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Mon, 29 Nov 2021 18:18:39 +0000 Subject: [PATCH 6/6] Remove `import time` --- pyani/scripts/subcommands/subcmd_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index 09281932..dbd156cd 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -42,7 +42,6 @@ import logging import os import multiprocessing -import time from argparse import Namespace from pathlib import Path