Skip to content

Commit

Permalink
Merge pull request #361 from widdowquinn/pr_358
Browse files Browse the repository at this point in the history
Issue #357: Fix genome labels in matrix output
  • Loading branch information
baileythegreen authored Nov 29, 2021
2 parents 233fa62 + 8330c2d commit 9ae767f
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
ignore = E203, E231, E266, E501, W503, F403, F401
ignore = E203, E231, E266, E501, W503, F403, F401, E731
max-line-length = 88
max-complexity = 18
select = B,C,E,F,W,T4,B9
select = B,C,E,F,W,T4,B9
10 changes: 4 additions & 6 deletions pyani/pyani_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,27 +189,25 @@ def write_dbtable(
dfm: pd.DataFrame,
path: Path,
formats: Sequence[str] = ("tab",),
index: bool = False,
show_index: bool = False,
show_index: bool = True,
colour_num: bool = False,
) -> None:
"""Write database result table to output file in named format.
:param dfm: pd.Dataframe
:param path: Path to output file
:param formats: tuple of str, output file formats
:param index: Boolean
:param show_index: Boolean
:param show_index: output row and column labels
:param colour_num: use colours for values in HTML output
colours are used for identity/coverage tables
"""
formatdict = {
"tab": (dfm.to_csv, {"sep": "\t", "index": False}, ".tab"),
"tab": (dfm.to_csv, {"sep": "\t", "index": show_index}, ".tab"),
"excel": (dfm.to_excel, {"index": show_index}, ".xlsx"),
"html": (
write_styled_html,
{"dfm": dfm, "index": index, "colour_num": colour_num},
{"dfm": dfm, "index": show_index, "colour_num": colour_num},
".html",
),
"stdout": (write_to_stdout, {"dfm": dfm, "show_index": show_index}, ""),
Expand Down
9 changes: 7 additions & 2 deletions pyani/pyani_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,13 @@ def label_results_matrix(matrix: pd.DataFrame, labels: Dict) -> pd.DataFrame:
Applies the labels from the dictionary to the dataframe in
matrix, and returns the result.
"""
matrix.columns = [f"{labels.get(_, _)}:{_}" for _ in matrix.columns]
matrix.index = [f"{labels.get(_, _)}:{_}" for _ in matrix.index]
# The dictionary uses string keys!
# Create a label function that produces <label>:<genome_id>
# when a label is available; and just Genome_id:<genome_id> when no
# label exists
label = lambda gen_id: f"{labels.get(str(gen_id), 'Genome_id')}:{gen_id}"
matrix.columns = [label(_) for _ in matrix.columns]
matrix.index = [label(_) for _ in matrix.index]
return matrix


Expand Down
8 changes: 8 additions & 0 deletions pyani/scripts/parsers/report_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,19 @@ def build(
default=None,
help="Report matrices of results for comma separated list of runs",
)
parser.add_argument(
"--no_matrix_labels",
action="store_true",
dest="no_matrix_labels",
default=False,
help="Turn off row/column labels in output matrix files",
)
parser.add_argument(
"--formats",
dest="formats",
action="store",
default=None,
choices=["html", "excel", "stdout"],
help="Output formats (in addition to .tab)",
)
parser.set_defaults(func=subcommands.subcmd_report)
20 changes: 16 additions & 4 deletions pyani/scripts/subcommands/subcmd_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ def subcmd_report(args: Namespace) -> int:
"genome class",
]
report(
args, session, formats, ReportParams("runs_genomes", statement, headers),
args,
session,
formats,
ReportParams("runs_genomes", statement, headers),
)

# Report table of all runs in which a genome is involved
Expand Down Expand Up @@ -194,7 +197,10 @@ def subcmd_report(args: Namespace) -> int:
"date run",
]
report(
args, session, formats, ReportParams("genomes_runs", statement, headers),
args,
session,
formats,
ReportParams("genomes_runs", statement, headers),
)

# Report table of comparison results for the indicated runs
Expand Down Expand Up @@ -259,6 +265,7 @@ def subcmd_report(args: Namespace) -> int:
# JSON, we don't bother with a helper function like report(), and write out
# our matrices directly, here
if args.run_matrices:
show_index = not args.no_matrix_labels
for run_id in [run_id.strip() for run_id in args.run_matrices.split(",")]:
logger.debug("Extracting matrices for run %s", run_id)
run = session.query(Run).filter(Run.run_id == run_id).first()
Expand Down Expand Up @@ -286,14 +293,19 @@ def subcmd_report(args: Namespace) -> int:
)
),
formats,
show_index=True,
show_index=show_index,
**matdata.graphic_args,
)

return 0


def report(args: Namespace, session, formats: List[str], params: ReportParams,) -> None:
def report(
args: Namespace,
session,
formats: List[str],
params: ReportParams,
) -> None:
"""Write tabular report of pyani runs from database.
:param args: Namespace of command-line arguments
Expand Down
23 changes: 23 additions & 0 deletions tests/test_subcmd_05_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def setUp(self):
show_genomes_runs=False,
run_results=False,
run_matrices=False,
no_matrix_labels=False,
force=True,
formats="html,excel,stdout",
),
Expand All @@ -93,6 +94,7 @@ def setUp(self):
show_genomes_runs=False,
run_results=False,
run_matrices=False,
no_matrix_labels=False,
force=True,
formats="html,excel,stdout",
),
Expand All @@ -105,6 +107,7 @@ def setUp(self):
show_genomes_runs=False,
run_results=False,
run_matrices=False,
no_matrix_labels=False,
force=True,
formats="html,excel,stdout",
),
Expand All @@ -117,6 +120,7 @@ def setUp(self):
show_genomes_runs=True,
run_results=False,
run_matrices=False,
no_matrix_labels=False,
force=True,
formats="html,excel,stdout",
),
Expand All @@ -129,6 +133,7 @@ def setUp(self):
show_genomes_runs=False,
run_results="1",
run_matrices=False,
no_matrix_labels=False,
force=True,
formats="html,excel,stdout",
),
Expand All @@ -141,6 +146,20 @@ def setUp(self):
show_genomes_runs=False,
run_results=False,
run_matrices="1",
no_matrix_labels=False,
force=True,
formats="html,excel",
),
"no_matrix_labels": Namespace(
outdir=self.outdir,
dbpath=self.dbpath,
show_runs=False,
show_genomes=True,
show_runs_genomes=False,
show_genomes_runs=False,
run_results=False,
run_matrices="1",
no_matrix_labels=True,
force=True,
formats="html,excel",
),
Expand Down Expand Up @@ -169,3 +188,7 @@ def test_results(self):
def test_matrices(self):
"""Test reporting of run matrices in the database."""
subcommands.subcmd_report(self.argsdict["run_matrices"])

def test_no_matrix_labels(self):
"""Test row and column labeling of run matrices."""
subcommands.subcmd_report(self.argsdict["no_matrix_labels"])

0 comments on commit 9ae767f

Please sign in to comment.