Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding CGE-Predicted Phenotypes to staramr's Resfinder Output #167

Merged
merged 10 commits into from
Mar 14, 2023
4 changes: 4 additions & 0 deletions staramr/blast/resfinder/ResfinderBlastDatabase.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os

from staramr.blast.AbstractBlastDatabase import AbstractBlastDatabase

Expand All @@ -20,3 +21,6 @@ def __init__(self, database_dir):

def get_name(self):
return 'resfinder'

def get_phenotypes_file(self):
return os.path.join(self.database_dir, 'phenotypes.txt')
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class BlastResultsParserResfinderResistance(BlastResultsParserResfinder):
Isolate ID
Gene
Predicted Phenotype
CGE Predicted Phenotype
%Identity
%Overlap
HSP Length/Total Length
Expand All @@ -20,12 +21,13 @@ class BlastResultsParserResfinderResistance(BlastResultsParserResfinder):
Sequence
'''.strip().split('\n')]

def __init__(self, file_blast_map, arg_drug_table, blast_database, pid_threshold, plength_threshold,
report_all=False, output_dir=None, genes_to_exclude=[]):
def __init__(self, file_blast_map, arg_drug_table, cge_drug_table, blast_database, pid_threshold,
plength_threshold, report_all=False, output_dir=None, genes_to_exclude=[]):
"""
Creates a new BlastResultsParserResfinderResistance.
:param file_blast_map: A map/dictionary linking input files to BLAST results files.
:param arg_drug_table: A table mapping the resistance gene to a specific drug resistance.
:param cge_drug_table: A table mapping the CGE-predicted resistance gene to a specific drug resistance.
:param blast_database: The particular staramr.blast.AbstractBlastDatabase to use.
:param pid_threshold: A percent identity threshold for BLAST results.
:param plength_threshold: A percent length threshold for results.
Expand All @@ -36,17 +38,25 @@ def __init__(self, file_blast_map, arg_drug_table, blast_database, pid_threshold
super().__init__(file_blast_map, blast_database, pid_threshold, plength_threshold, report_all,
output_dir=output_dir, genes_to_exclude=genes_to_exclude)
self._arg_drug_table = arg_drug_table
self._cge_drug_table = cge_drug_table

def _get_result_rows(self, hit, database_name):
drug = self._arg_drug_table.get_drug(database_name, hit.get_amr_gene_name_with_variant(),
hit.get_amr_gene_accession())
arg_drug = self._arg_drug_table.get_drug(database_name, hit.get_amr_gene_name_with_variant(),
hit.get_amr_gene_accession())

if drug is None:
drug = 'unknown[' + hit.get_amr_gene_variant_accession() + ']'
cge_drug = self._cge_drug_table.get_drug(database_name, hit.get_amr_gene_name_with_variant(),
hit.get_amr_gene_accession())

if arg_drug is None:
arg_drug = 'unknown[' + hit.get_amr_gene_variant_accession() + ']'

if cge_drug is None:
cge_drug = 'unknown[' + hit.get_amr_gene_variant_accession() + ']'

return [[hit.get_genome_id(),
hit.get_amr_gene_name(),
drug,
arg_drug,
cge_drug,
hit.get_pid(),
hit.get_plength(),
str(hit.get_hsp_length()) + "/" + str(hit.get_amr_gene_length()),
Expand Down
45 changes: 45 additions & 0 deletions staramr/databases/resistance/cge/CGEDrugTableResfinder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import logging
import os
from pathlib import Path

from staramr.databases.resistance.ARGDrugTable import ARGDrugTable

logger = logging.getLogger("CGEDrugTableResfinder")

"""
A Class used to load up and search a file containing gene/drug mappings for CGE ResFinder results.
"""


class CGEDrugTableResfinder(ARGDrugTable):
DTYPES = {'Gene_accession no.': str, 'Class': str, 'Phenotype': str, 'PMID': str,
'Mechanism of resistance': str, "Notes": str, "Required_gene": str}

def __init__(self, file):
"""
Builds a new CGEDrugTableResfinder from the given file.
:param file: The file containing the gene/drug mappings.
"""
super().__init__(file=file)

self._data['Class'] = self._data['Class'].str.lower()

def get_drug(self, drug_class, gene_plus_variant, accession):
"""
Gets the drug given the drug class, gene (plus variant of gene encoded in ResFinder database) and accession.
:param drug_class: The drug class.
:param gene_plus_variant: The gene plus variant (e.g., {gene}_{variant} = {blaIMP-58}_{1}).
:param accession: The accession in the resfinder database (e.g., KU647281).
:return: The particular drug, or None if no matching drug was found.
"""
table = self._data

gene_accession = str(gene_plus_variant) + "_" + str(accession)
drug = table[(table['Class'] == drug_class) &
(table['Gene_accession no.'] == gene_accession)]['Phenotype']
if (drug.empty):
logger.warning("No drug found for drug_class=%s, gene=%s, accession=%s", drug_class, gene_plus_variant,
accession)
return None
else:
return drug.iloc[0]
Empty file.
1 change: 1 addition & 0 deletions staramr/databases/resistance/data/info.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[Versions]
pointfinder_gene_drug_version = 072621.1
resfinder_gene_drug_version = 072621
cge_gene_drug_version = 24ee5a0
apetkau marked this conversation as resolved.
Show resolved Hide resolved
10 changes: 7 additions & 3 deletions staramr/detection/AMRDetectionFactory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from staramr.databases.resistance.pointfinder.ARGDrugTablePointfinder import ARGDrugTablePointfinder
from staramr.databases.resistance.resfinder.ARGDrugTableResfinder import ARGDrugTableResfinder
from staramr.databases.resistance.cge.CGEDrugTableResfinder import CGEDrugTableResfinder
from staramr.detection.AMRDetection import AMRDetection
from staramr.detection.AMRDetectionResistance import AMRDetectionResistance

Expand Down Expand Up @@ -29,9 +30,12 @@ def build(self, plasmidfinder_database, resfinder_database, blast_handler, point
"""

if include_resistances:
return AMRDetectionResistance(resfinder_database, ARGDrugTableResfinder(), blast_handler,
ARGDrugTablePointfinder(), pointfinder_database, include_negatives,
output_dir=output_dir, genes_to_exclude=genes_to_exclude,
phenotypes_file = resfinder_database.get_phenotypes_file()

return AMRDetectionResistance(resfinder_database, ARGDrugTableResfinder(),
CGEDrugTableResfinder(phenotypes_file), blast_handler,
ARGDrugTablePointfinder(), pointfinder_database,
include_negatives, output_dir=output_dir, genes_to_exclude=genes_to_exclude,
plasmidfinder_database=plasmidfinder_database)
else:
return AMRDetection(resfinder_database, blast_handler, pointfinder_database, include_negatives,
Expand Down
5 changes: 4 additions & 1 deletion staramr/detection/AMRDetectionResistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

class AMRDetectionResistance(AMRDetection):

def __init__(self, resfinder_database, arg_drug_table_resfinder, amr_detection_handler, arg_drug_table_pointfinder,
def __init__(self, resfinder_database, arg_drug_table_resfinder, cge_drug_table_resfinder, amr_detection_handler, arg_drug_table_pointfinder,
pointfinder_database=None, include_negative_results=False, output_dir=None, genes_to_exclude=[],
plasmidfinder_database=None):
"""
Builds a new AMRDetectionResistance.
:param resfinder_database: The staramr.blast.resfinder.ResfinderBlastDatabase for the particular ResFinder database.
:param arg_drug_table_resfinder: The staramr.databases.resistance.ARGDrugTable for searching for resfinder resistances.
:param cge_drug_table_resfinder: The staramr.databases.resistance.CGEDrugTable for searching for resfinder resistances.
:param amr_detection_handler: The staramr.blast.JobHandler to use for scheduling BLAST jobs.
:param arg_drug_table_pointfinder: The staramr.databases.resistance.ARGDrugTable for searching for pointfinder resistances.
:param pointfinder_database: The staramr.blast.pointfinder.PointfinderBlastDatabase to use for the particular PointFinder database.
Expand All @@ -36,10 +37,12 @@ def __init__(self, resfinder_database, arg_drug_table_resfinder, amr_detection_h
output_dir=output_dir, genes_to_exclude=genes_to_exclude,
plasmidfinder_database=plasmidfinder_database)
self._arg_drug_table_resfinder = arg_drug_table_resfinder
self._cge_drug_table_resfinder = cge_drug_table_resfinder
self._arg_drug_table_pointfinder = arg_drug_table_pointfinder

def _create_resfinder_dataframe(self, resfinder_blast_map, pid_threshold, plength_threshold, report_all):
resfinder_parser = BlastResultsParserResfinderResistance(resfinder_blast_map, self._arg_drug_table_resfinder,
self._cge_drug_table_resfinder,
self._resfinder_database, pid_threshold,
plength_threshold, report_all,
output_dir=self._output_dir,
Expand Down
Loading