Skip to content

Commit

Permalink
outfactored mechanistic inference to its own module
Browse files Browse the repository at this point in the history
  • Loading branch information
weiju committed Aug 30, 2024
1 parent e06b8d6 commit f99a291
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 95 deletions.
101 changes: 6 additions & 95 deletions bin/miner3-mechinf
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@ from pkg_resources import Requirement, resource_filename

from miner import miner, util, GIT_SHA
from miner import __version__ as MINER_VERSION
from miner import mechinf


DESCRIPTION = """miner3-mechinf - MINER compute mechanistic inference
MINER Version %s (Git SHA %s)""" % (str(MINER_VERSION).replace('miner3 ', ''),
GIT_SHA.replace('$Id: ', '').replace(' $', ''))

NUM_CORES = 5
MIN_REGULON_GENES = 5

if __name__ == '__main__':
logging.getLogger('matplotlib.font_manager').disabled = True
LOG_FORMAT = '%(asctime)s %(message)s'
Expand Down Expand Up @@ -67,8 +65,6 @@ if __name__ == '__main__':
with open(args.coexprdict) as infile:
revised_clusters = json.load(infile)

# get first principal component axes of clusters
t1 = time.time()
if args.tfs2genes is not None:
database_path = args.tfs2genes
else:
Expand All @@ -79,93 +75,8 @@ if __name__ == '__main__':
# running from source
database_path = os.path.join('miner', 'data', 'network_dictionaries', 'tfbsdb_tf_to_genes.pkl')

axes = miner.principal_df(revised_clusters, exp_data,
subkey=None, minNumberGenes=1)

# analyze revised clusters for enrichment in relational database
# (default: transcription factor binding site database)
mechanistic_output = miner.mechanisticInference(axes, revised_clusters, exp_data,
correlationThreshold=args.mincorr,
numCores=NUM_CORES,
database_path=database_path)

# write mechanistic output to .json file
with open(os.path.join(args.outdir, "mechanisticOutput.json"), 'w') as outfile:
json.dump(mechanistic_output, outfile)

# order mechanisticOutput as {tf:{coexpressionModule:genes}}
coregulation_modules = miner.getCoregulationModules(mechanistic_output)

# write coregulation modules to .json file
with open(os.path.join(args.outdir, "coregulationModules.json"), 'w') as outfile:
json.dump(coregulation_modules, outfile)

# get final regulons by keeping genes that requently appear coexpressed and associated
# to a common regulator
regulons = miner.getRegulons(coregulation_modules,
minNumberGenes=MIN_REGULON_GENES,
freqThreshold=0.333)

# reformat regulon dictionary for consistency with revisedClusters and coexpressionModules
regulon_modules, regulon_df = miner.regulonDictionary(regulons)

# FIRM export: note that we do not check whether we have RefSeq or Entrez, maybe this should be
# checked in the glue
entrez_map = miner.make_entrez_map(args.mapfile)
with open(os.path.join(args.outdir, args.firmout), 'w') as outfile:
outfile.write('Gene\tGroup\n')
for regulon, genes in regulon_modules.items():
for gene in genes:
if gene in entrez_map:
outfile.write('%s\t%s\n' % (entrez_map[gene], regulon))

# OpenTargets export
with open(os.path.join(args.outdir, args.genelist), 'w') as outfile:
all_genes = set()
for regulon, genes in regulon_modules.items():
all_genes.update(genes)
for gene in sorted(all_genes):
outfile.write('%s\n' % gene)

# write regulons to json file
with open(os.path.join(args.outdir, "regulons.json"), 'w') as outfile:
json.dump(regulon_modules, outfile)
regulon_df.to_csv(os.path.join(args.outdir, "regulonDf.csv"))

# define coexpression modules as composite of coexpressed regulons
coexpression_modules = miner.getCoexpressionModules(mechanistic_output)

# write coexpression modules to .json file
with open(os.path.join(args.outdir, "coexpressionModules.json"), 'w') as outfile:
json.dump(coexpression_modules, outfile)

# write annotated coexpression clusters to .json file
with open(os.path.join(args.outdir, "coexpressionDictionary_annotated.json"), 'w') as outfile:
json.dump(revised_clusters, outfile)

"""
# write annotated regulons to .json file
with open(os.path.join(args.outdir, "regulons_annotated.json"), 'w') as outfile:
json.dump(regulons, outfile)
# reconvert coexpression modules
annotated_coexpression_modules = mechanistic_inference.convert_dictionary(coexpression_modules, conv_table)
# write annotated coexpression modules to .json file
with open(os.path.join(args.outdir, "coexpressionModules_annotated.json"), 'w') as outfile:
json.dump(annotated_coexpression_modules, outfile)"""


# Get eigengenes for all modules
eigengenes = miner.getEigengenes(regulon_modules, exp_data, regulon_dict=None,
saveFolder=None)
eigen_scale = np.percentile(exp_data, 95) / np.percentile(eigengenes, 95)
eigengenes = eigen_scale * eigengenes
eigengenes.index = np.array(eigengenes.index).astype(str)

# write eigengenes to .csv
eigengenes.to_csv(os.path.join(args.outdir, "eigengenes.csv"))

t2 = time.time()
logging.info("Completed mechanistic inference in {:.2f} minutes".format((t2 - t1) / 60.))
logging.info("Inferred network with {:d} regulons, {:d} regulators, and {:d} co-regulated genes".format(len(regulon_df.Regulon_ID.unique()), len(regulon_df.Regulator.unique()),len(regulon_df.Gene.unique())))
mechinf.mechanistic_inference(exp_data, args.mapfile, revised_clusters,
database_path,
args.outdir,
args.mincorr, args.skip_tpm, args.firmout,
args.genelist)
109 changes: 109 additions & 0 deletions miner/mechinf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""mechinf.py - Mechanistic inference module"""

import os
import json
import logging
import time
import numpy as np

from miner import miner, util

NUM_CORES = 5
MIN_REGULON_GENES = 5


def mechanistic_inference(exp_data, mapfile, revised_clusters, database_path,
outdir, mincorr, skip_tpm, firmout, genelist):
# get first principal component axes of clusters
t1 = time.time()

axes = miner.principal_df(revised_clusters, exp_data,
subkey=None, minNumberGenes=1)

# analyze revised clusters for enrichment in relational database
# (default: transcription factor binding site database)
mechanistic_output = miner.mechanisticInference(axes, revised_clusters, exp_data,
correlationThreshold=mincorr,
numCores=NUM_CORES,
database_path=database_path)

# write mechanistic output to .json file
with open(os.path.join(outdir, "mechanisticOutput.json"), 'w') as outfile:
json.dump(mechanistic_output, outfile)

# order mechanisticOutput as {tf:{coexpressionModule:genes}}
coregulation_modules = miner.getCoregulationModules(mechanistic_output)

# write coregulation modules to .json file
with open(os.path.join(outdir, "coregulationModules.json"), 'w') as outfile:
json.dump(coregulation_modules, outfile)

# get final regulons by keeping genes that requently appear coexpressed and associated
# to a common regulator
regulons = miner.getRegulons(coregulation_modules,
minNumberGenes=MIN_REGULON_GENES,
freqThreshold=0.333)

# reformat regulon dictionary for consistency with revisedClusters and coexpressionModules
regulon_modules, regulon_df = miner.regulonDictionary(regulons)

# FIRM export: note that we do not check whether we have RefSeq or Entrez, maybe this should be
# checked in the glue
entrez_map = miner.make_entrez_map(mapfile)
with open(os.path.join(outdir, firmout), 'w') as outfile:
outfile.write('Gene\tGroup\n')
for regulon, genes in regulon_modules.items():
for gene in genes:
if gene in entrez_map:
outfile.write('%s\t%s\n' % (entrez_map[gene], regulon))

# OpenTargets export
with open(os.path.join(outdir, genelist), 'w') as outfile:
all_genes = set()
for regulon, genes in regulon_modules.items():
all_genes.update(genes)
for gene in sorted(all_genes):
outfile.write('%s\n' % gene)

# write regulons to json file
with open(os.path.join(outdir, "regulons.json"), 'w') as outfile:
json.dump(regulon_modules, outfile)
regulon_df.to_csv(os.path.join(outdir, "regulonDf.csv"))

# define coexpression modules as composite of coexpressed regulons
coexpression_modules = miner.getCoexpressionModules(mechanistic_output)

# write coexpression modules to .json file
with open(os.path.join(outdir, "coexpressionModules.json"), 'w') as outfile:
json.dump(coexpression_modules, outfile)

# write annotated coexpression clusters to .json file
with open(os.path.join(outdir, "coexpressionDictionary_annotated.json"), 'w') as outfile:
json.dump(revised_clusters, outfile)

"""
# write annotated regulons to .json file
with open(os.path.join(outdir, "regulons_annotated.json"), 'w') as outfile:
json.dump(regulons, outfile)
# reconvert coexpression modules
annotated_coexpression_modules = mechanistic_inference.convert_dictionary(coexpression_modules, conv_table)
# write annotated coexpression modules to .json file
with open(os.path.join(outdir, "coexpressionModules_annotated.json"), 'w') as outfile:
json.dump(annotated_coexpression_modules, outfile)"""


# Get eigengenes for all modules
eigengenes = miner.getEigengenes(regulon_modules, exp_data, regulon_dict=None,
saveFolder=None)
eigen_scale = np.percentile(exp_data, 95) / np.percentile(eigengenes, 95)
eigengenes = eigen_scale * eigengenes
eigengenes.index = np.array(eigengenes.index).astype(str)

# write eigengenes to .csv
eigengenes.to_csv(os.path.join(outdir, "eigengenes.csv"))

t2 = time.time()
logging.info("Completed mechanistic inference in {:.2f} minutes".format((t2 - t1) / 60.))
logging.info("Inferred network with {:d} regulons, {:d} regulators, and {:d} co-regulated genes".format(len(regulon_df.Regulon_ID.unique()), len(regulon_df.Regulator.unique()),len(regulon_df.Gene.unique())))

0 comments on commit f99a291

Please sign in to comment.