From c4260d5b0652addccb8f8adb30c215299855be51 Mon Sep 17 00:00:00 2001 From: weiju Date: Fri, 30 Aug 2024 11:06:25 -0700 Subject: [PATCH] outfactored bcmembers workflow * bcmembers.py * fixed reference to background_df() --- bin/miner3-bcmembers | 37 ++----------------------------------- miner/bcmembers.py | 42 ++++++++++++++++++++++++++++++++++++++++++ miner/miner.py | 3 --- 3 files changed, 44 insertions(+), 38 deletions(-) create mode 100644 miner/bcmembers.py diff --git a/bin/miner3-bcmembers b/bin/miner3-bcmembers index 3f64786..9453001 100755 --- a/bin/miner3-bcmembers +++ b/bin/miner3-bcmembers @@ -11,6 +11,7 @@ import matplotlib matplotlib.use('Agg') from miner import miner, util from miner import GIT_SHA, __version__ as pkg_version +from miner import bcmembers DESCRIPTION = """miner3-bcmembers - MINER compute bicluster membership inference. @@ -45,39 +46,5 @@ if __name__ == '__main__': with open(args.regulons) as infile: regulon_modules = json.load(infile) + bcmembers.bcmembers(exp_data, regulon_modules, args.outdir) - bkgd = miner.backgroundDf(exp_data) - overexpressed_members = miner.biclusterMembershipDictionary(regulon_modules, - bkgd, label=2, p=0.05) - underexpressed_members = miner.biclusterMembershipDictionary(regulon_modules, - bkgd, label=0, p=0.05) - dysregulated_members = miner.biclusterMembershipDictionary(regulon_modules, - bkgd, label="excluded") - coherent_members = miner.biclusterMembershipDictionary(regulon_modules, - bkgd, label="included") - - # write the overexpressed/underexpressed members as JSON, tools later in the pipeline can - # easier access them - with open(os.path.join(args.outdir, 'overExpressedMembers.json'), 'w') as out: - json.dump(overexpressed_members, out) - with open(os.path.join(args.outdir, 'underExpressedMembers.json'), 'w') as out: - json.dump(underexpressed_members, out) - - overexpressed_members_matrix = miner.membershipToIncidence(overexpressed_members, - exp_data) - overexpressed_members_matrix.to_csv(os.path.join(args.outdir, - "overExpressedMembers.csv")) - - underexpressed_members_matrix = miner.membershipToIncidence(underexpressed_members, - exp_data) - underexpressed_members_matrix.to_csv(os.path.join(args.outdir, - "underExpressedMembers.csv")) - - dysregulated_members_matrix = miner.membershipToIncidence(dysregulated_members, - exp_data) - dysregulated_members_matrix.to_csv(os.path.join(args.outdir, "dysregulatedMembers.csv")) - - coherent_members_matrix = miner.membershipToIncidence(coherent_members, - exp_data) - coherent_members_matrix.to_csv(os.path.join(args.outdir, - "coherentMembers.csv")) diff --git a/miner/bcmembers.py b/miner/bcmembers.py new file mode 100644 index 0000000..8563d0e --- /dev/null +++ b/miner/bcmembers.py @@ -0,0 +1,42 @@ +"""bcmembers.py - Module to compute bicluster memberships""" + +import os +import json +from miner import miner + +def bcmembers(exp_data, regulon_modules, outdir): + bkgd = miner.background_df(exp_data) + overexpressed_members = miner.biclusterMembershipDictionary(regulon_modules, + bkgd, label=2, p=0.05) + underexpressed_members = miner.biclusterMembershipDictionary(regulon_modules, + bkgd, label=0, p=0.05) + dysregulated_members = miner.biclusterMembershipDictionary(regulon_modules, + bkgd, label="excluded") + coherent_members = miner.biclusterMembershipDictionary(regulon_modules, + bkgd, label="included") + + # write the overexpressed/underexpressed members as JSON, tools later in the pipeline can + # easier access them + with open(os.path.join(outdir, 'overExpressedMembers.json'), 'w') as out: + json.dump(overexpressed_members, out) + with open(os.path.join(outdir, 'underExpressedMembers.json'), 'w') as out: + json.dump(underexpressed_members, out) + + overexpressed_members_matrix = miner.membershipToIncidence(overexpressed_members, + exp_data) + overexpressed_members_matrix.to_csv(os.path.join(outdir, + "overExpressedMembers.csv")) + + underexpressed_members_matrix = miner.membershipToIncidence(underexpressed_members, + exp_data) + underexpressed_members_matrix.to_csv(os.path.join(outdir, + "underExpressedMembers.csv")) + + dysregulated_members_matrix = miner.membershipToIncidence(dysregulated_members, + exp_data) + dysregulated_members_matrix.to_csv(os.path.join(outdir, "dysregulatedMembers.csv")) + + coherent_members_matrix = miner.membershipToIncidence(coherent_members, + exp_data) + coherent_members_matrix.to_csv(os.path.join(outdir, + "coherentMembers.csv")) diff --git a/miner/miner.py b/miner/miner.py index 90d4a06..e4fc8c5 100644 --- a/miner/miner.py +++ b/miner/miner.py @@ -1142,7 +1142,6 @@ def biclusterMembershipDictionary(revisedClusters,background,label=2,p=0.05): members[key] = [] continue members[key] = list(background.columns[nonMembers]) - print("done!") return members if label == "included": @@ -1158,7 +1157,6 @@ def biclusterMembershipDictionary(revisedClusters,background,label=2,p=0.05): members[key] = [] continue members[key] = list(background.columns[included]) - print("done!") return members members = {} @@ -1174,7 +1172,6 @@ def biclusterMembershipDictionary(revisedClusters,background,label=2,p=0.05): members[key] = [] continue members[key] = list(background.columns[overExpMembers]) - print("done!") return members def membershipToIncidence(membershipDictionary,expressionData):