Merge branch 'main' of https://github.com/freiburgermsu/ModelSEEDpy

freiburgermsu · Apr 3, 2024 · 6aba742 · 6aba742
2 parents 404773e + 1aaa4c9
commit 6aba742
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 78 deletions.
diff --git a/modelseedpy/community/commhelper.py b/modelseedpy/community/commhelper.py
@@ -16,6 +16,10 @@
 from numpy import mean
 import re
 
+import logging
+logging.basicConfig(filename='example.log', encoding='utf-8', level=logging.ERROR)
+logger = logging.getLogger(__name__)
+
 
 def strip_comp(ID):
     ID = ID.replace("-", "~")
@@ -88,12 +92,14 @@ def build_from_species_models(org_models, model_id=None, name=None, abundances=N
                     met.compartment = compartment + str(index)
                     met.id = name + "_" + met.compartment
             new_metabolites.add(met)
-            if "cpd11416_c" in met.id or "biomass" in met.id:  member_biomasses[org_model.id] = met
+            if "cpd11416_c" in met.id or "biomass" in met.id:
+                met.name = f"{met.id}_{model_util.model.id}"
+                member_biomasses[org_model.id] = met
         # Rename reactions
         for rxn in model_util.model.reactions:  # !!! all reactions should have a non-zero compartment index
             if rxn.id[0:3] != "EX_":
                 ## biomass reactions
-                if re.search('^(bio)(\d+)$', rxn.id):
+                if re.search('^(bio)(\d+)$', rxn.id) or "biomass" in rxn.id:
                     index = int(re.sub(r"(^bio)", "", rxn.id))
                     if biomass_index == 2:
                         while f"bio{biomass_index}" in model_reaction_ids:  biomass_index += 1
@@ -135,7 +141,8 @@ def build_from_species_models(org_models, model_id=None, name=None, abundances=N
                     for index, let in enumerate(finalID):
                         if index >= len(initialID) or index < len(initialID) and let != initialID[index]: string_diff += let
                     # if "compartment" not in locals():  print(f"the {rxn.id} with a {output} output is not defined with a compartment.")
-                    if string_diff != f"_{compartment}{model_index}" and printing:  print(f"The ID {initialID} is changed with {string_diff} to create the final ID {finalID}")
+                    if string_diff != f"_{compartment}{model_index}" and printing:
+                        logger.debug(f"The ID {initialID} is changed with {string_diff} to create the final ID {finalID}")
             new_reactions.add(rxn)
         # else:
         #     # TODO develop a method for compartmentalizing models without editing all reaction IDs or assuming their syntax
@@ -150,8 +157,14 @@ def build_from_species_models(org_models, model_id=None, name=None, abundances=N
     comm_biomass = Metabolite("cpd11416_c0", None, "Community biomass", 0, "c0")
     metabolites = {comm_biomass: 1}
     ## constrain the community abundances
-    if abundances:  abundances = {met: -abundances[memberID] for memberID, met in member_biomasses.items() if memberID in abundances}
-    else:   abundances = {met: -1 / len(member_biomasses) for met in member_biomasses.values()}
+    if abundances:
+        if isinstance(abundances[list(abundances.keys())[0]], dict):
+            abundances = {met: -abundances[memberID]["abundance"] for memberID, met in member_biomasses.items() if memberID in abundances}
+        else:   abundances = {met: -abundances[memberID]["abundance"] for memberID, met in member_biomasses.items() if memberID in abundances}
+    else:  abundances = {met: -1 / len(member_biomasses) for met in member_biomasses.values()}
+
+    # TODO - add the biomass reactions instead of thje biomass metabolites for the commKinetics
+
     ## define community biomass components
     metabolites.update(abundances)
     comm_biorxn = Reaction(id="bio1", name="bio1", lower_bound=0, upper_bound=1000)
@@ -164,23 +177,12 @@ def build_from_species_models(org_models, model_id=None, name=None, abundances=N
     newutl.model.add_boundary(comm_biomass, "sink") # Is a sink reaction for reversible cpd11416_c0 consumption necessary?
     ## proportionally limit the fluxes to their abundances 
     # print(abundances)
-    if commkinetics:  add_commkinetics(newutl, models, member_biomasses, abundances)
     # add the metadata of community composition
     if hasattr(newutl.model, "_context"):  newutl.model._contents.append(member_biomasses)
     elif hasattr(newutl.model, "notes"):  newutl.model.notes.update(member_biomasses)
     # print([cons.name for cons in newutl.model.constraints])
     return newutl.model
 
-def add_commkinetics(util, models, member_biomasses, abundances):
-    # TODO this creates an error with the member biomass reactions not being identified in the model
-    coef = {}
-    for model in models:
-        if member_biomasses[model.id] not in abundances:  continue
-        coef[member_biomasses[model.id]] = -abundances[member_biomasses[model.id]]
-        for rxn in model.reactions:
-            if rxn.id[:3] == "rxn":   coef[rxn.forward_variable] = coef[rxn.reverse_variable] = 1
-    util.create_constraint(Constraint(Zero, name="member_flux_limit"), coef=coef, printing=True)
-
 
 def phenotypes(community_members, phenotype_flux_threshold=.1, solver:str="glpk"):
     # log information of each respective model

diff --git a/modelseedpy/community/mscommunity.py b/modelseedpy/community/mscommunity.py
@@ -12,6 +12,7 @@
 from cobra.core.dictlist import DictList
 from optlang.symbolics import Zero
 from optlang import Constraint
+from os import makedirs, path
 from pandas import DataFrame
 from pprint import pprint
 from cobra import Reaction
@@ -21,38 +22,36 @@
 
 
 class CommunityMember:
-    def __init__(self, community, biomass_cpd, name=None, index=None, abundance=0):
+    def __init__(self, community, biomass_cpd, ID=None, index=None, abundance=0):
+        print(biomass_cpd)
         self.community, self.biomass_cpd = community, biomass_cpd
-        self.index = index or int(self.biomass_cpd.compartment[1:])
+        try:     self.index = int(self.biomass_cpd.compartment[1:])
+        except:  self.index = index
         self.abundance = abundance
         if self.biomass_cpd in self.community.primary_biomass.metabolites:
             self.abundance = abs(self.community.primary_biomass.metabolites[self.biomass_cpd])
-        if name:  self.id = name
+        if ID is not None:  self.id = ID
         elif "species_name" in self.biomass_cpd.annotation:
             self.id = self.biomass_cpd.annotation["species_name"]
         else:  self.id = "Species"+str(self.index)
 
         logger.info("Making atp hydrolysis reaction for species: "+self.id)
         atp_rxn = self.community.util.add_atp_hydrolysis("c"+str(self.index))
         self.atp_hydrolysis = atp_rxn["reaction"]
-        self.biomass_drain = None
-        self.biomasses, self.reactions = [], []
-        self.primary_biomass = None
+        self.biomass_drain = self.primary_biomass = None
+        self.reactions = []
         for rxn in self.community.util.model.reactions:
+            # print(rxn.id, rxn.reaction, "\t\t\t", end="\r")
             rxnComp = FBAHelper.rxn_compartment(rxn)
-            if not rxnComp:
-                print(f"The reaction {rxn.id} strangely lacks a compartment.")
-            elif int(rxnComp[1:]) == self.index and 'bio' not in rxn.name:
-                self.reactions.append(rxn)
+            if rxnComp is None:  print(f"The reaction {rxn.id} compartment is undefined.")
+            if rxnComp[1:] == '': print(rxn, rxnComp)
+            elif int(rxnComp[1:]) == self.index and 'bio' not in rxn.name:  self.reactions.append(rxn)
             if self.biomass_cpd in rxn.metabolites:
-                if rxn.metabolites[self.biomass_cpd] == 1 and len(rxn.metabolites) > 1:
-                    self.biomasses.append(rxn)
-                    if len(self.biomasses) == 1:  # TODO make this condition more reflective of primary biomass
-                        self.primary_biomass = rxn
-                elif len(rxn.metabolites) == 1 and rxn.metabolites[self.biomass_cpd] < 0:
-                    self.biomass_drain = rxn
+                # print(rxn.reaction)
+                if rxn.metabolites[self.biomass_cpd] == 1 and len(rxn.metabolites) > 1:  self.primary_biomass = rxn  ;  break
+                elif len(rxn.metabolites) == 1 and rxn.metabolites[self.biomass_cpd] < 0:  self.biomass_drain = rxn
 
-        if self.biomasses == []:  logger.critical("No biomass reaction found for species " + self.id)
+        if self.primary_biomass is None:  logger.critical("No biomass reaction found for species " + self.id)
         if not self.biomass_drain:
             logger.info("Making biomass drain reaction for species: "+self.id)
             self.biomass_drain = Reaction(
@@ -67,7 +66,7 @@ def disable_species(self):
             if int(reaction_index) == self.index:  reaction.upper_bound = reaction.lower_bound = 0
 
     def compute_max_biomass(self):
-        if len(self.biomasses) == 0:  logger.critical("No biomass reaction found for species "+self.id)
+        if self.primary_biomass is None:  logger.critical("No biomass reaction found for species "+self.id)
         self.community.util.add_objective(self.primary_biomass.flux_expression)
         if self.community.lp_filename:  self.community.print_lp(f"{self.community.lp_filename}_{self.id}_Biomass")
         return self.community.model.optimize()
@@ -80,8 +79,7 @@ def compute_max_atp(self):
 
 
 class MSCommunity:
-    def __init__(self, model=None, member_models: list = None, ids=None, abundances=None, kinetic_coeff=2000,
-                 flux_limit=300, lp_filename=None, printing=False):
+    def __init__(self, model=None, member_models: list = None, abundances=None, kinetic_coeff=2000, lp_filename=None, printing=False):
         self.lp_filename = lp_filename
         self.gapfillings = {}
 
@@ -91,14 +89,12 @@ def __init__(self, model=None, member_models: list = None, ids=None, abundances=
         # defining the models
         if member_models is not None and model is None:
             model = build_from_species_models(member_models, abundances=abundances, printing=printing)
-        if ids is None and member_models is not None:  ids = [mem.id for mem in member_models]
         self.id = model.id
         self.util = MSModelUtil(model, True)
         self.pkgmgr = MSPackageManager.get_pkg_mgr(self.util.model)
-        msid_cobraid_hash = self.util.msid_hash()
         # print(msid_cobraid_hash)
-        write_sbml_model(model, "test_comm.xml")
-
+        # write_sbml_model(model, "test_comm.xml")
+        msid_cobraid_hash = self.util.msid_hash()
         if "cpd11416" not in msid_cobraid_hash:  raise KeyError("Could not find biomass compound for the model.")
         other_biomass_cpds = []
         for self.biomass_cpd in msid_cobraid_hash["cpd11416"]:
@@ -113,38 +109,28 @@ def __init__(self, model=None, member_models: list = None, ids=None, abundances=
                         if printing:  print('primary biomass defined', rxn.id)
                         self.primary_biomass = rxn
                     elif rxn.metabolites[self.biomass_cpd] < 0 and len(rxn.metabolites) == 1:  self.biomass_drain = rxn
-            elif 'c' in self.biomass_cpd.compartment:
-                other_biomass_cpds.append(self.biomass_cpd)
-        # assign community members and their abundances
+            elif 'c' in self.biomass_cpd.compartment:   other_biomass_cpds.append(self.biomass_cpd)
+        abundances = abundances or {f"Species{memIndex}": {"biomass_compound": bioCpd, "abundance": 1/len(other_biomass_cpds)}
+                                    for memIndex, bioCpd in enumerate(other_biomass_cpds)}
         print()   # this returns the carriage after the tab-ends in the biomass compound printing 
-        abundances = abundances or [1/len(other_biomass_cpds)]*len(other_biomass_cpds)
-        self.members = DictList(
-            CommunityMember(community=self, biomass_cpd=biomass_cpd, name=ids[memIndex], abundance=abundances[memIndex])
-            for memIndex, biomass_cpd in enumerate(other_biomass_cpds))
+        self.members = DictList(CommunityMember(self, info["biomass_compound"], ID, index, info["abundance"])
+                                for index, (ID, info) in enumerate(abundances.items()))
         # assign the MSCommunity constraints and objective
         self.abundances_set = False
         if isinstance(abundances, dict):  self.set_abundance(abundances)
-        self.pkgmgr.getpkg("CommKineticPkg").build_package(kinetic_coeff, self)
-        for member in self.members:
-            vars_coef = {}
-            for rxn in self.util.model.reactions:
-                if "EX_" not in rxn.id and member.index == FBAHelper.rxn_compartment(rxn)[1:]:
-                    vars_coef[rxn.forward_variable] = vars_coef[rxn.reverse_variable] = 1
-            print(member.id, flux_limit, member.abundance)
-            self.util.create_constraint(Constraint(Zero, lb=0, ub=flux_limit*member.abundance,
-                                                   name=f"{member.id}_resource_balance"), coef=vars_coef)
+        # self.pkgmgr.getpkg("CommKineticPkg").build_package(kinetic_coeff, self)
 
     #Manipulation functions
     def set_abundance(self, abundances):
         #calculate the normalized biomass
-        total_abundance = sum(list(abundances.values()))
+        total_abundance = sum(list([content["abundance"] for content in abundances.values()]))
         # map abundances to all species
-        for species, abundance in abundances.items():
-            if species in self.members:  self.members.get_by_id(species).abundance = abundance/total_abundance
+        for modelID, content in abundances.items():
+            if modelID in self.members:  self.members.get_by_id(modelID).abundance = content["abundance"]/total_abundance
         #remake the primary biomass reaction based on abundances
         if self.primary_biomass is None:  logger.critical("Primary biomass reaction not found in community model")
         all_metabolites = {self.primary_biomass.products[0]: 1}
-        all_metabolites.update({mem.biomass_cpd: -abundances[mem.id]/total_abundance for mem in self.members})
+        all_metabolites.update({mem.biomass_cpd: -abundances[mem.id]["abundance"]/total_abundance for mem in self.members})
         self.primary_biomass.add_metabolites(all_metabolites, combine=False)
         self.abundances_set = True
 
@@ -169,10 +155,35 @@ def interactions(self, solution=None, media=None, msdb=None, msdb_path=None, fil
         return MSSteadyCom.interactions(self, solution or self.solution, media, flux_threshold, msdb, msdb_path,
                                         visualize, filename, figure_format, node_metabolites, True, ignore_mets)
 
+    def add_commkinetics(self, member_biomasses, abundances):
+        # TODO this creates an error with the member biomass reactions not being identified in the model
+        coef = {}
+        for rxn in self.util.model.reactions:
+            if rxn.id[:3] == "rxn":   coef[rxn.forward_variable] = coef[rxn.reverse_variable] = 1
+        for member in self.members:
+            if member_biomasses[member.id] not in abundances:  continue
+            coef[member_biomasses[member.id]] = -abundances[member_biomasses[member.id]]
+        self.util.create_constraint(Constraint(Zero, name="member_flux_limit"), coef=coef, printing=True)
+
+    def add_resource_balance(self, flux_limit=300):
+        for member in self.members:
+            vars_coef = {}
+            for rxn in self.util.model.reactions:
+                if "EX_" not in rxn.id and member.index == FBAHelper.rxn_compartment(rxn)[1:]:
+                    vars_coef[rxn.forward_variable] = vars_coef[rxn.reverse_variable] = 1
+            print(member.id, flux_limit, member.abundance)
+            self.util.create_constraint(Constraint(Zero, lb=0, ub=flux_limit*member.abundance,
+                                                   name=f"{member.id}_resource_balance"), coef=vars_coef)
+
     #Utility functions
     def print_lp(self, filename=None):
         filename = filename or self.lp_filename
-        with open(filename+".lp", 'w') as out:  out.write(str(self.util.model.solver))  ;  out.close()
+        makedirs(path.dirname(filename), exist_ok=True)
+        with open(filename, 'w') as out:  out.write(str(self.util.model.solver))  ;  out.close()
+
+    def to_sbml(self, export_name):
+        makedirs(path.dirname(export_name), exist_ok=True)
+        write_sbml_model(self.util.model, export_name)
 
     #Analysis functions
     def gapfill(self, media = None, target = None, minimize = False, default_gapfill_templates=None, default_gapfill_models=None,

diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py
@@ -1,17 +1,18 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import
-import logging
+
+
+from cobra.core import Gene, Metabolite, Model, Reaction
+from cobra.util import solver as sutil
 from chemicals import periodic_table
-import re
-from cobra.core import Gene, Metabolite, Model, Reaction   # !!! Gene and Model are never used
-from cobra.util import solver as sutil  # !!! sutil is never used
-from scipy.odr import Output  # !!! Output is never used
+from collections import Counter
+from scipy.odr import Output
 from typing import Iterable
-import time
-from chemw import ChemMW
 from warnings import warn
+from chemw import ChemMW
+import logging, time, re
+
 
-# from Carbon.Aliases import false
 
 logger = logging.getLogger(__name__)
 
@@ -159,11 +160,17 @@ def sum_dict(d1,d2):
     @staticmethod
     def rxn_compartment(reaction):
         compartments = list(reaction.compartments)
+        if len(compartments) == 0:
+            if re.search("(?<=\_)(\w\d+)", reaction.id):   return reaction.id.split("_")[-1]
+            agora_comp = re.compile("(?<=\[)(.+)(?=\])")
+            if agora_comp.search([met for met in reaction.metabolites][0].id) is not None:
+                compartments = [agora_comp.search(met.id).groups() for met in reaction.metabolites]
+                print(Counter(compartments))
+                return list(Counter(compartments).keys())[0]
         if len(compartments) == 1:  return compartments[0]
         for comp in compartments:
             if comp[0:1] != "e":  return comp
-            elif comp[0:1] == "e":  extracellular = comp
-        return extracellular
+            elif comp[0:1] == "e":  extracellular = comp  ;  return extracellular
 
     @staticmethod
     def remove_compartment(objID):

diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py
@@ -433,13 +433,11 @@ def add_objective(self, objective, direction="max", coef=None):
             self.model.objective.set_linear_coefficients(coef)
             self.model.solver.update()
 
-    def set_objective_from_target_reaction(self, target_reaction, minimize=False):
-        target_reaction = self.model.reactions.get_by_id(target_reaction)
+    def set_objective_from_target_reaction(self, target_rxn, minimize=False):
+        target_rxn = target_rxn if not isinstance(target_rxn, str) else self.model.reactions.get_by_id(target_rxn)
         sense = "max" if not minimize else "min"
-        self.model.objective = self.model.problem.Objective(
-            target_reaction.flux_expression, direction=sense
-        )
-        return target_reaction
+        self.model.objective = self.model.problem.Objective(target_rxn.flux_expression, direction=sense)
+        return target_rxn
 
     def biomass_expression(self):
         for met in self.model.metabolites:

diff --git a/modelseedpy/core/msprobability.py b/modelseedpy/core/msprobability.py
@@ -17,7 +17,7 @@ def add_biomass_objective(megaModel, captured_rxnIDs):
     else:
         # select the most conserved biomass composition
         for rxn in megaModel.reactions:
-            if "biomass" and not "EX_" in rxn.id:
+            if "biomass" and not "EX_" in rxn.id:   # randomly the first biomass reaction is pulled from the ASV set
                 megaModel.objective = Objective(rxn.flux_expression, direction="max")
                 break
     megaModel.solver.update()