Source code for cobrame.core.model

from __future__ import print_function, division, absolute_import

import re
from six import iteritems
from warnings import warn

from cobra import Model, DictList
import numpy as np
from scipy.sparse import dok_matrix

from cobrame.core.reaction import (SummaryVariable, MetabolicReaction,
                                   TranscriptionReaction, TranslationReaction)
from cobrame.core.component import (Constraint, ProcessedProtein, Complex,
                                    TranslatedGene, TranscribedGene)
from cobrame.core import processdata
from cobrame.util import mu


[docs]class MEModel(Model): def __init__(self, *args): Model.__init__(self, *args) self.global_info = {} self.process_data = DictList() # create the biomass/dilution constraint self._biomass = Constraint("biomass") self._biomass_dilution = SummaryVariable("biomass_dilution") self._biomass_dilution.add_metabolites({self._biomass: -1}) self.add_reactions([self._biomass_dilution]) self._biomass_dilution.upper_bound = mu self._biomass_dilution.lower_bound = mu # maintenance energy self._gam = 0. self._ngam = 0. # Unmodeled protein is handled by converting protein_biomass to # biomass, and requiring production of the appropriate amount of dummy # protein self._unmodeled_protein_fraction = None
[docs] def add_biomass_constraints_to_model(self, biomass_types): for biomass_type in biomass_types: if '_biomass' not in biomass_type: raise ValueError('Biomass types should be suffixed with ' '"_biomass"') constraint_obj = Constraint(biomass_type) summary_variable_obj = SummaryVariable("%s_to_biomass" % biomass_type) summary_variable_obj.add_metabolites({constraint_obj: -1, self._biomass: 1}) self.add_reactions([summary_variable_obj])
@property def unmodeled_protein(self): return self.metabolites.get_by_id("protein_dummy") @property def unmodeled_protein_fraction(self): return self._unmodeled_protein_fraction @property def unmodeled_protein_biomass(self): return self.metabolites.get_by_id('unmodeled_protein_biomass') @unmodeled_protein_fraction.setter def unmodeled_protein_fraction(self, value): if 'protein_biomass_to_biomass' not in self.reactions: raise UserWarning("Must add SummaryVariable handling the protein" "biomass constraint (via " ":meth:`add_biomass_constraints_to_model`) " "before defining the unmodeled protein fraction") # see the Biomass_formulations for an explanation amount = value / (1 - value) self.reactions.protein_biomass_to_biomass.add_metabolites( {self.unmodeled_protein_biomass: -amount}, combine=False) self.reactions.protein_biomass_to_biomass.add_metabolites( {self._biomass: 1 + amount}, combine=False) self._unmodeled_protein_fraction = value @property def gam(self): return self._gam @gam.setter def gam(self, value): if 'GAM' not in self.reactions: warn('Adding GAM reaction to model') self.add_reactions([SummaryVariable("GAM")]) self.reactions.GAM.lower_bound = mu atp_hydrolysis = {'atp_c': -1, 'h2o_c': -1, 'adp_c': 1, 'h_c': 1, 'pi_c': 1} for met, coeff in iteritems(atp_hydrolysis): self.reactions.GAM.add_metabolites({met: value * coeff}, combine=False) self._gam = value @property def ngam(self): return self._ngam @ngam.setter def ngam(self, value): if 'ATPM' not in self.reactions: warn('Adding ATPM reaction to model') atp_hydrolysis = {'atp_c': -1, 'h2o_c': -1, 'adp_c': 1, 'h_c': 1, 'pi_c': 1} self.add_reactions([SummaryVariable("ATPM")]) self.reactions.ATPM.add_metabolites(atp_hydrolysis) self.reactions.ATPM.lower_bound = value self._ngam = value @property def stoichiometric_data(self): for data in self.process_data: if isinstance(data, processdata.StoichiometricData): yield data @property def complex_data(self): for data in self.process_data: if isinstance(data, processdata.ComplexData): yield data @property def translation_data(self): for data in self.process_data: if isinstance(data, processdata.TranslationData): yield data @property def transcription_data(self): for data in self.process_data: if isinstance(data, processdata.TranscriptionData): yield data @property def generic_data(self): for data in self.process_data: if isinstance(data, processdata.GenericData): yield data @property def tRNA_data(self): for data in self.process_data: if isinstance(data, processdata.tRNAData): yield data @property def translocation_data(self): for data in self.process_data: if isinstance(data, processdata.TranslocationData): yield data @property def posttranslation_data(self): for data in self.process_data: if isinstance(data, processdata.PostTranslationData): yield data @property def subreaction_data(self): for data in self.process_data: if isinstance(data, processdata.SubreactionData): yield data
[docs] def get_metabolic_flux(self, solution=None): """extract the flux state for metabolic reactions""" if solution is None: solution = self.solution if solution.status != "optimal": raise ValueError("solution status '%s' is not 'optimal'" % solution.status) flux_dict = {r.id: 0 for r in self.stoichiometric_data} for reaction in self.reactions: if isinstance(reaction, MetabolicReaction): m_reaction_id = reaction.stoichiometric_data.id if reaction.reverse: flux_dict[m_reaction_id] -= solution.x_dict[reaction.id] else: flux_dict[m_reaction_id] += solution.x_dict[reaction.id] elif reaction.id.startswith("EX_") or reaction.id.startswith("DM"): flux_dict[reaction.id] = solution.x_dict[reaction.id] return flux_dict
[docs] def get_transcription_flux(self, solution=None): """extract the transcription flux state""" if solution is None: solution = self.solution if solution.status != "optimal": raise ValueError("solution status '%s' is not 'optimal'" % solution.status) flux_dict = {} for reaction in self.reactions: if isinstance(reaction, TranscriptionReaction): for rna_id in reaction.transcription_data.RNA_products: locus_id = rna_id.replace("RNA_", "", 1) if locus_id not in flux_dict: flux_dict[locus_id] = 0 flux_dict[locus_id] += solution.x_dict[reaction.id] return flux_dict
[docs] def get_translation_flux(self, solution=None): """extract the translation flux state""" if solution is None: solution = self.solution if solution.status != "optimal": raise ValueError("solution status '%s' is not 'optimal'" % solution.status) flux_dict = {r.id: 0 for r in self.translation_data} for reaction in self.reactions: if isinstance(reaction, TranslationReaction): protein_id = reaction.translation_data.id flux_dict[protein_id] += solution.x_dict[reaction.id] return flux_dict
[docs] def construct_s_matrix(self, growth_rate): """build the stoichiometric matrix at a specific growth rate""" # intialize to 0 s = dok_matrix((len(self.metabolites), len(self.reactions))) # populate with stoichiometry for i, r in enumerate(self.reactions): for met, value in iteritems(r._metabolites): met_index = self.metabolites.index(met) if hasattr(value, "subs"): s[met_index, i] = float(value.subs(mu, growth_rate)) else: s[met_index, i] = float(value) return s
[docs] def construct_attribute_vector(self, attr_name, growth_rate): """build a vector of a reaction attribute at a specific growth rate Mainly used for upper and lower bounds""" return np.array([float(value.subs(mu, growth_rate)) if hasattr(value, "subs") else float(value) for value in self.reactions.list_attr(attr_name)])
[docs] def compute_solution_error(self, solution=None): errors = {} if solution is None: solution = self.solution s = self.construct_s_matrix(solution.f) lb = self.construct_attribute_vector("lower_bound", solution.f) ub = self.construct_attribute_vector("upper_bound", solution.f) x = np.array(solution.x) err = abs(s * x) errors["max_error"] = err.max() errors["sum_error"] = err.sum() ub_err = min(ub - x) errors["upper_bound_error"] = abs(ub_err) if ub_err < 0 else 0 lb_err = min(x - lb) errors["lower_bound_error"] = abs(lb_err) if lb_err < 0 else 0 return errors
[docs] def update(self): """updates all component reactions""" for r in self.reactions: if hasattr(r, "update"): r.update()
[docs] def prune(self, skip=None): """remove all unused metabolites and reactions This should be run after the model is fully built. It will be difficult to add new content to the model once this has been run. skip: list List of complexes/proteins/mRNAs/TUs to remain unpruned from model. """ if not skip: skip = [] complex_data_list = [i.id for i in self.complex_data if i.id not in skip] for c_d in complex_data_list: c = self.process_data.get_by_id(c_d) cplx = c.complex if len(cplx.reactions) == 1: list(cplx.reactions)[0].delete(remove_orphans=True) self.process_data.remove(self.process_data.get_by_id(c_d)) for p in self.metabolites.query('_folded'): if 'partially' not in p.id and p.id not in skip: delete = True for rxn in p.reactions: if rxn.metabolites[p] < 0: delete = False break if delete: while len(p.reactions) > 0: list(p.reactions)[0].delete(remove_orphans=True) for data in self.process_data.query(p.id): self.process_data.remove(data.id) for p in self.metabolites.query(re.compile('^protein_')): if isinstance(p, ProcessedProtein) and p.id not in skip: delete = True for rxn in p.reactions: if rxn.metabolites[p] < 0: delete = False break if delete: for rxn in list(p.reactions): self.process_data.remove(rxn.posttranslation_data.id) rxn.delete(remove_orphans=True) for p in self.metabolites.query(re.compile('^protein_')): if isinstance(p, TranslatedGene) and p.id not in skip: delete = True for rxn in p.reactions: if rxn.metabolites[p] < 0 and not rxn.id.startswith( 'degradation'): delete = False break if delete: for rxn in list(p.reactions): p_id = p.id.replace('protein_', '') data = self.process_data.get_by_id(p_id) self.process_data.remove(data.id) rxn.delete(remove_orphans=True) removed_rna = set() for m in list(self.metabolites.query(re.compile("^RNA_"))): delete = False if m.id in skip else True for rxn in m.reactions: if rxn.metabolites[m] < 0 and not rxn.id.startswith('DM_'): delete = False if delete: try: self.reactions.get_by_id('DM_' + m.id).remove_from_model( remove_orphans=True) if m in self.metabolites: # Defaults to subtractive when removing reaction m.remove_from_model() except KeyError: pass else: removed_rna.add(m.id) for t in self.reactions.query('transcription_TU'): if t.id in skip: delete = False else: delete = True for product in t.products: if isinstance(product, TranscribedGene): delete = False t_process_id = t.id.replace('transcription_', '') if delete: t.remove_from_model(remove_orphans=True) self.process_data.remove(t_process_id) else: # gets rid of the removed RNA from the products self.process_data.get_by_id( t_process_id).RNA_products.difference_update(removed_rna) # update to update the TranscriptionReaction mRNA biomass # stoichiometry with new RNA_products if not delete: t.update()
[docs] def remove_genes_from_model(self, gene_list): for gene in gene_list: # defaults to subtractive when removing model self.metabolites.get_by_id('RNA_'+gene).remove_from_model() protein = self.metabolites.get_by_id('protein_'+gene) for cplx in protein.complexes: print('Complex (%s) removed from model' % cplx.id) for rxn in cplx.metabolic_reactions: try: self.process_data.remove(rxn.id.split('_')[0]) except ValueError: pass rxn.remove_from_model() # If cannot import SymbolicParameter, assume using cobrapy # versions <= 0.5.11 try: from optlang.interface import SymbolicParameter except ImportError: protein.remove_from_model(method='destructive') else: protein.remove_from_model(destructive=True) # Remove all transcription reactions that now do not form a used # transcript for t in self.reactions.query('transcription_TU'): delete = True for product in t.products: if isinstance(product, TranscribedGene): delete = False if delete: t.remove_from_model(remove_orphans=True) t_process_id = t.id.replace('transcription_', '') self.process_data.remove(t_process_id)
[docs] def set_sasa_keffs(self, median_keff): # Get median SASA value considering all complexes in model sasa_list = [] for met in self.metabolites: cplx_sasa = 0. if not isinstance(met, Complex): continue cplx_sasa += met.formula_weight ** (3. / 4) sasa_list.append(cplx_sasa) median_sasa = np.median(np.array(sasa_list)) # redo scaling average SASA to 65. for rxn in self.reactions: if hasattr(rxn, 'keff') and rxn.complex_data is not None: sasa = rxn.complex_data.complex.formula_weight ** (3. / 4.) if sasa == 0: raise UserWarning('No SASA for %s' % rxn) rxn.keff = sasa * median_keff / median_sasa for data in self.process_data: sasa = 0. if isinstance(data, processdata.TranslocationData): continue if hasattr(data, 'keff') and data.enzyme is not None: cplxs = \ [data.enzyme] if type(data.enzyme) == str else data.enzyme for cplx in cplxs: sasa += \ self.metabolites.get_by_id(cplx).formula_weight ** \ (3. / 4) if sasa == 0: raise UserWarning('No SASA for %s' % rxn) data.keff = sasa * median_keff / median_sasa self.update()