Source code for cobrame.util.building

from __future__ import print_function, division, absolute_import

from warnings import warn
from six import iteritems

import pandas
from Bio import SeqIO

import cobrame
from cobrame.util import dogma


[docs]def add_transcription_reaction(me_model, tu_name, locus_ids, sequence, update=True): """ Create TranscriptionReaction object and add it to ME-Model. This includes the necessary transcription data. Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` The MEModel object to which the reaction will be added tu_name : str ID of TU being transcribed. The TranscriptionReaction will be added as "transcription_+TU_name" The TranscriptionData will be added as just "TU_name" locus_ids : set Set of locus IDs that the TU transcribes sequence : str Nucleotide sequence of the TU. update : bool If True, use TranscriptionReaction's update function to update and add reaction stoichiometry Returns ------- :class:`cobrame.core.reaction.TranscriptionReaction` TranscriptionReaction for the TU """ transcription = cobrame.TranscriptionReaction("transcription_" + tu_name) transcription.transcription_data = \ cobrame.TranscriptionData(tu_name, me_model) transcription.transcription_data.nucleotide_sequence = sequence transcription.transcription_data.RNA_products = {"RNA_" + i for i in locus_ids} me_model.add_reaction(transcription) if update: transcription.update() return transcription
[docs]def create_transcribed_gene(me_model, locus_id, rna_type, seq, left_pos=None, right_pos=None, strand=None): """ Creates a `TranscribedGene` metabolite object and adds it to the ME-model Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` The MEModel object to which the reaction will be added locus_id : str Locus ID of RNA product. The TranscribedGene will be added as "RNA + _ + locus_id" left_pos : int or None Left position of gene on the sequence of the (+) strain right_pos : int or None Right position of gene on the sequence of the (+) strain seq : str Nucleotide sequence of RNA product. Amino acid sequence, codon counts, etc. will be calculated based on this string. strand : str or None - (+) if the RNA product is on the leading strand - (-) if the RNA product is on the complementary strand rna_type : str Type of RNA of the product. tRNA, rRNA, or mRNA Used for determining how RNA product will be processed. Returns ------- :class:`cobrame.core.component.TranscribedGene` Metabolite object for the RNA product """ gene = cobrame.TranscribedGene('RNA_' + locus_id, rna_type, seq) gene.left_pos = left_pos gene.right_pos = right_pos gene.strand = strand me_model.add_metabolites([gene]) return gene
[docs]def add_translation_reaction(me_model, locus_id, dna_sequence, update=False): """ Creates and adds a TranslationReaction to the ME-model as well as the associated TranslationData A dna_sequence is required in order to add a TranslationReaction to the ME-model Parameters ---------- me_model : :class:`cobra.core.model.MEModel` The MEModel object to which the reaction will be added locus_id : str Locus ID of RNA product. The TranslationReaction will be added as "translation + _ + locus_id" The TranslationData will be added as "locus_id" dna_sequence : str DNA sequence of the RNA product. This string should be reverse transcribed if it originates on the complement strand. update : bool If True, use TranslationReaction's update function to update and add reaction stoichiometry """ # Create TranslationData translation_data = \ cobrame.TranslationData(locus_id, me_model, "RNA_" + locus_id, "protein_" + locus_id) translation_data.nucleotide_sequence = dna_sequence # Add RNA to model if it doesn't exist if "RNA_" + locus_id not in me_model.metabolites: warn('RNA_%s not present in model. Adding it now.') rna = cobrame.TranscribedGene('RNA_' + locus_id, 'mRNA', dna_sequence) me_model.add_metabolites(rna) # Create and add TranslationReaction with TranslationData translation_reaction = \ cobrame.TranslationReaction("translation_" + locus_id) me_model.add_reaction(translation_reaction) translation_reaction.translation_data = translation_data if update: translation_reaction.update()
[docs]def convert_aa_codes_and_add_charging(me_model, trna_aa, trna_to_codon, verbose=True): """ Adds tRNA charging reactions for all tRNAs in ME-model Parameters ---------- me_model : :class:`cobra.core.model.MEModel` The MEModel object to which the reaction will be added trna_aa : dict Dictionary of tRNA locus ID to 3 letter codes of the amino acid that the tRNA contributes {tRNA identifier (locus_id): amino_acid_3_letter_code} trna_to_codon : dict Dictionary of tRNA identifier to the codon which it associates {tRNA identifier (locus_id): codon_sequence} verbose : bool If True, display metabolites that were not previously added to the model and were thus added when creating charging reactions """ # convert amino acid 3 letter codes to metabolites for tRNA, aa in list(iteritems(trna_aa)): if aa == "OTHER": trna_aa.pop(tRNA) elif aa == "Sec": # Charge with precursor to selenocysteine trna_aa[tRNA] = me_model.metabolites.get_by_id('cys__L_c') elif aa == "Gly": trna_aa[tRNA] = me_model.metabolites.get_by_id("gly_c") else: trna_aa[tRNA] = \ me_model.metabolites.get_by_id(aa.lower() + "__L_c") # add in all the tRNA charging reactions for tRNA, aa in iteritems(trna_aa): for codon in trna_to_codon[tRNA]: trna_data = cobrame.tRNAData("tRNA_" + tRNA + "_" + codon, me_model, aa.id, "RNA_" + tRNA, codon) charging_reaction = \ cobrame.tRNAChargingReaction("charging_tRNA_%s_%s" % (tRNA, codon)) charging_reaction.tRNA_data = trna_data me_model.add_reaction(charging_reaction) charging_reaction.update(verbose=verbose)
[docs]def build_reactions_from_genbank(me_model, gb_filename, tu_frame=None, element_types={'CDS', 'rRNA', 'tRNA', 'ncRNA'}, verbose=True, frameshift_dict=None, trna_to_codon=None, update=True): # TODO handle special RNAse without type ('b3123') """Creates and adds transcription and translation reactions using genomic information from the organism's genbank file. Adds in the basic requirements for these reactions. Organism specific components are added ... Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` The MEModel object to which the reaction will be added gb_filename : str Local name of the genbank file that will be used for ME-model construction tu_frame : :class:`pandas.DataFrame` DataFrame with indexes of the transcription unit name and columns containing the transcription unit starting and stopping location on the genome and whether the transcription unit is found on the main (+) strand or complementary (-) strand. If no transcription unit DataFrame is passed into the function, transcription units are added corresponding to each transcribed gene in the genbank file. element_types : set Transcription reactions will be added to the ME-model for all RNA feature.types in this set. This uses the nomenclature of the genbank file (gb_filename) verbose : bool If True, display metabolites that were not previously added to the model and were thus added when creating charging reactions frameshift_dict : dict {locus_id: genome_position_of_TU} If a locus_id is in the frameshift_dict, update it's nucleotide sequence to account of the frameshift """ if not frameshift_dict: frameshift_dict = {} if not trna_to_codon: trna_to_codon = {} metabolites = me_model.metabolites # Load genbank file and extract DNA sequence gb_file = SeqIO.read(gb_filename, 'gb') full_seq = str(gb_file.seq) # Dictionary of tRNA locus ID to the 3 letter code for the amino acid it # contributes trna_aa = {} # If no tu_frame is provided generate a new TU frame where each mRNA gets # its own TU using_tus = tu_frame is not None if not using_tus: tu_frame = pandas.DataFrame.from_dict( {"TU_" + i.qualifiers["locus_tag"][0]: {"start": int(i.location.start), "stop": int(i.location.end), "strand": "+" if i.strand == 1 else "-"} for i in gb_file.features if i.type in element_types}, orient="index") # Create transcription reactions for each TU and DNA sequence. # RNA_products will be added so no need to update now for tu_id in tu_frame.index: # subtract 1 from TU start site to account for 0 indexing sequence = dogma.extract_sequence(full_seq, tu_frame.start[tu_id]-1, tu_frame.stop[tu_id], tu_frame.strand[tu_id]) add_transcription_reaction(me_model, tu_id, set(), sequence, update=False) # Associate each feature (RNA_product) with a TU and add translation # reactions and demands for feature in gb_file.features: # Skip if not a gene used in ME construction if feature.type not in element_types or 'pseudo' in feature.qualifiers: continue # ---- Assign values for all important gene attributes ---- bnum = feature.qualifiers["locus_tag"][0] left_pos = int(feature.location.start) right_pos = int(feature.location.end) rna_type = 'mRNA' if feature.type == 'CDS' else feature.type strand = '+' if feature.strand == 1 else '-' seq = dogma.extract_sequence(full_seq, left_pos, right_pos, strand) # ---- Add gene metabolites and apply frameshift mutations---- frameshift_string = frameshift_dict.get(bnum) if len(seq) % 3 != 0 and frameshift_string: print('Applying frameshift on %s' % bnum) seq = dogma.return_frameshift_sequence(full_seq, frameshift_string) if strand == '-': seq = dogma.reverse_transcribe(seq) # Add TranscribedGene metabolite gene = create_transcribed_gene(me_model, bnum, rna_type, seq, left_pos, right_pos, strand) # ---- Add translation reaction for mRNA ---- if rna_type == "mRNA": add_translation_reaction(me_model, bnum, dna_sequence=seq) # ---- Create dict to use for adding tRNAChargingReactions ---- # tRNA_aa = {'amino_acid':'tRNA'} elif rna_type == "tRNA": trna_aa[bnum] = feature.qualifiers["product"][0].split("-")[1] # ---- Associate TranscribedGene to a TU ---- parent_tu = tu_frame[ (tu_frame.start - 1 <= left_pos) & (tu_frame.stop >= right_pos) & ( tu_frame.strand == strand)].index if len(parent_tu) == 0: if verbose: warn('No TU found for %s %s' % (rna_type, bnum)) tu_id = "TU_" + bnum parent_tu = [tu_id] add_transcription_reaction(me_model, tu_id, set(), seq, update=False) for TU_id in parent_tu: me_model.process_data.get_by_id(TU_id).RNA_products.add( "RNA_" + bnum) convert_aa_codes_and_add_charging(me_model, trna_aa, trna_to_codon, verbose=verbose) if update: # Update all newly added reactions for r in me_model.reactions: if isinstance(r, (cobrame.TranscriptionReaction, cobrame.TranslationReaction)): r.update()
[docs]def add_m_model_content(me_model, m_model, complex_metabolite_ids=None): """ Add metabolite and reaction attributes to me_model from m_model. Also creates StoichiometricData objects for each reaction in m_model, and adds reactions directly to me_model if they are exchanges or demands. Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` The MEModel object to which the content will be added m_model : :class:`cobra.core.model.Model` The m_model which will act as the source of metabolic content for MEModel complex_metabolite_ids : list List of complexes which are 'metabolites' in the m-model reaction matrix, but should be treated as complexes """ if not complex_metabolite_ids: complex_metabolite_ids = [] for met in m_model.metabolites: if met.id in complex_metabolite_ids: new_met = cobrame.Complex(met.id) elif met.id.startswith("RNA"): raise ValueError('Processed M-model should not contain RNAs (%s)' % met.id) else: new_met = cobrame.Metabolite(met.id) new_met.name = met.name new_met.formula = met.formula new_met.compartment = met.compartment new_met.charge = met.charge new_met.annotation = met.annotation new_met.notes = met.notes me_model.add_metabolites(new_met) for reaction in m_model.reactions: if reaction.id.startswith("EX_") or reaction.id.startswith("DM_"): new_reaction = cobrame.MEReaction(reaction.id) me_model.add_reaction(new_reaction) new_reaction.lower_bound = reaction.lower_bound new_reaction.upper_bound = reaction.upper_bound for met, stoichiometry in iteritems(reaction.metabolites): new_reaction.add_metabolites( {me_model.metabolites.get_by_id(met.id): stoichiometry}) else: reaction_data = cobrame.StoichiometricData(reaction.id, me_model) reaction_data.lower_bound = reaction.lower_bound reaction_data.upper_bound = reaction.upper_bound reaction_data._stoichiometry = {k.id: v for k, v in iteritems(reaction.metabolites)}
[docs]def add_dummy_reactions(me_model, dna_seq, update=True): """ Add all reactions necessary to produce a dummy reaction catalyzed by "CPLX_dummy". Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` The MEModel object to which the content will be added dna_seq : str DNA sequence of dummy gene. Should be representative of the average codon composition, amino acid composition, length of a gene in the organism being modeled update : bool If True, run update functions on all transcription, translation, complex formation, and metabolic reactions added when constructing dummy reactions. """ dummy = cobrame.StoichiometricData("dummy_reaction", me_model) dummy.lower_bound = 0 dummy.upper_bound = 1000 dummy._stoichiometry = {'CPLX_dummy': -1} create_transcribed_gene(me_model, 'dummy', 'mRNA', dna_seq) add_transcription_reaction(me_model, "RNA_dummy", {"dummy"}, dna_seq) me_model.add_metabolites(cobrame.TranslatedGene("protein_" + "dummy")) add_translation_reaction(me_model, "dummy", dna_sequence=dna_seq, update=update) try: complex_data = cobrame.ComplexData("CPLX_dummy", me_model) except ValueError: warn('CPLX_dummy already in model') complex_data = me_model.process_data.get_by_id('CPLX_dummy') complex_data.stoichiometry = {"protein_dummy": 1} if update: complex_data.create_complex_formation()
[docs]def add_complex_to_model(me_model, complex_id, complex_stoichiometry, complex_modifications=None): """ Adds ComplexData to the model for a given complex. Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` complex_id : str ID of the complex and thus the model ComplexData complex_stoichiometry : dict {complex_id: {protein_<locus_tag>: stoichiometry}} complex_modifications : dict {subreaction_id: stoichiometry} """ if not complex_modifications: complex_modifications = {} complex_data = cobrame.ComplexData(complex_id, me_model) # must add update stoichiometry one by one since it is a defaultdict for metabolite, value in iteritems(complex_stoichiometry): complex_data.stoichiometry[metabolite] += value for modification, value in iteritems(complex_modifications): complex_data.subreactions[modification] = value
[docs]def add_subreaction_data(me_model, modification_id, modification_stoichiometry, modification_enzyme=None, verbose=True): """ Creates a SubreactionData object for each modification defined by the function inputs. It's assumed every complex modification occurs spontaneously, unless a modification_enzyme argument is passed. If a modification uses an enzyme this can be updated after the SubreactionData object is already created Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` """ if modification_id in me_model.process_data: if verbose: warn('Subreaction (%s) already in model' % modification_id) else: pass else: modification_data = cobrame.SubreactionData(modification_id, me_model) modification_data.stoichiometry = modification_stoichiometry modification_data.enzyme = modification_enzyme
[docs]def add_model_complexes(me_model, complex_stoichiometry_dict, complex_modification_dict, verbose=True): """ Construct ComplexData for complexes into MEModel from its subunit stoichiometry, and a dictionary of its modification metabolites. It is assumed that each modification adds one equivalent of the modification metabolite. Multiple Intended to be used as a function for large-scale complex addition. For adding individual ComplexData objects, use add_complex_to_model Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` complex_stoichiometry_dict : dict {unmodified_complex_id: {protein_<locus_tag>: stoichiometry}} complex_modification_dict : dict {modified_complex_id:{core_enzyme: unmodified_complex_id, 'modifications: {mod_metabolite: stoichiometry}}} """ for complex_id, stoichiometry in iteritems(complex_stoichiometry_dict): add_complex_to_model(me_model, complex_id, stoichiometry, {}) for modified_complex_id, info in iteritems(complex_modification_dict): modification_dict = {} for metabolite, number in iteritems(info['modifications']): modification_id = 'mod_' + metabolite # add modification as subreaction add_subreaction_data(me_model, modification_id, {metabolite: -1}, verbose=verbose) # stoichiometry of modification determined in # modification_data.stoichiometry modification_dict[modification_id] = abs(number) core_enzyme = \ complex_modification_dict[modified_complex_id]['core_enzyme'] stoichiometry = complex_stoichiometry_dict[core_enzyme] add_complex_to_model(me_model, modified_complex_id, stoichiometry, complex_modifications=modification_dict)
[docs]def add_metabolic_reaction_to_model(me_model, stoichiometric_data_id, directionality, complex_id=None, spontaneous=False, update=False, keff=65): """ Creates and add a MetabolicReaction to a MEModel. Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` MEModel that the MetabolicReaction will be added to stoichiometric_data_id : str ID of the StoichiometricData for the reaction being added directionality : str - Forward: Add reaction that occurs in the forward direction - Reverse: Add reaction that occurs in the reverse direction complex_id : str or None ID of the ComplexData for the enzyme that catalyze the reaction being added. spontaneous : bool - If True and complex_id='' add reaction as spontaneous reaction - If False and complex_id='' add reaction as orphan (CPLX_dummy catalyzed) """ # Get stoichiometric data for reaction being added try: stoichiometric_data = \ me_model.process_data.get_by_id(stoichiometric_data_id) except KeyError: raise Exception("Stoichiometric data for %s has not been added to" " model" % stoichiometric_data_id) # Get complex data and id based on arguments passed into function if type(complex_id) == str: complex_data = me_model.process_data.get_by_id(complex_id) elif complex_id is None and spontaneous is True: complex_id = "SPONT" complex_data = None elif complex_id is None and spontaneous is False: complex_id = "CPLX_dummy" try: complex_data = me_model.process_data.get_by_id(complex_id) except KeyError: raise Exception("CPLX_dummy must be added to complex data to add" "orphan reactions") else: raise ValueError("Complex id (%s) must be a string or None" % str(complex_id)) if directionality.lower() == 'forward': direction = "_FWD_" reverse_flag = False elif directionality.lower() == 'reverse': direction = "_REV_" reverse_flag = True else: raise NameError("Reaction direction must be 'forward' or 'reverse'") r = cobrame.MetabolicReaction(''.join([stoichiometric_data_id, direction, complex_id])) me_model.add_reaction(r) r.keff = keff r.stoichiometric_data = stoichiometric_data r.reverse = reverse_flag if complex_data is not None: r.complex_data = complex_data if update: r.update(verbose=True)
[docs]def add_reactions_from_stoichiometric_data(me_model, rxn_to_cplx_dict, rxn_info_frame, update=False, keff=65): """ Creates and adds MetabolicReaction for all StoichiometricData in model. Intended for use when adding all reactions from stoichiometric data for the first time. For adding an individual reaction use add_metabolic_reaction_to_model() Parameters ---------- me_model : :class:`cobrame.core.model.MEModel` MEModel that the MetabolicReaction will be added to rxn_to_cplx_dict : dict {StoichiometricData.id: catalytic_enzyme_id} rxn_info_frame: :class:`pandas.Dataframe` Contains the ids, names and reversibility for each reaction in the metabolic reaction matrix as well as whether the reaction is spontaneous """ for reaction_data in me_model.stoichiometric_data: try: spontaneous_flag = rxn_info_frame.is_spontaneous[reaction_data.id] except KeyError: spontaneous_flag = 0 warn("(%s) not in rxn_info_frame assumed nonspontaneous" % reaction_data.id) if spontaneous_flag == 1: spontaneous = True elif spontaneous_flag == 0: spontaneous = False else: raise Exception("is_spontaneous must be '1' or '0'") # Reactions can be catalyzed by multiple isozymes so retrieve list of # complexes that catalyze the reaction complexes_list = rxn_to_cplx_dict.get(reaction_data.id, [None]) # Add metabolic reactions for each isozyme for complex_id in complexes_list: directionality_list = [] if reaction_data.lower_bound < 0: directionality_list.append('reverse') if reaction_data.upper_bound > 0: directionality_list.append('forward') elif reaction_data.upper_bound == 0 and \ reaction_data.lower_bound == 0: directionality_list.append('forward') warn('Reaction (%s) cannot carry flux' % reaction_data.id) for directionality in directionality_list: add_metabolic_reaction_to_model(me_model, reaction_data.id, directionality, complex_id=complex_id, spontaneous=spontaneous, update=update, keff=keff)