Source code for cobrame.util.mass

from __future__ import division, absolute_import, print_function

from six import iteritems

from .dogma import transcription_table

# Mass of amino acids (Daltons) with H and OH removed from either end.
# This is used to compute protein masses because a water is removed
# during formation of the amide bond.
amino_acid_no_h2o = {
    'ala__L_c': 71.079,
    'arg__L_c': 157.197,
    'asn__L_c': 114.104,
    'asp__L_c': 114.08,
    'cys__L_c': 103.145,
    'gln__L_c': 128.131,
    'glu__L_c': 128.107,
    'gly_c': 57.052,
    'his__L_c': 137.142,
    'ile__L_c': 113.16,
    'leu__L_c': 113.16,
    'lys__L_c': 129.183,
    'met__L_c': 131.199,
    'phe__L_c': 147.177,
    'pro__L_c': 97.117,
    'ser__L_c': 87.078,
    'thr__L_c': 101.105,
    'trp__L_c': 186.214,
    'tyr__L_c': 163.176,
    'val__L_c': 99.133}

# Mass of the RNA nucleotides with the diphosphate removed.
rna_no_ppi = {
    'gtp_c': 344.2,
    'utp_c': 305.159,
    'ctp_c': 304.175,
    'atp_c': 328.201}

# Mass of the DNA nucleotides with the diphosphate removed.
dna_mw_no_ppi = {'datp': 312.202,
                 'dctp': 286.16,
                 'dgtp': 328.201,
                 'dttp': 303.187}


[docs]def compute_protein_mass(amino_acid_count): """compute protein mass in kDa from amino acid count amino_acid_count: {amino_acid: number} """ protein_mass = sum(amino_acid_no_h2o[aa] * count for aa, count in iteritems(amino_acid_count)) protein_mass += 18.015 # one water not removed return protein_mass / 1000
[docs]def compute_rna_mass(dna_sequence, excised_bases=None): """compute RNA mass in kDA from nucleotide count nucleotide_count: {nucleotide: number} """ if not excised_bases: excised_bases = {} nuc_count = {transcription_table[i]: dna_sequence.count(i) for i in set(dna_sequence)} nuc_count["gtp_c"] -= excised_bases.get("gmp_c", 0) nuc_count["utp_c"] -= excised_bases.get("ump_c", 0) nuc_count["ctp_c"] -= excised_bases.get("cmp_c", 0) nuc_count["atp_c"] -= excised_bases.get("amp_c", 0) rna_mass = sum(rna_no_ppi[nuc] * count for nuc, count in iteritems(nuc_count)) if sum(excised_bases.values()) > 0: rna_mass += 174.951262 # 5' has 3 phosphates return rna_mass / 1000