Team:TU-Munich/AnnotatorCode.js

From 2013.igem.org

Revision as of 10:20, 21 July 2013 by ChristopherW (Talk | contribs)

/**

*  DATA VALUES
*/

var amino_acids = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,I:0,H:0,K:0,L:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0};

// amino acid weights - http://web.expasy.org/findmod/findmod_masses.html#AA var amino_weights = { "A": 71.0788, "C": 103.1388, "D": 115.0886, "E": 129.1155, "F": 147.1766, "G": 57.0519, "H": 137.1411, "I": 113.1594, "K": 128.1741, "L": 113.1594, "M": 131.1926, "N": 114.1038, "P": 97.1167, "Q": 128.1307, "R": 156.1875, "S": 87.0782, "T": 101.1051, "V": 99.1326, "W": 186.2132, "Y": 163.1760 }; var water_weight = 18.01528; //average molecular weight of one molecule of water

// Extinction coefficients for Y(Tyr) and W(Trp), as well as Cystine (formed by disulfide bond of two Cysteine) var extinction = { Y:1490 , W:5500 , Cystine:125 };

// Kyte & Doolittle index of hydrophobicity var kd = { 'A': 1.8, 'R':-4.5, 'N':-3.5, 'D':-3.5, 'C': 2.5,

     'Q':-3.5, 'E':-3.5, 'G':-0.4, 'H':-3.2, 'I': 4.5,
     'L': 3.8, 'K':-3.9, 'M': 1.9, 'F': 2.8, 'P':-1.6,
     'S':-0.8, 'T':-0.7, 'W':-0.9, 'Y':-1.3, 'V': 4.2 };

// Flexibility // Normalized flexibility parameters (B-values), average (Vihinen et al., 1994) var Flex= { 'A': 0.984, 'C': 0.906, 'E': 1.094, 'D': 1.068,

      'G': 1.031, 'F': 0.915, 'I': 0.927, 'H': 0.950,
      'K': 1.102, 'M': 0.952, 'L': 0.935, 'N': 1.048,
      'Q': 1.037, 'P': 1.049, 'S': 1.046, 'R': 1.008,
      'T': 0.997, 'W': 0.904, 'V': 0.931, 'Y': 0.929};

// Hydrophilicity // 1 Hopp & Wood // Proc. Natl. Acad. Sci. U.S.A. 78:3824-3828(1981). var hw = { 'A':-0.5, 'R': 3.0, 'N': 0.2, 'D': 3.0, 'C':-1.0,

     'Q': 0.2, 'E': 3.0, 'G': 0.0, 'H':-0.5, 'I':-1.8,
     'L':-1.8, 'K': 3.0, 'M':-1.3, 'F':-2.5, 'P': 0.0,
     'S': 0.3, 'T':-0.4, 'W':-3.4, 'Y':-2.3, 'V':-1.5 };

// Surface accessibility // 1 Emini Surface fractional probability var em = { 'A': 0.815, 'R': 1.475, 'N': 1.296, 'D': 1.283, 'C': 0.394,

     'Q': 1.348, 'E': 1.445, 'G': 0.714, 'H': 1.180, 'I': 0.603,
     'L': 0.603, 'K': 1.545, 'M': 0.714, 'F': 0.695, 'P': 1.236,
     'S': 1.115, 'T': 1.184, 'W': 0.808, 'Y': 1.089, 'V': 0.606 };

// 2 Janin Interior to surface transfer energy scale var ja = { 'A': 0.28, 'R':-1.14, 'N':-0.55, 'D':-0.52, 'C': 0.97,

     'Q':-0.69, 'E':-1.01, 'G': 0.43, 'H':-0.31, 'I': 0.60,
     'L': 0.60, 'K':-1.62, 'M': 0.43, 'F': 0.46, 'P':-0.42,
     'S':-0.19, 'T':-0.32, 'W': 0.29, 'Y':-0.15, 'V': 0.60 };

// A two dimentional dictionary for calculating the instability index. // Guruprasad K., Reddy B.V.B., Pandit M.W. Protein Engineering 4:155-161(1990). // It is based on dipeptide values therefore the vale for the dipeptide DG is DIWV['D']['G']. var DIWV = { 'A': {'A': 1.0, 'C': 44.94, 'E': 1.0, 'D': -7.49,

             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': -7.49,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 1.0, 'P': 20.26, 'S': 1.0, 'R': 1.0,
             'T': 1.0, 'W': 1.0, 'V': 1.0, 'Y': 1.0 },
       'C': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 20.26,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 33.60,
             'K': 1.0, 'M': 33.60, 'L': 20.26, 'N': 1.0,
             'Q': -6.54, 'P': 20.26, 'S': 1.0, 'R': 1.0,
             'T': 33.60, 'W': 24.68, 'V': -6.54, 'Y': 1.0},
       'E': {'A': 1.0, 'C': 44.94, 'E': 33.60, 'D': 20.26,
             'G': 1.0, 'F': 1.0, 'I': 20.26, 'H': -6.54,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 20.26, 'P': 20.26, 'S': 20.26, 'R': 1.0,
             'T': 1.0, 'W': -14.03, 'V': 1.0, 'Y': 1.0},
       'D': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': 1.0, 'F': -6.54, 'I': 1.0, 'H': 1.0,
             'K': -7.49, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 1.0, 'P': 1.0, 'S': 20.26, 'R': -6.54,
             'T': -14.03, 'W': 1.0, 'V': 1.0, 'Y': 1.0},
       'G': {'A': -7.49, 'C': 1.0, 'E': -6.54, 'D': 1.0,
             'G': 13.34, 'F': 1.0, 'I': -7.49, 'H': 1.0,
             'K': -7.49, 'M': 1.0, 'L': 1.0, 'N': -7.49,
             'Q': 1.0, 'P': 1.0, 'S': 1.0, 'R': 1.0,
             'T': -7.49, 'W': 13.34, 'V': 1.0, 'Y': -7.49},
       'F': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 13.34,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 1.0,
             'K': -14.03, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 1.0, 'P': 20.26, 'S': 1.0, 'R': 1.0,
             'T': 1.0, 'W': 1.0, 'V': 1.0, 'Y': 33.601},
       'I': {'A': 1.0, 'C': 1.0, 'E': 44.94, 'D': 1.0,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 13.34,
             'K': -7.49, 'M': 1.0, 'L': 20.26, 'N': 1.0,
             'Q': 1.0, 'P': -1.88, 'S': 1.0, 'R': 1.0,
             'T': 1.0, 'W': 1.0, 'V': -7.49, 'Y': 1.0},
       'H': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': -9.37, 'F': -9.37, 'I': 44.94, 'H': 1.0,
             'K': 24.68, 'M': 1.0, 'L': 1.0, 'N': 24.68,
             'Q': 1.0, 'P': -1.88, 'S': 1.0, 'R': 1.0,
             'T': -6.54, 'W': -1.88, 'V': 1.0, 'Y': 44.94},
       'K': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': -7.49, 'F': 1.0, 'I': -7.49, 'H': 1.0,
             'K': 1.0, 'M': 33.60, 'L': -7.49, 'N': 1.0,
             'Q': 24.64, 'P': -6.54, 'S': 1.0, 'R': 33.60,
             'T': 1.0, 'W': 1.0, 'V': -7.49, 'Y': 1.0},
       'M': {'A': 13.34, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 58.28,
             'K': 1.0, 'M': -1.88, 'L': 1.0, 'N': 1.0,
             'Q': -6.54, 'P': 44.94, 'S': 44.94, 'R': -6.54,
             'T': -1.88, 'W': 1.0, 'V': 1.0, 'Y': 24.68},
       'L': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 1.0,
             'K': -7.49, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 33.60, 'P': 20.26, 'S': 1.0, 'R': 20.26,
             'T': 1.0, 'W': 24.68, 'V': 1.0, 'Y': 1.0},
       'N': {'A': 1.0, 'C': -1.88, 'E': 1.0, 'D': 1.0,
             'G': -14.03, 'F': -14.03, 'I': 44.94, 'H': 1.0,
             'K': 24.68, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': -6.54, 'P': -1.88, 'S': 1.0, 'R': 1.0,
             'T': -7.49, 'W': -9.37, 'V': 1.0, 'Y': 1.0},
       'Q': {'A': 1.0, 'C': -6.54, 'E': 20.26, 'D': 20.26,
             'G': 1.0, 'F': -6.54, 'I': 1.0, 'H': 1.0,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 20.26, 'P': 20.26, 'S': 44.94, 'R': 1.0,
             'T': 1.0, 'W': 1.0, 'V': -6.54, 'Y': -6.54},
       'P': {'A': 20.26, 'C': -6.54, 'E': 18.38, 'D': -6.54,
             'G': 1.0, 'F': 20.26, 'I': 1.0, 'H': 1.0,
             'K': 1.0, 'M': -6.54, 'L': 1.0, 'N': 1.0,
             'Q': 20.26, 'P': 20.26, 'S': 20.26, 'R': -6.54,
             'T': 1.0, 'W': -1.88, 'V': 20.26, 'Y': 1.0},
       'S': {'A': 1.0, 'C': 33.60, 'E': 20.26, 'D': 1.0,
             'G': 1.0, 'F': 1.0, 'I': 1.0, 'H': 1.0,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 20.26, 'P': 44.94, 'S': 20.26, 'R': 20.26,
             'T': 1.0, 'W': 1.0, 'V': 1.0, 'Y': 1.0},
       'R': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': -7.49, 'F': 1.0, 'I': 1.0, 'H': 20.26,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': 13.34,
             'Q': 20.26, 'P': 20.26, 'S': 44.94, 'R': 58.28,
             'T': 1.0, 'W': 58.28, 'V': 1.0, 'Y': -6.54},
       'T': {'A': 1.0, 'C': 1.0, 'E': 20.26, 'D': 1.0,
             'G': -7.49, 'F': 13.34, 'I': 1.0, 'H': 1.0,
             'K': 1.0, 'M': 1.0, 'L': 1.0, 'N': -14.03,
             'Q': -6.54, 'P': 1.0, 'S': 1.0, 'R': 1.0,
             'T': 1.0, 'W': -14.03, 'V': 1.0, 'Y': 1.0},
       'W': {'A': -14.03, 'C': 1.0, 'E': 1.0, 'D': 1.0,
             'G': -9.37, 'F': 1.0, 'I': 1.0, 'H': 24.68,
             'K': 1.0, 'M': 24.68, 'L': 13.34, 'N': 13.34,
             'Q': 1.0, 'P': 1.0, 'S': 1.0, 'R': 1.0,
             'T': -14.03, 'W': 1.0, 'V': -7.49, 'Y': 1.0},
       'V': {'A': 1.0, 'C': 1.0, 'E': 1.0, 'D': -14.03,
             'G': -7.49, 'F': 1.0, 'I': 1.0, 'H': 1.0,
             'K': -1.88, 'M': 1.0, 'L': 1.0, 'N': 1.0,
             'Q': 1.0, 'P': 20.26, 'S': 1.0, 'R': 1.0,
             'T': -7.49, 'W': 1.0, 'V': 1.0, 'Y': -6.54},
       'Y': {'A': 24.68, 'C': 1.0, 'E': -6.54, 'D': 24.68,
             'G': -7.49, 'F': 1.0, 'I': 1.0, 'H': 13.34,
             'K': 1.0, 'M': 44.94, 'L': 1.0, 'N': 1.0,
             'Q': 1.0, 'P': 13.34, 'S': 1.0, 'R': -15.91,
             'T': -7.49, 'W': -9.37, 'V': 1.0, 'Y': 13.34}
       };

// Data for the calculation of the Isoelectric Point /* pK values are from:

	* Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F., Sanchez,

J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences. Electrophoresis 1993, 14, 1023-1031. * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E. Reference points for comparisons of two-dimensional maps of proteins from different human cell types defined in a pH scale where isoelectric points correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.

  • /

var positive_pKs = {'Nterm': 7.5 , 'K': 10.0, 'R': 12.0, 'H': 5.98 }; var negative_pKs = {'Cterm': 3.55, 'D': 4.05, 'E': 4.45, 'C': 9.0 , 'Y': 10.0}; // For some amino acids at the n-terminus the pK value is changed: var pKnterminal = {'A': 7.59, 'M': 7.0, 'S': 6.93, 'P': 8.36, 'T': 6.82, 'V': 7.44, 'E': 7.7}; //if one of these is N-terminal, this replaces the usual 7.5 for Nterm

var charged_aas = ['K', 'R', 'H', 'D', 'E', 'C', 'Y']; // or ProMost?

var halflife_table_Mammalian = {A: "4.4 hour", R: "1 hour", N: "1.4 hour", D: "1.1 hour", C: "1.2 hour", Q: "0.8 hour", E: "1 hour", G: "30 hour", H: "3.5 hour", I: "20 hour", L: "5.5 hour", K: "1.3 hour", M: "30 hour", F: "1.1 hour", P: ">20 hour", S: "1.9 hour", T: "7.2 hour", W: "2.8 hour", Y: "2.8 hour", V: "100 hour"};

var halflife_table_Yeast = {A: ">20 hour", R: "2 min", N: "3 min", D: "3 min", C: ">20 hour", Q: "10 min", E: "30 min", G: ">20 hour", H: "10 min", I: "30 min", L: "3 min", K: "3 min", M: ">20 hour", F: "3 min", P: ">20 hour", S: ">20 hour", T: ">20 hour", W: "3 min", Y: "10 min", V: ">20 hour"};

var halflife_table_E_coli = {A: ">10 hour", R: "2 min", N: ">10 hour", D: ">10 hour", C: ">10 hour", Q: ">10 hour", E: ">10 hour", G: ">10 hour", H: ">10 hour", I: ">10 hour", L: "2 min", K: "2 min", M: ">10 hour", F: "2 min", P: "?", S: ">10 hour", T: ">10 hour", W: "2 min", Y: "2 min", V: ">10 hour"};

var translation_table = { 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', 'TAA': '*', //STOP CODONS, shouldn't appear and already be removed 'TAG': '*', 'TGA': '*' };

var stop_codons = {'TAA':0,'TAG':0, 'TGA' :0};

// weights for the calculation of the CAI (codon adaptation index), see e.g. http://www.ihes.fr/~carbone/papers/Bioinformatics.pdf // method based on: Sharp,P.M. and Li,W-H. (1987) The codon adaptation index—a measure of directional synonymous codon usage bias, and its potential applications var E_coli_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=37762&aa=1&style=N for E.coli & computing the weights for CAI calculation 'TTT': 1, 'TTC': 36/64, 'TTA': 18/38, 'TTG': 13/38, 'TCT': 18/20, 'TCC': 14/20, 'TCA': 18/20, 'TCG': 11/20, 'TAT': 1, 'TAC': 35/65, 'TGT': 1, 'TGC': 48/52, 'TGG': 1, 'CTT': 15/38, 'CTC': 10/38, 'CTA': 6/38, 'CTG': 1, 'CCT': 24/37, 'CCC': 16/37, 'CCA': 23/37, 'CCG': 1, 'CAT': 1, 'CAC': 37/63, 'CAA': 35/65, 'CAG': 1, 'CGT': 1, 'CGC': 26/30, 'CGA': 9/30, 'CGG': 15/30, 'ATT': 1, 'ATC': 31/47, 'ATA': 21/47, 'ATG': 1, 'ACT': 22/31, 'ACC': 1, 'ACA': 25/31, 'ACG': 22/31, 'AAT': 1, 'AAC': 41/59, 'AAA': 1, 'AAG': 29/71, 'AGT': 18/20, 'AGC': 1, 'AGA': 13/30, 'AGG': 7/30, 'GTT': 1, 'GTC': 19/32, 'GTA': 19/32, 'GTG': 29/32, 'GCT': 22/27, 'GCC': 26/27, 'GCA': 1, 'GCG': 25/27, 'GAT': 1, 'GAC': 35/65, 'GAA': 1, 'GAG': 36/64, 'GGT': 1, 'GGC': 29/34, 'GGA': 19/34, 'GGG': 18/34, 'TAA': 1, //STOP CODONS, shouldn't appear and already be removed 'TAG': 9/58, 'TGA': 33/58 };

var Mouse_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=10090&aa=1&style=N for mus musculus & computing the weights for CAI calculation 'TTT': 44/56, 'TTC': 1, 'TTA': 7/39, 'TTG': 13/39, 'TCT': 20/24, 'TCC': 22/24, 'TCA': 14/24, 'TCG': 5/24, 'TAT': 43/57, 'TAC': 1, 'TGT': 48/52, 'TGC': 1, 'TGG': 1, 'CTT': 13/39, 'CTC': 20/39, 'CTA': 8/39, 'CTG': 1, 'CCT': 1, 'CCC': 30/31, 'CCA': 29/31, 'CCG': 10/31, 'CAT': 41/59, 'CAC': 1, 'CAA': 26/74, 'CAG': 1, 'CGT': 8/22, 'CGC': 17/22, 'CGA': 12/22, 'CGG': 19/22, 'ATT': 34/50, 'ATC': 1, 'ATA': 16/50, 'ATG': 1, 'ACT': 25/35, 'ACC': 1, 'ACA': 29/35, 'ACG': 10/35, 'AAT': 43/57, 'AAC': 1, 'AAA': 39/61, 'AAG': 1, 'AGT': 15/24, 'AGC': 1, 'AGA': 1, 'AGG': 1, 'GTT': 17/46, 'GTC': 25/46, 'GTA': 12/46, 'GTG': 1, 'GCT': 29/38, 'GCC': 1, 'GCA': 23/38, 'GCG': 9/38, 'GAT': 45/55, 'GAC': 1, 'GAA': 41/59, 'GAG': 1, 'GGT': 18/33, 'GGC': 1, 'GGA': 26/33, 'GGG': 23/33, 'TAA': 28/49, //STOP CODONS, shouldn't appear and already be removed 'TAG': 23/49, 'TGA': 1 };

var Yeast_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=4932&aa=1&style=N for Saccharomyces cerevisiae & computing the weights for CAI calculation 'TTT': 1, 'TTC': 41/59, 'TTA': 28/29, 'TTG': 1, 'TCT': 1, 'TCC': 16/26, 'TCA': 21/26, 'TCG': 10/26, 'TAT': 1, 'TAC': 44/56, 'TGT': 1, 'TGC': 37/63, 'TGG': 1, 'CTT': 13/29, 'CTC': 6/29, 'CTA': 14/29, 'CTG': 11/29, 'CCT': 31/42, 'CCC': 15/42, 'CCA': 1, 'CCG': 12/42, 'CAT': 1, 'CAC': 36/64, 'CAA': 1, 'CAG': 31/69, 'CGT': 14/48, 'CGC': 6/48, 'CGA': 7/48, 'CGG': 4/48, 'ATT': 1, 'ATC': 26/46, 'ATA': 27/46, 'ATG': 1, 'ACT': 1, 'ACC': 22/35, 'ACA': 30/35, 'ACG': 14/35, 'AAT': 1, 'AAC': 41/59, 'AAA': 1, 'AAG': 42/58, 'AGT': 16/26, 'AGC': 11/26, 'AGA': 1, 'AGG': 21/48, 'GTT': 1, 'GTC': 21/39, 'GTA': 21/39, 'GTG': 19/39, 'GCT': 1, 'GCC': 22/38, 'GCA': 29/38, 'GCG': 11/38, 'GAT': 1, 'GAC': 35/65, 'GAA': 1, 'GAG': 30/70, 'GGT': 1, 'GGC': 19/47, 'GGA': 22/47, 'GGG': 12/47, 'TAA': 1, //STOP CODONS, shouldn't appear and already be removed 'TAG': 23/47, 'TGA': 30/47 };

/**

*  FUNCTIONS
*/

function main_table_calc() { try{ var entered_nuc_sequence = read_sequence(); var reading_frame = find_reading_frame(entered_nuc_sequence); var reading_frame_start = reading_frame[0]; //the A in the ATG start codon var reading_frame_end = reading_frame[1]; //last nucleotide to be translated //recall internal indexing starts at 0, external indexing at 1

var entered_nuc_length = entered_nuc_sequence.length; if (reading_frame_start == -9 && (reading_frame_end > entered_nuc_length - 1) ){ //so RFC25 var coding_nuc_sequence = "ATGGCCGGC" + entered_nuc_sequence + "ACCGGT"; //add prefix and suffix RFC_standard = "This is a RFC 25 BioBrick, thus ATGGCCGGC and ACCGGT were added to the 5' and 3' ends."; } else if ( reading_frame_end > entered_nuc_length - 1 ){ //stop codon in RFC10 suffix var coding_nuc_sequence = entered_nuc_sequence.substr(reading_frame_start) + "TAC"; RFC_standard = "This is a RFC 10 BioBrick using the stop codon in the suffix, so TAC was added to the 3' end."; } else { var coding_nuc_sequence = entered_nuc_sequence.substr( reading_frame_start , reading_frame_end - reading_frame_start + 1); RFC_standard = "This is a RFC 10 BioBrick, nothing was added"; }

var trans_result = translate_to_aa_and_codon_count(coding_nuc_sequence); var amino_sequence = trans_result[0]; var codon_count = trans_result[1];

var codon_usage = analyze_codons(codon_count); // 0 -> E_coli, 1 -> Yeast, 2 -> Mammalian

var counting_result = count_amino_acids(amino_sequence); var amino_content = counting_result[0]; var amino_freq = counting_result[1]; var total_aminos = amino_sequence.length; var molecular_weight = compute_molecular_weight(amino_content); var pI = compute_pI(amino_sequence,amino_content); var extinction_coeffs = compute_extinction_coeff(amino_content,molecular_weight); var nterm = amino_sequence[0];

// --- Create the output --- var wikiCode = ""; var htmlCode = "";

//Code to be copied into the wiki to create the table wikiCode = wikiCode + "{|cellspacing=\"0\" border=\"1\" " + "
|colspan=\"20\"|Automatically determined parameters using the [http://www.mediawiki.org/wiki/Table BioBrick-AutoAnnotator]" + "
|-" + "
|colspan=\"10\"|BioBrick: <partinfo>BBa_K1159000</partinfo>" + "
|colspan=\"10\"|Used open reading frame from position " + (reading_frame_start + 1) + " to " + (reading_frame_end + 1) + " (excluding stop-codon; if appropriate prefix/suffix were added)." + "
|-" // EXClUDING STOP CODON + "
|colspan=\"2\"|A (Ala)" + "
|colspan=\"2\"|" + amino_content.A + " (" + amino_freq.A.toFixed(2) + "%)" + "
|colspan=\"2\"|R (Arg)" + "
|colspan=\"2\"|" + amino_content.R + " (" + amino_freq.R.toFixed(2) + "%)" + "
|colspan=\"2\"|N (Asn)" + "
|colspan=\"2\"|" + amino_content.N + " (" + amino_freq.N.toFixed(2) + "%)" + "
|colspan=\"2\"|D (Asp)" + "
|colspan=\"2\"|" + amino_content.D + " (" + amino_freq.D.toFixed(2) + "%)" + "
|colspan=\"2\"|C (Cys)" + "
|colspan=\"2\"|" + amino_content.C + " (" + amino_freq.C.toFixed(2) + "%)" + "
|-" + "
|colspan=\"2\"|Q (Gln)" + "
|colspan=\"2\"|" + amino_content.Q + " (" + amino_freq.Q.toFixed(2) + "%)" + "
|colspan=\"2\"|E (Glu)" + "
|colspan=\"2\"|" + amino_content.E + " (" + amino_freq.E.toFixed(2) + "%)" + "
|colspan=\"2\"|G (Gly)" + "
|colspan=\"2\"|" + amino_content.G + " (" + amino_freq.G.toFixed(2) + "%)" + "
|colspan=\"2\"|H (His)" + "
|colspan=\"2\"|" + amino_content.H + " (" + amino_freq.H.toFixed(2) + "%)" + "
|colspan=\"2\"|I (Ile)" + "
|colspan=\"2\"|" + amino_content.I + " (" + amino_freq.I.toFixed(2) + "%)" + "
|-" + "
|colspan=\"2\"|L (Leu)" + "
|colspan=\"2\"|" + amino_content.L + " (" + amino_freq.L.toFixed(2) + "%)" + "
|colspan=\"2\"|K (Lys)" + "
|colspan=\"2\"|" + amino_content.K + " (" + amino_freq.K.toFixed(2) + "%)" + "
|colspan=\"2\"|M (Met)" + "
|colspan=\"2\"|" + amino_content.M + " (" + amino_freq.M.toFixed(2) + "%)" + "
|colspan=\"2\"|F (Phe)" + "
|colspan=\"2\"|" + amino_content.F + " (" + amino_freq.F.toFixed(2) + "%)" + "
|colspan=\"2\"|P (Pro)" + "
|colspan=\"2\"|" + amino_content.P + " (" + amino_freq.P.toFixed(2) + "%)" + "
|-" + "
|colspan=\"2\"|S (Ser)" + "
|colspan=\"2\"|" + amino_content.S + " (" + amino_freq.S.toFixed(2) + "%)" + "
|colspan=\"2\"|T (Thr)" + "
|colspan=\"2\"|" + amino_content.T + " (" + amino_freq.T.toFixed(2) + "%)" + "
|colspan=\"2\"|W (Trp)" + "
|colspan=\"2\"|" + amino_content.W + " (" + amino_freq.W.toFixed(2) + "%)" + "
|colspan=\"2\"|Y (Tyr)" + "
|colspan=\"2\"|" + amino_content.Y + " (" + amino_freq.Y.toFixed(2) + "%)" + "
|colspan=\"2\"|V (Val)" + "
|colspan=\"2\"|" + amino_content.V + " (" + amino_freq.V.toFixed(2) + "%)" + "
|-" + "
|colspan=\"2\"|Amino acid counting:" + "
|colspan=\"4\"|Total number of amino acids (aa):" + "
|colspan=\"2\"|" + total_aminos + "
|colspan=\"4\"|Number of positively charged aa (Arg + Lys):" + "
|colspan=\"2\"|" + (amino_content.R + amino_content.K) + "
|colspan=\"4\"|Number of negatively charged aa (Asp + Glu):" + "
|colspan=\"2\"|" + (amino_content.D + amino_content.E) + "
|-" + "
|colspan=\"2\"|Biochemical parameters:" + "
|colspan=\"4\"|Molecular weight [Da]:" + "
|colspan=\"2\"|" + molecular_weight.toFixed(2) + "
|colspan=\"4\"|Theoretical pI:" + "
|colspan=\"2\"|" + pI.toFixed(2) + "
|colspan=\"4\"|Extinction coefficient:" + "
|colspan=\"2\"|" + extinction_coeffs[0] + " (all Cys as cystine), " + extinction_coeffs[1] + " (no Cys as cystine)" + "
|-" + "
|colspan=\"2\"|Estimated half-life:" + "
|colspan=\"4\"|Mammals:" + "
|colspan=\"2\"|" + halflife_table_Mammalian[nterm] + "
|colspan=\"4\"|Yeast:" + "
|colspan=\"2\"|" + halflife_table_Yeast[nterm] + "
|colspan=\"4\"|E. coli:" + "
|colspan=\"2\"|" + halflife_table_E_coli[nterm] + "
|-" + "
|colspan=\"2\"|Codon usage: (CAI)" + "
|colspan=\"4\"|Mammals:" + "
|colspan=\"2\"|" + codon_usage[2].toFixed(2) + "
|colspan=\"4\"|Yeast:" + "
|colspan=\"2\"|" + codon_usage[1].toFixed(2) + "
|colspan=\"4\"|E. coli:" + "
|colspan=\"2\"|" + codon_usage[0].toFixed(2) + "
|-" + "
|colspan=\"3\"|RFC standard:" + "
|colspan=\"17\"|" + RFC_standard + "
|-" + "
|colspan=\"20\"| The BioBrick-AutoAnnotator was created by TU-Munich 2013 iGEM team. For information please read the description." + "
|}" + "

";

// Code to display the actual table htmlCode = htmlCode

+ "" + "" + "" + "" + "" + "" + "" // EXClUDING STOP CODON + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
Automatically determined parameters using the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/Software\">BioBrick-AutoAnnotator</a>" + "
BioBrick: <partinfo>BBa_K1159000</partinfo>" + "Used open reading frame from position " + (reading_frame_start + 1) + " to " + (reading_frame_end + 1) + " (excluding stop-codon; if appropriate prefix/suffix were added)." + "
A (Ala)" + "" + amino_content.A + " (" + amino_freq.A.toFixed(2) + "%)" + "R (Arg)" + "" + amino_content.R + " (" + amino_freq.R.toFixed(2) + "%)" + "N (Asn)" + "" + amino_content.N + " (" + amino_freq.N.toFixed(2) + "%)" + "D (Asp)" + "" + amino_content.D + " (" + amino_freq.D.toFixed(2) + "%)" + "C (Cys)" + "" + amino_content.C + " (" + amino_freq.C.toFixed(2) + "%)" + "
Q (Gln)" + "" + amino_content.Q + " (" + amino_freq.Q.toFixed(2) + "%)" + "E (Glu)" + "" + amino_content.E + " (" + amino_freq.E.toFixed(2) + "%)" + "G (Gly)" + "" + amino_content.G + " (" + amino_freq.G.toFixed(2) + "%)" + "H (His)" + "" + amino_content.H + " (" + amino_freq.H.toFixed(2) + "%)" + "I (Ile)" + "" + amino_content.I + " (" + amino_freq.I.toFixed(2) + "%)" + "
L (Leu)" + "" + amino_content.L + " (" + amino_freq.L.toFixed(2) + "%)" + "K (Lys)" + "" + amino_content.K + " (" + amino_freq.K.toFixed(2) + "%)" + "M (Met)" + "" + amino_content.M + " (" + amino_freq.M.toFixed(2) + "%)" + "F (Phe)" + "" + amino_content.F + " (" + amino_freq.F.toFixed(2) + "%)" + "P (Pro)" + "" + amino_content.P + " (" + amino_freq.P.toFixed(2) + "%)" + "
S (Ser)" + "" + amino_content.S + " (" + amino_freq.S.toFixed(2) + "%)" + "T (Thr)" + "" + amino_content.T + " (" + amino_freq.T.toFixed(2) + "%)" + "W (Trp)" + "" + amino_content.W + " (" + amino_freq.W.toFixed(2) + "%)" + "Y (Tyr)" + "" + amino_content.Y + " (" + amino_freq.Y.toFixed(2) + "%)" + "V (Val)" + "" + amino_content.V + " (" + amino_freq.V.toFixed(2) + "%)" + "
Amino acid counting:" + "Total number of amino acids (aa):" + "" + total_aminos + "Number of positively charged aa (Arg + Lys):" + "" + (amino_content.R + amino_content.K) + "Number of negatively charged aa (Asp + Glu):" + "" + (amino_content.D + amino_content.E) + "
Biochemical parameters:" + "Molecular weight [Da]:" + "" + molecular_weight.toFixed(2) + "Theoretical pI:" + "" + pI.toFixed(2) + "Extinction coefficient:" + "" + extinction_coeffs[0] + " (all Cys as cystine), " + extinction_coeffs[1] + " (no Cys as cystine)" + "
Estimated half-life:" + "Mammals:" + "" + halflife_table_Mammalian[nterm] + "Yeast:" + "" + halflife_table_Yeast[nterm] + "E. coli:" + "" + halflife_table_E_coli[nterm] + "
Codon usage: (CAI)" + "Mammals:" + "" + codon_usage[2].toFixed(2) + "Yeast:" + "" + codon_usage[1].toFixed(2) + "E. coli:" + "" + codon_usage[0].toFixed(2) + "
RFC standard:" + "" + RFC_standard + "
The BioBrick-AutoAnnotator was created by <a href=\"https://2013.igem.org/Team:TU-Munich\">TU-Munich 2013</a> iGEM team. For information please read the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/Software\">description</a>." + "
"

+ "
";

$("#htmlTable").html(htmlCode); $("#htmlExplanation").html("The generated table giving various computed parameters: (The wiki-code producing this table is below)");

$("#wikiTable").html(wikiCode); $("#wikiExplanation").html("Copy the following into the wiki to get the protein-data-table:");

} catch(err){ txt="There was an error on this page in main_table_calc().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue.\n\n"; alert(txt); }

};

function read_sequence() { try{ var sequence = document.getElementById("EnteredSequence").value; var cleanSequence = sequence.replace(/ /g, ""); // remove spaces var cleanSequence2= cleanSequence.toUpperCase(); // convert to upper case var cleanSequence3= cleanSequence2.replace(/\r?\n|\r/g, ""); // remove line breaks var wrongLetter = cleanSequence3.search(/[^ATGC]/); // check for wrong nucleotides if (wrongLetter > -1){ throw "Unknown nucleotide in the entered sequence. Only use A, T, G, C!"; } return cleanSequence2; } catch(err){ txt="There was an error on this page in read_sequence().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function count_amino_acids(sequence){ try{ //Counts standard amino acids, returns an array {AminoAcid:Number} var amino_acids_content = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,H:0,I:0,L:0,K:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0}; var amino_acids_freq = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,H:0,I:0,L:0,K:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0}; for (i=0; i<sequence.length; i++){ amino_acids_content[sequence[i]]++; } var total=sequence.length; for (aa in amino_acids){ amino_acids_freq[aa] = amino_acids_content[aa] * (100 / total); }; return [amino_acids_content,amino_acids_freq]; } catch(err){ txt="There was an error on this page in count_amino_acids(sequence).\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function compute_molecular_weight(amino_acids_content){ try{ var molec_weight = water_weight; for (aa in amino_acids){ molec_weight = molec_weight + amino_acids_content[aa] * amino_weights[aa]; } return molec_weight; } catch(err){ txt="There was an error on this page in compute_molecular_weight(amino_acids_content).\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function compute_pI(sequence,amino_acids_content){ try{ var nterm = sequence[0]; //the first aa var composition = {Nterm:1,Cterm:1, K:amino_acids_content.K, R:amino_acids_content.R, H:amino_acids_content.H, D:amino_acids_content.D, E:amino_acids_content.E, C:amino_acids_content.C, Y:amino_acids_content.Y}; var pos_pKs = jQuery.extend(true, {}, positive_pKs); //to clone positive_pKs, otherwise just passes reference if ( nterm in pKnterminal ){ pos_pKs.Nterm = pKnterminal[nterm]; }

var pHOld = 0.0; //just to set off the while loop var pHNew = 7.0; var step = 3.5; var charge = charge_at_pH(pHNew,composition,pos_pKs); while ( Math.abs(pHOld - pHNew) > 0.0001 && Math.abs(charge)!=0 ){ pHOld = pHNew; //store the now old pH if (charge >0){ pHNew = pHNew + step; } else { //so charge < 0 pHNew = pHNew - step; } step = step/2; charge = charge_at_pH(pHNew,composition,pos_pKs); } var pH = pHNew; return pH; } catch(err){ txt="There was an error on this page in compute_pI(sequence,amino_acids_content).\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function charge_at_pH(pH, compo, pos_pKs){ try{ var charge = 0; for (aa in positive_pKs){ charge = charge + compo[aa] * (1/(1+Math.pow(10, pH - pos_pKs[aa]))); } for (aa in negative_pKs){ charge = charge - compo[aa] * (1/(1+Math.pow(10, negative_pKs[aa] - pH))); } return charge; } catch(err){ txt="There was an error on this page in charge_at_pH(), which is used in calculating the pI.\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function compute_extinction_coeff(amino_acid_content,molecular_weight){ try{ var E_allCystine = amino_acid_content.Y * extinction.Y + amino_acid_content.W * extinction.W + (amino_acid_content.C /2)*extinction.Cystine; var E_noCystine = amino_acid_content.Y * extinction.Y + amino_acid_content.W * extinction.W ; return [E_allCystine,E_noCystine]; } catch(err){ txt="There was an error on this page in compute_extinction_coeff().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function find_reading_frame(nuc_sequence){ try{ // atg_position is the first nucleotide in the ATG (start codon) // stop_position is the first nucleotide in the stop codon var isRFC25=false; // check for RFC 25 var ideal_stop = nuc_sequence.length - 3; if (nuc_sequence.length%3 == 0 && !(nuc_sequence.substr(ideal_stop,3) in stop_codons)){ isRFC25 = confirm("Is this part in RFC25, i.e. the start codon is part of the prefix?\n If it is RFC25, press 'OK', otherwise 'Cancel'"); }

if ( !isRFC25 ){ //so not RFC25

var atg_position = nuc_sequence.indexOf("ATG"); if ( atg_position > -1){ var stop_position = -1; for ( i = atg_position + 3 ; i < nuc_sequence.length ; i = i + 3){ var codon = nuc_sequence.substr(i,3); if ( codon in stop_codons ){ stop_position = i; break; } } if (stop_position == -1){ if ((nuc_sequence.length - atg_position)%3 == 0){

stop_position = nuc_sequence.length + 3; //so RFC10 using stop codon in suffix alert("Using stop codon in suffix of RFC10"); } else { throw "No stop codon found in frame & can't use stop codon in suffix of RFC10"; } } } else { throw "No ATG found, something is wrong! Might be RFC25?"; } reading_length = stop_position - atg_position + 3; if ( (reading_length / nuc_sequence.length) < 0.6){ real_atg_position = prompt("The reading length is small compared to the length of the sequence, please enter the position of the start codon (start of sequence is 1)",atg_position + 1); atg_position = real_atg_position - 1; stop_position= -1; for ( i = atg_position + 3 ; i < nuc_sequence.length ; i = i + 3){ if (nuc_sequence.substr(i,3) == ("TAA"|"TAG"|"TGA") ){ stop_position = i; break; } } if (stop_position == -1 && atg_position > -1){ if ((nuc_sequence.length - atg_position)%3 == 0){

stop_position = nuc_sequence.length + 3; //so RFC10 using stop codon in suffix alert("Using stop codon in suffix of RFC10"); } else { throw "No stop codon found in frame & can't use stop codon in suffix of RFC10"; } }

} } else { // so RFC25 atg_position = -9; stop_position = nuc_sequence.length + 6; }

var reading_frame_start = atg_position; var reading_frame_end = stop_position - 1; //the last nucleotide to be translated

return [reading_frame_start,reading_frame_end]; } catch(err){ txt="There was an error on this page in find_reading_frame().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); } };

function translate_to_aa_and_codon_count(nuc_sequence){ try{ var amino_sequence = ""; var codon_count = { 'TTT': 0, 'TTC': 0, 'TTA': 0, 'TTG': 0, 'TCT': 0, 'TCC': 0, 'TCA': 0, 'TCG': 0, 'TAT': 0, 'TAC': 0, 'TGT': 0, 'TGC': 0, 'TGG': 0, 'CTT': 0, 'CTC': 0, 'CTA': 0, 'CTG': 0, 'CCT': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0, 'CAT': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'CGT': 0, 'CGC': 0, 'CGA': 0, 'CGG': 0, 'ATT': 0, 'ATC': 0, 'ATA': 0, 'ATG': 0, 'ACT': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'AAT': 0, 'AAC': 0, 'AAA': 0, 'AAG': 0, 'AGT': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0, 'GTT': 0, 'GTC': 0, 'GTA': 0, 'GTG': 0, 'GCT': 0, 'GCC': 0, 'GCA': 0, 'GCG': 0, 'GAT': 0, 'GAC': 0, 'GAA': 0, 'GAG': 0, 'GGT': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0, 'TAA': 0, //STOP CODONS, shouldn't appear and already be removed 'TAG': 0, 'TGA': 0 }; for ( i = 0 ; i < nuc_sequence.length ; i = i + 3){ var codon = nuc_sequence.substr(i,3); codon_count[codon] ++; //count the number of each codon appearing amino_sequence = amino_sequence + translation_table[codon]; } return [amino_sequence,codon_count]; } catch(err){ txt="There was an error on this page in translate_to_aa_and_codon_count().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); }

};

function analyze_codons(codon_count){ try{ var total_codons = 0; var E_coli_prod = 1; var Yeast_prod = 1; var Mammalian_prod = 1;

for (codon in codon_count){ total_codons = total_codons + codon_count[codon]; //should give the total number of codons (excluding stop codons) E_coli_prod = E_coli_prod * Math.pow( E_coli_codon_weights[codon], codon_count[codon]); Yeast_prod = Yeast_prod * Math.pow( Yeast_codon_weights[codon], codon_count[codon]); Mammalian_prod = Mammalian_prod * Math.pow( Mouse_codon_weights[codon], codon_count[codon]); }

var E_coli_CAI = Math.pow( E_coli_prod, 1/total_codons); var Yeast_CAI = Math.pow( Yeast_prod, 1/total_codons); var Mammalian_CAI = Math.pow( Mammalian_prod, 1/total_codons);

var usageOutput = [ E_coli_CAI , Yeast_CAI , Mammalian_CAI ]; return usageOutput; } catch(err){ txt="There was an error on this page in analyze_codons().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; } }