Team:TU-Munich/AnnotatorCode.js

From 2013.igem.org

(Difference between revisions)
Line 202: Line 202:
    'TAG': 23/47,  
    'TAG': 23/47,  
    'TGA': 30/47 };
    'TGA': 30/47 };
 +
 +
var Arabidopsis_codon_weights = {
 +
// from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=3702&aa=1&style=N
 +
    'TTT': 1, 'TTC': 49/51, 'TTA': 14/26, 'TTG': 22/26, 'TCT': 1,
 +
    'TCC': 13/28, 'TCA': 20/28, 'TCG': 10/28, 'TAT': 1, 'TAC': 48/52,
 +
    'TGT': 1, 'TGC': 40/60, 'TGG': 1, 'CTT': 1, 'CTC': 17/26,
 +
    'CTA': 11/26, 'CTG': 11/26, 'CCT': 1, 'CCC': 11/38, 'CCA': 33/38,
 +
    'CCG': 18/38, 'CAT': 1, 'CAC': 39/61, 'CAA': 1, 'CAG': 44/56,
 +
    'CGT': 17/35, 'CGC': 7/35, 'CGA': 12/35, 'CGG':9/35, 'ATT': 1,
 +
    'ATC': 35/41, 'ATA': 24/41, 'ATG': 1, 'ACT': 1, 'ACC': 20/34,
 +
    'ACA': 31/34, 'ACG': 15/34, 'AAT': 1, 'AAC': 48/52, 'AAA': 49/51,
 +
    'AAG': 1, 'AGT': 16/28, 'AGC': 13/28, 'AGA': 1, 'AGG': 20/35,
 +
    'GTT': 1, 'GTC': 19/40, 'GTA': 15/40, 'GTG': 26/40, 'GCT': 1,
 +
    'GCC': 16/43, 'GCA': 27/43, 'GCG': 14/43, 'GAT': 1, 'GAC': 32/68,
 +
    'GAA': 1, 'GAG': 48/52, 'GGT': 34/37, 'GGC': 14/37, 'GGA': 1,
 +
    'GGG': 16/37,
 +
    'TAA': 36/44,  //STOP CODONS, shouldn't appear and already be removed
 +
    'TAG': 20/44,
 +
    'TGA': 1 };
 +
 +
var Subtilis_codon_weights = {
 +
//http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=1423&aa=1&style=N
 +
    'TTT': 1, 'TTC': 32/68, 'TTA': 21/24, 'TTG': 16/24, 'TCT': 20/23,
 +
    'TCC': 13/23, 'TCA': 1, 'TCG': 10/23, 'TAT': 1, 'TAC': 35/65,
 +
    'TGT': 46/54, 'TGC': 1, 'TGG': 1, 'CTT': 23/24, 'CTC': 11/24,
 +
    'CTA': 5/24, 'CTG': 1, 'CCT': 28/44, 'CCC': 9/44, 'CCA': 19/44,
 +
    'CCG': 1, 'CAT': 1, 'CAC': 32/68, 'CAA': 1, 'CAG': 48/52,
 +
    'CGT': 18/25, 'CGC': 20/25, 'CGA': 10/25, 'CGG': 17/25, 'ATT': 1,
 +
    'ATC': 37/49, 'ATA': 13/49, 'ATG': 1, 'ACT': 16/40, 'ACC': 17/40,
 +
    'ACA': 1, 'ACG': 27/40, 'AAT': 1, 'AAC': 44/56, 'AAA': 1,
 +
    'AAG': 30/70, 'AGT': 11/23, 'AGC': 1, 'AGA': 1, 'AGG': 10/25,
 +
    'GTT': 1, 'GTC': 26/28, 'GTA': 20/28, 'GTG': 26/28, 'GCT': 24/28,
 +
    'GCC': 22/28, 'GCA': 1, 'GCG': 26/28, 'GAT': 1, 'GAC': 36/64,
 +
    'GAA': 1, 'GAG': 32/68, 'GGT': 19/34, 'GGC': 1, 'GGA': 31/34,
 +
    'GGG': 16/34,
 +
    'TAA': 1,  //STOP CODONS, shouldn't appear and already be removed
 +
    'TAG': 15/61,
 +
    'TGA': 24/61 };
 +
 +
var list_of_features = {
 +
"TG":"RFC25 scar (in-frame ACCGGC, show in bold)",
 +
"RRRRR":"Arg5-tag",
 +
"AWRHPQFGG":"Strep-tag I",
 +
"WSHPQFEK":"Strep-tag II",
 +
"DYKDHDGDYKDHDIDYKDDDDK":"3xFlag-tag",
 +
"DYKDDDDK":"Flag-tag",
 +
"YPYDVPDYA":"HA-tag",
 +
"HHHHHH":"His6-tag",
 +
"HHHHH":"His5-tag",
 +
"EQKLISEEDL":"c-Myc-tag",
 +
"KETAAAKFERQHMDS":"S-tag",
 +
"KDHLIHNVHKEFHAHAHNK":"HAT-tag",
 +
"KRRWKKNFIAVSAANRFKKISSSGAL":"Calmodulin-binding-peptide",
 +
"TDKDMTITFTNKKDAE":"Isopep-tag",
 +
"AHIVMVDAYKPTK":"Spy-tag",
 +
"TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ":"Chitin-binding domain",
 +
"MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP":"SBP-tag",
 +
"GLNDIFEAQKIEWHE":"Avitag",
 +
"DDDDK":"Enterokinase cleavage site",
 +
"IDGR":"Factor Xa cleavage site",
 +
"IEGR":"Factor Xa cleavage site",
 +
"LVPRGS":"Thrombin cleavage site",
 +
"LEVLFQGP":"PreScission cleavage site",
 +
"ENLYFQG":"TEV cleavage site",
 +
"ETVRFQGS":"TVMV cleavage site"
 +
//Note: to extend this list also need to extend search_regexp below!!!
 +
};
 +
 +
var search_regexp = /(RRRRR|AWRHPQFGG|WSHPQFEK|DYKDHDGDYKDHDIDYKDDDDK|DYKDDDDK|YPYDVPDYA|HHHHHH|HHHHH|EQKLISEEDL|KETAAAKFERQHMDS|KDHLIHNVHKEFHAHAHNK|KRRWKKNFIAVSAANRFKKISSSGAL|TDKDMTITFTNKKDAE|AHIVMVDAYKPTK|TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ|MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP|GLNDIFEAQKIEWHE|DDDDK|IDGR|IEGR|LVPRGS|LEVLFQGP|ENLYFQG|ETVRFQGS)/g ;
/**
/**
Line 234: Line 303:
function(res) {
function(res) {
try{
try{
-
 
var a = res.responseText;
var a = res.responseText;
var b=a.indexOf("<body>");
var b=a.indexOf("<body>");
Line 258: Line 326:
}
}
main_table_calc(sequence,bb_number);
main_table_calc(sequence,bb_number);
-
 
}
}
});
});
Line 309: Line 376:
var codon_count = trans_result[1];
var codon_count = trans_result[1];
-
var amino_output = "<span style=\"font-family:'Courier New', Arial;\">&nbsp;&nbsp;";
+
/* //for test purposes
-
for ( i=0 ; i < amino_sequence.length /100 ; i++){
+
amino_sequence = "RRRRRAWRHPQFGGWSHPQFEKDYKDHDGDYKDHDIDYKDDDDKDYKDDDDKYPYDVPDYAHHHHHHEQKLISEEDLHHHHHKETAAAKFERQHMDSKDHLIHNVHKEFHAHAHNKKRRWKKNFIAVSAANRFKKISSSGAL" +
-
amino_output = amino_output + (100*(i) + 1) + "&nbsp;" + amino_sequence.substr(i*100, 100) + "<br>";
+
"TDKDMTITFTNKKDAEAHIVMVDAYKPTKTNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQMDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREPGLNDIFEAQKIEWHEDDDDK" +
-
}
+
"IDGRIEGRLVPRGSLEVLFQGPENLYFQGETVRFQGS";  
-
amino_output = amino_output + "</span>";
+
*/
 +
var SequenceFeatureTable = "<table border=\"0\" cellspacing=\"0\" width=\"60%\" !important>";
SequenceFeatures = find_sequence_features(coding_nuc_sequence,amino_sequence);
SequenceFeatures = find_sequence_features(coding_nuc_sequence,amino_sequence);
-
var SequenceFeatureTable = "";
+
var FeatureStarts = [];
 +
var FeatureEnds = [];
 +
for (f in SequenceFeatures){ //f is in the start of the feature in the amino acid sequence starting from 1
 +
feat_start_num = parseInt(f);
 +
feat_seq = SequenceFeatures[f]; // the aa sequence of the feature
 +
feat_length = feat_seq.length;
 +
feat_end_num = feat_start_num + feat_length - 1; //the last aa of the feature (starting from 1)
 +
 +
// Check for unwanted overlap information
 +
if ((feat_seq == "HHHHH") && (amino_sequence.substr(feat_start_num - 2, 6) == "HHHHHH")){
 +
continue; //so is a His5-Tag and we already have the His6-Tag, so we skip it
 +
}
 +
if ((feat_seq == "HHHHHH") && (amino_sequence.substr(feat_start_num - 2, 6) == "HHHHHH")){
 +
continue; //so is a His6-Tag overlapping with a previous His6-Tag, so we skip it
 +
}
 +
if ((feat_seq == "RRRRR") && (amino_sequence.substr(feat_start_num - 2, 5) == "RRRRR")){
 +
continue; //so is a Arg5-Tag overlapping with a previous Arg5-Tag, so we skip it
 +
}
 +
if ((feat_seq == "DYKDDDDK") && (amino_sequence.substr(feat_start_num - 15, 22) == "DYKDHDGDYKDHDIDYKDDDDK")){
 +
continue; //so is a Flag-Tag in a 3xFlag-Tag, so we skip it
 +
}
 +
 +
FeatureStarts.push(feat_start_num); //all the feature starts and features ends in amino acid sequence starting from 1
 +
FeatureEnds.push(feat_end_num); //are collected in these arrays
 +
 
 +
// beautify start of the feature
 +
if (feat_start_num<10){
 +
feat_start = "&nbsp;&nbsp;&nbsp;" + feat_start_num.toString();
 +
}
 +
else if (feat_start_num<100){
 +
feat_start = "&nbsp;&nbsp;" + feat_start_num.toString();
 +
}
 +
else if (feat_start_num<1000){
 +
feat_start = "&nbsp;" + feat_start_num.toString();
 +
}
 +
else{
 +
feat_start = feat_start_num;
 +
}
 +
// now end of the feature
 +
if (feat_end_num<10){
 +
feat_end = "&nbsp;&nbsp;&nbsp;" + feat_end_num.toString();
 +
}
 +
else if (feat_end_num<100){
 +
feat_end = "&nbsp;&nbsp;" + feat_end_num.toString();
 +
}
 +
else if (feat_end_num<1000){
 +
feat_end = "&nbsp;" + feat_end_num.toString();
 +
}
 +
else{
 +
feat_end = feat_end_num.toString();
 +
}
 +
SequenceFeatureTable = SequenceFeatureTable + "<tr><td width=\"5%\"></td><td width=\"25%\"><span style=\"font-family:'Courier New', Arial;\">" + feat_start + " -" + feat_end + ":</span></td><td width=\"70%\">" + list_of_features[SequenceFeatures[f]] + "</td></tr>";
 +
}
 +
SequenceFeatureTable = SequenceFeatureTable + "</table>";
 +
 +
//now add underline/bold to amino_output
 +
var Feature_Markups = FeatureStarts.concat(FeatureEnds);
 +
 +
//sort function needed for the sort below to work properly
 +
function sortNumber(a,b) {
 +
    return a - b;
 +
}
 +
Feature_Markups.sort(sortNumber); //sort it
 +
//determine in which lines there is some markup to be done
 +
var Feature_Markups_lines = [];
 +
for ( i=0 ; i < Feature_Markups.length ; i++){
 +
Feature_Markups_lines.push(Math.floor((Feature_Markups[i] - 1)/100));
 +
}
 +
 +
// -- Prepare the output for the amino acid sequence --
 +
output_amino_sequence = amino_sequence + "*";
 +
// first break the aa sequence into lines of 100 AAs
 +
var amino_lines = [];
 +
for ( i=0 ; i < output_amino_sequence.length /100 ; i++){
 +
amino_lines.push(output_amino_sequence.substr(i*100,100));
 +
}
 +
// next add the mark-up to the lines
 +
var amino_output_lines = [];
 +
if (Feature_Markups.length == 0){
 +
amino_output_lines = amino_lines; //so no markup needed
 +
}
 +
else{ // so there is markup to be added
 +
var current_feature_element = 0;
 +
var open_close_index = 0;
 +
var open_close_index_bold = 0;
 +
for ( var line = 0; line<amino_lines.length ; line++ ){
 +
amino_output_lines[line] = "" ;
 +
var last_element_pos = 0;
 +
if (open_close_index>0){ // so have open underlines at the end of previous line, so must reopen them
 +
for ( j = 0 ; j < open_close_index ; j++){
 +
amino_output_lines[line]=amino_output_lines[line] + "<u>";
 +
}
 +
}
 +
if (open_close_index_bold>0){ // so have open bolds at the end of previous line, so must reopen them
 +
for ( j = 0 ; j < open_close_index_bold ; j++){
 +
amino_output_lines[line]=amino_output_lines[line] + "<b>";
 +
}
 +
}
 +
while (Feature_Markups_lines[current_feature_element] == line){ //loop over all markups in this line
 +
var curr_element_pos = Feature_Markups[current_feature_element];
 +
var curr_element_pos_inline = curr_element_pos - 100*line - 1;
 +
if ( jQuery.inArray( curr_element_pos , FeatureStarts ) > -1 ){
 +
if (SequenceFeatures[curr_element_pos] == "TG"){ //so scar, so bold
 +
amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline) + "<b>";
 +
open_close_index_bold ++;
 +
}
 +
else{ //so underline
 +
amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline) + "<u>";
 +
open_close_index ++;
 +
}
 +
last_element_pos = curr_element_pos_inline;
 +
FeatureStarts.shift(); //removes the element which was just marked
 +
}
 +
else if ( jQuery.inArray( curr_element_pos , FeatureEnds ) > -1 ){
 +
if (SequenceFeatures[curr_element_pos-1] == "TG"){ //so scar, so bold
 +
amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline+1) + "</b>";
 +
open_close_index_bold = open_close_index_bold - 1;
 +
}
 +
else{ //so underline
 +
amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline+1) + "</u>";
 +
open_close_index = open_close_index - 1;
 +
}
 +
last_element_pos = curr_element_pos_inline+1;
 +
FeatureEnds.shift(); //removes the element which was just marked
 +
}
 +
current_feature_element++; //update so we get next markup
 +
}
 +
//add the remainder of the line to the output
 +
amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substr(last_element_pos);
 +
if (open_close_index>0){ //so have open underlines at the end of this line, so must close them for now
 +
for ( j = 0 ; j < open_close_index ; j++){
 +
amino_output_lines[line]=amino_output_lines[line] + "</u>";
 +
}
 +
}
 +
if (open_close_index_bold>0){ //so have open underlines at the end of this line, so must close them for now
 +
for ( j = 0 ; j < open_close_index_bold ; j++){
 +
amino_output_lines[line]=amino_output_lines[line] + "</b>";
 +
}
 +
}
 +
}
 +
/* //replace underline of RFC25 scar by boldness
 +
for ( f in SequenceFeatures ){
 +
if ( SequenceFeatures[f] == "TG"){
 +
for ( var line = 0 ; line < amino_lines.length ; line++){
 +
amino_output_lines[line].match
 +
}
 +
 +
break; // end for loop
 +
}
 +
}
 +
*/
 +
/*
 +
*  TO DO LIST:
 +
* - check for RFC25 scar and replace by BOLD (use that in next line must have end markup at position 1)
 +
 +
*/
 +
}
 +
 +
 +
//put everything together
 +
var amino_output = "<span style=\"font-family:'Courier New', Arial;\"><table border=\"0\" cellspacing=\"0\" width=\"100%\" !important>";
 +
for ( j=0 ; j<amino_lines.length ; j++ ){
 +
amino_output = amino_output + "<tr><td align=\"right\" width=\"2%\">" + (j*100 + 1).toString() + "&nbsp;</td><td width=\"98%\">" + amino_output_lines[j] + "</td></tr>";
 +
}
 +
amino_output = amino_output + "</table></span>";
-
var codon_usage = analyze_codons(codon_count); // 0 -> E_coli, 1 -> Yeast, 2 -> Mammalian
+
var codon_usage = analyze_codons(codon_count); // 0 -> E_coli, 1 -> Yeast, 2 -> Mammalian, 3 -> Subtilis, 4 -> Arabidopsis
//translate numerical CAI value into categories: 1.00-0.80 -> excellent
//translate numerical CAI value into categories: 1.00-0.80 -> excellent
// 0.79-0.60 -> good
// 0.79-0.60 -> good
Line 324: Line 556:
// 0.39-0.20 -> bad
// 0.39-0.20 -> bad
// 0.19-0.00 -> very bad
// 0.19-0.00 -> very bad
-
for (i=0;i<3;i++){
+
for (i=0;i<5;i++){
if (codon_usage[i] >= 0.80){
if (codon_usage[i] >= 0.80){
codon_usage[i] = "excellent (" + codon_usage[i].toFixed(2) + ")";
codon_usage[i] = "excellent (" + codon_usage[i].toFixed(2) + ")";
Line 350: Line 582:
var extinction_coeffs = compute_extinction_coeff(amino_content,molecular_weight);
var extinction_coeffs = compute_extinction_coeff(amino_content,molecular_weight);
var nterm = amino_sequence[0];
var nterm = amino_sequence[0];
 +
 +
var creation_time = new Date();
// --- Create the output ---  
// --- Create the output ---  
Line 357: Line 591:
htmlCode = htmlCode  
htmlCode = htmlCode  
+ "<table border=\"1\" cellspacing=\"0\" width=\"100%\" !important>"
+ "<table border=\"1\" cellspacing=\"0\" width=\"100%\" !important>"
-
+ "<tr>"
+
+ "<tr><!-- Time stamp in ms since 1/1/1970 " + creation_time.getTime() + " -->"
-
+ "<td width=\"100%\" colspan=\"2\"><strong>Parameters for BioBrick <a href=\"http://parts.igem.org/wiki/index.php?title=Part:BBa_" + bb_number + "\">BBa_" + bb_number + "</a> automatically created by the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/AutoAnnotator\">BioBrick-AutoAnnotator</a> version 1.0</strong>" + "</td>"
+
+ "<td width=\"100%\" colspan=\"2\" style=\"background-color: rgb(221, 221, 221);\"><strong>Parameters for BioBrick <a href=\"http://parts.igem.org/wiki/index.php?title=Part:BBa_" + bb_number + "\">BBa_" + bb_number + "</a> automatically created by the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/AutoAnnotator\">BioBrick-AutoAnnotator</a> version 1.0</strong>" + "</td>"
+ "</tr><tr>"
+ "</tr><tr>"
+ "<td width=\"100%\" colspan=\"2\">" + "<strong>Nucleotide sequence</strong> in " + RFC_standard + "<br><span style=\"font-family:'Courier New', Arial;\">" + nuc_sequence_to_display_html + "</span>" + "<br>" + "&nbsp;<strong>ORF</strong> from " + (reading_frame_start + 1) + " to " + (reading_frame_end + 1) + " (excluding stop-codon)" + "</td>"
+ "<td width=\"100%\" colspan=\"2\">" + "<strong>Nucleotide sequence</strong> in " + RFC_standard + "<br><span style=\"font-family:'Courier New', Arial;\">" + nuc_sequence_to_display_html + "</span>" + "<br>" + "&nbsp;<strong>ORF</strong> from " + (reading_frame_start + 1) + " to " + (reading_frame_end + 1) + " (excluding stop-codon)" + "</td>"
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td width=\"100%\" colspan=\"2\"><strong>Amino acid sequence:</strong><br>" + amino_output + "</td>"
+
+ "<td width=\"100%\" colspan=\"2\"><strong>Amino acid sequence:</strong> (RFC25 scars in shown in bold, other sequence features underlined; both given below)<br>" + amino_output + "</td>"
 +
+ "</tr><tr>"
 +
+ "<td colspan=\"2\" width=\"100%\"><strong>Sequence features:</strong> (with their position in the amino acid sequence, also underlined or shown bold above)"
 +
+ SequenceFeatureTable //created above
 +
+ "</td>"
+ "</tr><tr>"
+ "</tr><tr>"
+ "<td width=\"100%\" colspan=\"2\"><strong>Amino acid composition:</strong>"
+ "<td width=\"100%\" colspan=\"2\"><strong>Amino acid composition:</strong>"
Line 457: Line 695:
+ "</td>" // close the cell of Amino acid composition
+ "</td>" // close the cell of Amino acid composition
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td width=\"40%\">" + "<strong>Amino acid counting:</strong>&nbsp;&nbsp;"
+
+ "<td width=\"40%\">" + "<strong>Amino acid counting</strong>"
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+ "<tr>"
+ "<tr>"
-
+ "<td width=\" 5%\"></td>"
+
+ "<td width=\" 7%\"></td>"
-
+ "<td width=\"55%\">Total number:" + "</td>"
+
+ "<td width=\"53%\">Total number:" + "</td>"
+ "<td width=\"40%\">" + total_aminos + "</td>"
+ "<td width=\"40%\">" + total_aminos + "</td>"
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td width=\" 5%\"></td>"
+
+ "<td width=\" 7%\"></td>"
-
+ "<td width=\"55%\">Positively charged (Arg+Lys):" + "</td>"
+
+ "<td width=\"53%\">Positively charged (Arg+Lys):" + "</td>"
+ "<td width=\"40%\">" + (amino_content.R + amino_content.K) + " (" + ((amino_content.R + amino_content.K)*100/total_aminos).toFixed(1) + "%)" + "</td>"
+ "<td width=\"40%\">" + (amino_content.R + amino_content.K) + " (" + ((amino_content.R + amino_content.K)*100/total_aminos).toFixed(1) + "%)" + "</td>"
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td width=\" 5%\"></td>"
+
+ "<td width=\" 7%\"></td>"
-
+ "<td width=\"55%\">Negatively charged (Asp+Glu):" + "</td>"
+
+ "<td width=\"53%\">Negatively charged (Asp+Glu):" + "</td>"
+ "<td width=\"40%\">" + (amino_content.D + amino_content.E) + " (" + ((amino_content.D + amino_content.E)*100/total_aminos).toFixed(1) + "%)" + "</td>"
+ "<td width=\"40%\">" + (amino_content.D + amino_content.E) + " (" + ((amino_content.D + amino_content.E)*100/total_aminos).toFixed(1) + "%)" + "</td>"
+ "</tr>"
+ "</tr>"
+ "</table>"
+ "</table>"
+ "</td>" //close Amino acid counting cell
+ "</td>" //close Amino acid counting cell
-
+ "<td width=\"60%\">" + "<strong>Biochemical parameters:</strong>"
+
+ "<td width=\"60%\">" + "<strong>Biochemical parameters</strong>"
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+ "<tr>"
+ "<tr>"
Line 492: Line 730:
+ "</td>" //close Biochemical parameters cell
+ "</td>" //close Biochemical parameters cell
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td colspan=\"2\" width=\"100%\"><strong>Organism specific parameters</strong>"
+
+ "<td colspan=\"2\" width=\"100%\"><strong>Codon usage</strong>"
-
+ "<table border=\"1\" cellspacing=\"0\" width=\"100%\">"
+
+ "<table border=\"0\" cellspacing=\"0\" width=\"75%\">"
+ "<tr>"
+ "<tr>"
-
+ "<td width=\"40%\">"  
+
+ "<td width=\" 4%\"></td>"
-
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+
+ "<td width=\"21%\">Organism:</td>"
-
+ "<tr>"
+
+ "<td width=\"15%\"><i>E. coli</i></td>"
-
+ "<td><strong>Organism</strong></td>"
+
+ "<td width=\"15%\"><i>B. subtilis</i></td>"
-
+ "</tr><tr>"
+
+ "<td width=\"15%\"><i>S. cerevisiae</i></td>"
-
+ "<td><i>E. coli</i></td>"
+
+ "<td width=\"15%\"><i>A. thaliana</i></td>"
-
+ "</tr><tr>"
+
+ "<td width=\"15%\">Mammals</td>"
-
+ "<td><i>B. subtilis</i></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td><i>S. cervisiae</i></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td><i>A. thaliana</i></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>Mammals</td>"
+
-
+ "</tr>"
+
-
+ "</table>"
+
-
+ "</td>"
+
-
+ "<td width=\"30%\">"
+
-
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+
-
+ "<tr>"
+
-
+ "<td><strong>Estimated half-life [h]</strong></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + halflife_table_E_coli[nterm] + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + "Platzhalter " + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + halflife_table_Yeast[nterm] + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + "Platzhalter" + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + halflife_table_Mammalian[nterm] + "</td>"
+
-
+ "</tr>"
+
-
+ "</table>"
+
-
+ "</td>"
+
-
+ "<td width=\"30%\">"
+
-
+ "<table border=\"0\" cellspacing=\"0\" width=\"100%\">"
+
-
+ "<tr>"
+
-
+ "<td><strong>Codon usage (CAI)</strong></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + codon_usage[0] + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td><i>B. subtilis</i></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + codon_usage[1] + "</td>"
+
-
+ "</tr><tr>"
+
-
+ "<td><i>A. thaliana</i></td>"
+
-
+ "</tr><tr>"
+
-
+ "<td>" + codon_usage[2] + "</td>"
+
-
+ "</tr>"
+
-
+ "</table>"
+
-
+ "</td>"
+
-
+ "</tr>"
+
-
+ "</table>"
+
-
+ "</td>" //close Organism specific cell
+
-
+ "</tr><tr>"
+
-
// Alternativ-Vorschlag
+
-
+ "<td colspan=\"2\" width=\"100%\"><strong>Organism specific parameters</strong>"
+
-
+ "<table border=\"1\" cellspacing=\"0\" width=\"100%\">"
+
-
+ "<tr>"
+
-
+ "<td><strong>Organism:</strong></td>"
+
-
+ "<td><i>E. coli</i></td>"
+
-
+ "<td><i>B. subtilis</i></td>"
+
-
+ "<td><i>S. cervisiae</i></td>"
+
-
+ "<td><i>A. thaliana</i></td>"
+
-
+ "<td>Mammals</td>"
+
+ "</tr><tr>"
+ "</tr><tr>"
-
+ "<td><strong>Estimated half-life [h]</strong></td>"
+
+ "<td width=\" 4%\"></td>"
-
+ "<td>" + halflife_table_E_coli[nterm] + "</td>"
+
+ "<td width=\"21%\">Codon quality (CAI):</td>"
-
+ "<td>" + "Platzhalter" + "</td>"
+
+ "<td width=\"15%\">" + codon_usage[0] + "</td>"
-
+ "<td>" + halflife_table_Yeast[nterm] + "</td>"
+
+ "<td width=\"15%\">" + codon_usage[3] + "</td>"
-
+ "<td>" + "Platzhalter" + "</td>"
+
+ "<td width=\"15%\">" + codon_usage[1] + "</td>"
-
+ "<td>" + halflife_table_Mammalian[nterm] + "</td>"
+
+ "<td width=\"15%\">" + codon_usage[4] + "</td>"
-
+ "</tr><tr>"
+
+ "<td width=\"15%\">" + codon_usage[2] + "</td>"
-
+ "<td><strong>Codon usage (CAI)</strong></td>"
+
-
+ "<td>" + codon_usage[0] + "</td>"
+
-
+ "<td><i>B. subtilis</i></td>"
+
-
+ "<td>" + codon_usage[1] + "</td>"
+
-
+ "<td><i>A. thaliana</i></td>"
+
-
+ "<td>" + codon_usage[2] + "</td>"
+
+ "</tr>"
+ "</tr>"
+ "</table>"
+ "</table>"
-
+ "</td>" //close Organism specific cell
+
+ "</td>" //close Codon usage cell
-
+ "</tr><tr>"
+
-
+ "<td colspan=\"2\" width=\"100%\"><strong>Sequence features:</strong>"
+
-
+ SequenceFeatureTable //created in find_sequence_features(a,b) below
+
-
+ "</td>"
+
+ "</tr><tr>"
+ "</tr><tr>"
+ "<td colspan=\"2\" width=\"100%\"> The BioBrick-AutoAnnotator was created by <a href=\"https://2013.igem.org/Team:TU-Munich\">TU-Munich 2013</a> iGEM team. For more information please see the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/Software\">documentation</a>.<br>If you have any questions, comments or suggestions, please email us at <a href=\"mailto:igem@wzw.tum.de?Subject=AutoAnnotator\" target=\"_top\">igem@wzw.tum.de</a>." + "</td>"
+ "<td colspan=\"2\" width=\"100%\"> The BioBrick-AutoAnnotator was created by <a href=\"https://2013.igem.org/Team:TU-Munich\">TU-Munich 2013</a> iGEM team. For more information please see the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/Software\">documentation</a>.<br>If you have any questions, comments or suggestions, please email us at <a href=\"mailto:igem@wzw.tum.de?Subject=AutoAnnotator\" target=\"_top\">igem@wzw.tum.de</a>." + "</td>"
Line 607: Line 777:
};
};
-
function find_sequence_features(coding_nuc_sequence,amino_sequence){
+
function find_sequence_features(coding_nuc_sequence,amino_sequence){ //returns the object Features with EXTERNAL index as property and feature as value
try{
try{
 +
var Features = {};
 +
//first look for RFC25 scars
//first look for RFC25 scars
-
var RFC25_scars = [];
 
for ( i = 0 ; i < coding_nuc_sequence.length ; i = i + 3){
for ( i = 0 ; i < coding_nuc_sequence.length ; i = i + 3){
var codonpair = coding_nuc_sequence.substr(i,6);
var codonpair = coding_nuc_sequence.substr(i,6);
if (codonpair == "ACCGGC"){
if (codonpair == "ACCGGC"){
-
RFC25_scars = [RFC25_scars,i];
+
Features[(i/3) + 1] = "TG";
}
}
}
}
 +
//find forbidden restriction sites?
-
//find forbidden restriction sites
+
//var motives_found = amino_sequence.match(search_regexp);
 +
 +
while ((match = search_regexp.exec(amino_sequence)) != null) {
 +
Features[match.index + 1] = match[0]; // match.index gives the location of the first aa in the feature BUT starting from 0
 +
search_regexp.lastIndex = match.index + 1; // to look for overlapping features
 +
}
-
//find His5
 
-
var Features = [RFC25_scars,];
 
return Features;
return Features;
}
}
Line 921: Line 1,096:
var Yeast_prod = 1;
var Yeast_prod = 1;
var Mammalian_prod = 1;
var Mammalian_prod = 1;
 +
var Arabidopsis_prod = 1;
 +
var Subtilis_prod = 1;
for (codon in codon_count){
for (codon in codon_count){
Line 928: Line 1,105:
total_synon_codons = total_synon_codons + codon_count[codon]; //should give the total number of codons (excluding stop codons)
total_synon_codons = total_synon_codons + codon_count[codon]; //should give the total number of codons (excluding stop codons)
-
E_coli_prod   =   E_coli_prod * Math.pow( E_coli_codon_weights[codon], codon_count[codon]);
+
E_coli_prod     =       E_coli_prod * Math.pow(     E_coli_codon_weights[codon], codon_count[codon]);
-
Yeast_prod     =     Yeast_prod * Math.pow( Yeast_codon_weights[codon], codon_count[codon]);
+
Yeast_prod       =       Yeast_prod * Math.pow(     Yeast_codon_weights[codon], codon_count[codon]);
-
Mammalian_prod = Mammalian_prod * Math.pow( Mouse_codon_weights[codon], codon_count[codon]);
+
Mammalian_prod   =   Mammalian_prod * Math.pow(     Mouse_codon_weights[codon], codon_count[codon]);
 +
Subtilis_prod    =    Subtilis_prod * Math.pow(  Subtilis_codon_weights[codon], codon_count[codon]);
 +
Arabidopsis_prod =  Arabidopsis_prod * Math.pow( Arabidopsis_codon_weights[codon], codon_count[codon]);
//could count weak codons
//could count weak codons
};
};
-
var E_coli_CAI   = Math.pow(   E_coli_prod, 1/total_synon_codons);
+
var E_coli_CAI     = Math.pow(     E_coli_prod, 1/total_synon_codons);
-
var Yeast_CAI     = Math.pow(     Yeast_prod, 1/total_synon_codons);
+
var Yeast_CAI       = Math.pow(       Yeast_prod, 1/total_synon_codons);
-
var Mammalian_CAI = Math.pow( Mammalian_prod, 1/total_synon_codons);
+
var Mammalian_CAI   = Math.pow(   Mammalian_prod, 1/total_synon_codons);
 +
var Subtilis_CAI    = Math.pow(    Subtilis_prod, 1/total_synon_codons);
 +
var Arabidopsis_CAI = Math.pow( Arabidopsis_prod, 1/total_synon_codons);
 +
-
var usageOutput = [ E_coli_CAI , Yeast_CAI , Mammalian_CAI ];
+
var usageOutput = [ E_coli_CAI , Yeast_CAI , Mammalian_CAI , Subtilis_CAI , Arabidopsis_CAI ];
return usageOutput;
return usageOutput;
}
}

Revision as of 19:09, 5 August 2013

/**

*  DATA VALUES
*/

var amino_acids = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,I:0,H:0,K:0,L:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0};

// amino acid weights - http://web.expasy.org/findmod/findmod_masses.html#AA var amino_weights = { "A": 71.0788, "C": 103.1388, "D": 115.0886, "E": 129.1155, "F": 147.1766, "G": 57.0519, "H": 137.1411, "I": 113.1594, "K": 128.1741, "L": 113.1594, "M": 131.1926, "N": 114.1038, "P": 97.1167, "Q": 128.1307, "R": 156.1875, "S": 87.0782, "T": 101.1051, "V": 99.1326, "W": 186.2132, "Y": 163.1760 }; var water_weight = 18.01528; //average molecular weight of one molecule of water

// Extinction coefficients for Y(Tyr) and W(Trp), as well as Cystine (formed by disulfide bond of two Cysteine) var extinction = { Y:1490 , W:5500 , Cystine:125 };

// Data for the calculation of the Isoelectric Point /* pK values are from:

	* Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F., Sanchez,

J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences. Electrophoresis 1993, 14, 1023-1031. * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E. Reference points for comparisons of two-dimensional maps of proteins from different human cell types defined in a pH scale where isoelectric points correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.

  • /

var positive_pKs = {'Nterm': 7.5 , 'K': 10.0, 'R': 12.0, 'H': 5.98 }; var negative_pKs = {'Cterm': 3.55, 'D': 4.05, 'E': 4.45, 'C': 9.0 , 'Y': 10.0}; // For some amino acids at the n-terminus the pK value is changed: var pKnterminal = {'A': 7.59, 'M': 7.0, 'S': 6.93, 'P': 8.36, 'T': 6.82, 'V': 7.44, 'E': 7.7}; //if one of these is N-terminal, this replaces the usual 7.5 for Nterm

var charged_aas = ['K', 'R', 'H', 'D', 'E', 'C', 'Y'];

/*from: * Varshavsky (1997). The N-end rule pathway of protein degradation.

		* Varshavsky et al. (1989). Universality and structure of the N-end rule.
*/

var halflife_table_Mammalian = {A: "4.4", R: "1", N: "1.4", D: "1.1", C: "1.2", Q: "0.8", E: "1", G: "30", H: "3.5", I: "20", L: "5.5", K: "1.3", M: "30", F: "1.1", P: ">20", S: "1.9", T: "7.2", W: "2.8", Y: "2.8", V: "100"};

var halflife_table_Yeast = {A: ">20", R: "0.03", N: "0.05", D: "0.05", C: ">20", Q: "0.17", E: "0.5", G: ">20", H: "0.17", I: "0.5", L: "0.05", K: "0.05", M: ">20", F: "0.05", P: ">20", S: ">20", T: ">20", W: "0.05", Y: "0.17", V: ">20"};

var halflife_table_E_coli = {A: ">10", R: "0.03", N: ">10", D: ">10", C: ">10", Q: ">10", E: ">10", G: ">10", H: ">10", I: ">10", L: "0.03", K: "0.03", M: ">10", F: "0.03", P: "unknown", S: ">10", T: ">10", W: "0.03", Y: "0.03", V: ">10"};

var translation_table = { 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', 'TAA': '*', //STOP CODONS, shouldn't appear and already be removed 'TAG': '*', 'TGA': '*' };

var stop_codons = {'TAA':0,'TAG':0, 'TGA' :0};

var non_synonymous_codons = { //these are excluded from the calculation of the CAI 'ATG':0, 'TGG':0 };

// weights for the calculation of the CAI (codon adaptation index), see e.g. http://www.ihes.fr/~carbone/papers/Bioinformatics.pdf // method based on: Sharp,P.M. and Li,W-H. (1987) The codon adaptation index—a measure of directional synonymous codon usage bias, and its potential applications var E_coli_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=37762&aa=1&style=N for E.coli & computing the weights for CAI calculation 'TTT': 1, 'TTC': 36/64, 'TTA': 18/38, 'TTG': 13/38, 'TCT': 18/20, 'TCC': 14/20, 'TCA': 18/20, 'TCG': 11/20, 'TAT': 1, 'TAC': 35/65, 'TGT': 1, 'TGC': 48/52, 'TGG': 1, 'CTT': 15/38, 'CTC': 10/38, 'CTA': 6/38, 'CTG': 1, 'CCT': 24/37, 'CCC': 16/37, 'CCA': 23/37, 'CCG': 1, 'CAT': 1, 'CAC': 37/63, 'CAA': 35/65, 'CAG': 1, 'CGT': 1, 'CGC': 26/30, 'CGA': 9/30, 'CGG': 15/30, 'ATT': 1, 'ATC': 31/47, 'ATA': 21/47, 'ATG': 1, 'ACT': 22/31, 'ACC': 1, 'ACA': 25/31, 'ACG': 22/31, 'AAT': 1, 'AAC': 41/59, 'AAA': 1, 'AAG': 29/71, 'AGT': 18/20, 'AGC': 1, 'AGA': 13/30, 'AGG': 7/30, 'GTT': 1, 'GTC': 19/32, 'GTA': 19/32, 'GTG': 29/32, 'GCT': 22/27, 'GCC': 26/27, 'GCA': 1, 'GCG': 25/27, 'GAT': 1, 'GAC': 35/65, 'GAA': 1, 'GAG': 36/64, 'GGT': 1, 'GGC': 29/34, 'GGA': 19/34, 'GGG': 18/34, 'TAA': 1, //STOP CODONS, shouldn't appear and already be removed 'TAG': 9/58, 'TGA': 33/58 };

var Mouse_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=10090&aa=1&style=N for mus musculus & computing the weights for CAI calculation 'TTT': 44/56, 'TTC': 1, 'TTA': 7/39, 'TTG': 13/39, 'TCT': 20/24, 'TCC': 22/24, 'TCA': 14/24, 'TCG': 5/24, 'TAT': 43/57, 'TAC': 1, 'TGT': 48/52, 'TGC': 1, 'TGG': 1, 'CTT': 13/39, 'CTC': 20/39, 'CTA': 8/39, 'CTG': 1, 'CCT': 1, 'CCC': 30/31, 'CCA': 29/31, 'CCG': 10/31, 'CAT': 41/59, 'CAC': 1, 'CAA': 26/74, 'CAG': 1, 'CGT': 8/22, 'CGC': 17/22, 'CGA': 12/22, 'CGG': 19/22, 'ATT': 34/50, 'ATC': 1, 'ATA': 16/50, 'ATG': 1, 'ACT': 25/35, 'ACC': 1, 'ACA': 29/35, 'ACG': 10/35, 'AAT': 43/57, 'AAC': 1, 'AAA': 39/61, 'AAG': 1, 'AGT': 15/24, 'AGC': 1, 'AGA': 1, 'AGG': 1, 'GTT': 17/46, 'GTC': 25/46, 'GTA': 12/46, 'GTG': 1, 'GCT': 29/38, 'GCC': 1, 'GCA': 23/38, 'GCG': 9/38, 'GAT': 45/55, 'GAC': 1, 'GAA': 41/59, 'GAG': 1, 'GGT': 18/33, 'GGC': 1, 'GGA': 26/33, 'GGG': 23/33, 'TAA': 28/49, //STOP CODONS, shouldn't appear and already be removed 'TAG': 23/49, 'TGA': 1 };

var Yeast_codon_weights = { //from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=4932&aa=1&style=N for Saccharomyces cerevisiae & computing the weights for CAI calculation 'TTT': 1, 'TTC': 41/59, 'TTA': 28/29, 'TTG': 1, 'TCT': 1, 'TCC': 16/26, 'TCA': 21/26, 'TCG': 10/26, 'TAT': 1, 'TAC': 44/56, 'TGT': 1, 'TGC': 37/63, 'TGG': 1, 'CTT': 13/29, 'CTC': 6/29, 'CTA': 14/29, 'CTG': 11/29, 'CCT': 31/42, 'CCC': 15/42, 'CCA': 1, 'CCG': 12/42, 'CAT': 1, 'CAC': 36/64, 'CAA': 1, 'CAG': 31/69, 'CGT': 14/48, 'CGC': 6/48, 'CGA': 7/48, 'CGG': 4/48, 'ATT': 1, 'ATC': 26/46, 'ATA': 27/46, 'ATG': 1, 'ACT': 1, 'ACC': 22/35, 'ACA': 30/35, 'ACG': 14/35, 'AAT': 1, 'AAC': 41/59, 'AAA': 1, 'AAG': 42/58, 'AGT': 16/26, 'AGC': 11/26, 'AGA': 1, 'AGG': 21/48, 'GTT': 1, 'GTC': 21/39, 'GTA': 21/39, 'GTG': 19/39, 'GCT': 1, 'GCC': 22/38, 'GCA': 29/38, 'GCG': 11/38, 'GAT': 1, 'GAC': 35/65, 'GAA': 1, 'GAG': 30/70, 'GGT': 1, 'GGC': 19/47, 'GGA': 22/47, 'GGG': 12/47, 'TAA': 1, //STOP CODONS, shouldn't appear and already be removed 'TAG': 23/47, 'TGA': 30/47 };

var Arabidopsis_codon_weights = { // from http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=3702&aa=1&style=N 'TTT': 1, 'TTC': 49/51, 'TTA': 14/26, 'TTG': 22/26, 'TCT': 1, 'TCC': 13/28, 'TCA': 20/28, 'TCG': 10/28, 'TAT': 1, 'TAC': 48/52, 'TGT': 1, 'TGC': 40/60, 'TGG': 1, 'CTT': 1, 'CTC': 17/26, 'CTA': 11/26, 'CTG': 11/26, 'CCT': 1, 'CCC': 11/38, 'CCA': 33/38, 'CCG': 18/38, 'CAT': 1, 'CAC': 39/61, 'CAA': 1, 'CAG': 44/56, 'CGT': 17/35, 'CGC': 7/35, 'CGA': 12/35, 'CGG':9/35, 'ATT': 1, 'ATC': 35/41, 'ATA': 24/41, 'ATG': 1, 'ACT': 1, 'ACC': 20/34, 'ACA': 31/34, 'ACG': 15/34, 'AAT': 1, 'AAC': 48/52, 'AAA': 49/51, 'AAG': 1, 'AGT': 16/28, 'AGC': 13/28, 'AGA': 1, 'AGG': 20/35, 'GTT': 1, 'GTC': 19/40, 'GTA': 15/40, 'GTG': 26/40, 'GCT': 1, 'GCC': 16/43, 'GCA': 27/43, 'GCG': 14/43, 'GAT': 1, 'GAC': 32/68, 'GAA': 1, 'GAG': 48/52, 'GGT': 34/37, 'GGC': 14/37, 'GGA': 1, 'GGG': 16/37, 'TAA': 36/44, //STOP CODONS, shouldn't appear and already be removed 'TAG': 20/44, 'TGA': 1 };

var Subtilis_codon_weights = { //http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=1423&aa=1&style=N 'TTT': 1, 'TTC': 32/68, 'TTA': 21/24, 'TTG': 16/24, 'TCT': 20/23, 'TCC': 13/23, 'TCA': 1, 'TCG': 10/23, 'TAT': 1, 'TAC': 35/65, 'TGT': 46/54, 'TGC': 1, 'TGG': 1, 'CTT': 23/24, 'CTC': 11/24, 'CTA': 5/24, 'CTG': 1, 'CCT': 28/44, 'CCC': 9/44, 'CCA': 19/44, 'CCG': 1, 'CAT': 1, 'CAC': 32/68, 'CAA': 1, 'CAG': 48/52, 'CGT': 18/25, 'CGC': 20/25, 'CGA': 10/25, 'CGG': 17/25, 'ATT': 1, 'ATC': 37/49, 'ATA': 13/49, 'ATG': 1, 'ACT': 16/40, 'ACC': 17/40, 'ACA': 1, 'ACG': 27/40, 'AAT': 1, 'AAC': 44/56, 'AAA': 1, 'AAG': 30/70, 'AGT': 11/23, 'AGC': 1, 'AGA': 1, 'AGG': 10/25, 'GTT': 1, 'GTC': 26/28, 'GTA': 20/28, 'GTG': 26/28, 'GCT': 24/28, 'GCC': 22/28, 'GCA': 1, 'GCG': 26/28, 'GAT': 1, 'GAC': 36/64, 'GAA': 1, 'GAG': 32/68, 'GGT': 19/34, 'GGC': 1, 'GGA': 31/34, 'GGG': 16/34, 'TAA': 1, //STOP CODONS, shouldn't appear and already be removed 'TAG': 15/61, 'TGA': 24/61 };

var list_of_features = { "TG":"RFC25 scar (in-frame ACCGGC, show in bold)", "RRRRR":"Arg5-tag", "AWRHPQFGG":"Strep-tag I", "WSHPQFEK":"Strep-tag II", "DYKDHDGDYKDHDIDYKDDDDK":"3xFlag-tag", "DYKDDDDK":"Flag-tag", "YPYDVPDYA":"HA-tag", "HHHHHH":"His6-tag", "HHHHH":"His5-tag", "EQKLISEEDL":"c-Myc-tag", "KETAAAKFERQHMDS":"S-tag", "KDHLIHNVHKEFHAHAHNK":"HAT-tag", "KRRWKKNFIAVSAANRFKKISSSGAL":"Calmodulin-binding-peptide", "TDKDMTITFTNKKDAE":"Isopep-tag", "AHIVMVDAYKPTK":"Spy-tag", "TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ":"Chitin-binding domain", "MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP":"SBP-tag", "GLNDIFEAQKIEWHE":"Avitag", "DDDDK":"Enterokinase cleavage site", "IDGR":"Factor Xa cleavage site", "IEGR":"Factor Xa cleavage site", "LVPRGS":"Thrombin cleavage site", "LEVLFQGP":"PreScission cleavage site", "ENLYFQG":"TEV cleavage site", "ETVRFQGS":"TVMV cleavage site" //Note: to extend this list also need to extend search_regexp below!!! };

var search_regexp = /(RRRRR|AWRHPQFGG|WSHPQFEK|DYKDHDGDYKDHDIDYKDDDDK|DYKDDDDK|YPYDVPDYA|HHHHHH|HHHHH|EQKLISEEDL|KETAAAKFERQHMDS|KDHLIHNVHKEFHAHAHNK|KRRWKKNFIAVSAANRFKKISSSGAL|TDKDMTITFTNKKDAE|AHIVMVDAYKPTK|TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ|MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP|GLNDIFEAQKIEWHE|DDDDK|IDGR|IEGR|LVPRGS|LEVLFQGP|ENLYFQG|ETVRFQGS)/g ;

/**

*  FUNCTIONS
*/

//The function called initially. This handles interaction with the registry server to obtain the sequence function get_sequence(){ try{

//---put together the url from which to get the sequence var entered_bb_number = document.getElementById("EnteredBioBrick").value; var numeric_start= entered_bb_number.search(/[0-9]/); if (numeric_start == 0){ throw "Not a valid BioBrick name nor a nucleotide sequence. The BioBrick name must contain one letter followed by digits"; } if (numeric_start == -1){ //so entered a sequence //interpret the entered bb number as sequence bb_number = ""; main_table_calc(entered_bb_number,bb_number); } else{ //entered a BioBrick number, so try to get the sequence from the Registry var bb_number = entered_bb_number.substr(numeric_start - 1); bb_number = bb_number.toUpperCase(); var bb_url = "http://parts.igem.org/das/parts/dna/?segment=BBa_" + bb_number;

try{ jQuery.ajax({ url: bb_url, type: 'GET', success: function(res) { try{ var a = res.responseText; var b=a.indexOf("<body>");

var c=a.indexOf("

",b); var d=a.indexOf("

",c);

var sequence = a.substr(c+3,d-c-3); } catch(err){ var txt = "Couldn't get the sequence from the registry. The BioBrick does not exist in the data base."; txt = txt + "\n"; txt = txt + "\nPlease restart and enter the nucleotide sequence manually instead of the BioBrick number!"; alert(txt); return; //to end program

}

//---call the main function with the sequence information--- if (sequence == ""){ alert("The sequence obtained from the registry is empty.\nPlease restart and enter the sequence manually instead of the BioBrick number!"); return; //to end the program //sequence = prompt(prompttxt,""); //if couldn't get the sequence in this way, ask user to enter it manually } main_table_calc(sequence,bb_number); } }); } catch(error){ alert(error); } } } catch(err){ txt="There was an error on this page in get_sequence().\n\n"; txt=txt + "Error description: \n" + err + "\n\n"; txt=txt + "Click OK to continue. \n\n"; alert(txt); }; }


//the main function, which calls the necessary functions to compute the table entries and puts the table together function main_table_calc(sequence,bb_number) { try{ var entered_nuc_sequence = clean_sequence(sequence); var reading_frame = find_reading_frame(entered_nuc_sequence); var reading_frame_start = reading_frame[0]; //the A in the ATG start codon var reading_frame_end = reading_frame[1]; //last nucleotide to be translated //recall internal indexing starts at 0, external indexing at 1

var entered_nuc_length = entered_nuc_sequence.length; if (reading_frame_start == -9 && (reading_frame_end > entered_nuc_length - 1) ){ //so RFC25 var coding_nuc_sequence = "ATGGCCGGC" + entered_nuc_sequence + "ACCGGT"; //add prefix and suffix var RFC_standard = "RFC 25, so ATGGCCGGC and ACCGGT were added (in italics) to the 5' and 3' ends: (underlined part encodes the protein)"; var nuc_sequence_to_display = "<u>ATGGCCGGC" + entered_nuc_sequence.substr(0,9) + "..." + entered_nuc_sequence.substr(entered_nuc_length-9) +"ACCGGT</u>" ; var nuc_sequence_to_display_html = " " + "ATGGCCGGC" + entered_nuc_sequence.substr(0,9) + " ... " + entered_nuc_sequence.substr(entered_nuc_length-9) +"ACCGGT" ; } else if ( reading_frame_end > entered_nuc_length - 1 ){ //stop codon in RFC10 suffix var coding_nuc_sequence = entered_nuc_sequence.substr(reading_frame_start) + "TAC"; var RFC_standard = "RFC 10 using the stop codon in the suffix, so TAC was added (in italics) to the 3' end: (underlined part encodes the protein)"; var nuc_sequence_to_display = entered_nuc_sequence.substr(0,reading_frame_start) + "<u>" + entered_nuc_sequence.substr(reading_frame_start,9) + "..." + entered_nuc_sequence.substr(entered_nuc_length-9) +"TAC</u>" ; var nuc_sequence_to_display_html = " " + entered_nuc_sequence.substr(0,reading_frame_start) + "" + entered_nuc_sequence.substr(reading_frame_start,9) + " ... " + entered_nuc_sequence.substr(entered_nuc_length-9) +"TAC" ; } else { var coding_nuc_sequence = entered_nuc_sequence.substr( reading_frame_start , reading_frame_end - reading_frame_start + 1); var RFC_standard = "RFC 10: (underlined part encodes the protein)"; var nuc_sequence_to_display = entered_nuc_sequence.substr(0,reading_frame_start) + "<u>" + entered_nuc_sequence.substr(reading_frame_start,9) + "..." + entered_nuc_sequence.substr(reading_frame_end-8,9) +"</u>" + entered_nuc_sequence.substr(reading_frame_end+1) ; var nuc_sequence_to_display_html = " " + entered_nuc_sequence.substr(0,reading_frame_start) + "" + entered_nuc_sequence.substr(reading_frame_start,9) + " ... " + entered_nuc_sequence.substr(reading_frame_end-8,9) +"" + entered_nuc_sequence.substr(reading_frame_end+1) ; }

var trans_result = translate_to_aa_and_codon_count(coding_nuc_sequence); var amino_sequence = trans_result[0]; var codon_count = trans_result[1];

/* //for test purposes amino_sequence = "RRRRRAWRHPQFGGWSHPQFEKDYKDHDGDYKDHDIDYKDDDDKDYKDDDDKYPYDVPDYAHHHHHHEQKLISEEDLHHHHHKETAAAKFERQHMDSKDHLIHNVHKEFHAHAHNKKRRWKKNFIAVSAANRFKKISSSGAL" + "TDKDMTITFTNKKDAEAHIVMVDAYKPTKTNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQMDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREPGLNDIFEAQKIEWHEDDDDK" + "IDGRIEGRLVPRGSLEVLFQGPENLYFQGETVRFQGS";

  • /
var SequenceFeatureTable = ""; SequenceFeatures = find_sequence_features(coding_nuc_sequence,amino_sequence); var FeatureStarts = []; var FeatureEnds = []; for (f in SequenceFeatures){ //f is in the start of the feature in the amino acid sequence starting from 1 feat_start_num = parseInt(f); feat_seq = SequenceFeatures[f]; // the aa sequence of the feature feat_length = feat_seq.length; feat_end_num = feat_start_num + feat_length - 1; //the last aa of the feature (starting from 1) // Check for unwanted overlap information if ((feat_seq == "HHHHH") && (amino_sequence.substr(feat_start_num - 2, 6) == "HHHHHH")){ continue; //so is a His5-Tag and we already have the His6-Tag, so we skip it } if ((feat_seq == "HHHHHH") && (amino_sequence.substr(feat_start_num - 2, 6) == "HHHHHH")){ continue; //so is a His6-Tag overlapping with a previous His6-Tag, so we skip it } if ((feat_seq == "RRRRR") && (amino_sequence.substr(feat_start_num - 2, 5) == "RRRRR")){ continue; //so is a Arg5-Tag overlapping with a previous Arg5-Tag, so we skip it } if ((feat_seq == "DYKDDDDK") && (amino_sequence.substr(feat_start_num - 15, 22) == "DYKDHDGDYKDHDIDYKDDDDK")){ continue; //so is a Flag-Tag in a 3xFlag-Tag, so we skip it } FeatureStarts.push(feat_start_num); //all the feature starts and features ends in amino acid sequence starting from 1 FeatureEnds.push(feat_end_num); //are collected in these arrays // beautify start of the feature if (feat_start_num<10){ feat_start = "   " + feat_start_num.toString(); } else if (feat_start_num<100){ feat_start = "  " + feat_start_num.toString(); } else if (feat_start_num<1000){ feat_start = " " + feat_start_num.toString(); } else{ feat_start = feat_start_num; } // now end of the feature if (feat_end_num<10){ feat_end = "   " + feat_end_num.toString(); } else if (feat_end_num<100){ feat_end = "  " + feat_end_num.toString(); } else if (feat_end_num<1000){ feat_end = " " + feat_end_num.toString(); } else{ feat_end = feat_end_num.toString(); } SequenceFeatureTable = SequenceFeatureTable + "";

}

SequenceFeatureTable = SequenceFeatureTable + "
" + feat_start + " -" + feat_end + ":" + list_of_features[SequenceFeatures[f]] + "
";

//now add underline/bold to amino_output var Feature_Markups = FeatureStarts.concat(FeatureEnds);

//sort function needed for the sort below to work properly function sortNumber(a,b) { return a - b; } Feature_Markups.sort(sortNumber); //sort it //determine in which lines there is some markup to be done var Feature_Markups_lines = []; for ( i=0 ; i < Feature_Markups.length ; i++){ Feature_Markups_lines.push(Math.floor((Feature_Markups[i] - 1)/100)); }

// -- Prepare the output for the amino acid sequence -- output_amino_sequence = amino_sequence + "*"; // first break the aa sequence into lines of 100 AAs var amino_lines = []; for ( i=0 ; i < output_amino_sequence.length /100 ; i++){ amino_lines.push(output_amino_sequence.substr(i*100,100)); } // next add the mark-up to the lines var amino_output_lines = []; if (Feature_Markups.length == 0){ amino_output_lines = amino_lines; //so no markup needed } else{ // so there is markup to be added var current_feature_element = 0; var open_close_index = 0; var open_close_index_bold = 0; for ( var line = 0; line<amino_lines.length ; line++ ){ amino_output_lines[line] = "" ; var last_element_pos = 0; if (open_close_index>0){ // so have open underlines at the end of previous line, so must reopen them for ( j = 0 ; j < open_close_index ; j++){ amino_output_lines[line]=amino_output_lines[line] + ""; } } if (open_close_index_bold>0){ // so have open bolds at the end of previous line, so must reopen them for ( j = 0 ; j < open_close_index_bold ; j++){ amino_output_lines[line]=amino_output_lines[line] + ""; } } while (Feature_Markups_lines[current_feature_element] == line){ //loop over all markups in this line var curr_element_pos = Feature_Markups[current_feature_element]; var curr_element_pos_inline = curr_element_pos - 100*line - 1; if ( jQuery.inArray( curr_element_pos , FeatureStarts ) > -1 ){ if (SequenceFeatures[curr_element_pos] == "TG"){ //so scar, so bold amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline) + "<b>"; open_close_index_bold ++; } else{ //so underline amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline) + "<u>"; open_close_index ++; } last_element_pos = curr_element_pos_inline; FeatureStarts.shift(); //removes the element which was just marked } else if ( jQuery.inArray( curr_element_pos , FeatureEnds ) > -1 ){ if (SequenceFeatures[curr_element_pos-1] == "TG"){ //so scar, so bold amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline+1) + ""; open_close_index_bold = open_close_index_bold - 1; } else{ //so underline amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substring(last_element_pos,curr_element_pos_inline+1) + ""; open_close_index = open_close_index - 1; } last_element_pos = curr_element_pos_inline+1; FeatureEnds.shift(); //removes the element which was just marked } current_feature_element++; //update so we get next markup } //add the remainder of the line to the output amino_output_lines[line] = amino_output_lines[line] + amino_lines[line].substr(last_element_pos); if (open_close_index>0){ //so have open underlines at the end of this line, so must close them for now for ( j = 0 ; j < open_close_index ; j++){ amino_output_lines[line]=amino_output_lines[line] + "</u>"; } } if (open_close_index_bold>0){ //so have open underlines at the end of this line, so must close them for now for ( j = 0 ; j < open_close_index_bold ; j++){ amino_output_lines[line]=amino_output_lines[line] + "</b>"; } } } /* //replace underline of RFC25 scar by boldness for ( f in SequenceFeatures ){ if ( SequenceFeatures[f] == "TG"){ for ( var line = 0 ; line < amino_lines.length ; line++){ amino_output_lines[line].match }

break; // end for loop } }

  • /

/* * TO DO LIST: * - check for RFC25 scar and replace by BOLD (use that in next line must have end markup at position 1) * */ }


//put everything together

var amino_output = ""; for ( j=0 ; j<amino_lines.length ; j++ ){ amino_output = amino_output + "";

}

amino_output = amino_output + "
" + (j*100 + 1).toString() + " " + amino_output_lines[j] + "
";

var codon_usage = analyze_codons(codon_count); // 0 -> E_coli, 1 -> Yeast, 2 -> Mammalian, 3 -> Subtilis, 4 -> Arabidopsis //translate numerical CAI value into categories: 1.00-0.80 -> excellent // 0.79-0.60 -> good // 0.59-0.40 -> acceptable // 0.39-0.20 -> bad // 0.19-0.00 -> very bad for (i=0;i<5;i++){ if (codon_usage[i] >= 0.80){ codon_usage[i] = "excellent (" + codon_usage[i].toFixed(2) + ")"; } else if (codon_usage[i] < 0.8 && codon_usage[i] >= 0.6){ codon_usage[i] = "good (" + codon_usage[i].toFixed(2) + ")"; } else if (codon_usage[i] < 0.6 && codon_usage[i] >= 0.4){ codon_usage[i] = "acceptable (" + codon_usage[i].toFixed(2) + ")"; } else if (codon_usage[i] < 0.4 && codon_usage[i] >= 0.2){ codon_usage[i] = "bad (" + codon_usage[i].toFixed(2) + ")"; } else{ codon_usage[i] = "very bad (" + codon_usage[i].toFixed(2) + ")"; } }

var counting_result = count_amino_acids(amino_sequence); var amino_content = counting_result[0]; var amino_freq = counting_result[1]; var total_aminos = amino_sequence.length; var molecular_weight = compute_molecular_weight(amino_content); var pI = compute_pI(amino_sequence,amino_content); var extinction_coeffs = compute_extinction_coeff(amino_content,molecular_weight); var nterm = amino_sequence[0];

var creation_time = new Date();

// --- Create the output --- var htmlCode = "";

// Code to display the table htmlCode = htmlCode

+ "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" // close the cell of Amino acid composition + "" + "" //close Amino acid counting cell + "" //close Biochemical parameters cell + "" + "" //close Codon usage cell + "" + "" + "
Parameters for BioBrick <a href=\"http://parts.igem.org/wiki/index.php?title=Part:BBa_" + bb_number + "\">BBa_" + bb_number + "</a> automatically created by the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/AutoAnnotator\">BioBrick-AutoAnnotator</a> version 1.0" + "
" + "Nucleotide sequence in " + RFC_standard + "
" + nuc_sequence_to_display_html + "" + "
" + " ORF from " + (reading_frame_start + 1) + " to " + (reading_frame_end + 1) + " (excluding stop-codon)" + "
Amino acid sequence: (RFC25 scars in shown in bold, other sequence features underlined; both given below)
" + amino_output + "
Sequence features: (with their position in the amino acid sequence, also underlined or shown bold above)"

+ SequenceFeatureTable //created above

+ "
Amino acid composition:"

//TABLE IN TABLE

+ "" + "" + "" + "" + "" + "" + "" + "" + "
" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
A (Ala)" + "" + amino_content.A + " (" + amino_freq.A.toFixed(1) + "%)" + "
R (Arg)" + "" + amino_content.R + " (" + amino_freq.R.toFixed(1) + "%)" + "
N (Asn)" + "" + amino_content.N + " (" + amino_freq.N.toFixed(1) + "%)" + "
D (Asp)" + "" + amino_content.D + " (" + amino_freq.D.toFixed(1) + "%)" + "
" + "
" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
C (Cys)" + "" + amino_content.C + " (" + amino_freq.C.toFixed(1) + "%)" + "
Q (Gln)" + "" + amino_content.Q + " (" + amino_freq.Q.toFixed(1) + "%)" + "
E (Glu)" + "" + amino_content.E + " (" + amino_freq.E.toFixed(1) + "%)" + "
G (Gly)" + "" + amino_content.G + " (" + amino_freq.G.toFixed(1) + "%)" + "
" + "
" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
H (His)" + "" + amino_content.H + " (" + amino_freq.H.toFixed(1) + "%)" + "
I (Ile)" + "" + amino_content.I + " (" + amino_freq.I.toFixed(1) + "%)" + "
L (Leu)" + "" + amino_content.L + " (" + amino_freq.L.toFixed(1) + "%)" + "
K (Lys)" + "" + amino_content.K + " (" + amino_freq.K.toFixed(1) + "%)" + "
" + "
" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
M (Met)" + "" + amino_content.M + " (" + amino_freq.M.toFixed(1) + "%)" + "
F (Phe)" + "" + amino_content.F + " (" + amino_freq.F.toFixed(1) + "%)" + "
P (Pro)" + "" + amino_content.P + " (" + amino_freq.P.toFixed(1) + "%)" + "
S (Ser)" + "" + amino_content.S + " (" + amino_freq.S.toFixed(1) + "%)" + "
" + "
" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
T (Thr)" + "" + amino_content.T + " (" + amino_freq.T.toFixed(1) + "%)" + "
W (Trp)" + "" + amino_content.W + " (" + amino_freq.W.toFixed(1) + "%)" + "
Y (Tyr)" + "" + amino_content.Y + " (" + amino_freq.Y.toFixed(1) + "%)" + "
V (Val)" + "" + amino_content.V + " (" + amino_freq.V.toFixed(1) + "%)" + "
" + "
" + "
" + "Amino acid counting" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
Total number:" + "" + total_aminos + "
Positively charged (Arg+Lys):" + "" + (amino_content.R + amino_content.K) + " (" + ((amino_content.R + amino_content.K)*100/total_aminos).toFixed(1) + "%)" + "
Negatively charged (Asp+Glu):" + "" + (amino_content.D + amino_content.E) + " (" + ((amino_content.D + amino_content.E)*100/total_aminos).toFixed(1) + "%)" + "
" + "
" + "Biochemical parameters" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
Molecular mass [Da]:" + "" + molecular_weight.toFixed(1) + "
Theoretical pI:" + "" + pI.toFixed(2) + "
Extinction coefficient at 280 nm [M-1 cm-1]:" + "" + extinction_coeffs[1].toFixed(0) + " / " + extinction_coeffs[0].toFixed(0) + " (all Cys red/ox)" + "
" + "
Codon usage" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
Organism:E. coliB. subtilisS. cerevisiaeA. thalianaMammals
Codon quality (CAI):" + codon_usage[0] + "" + codon_usage[3] + "" + codon_usage[1] + "" + codon_usage[4] + "" + codon_usage[2] + "
" + "
The BioBrick-AutoAnnotator was created by <a href=\"https://2013.igem.org/Team:TU-Munich\">TU-Munich 2013</a> iGEM team. For more information please see the <a href=\"https://2013.igem.org/Team:TU-Munich/Results/Software\">documentation</a>.
If you have any questions, comments or suggestions, please email us at <a href=\"mailto:igem@wzw.tum.de?Subject=AutoAnnotator\" target=\"_top\">igem@wzw.tum.de</a>." + "
"

+ "
";

$("#htmlTable").html(htmlCode); $("#htmlExplanation").html("The generated table giving various computed parameters: (The wiki-code producing this table is below)");

$("#wikiTable").text("" + htmlCode + ""); $("#wikiExplanation").html("Copy the following into the wiki to get the protein-data-table:");

} catch(err){ txt="There was an error on this page.\n\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } alert(txt); return; //to end execution }

};

function find_sequence_features(coding_nuc_sequence,amino_sequence){ //returns the object Features with EXTERNAL index as property and feature as value try{ var Features = {};

//first look for RFC25 scars for ( i = 0 ; i < coding_nuc_sequence.length ; i = i + 3){ var codonpair = coding_nuc_sequence.substr(i,6); if (codonpair == "ACCGGC"){ Features[(i/3) + 1] = "TG"; } } //find forbidden restriction sites?

//var motives_found = amino_sequence.match(search_regexp);

while ((match = search_regexp.exec(amino_sequence)) != null) { Features[match.index + 1] = match[0]; // match.index gives the location of the first aa in the feature BUT starting from 0 search_regexp.lastIndex = match.index + 1; // to look for overlapping features }

return Features; } catch(err){ txt = "An error occured while finding sequence features and preparing the output.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } }

function clean_sequence(sequence){ try{ var cleanSequence = sequence.replace(/ /g, ""); // remove spaces var cleanSequence2= cleanSequence.toUpperCase(); // convert to upper case var cleanSequence3= cleanSequence2.replace(/\r?\n|\r/g, ""); // remove line breaks var wrongLetter = cleanSequence3.search(/[^ATGC]/); // check for wrong nucleotides if (wrongLetter > -1){ throw "Unknown nucleotide in the entered sequence. Only use A, T, G, C!"; } return cleanSequence3; } catch(err){ txt = "An error occured while checking and cleaning up the provided sequence.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function count_amino_acids(sequence){ try{ //Counts standard amino acids, returns an array {AminoAcid:Number} var amino_acids_content = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,H:0,I:0,L:0,K:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0}; var amino_acids_freq = {A:0,R:0,N:0,D:0,C:0,Q:0,E:0,G:0,H:0,I:0,L:0,K:0,M:0,F:0,P:0,S:0,T:0,W:0,Y:0,V:0}; for (i=0; i<sequence.length; i++){ amino_acids_content[sequence[i]]++; } var total=sequence.length; for (aa in amino_acids){ amino_acids_freq[aa] = amino_acids_content[aa] * (100 / total); }; return [amino_acids_content,amino_acids_freq]; } catch(err){ txt = "An error occured while counting the amino acids.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function compute_molecular_weight(amino_acids_content){ try{ var molec_weight = water_weight; for (aa in amino_acids){ molec_weight = molec_weight + amino_acids_content[aa] * amino_weights[aa]; } return molec_weight; } catch(err){ txt = "An error occured while computing the molecular weight of the protein.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function compute_pI(sequence,amino_acids_content){ try{ var nterm = sequence[0]; //the first aa var composition = {Nterm:1,Cterm:1, K:amino_acids_content.K, R:amino_acids_content.R, H:amino_acids_content.H, D:amino_acids_content.D, E:amino_acids_content.E, C:amino_acids_content.C, Y:amino_acids_content.Y}; var pos_pKs = jQuery.extend(true, {}, positive_pKs); //to clone positive_pKs, otherwise just passes reference if ( nterm in pKnterminal ){ pos_pKs.Nterm = pKnterminal[nterm]; }

var pHOld = 0.0; //just to set off the while loop var pHNew = 7.0; var step = 3.5; var charge = charge_at_pH(pHNew,composition,pos_pKs); while ( Math.abs(pHOld - pHNew) > 0.0001 && Math.abs(charge)!=0 ){ pHOld = pHNew; //store the now old pH if (charge >0){ pHNew = pHNew + step; } else { //so charge < 0 pHNew = pHNew - step; } step = step/2; charge = charge_at_pH(pHNew,composition,pos_pKs); } var pH = pHNew; return pH; } catch(err){ txt = "An error occured while computing the theoretical pI.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function charge_at_pH(pH, compo, pos_pKs){ try{ var charge = 0; for (aa in positive_pKs){ charge = charge + compo[aa] * (1/(1+Math.pow(10, pH - pos_pKs[aa]))); } for (aa in negative_pKs){ charge = charge - compo[aa] * (1/(1+Math.pow(10, negative_pKs[aa] - pH))); } return charge; } catch(err){ txt = "An error occured while computing the charge of the protein at a certain pH.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function compute_extinction_coeff(amino_acid_content,molecular_weight){ try{ var E_allCystine = amino_acid_content.Y * extinction.Y + amino_acid_content.W * extinction.W + (amino_acid_content.C /2)*extinction.Cystine; var E_noCystine = amino_acid_content.Y * extinction.Y + amino_acid_content.W * extinction.W ; return [E_allCystine,E_noCystine]; } catch(err){ txt = "An error occured while computing the extinction coefficient of the protein.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function find_reading_frame(nuc_sequence){ try{ // atg_position is the first nucleotide in the ATG (start codon) // stop_position is the first nucleotide in the stop codon var isRFC25=false; // check for RFC 25 var ideal_stop = nuc_sequence.length - 3; if (nuc_sequence.length%3 == 0 && !(nuc_sequence.substr(ideal_stop,3) in stop_codons)){ isRFC25 = confirm("Is this part in RFC25, i.e. the start codon is part of the prefix?\n If it is RFC25, press 'OK', otherwise 'Cancel'"); }

if ( !isRFC25 ){ //so not RFC25

var atg_position = nuc_sequence.indexOf("ATG"); if ( atg_position > -1){ var stop_position = -1; for ( i = atg_position + 3 ; i < nuc_sequence.length ; i = i + 3){ var codon = nuc_sequence.substr(i,3); if ( codon in stop_codons ){ stop_position = i; break; } } if (stop_position == -1){ if ((nuc_sequence.length - atg_position)%3 == 0){

stop_position = nuc_sequence.length + 3; //so RFC10 using stop codon in suffix alert("Using stop codon in suffix of RFC10"); } else { throw "No stop codon found in frame & can't use stop codon in suffix of RFC10"; } } } else { throw "No ATG found, something is wrong! Might be RFC25?"; } reading_length = stop_position - atg_position + 3; if ( (reading_length / nuc_sequence.length) < 0.6){ real_atg_position = prompt("The reading length is small compared to the length of the sequence, please enter the position of the start codon (start of sequence is 1)",atg_position + 1); atg_position = real_atg_position - 1; stop_position= -1; for ( i = atg_position + 3 ; i < nuc_sequence.length ; i = i + 3){ if (nuc_sequence.substr(i,3) == ("TAA"|"TAG"|"TGA") ){ stop_position = i; break; } } if (stop_position == -1 && atg_position > -1){ if ((nuc_sequence.length - atg_position)%3 == 0){

stop_position = nuc_sequence.length + 3; //so RFC10 using stop codon in suffix alert("Using stop codon in suffix of RFC10"); } else { throw "No stop codon found in frame & can't use stop codon in suffix of RFC10"; } }

} } else { // so RFC25 atg_position = -9; stop_position = nuc_sequence.length + 6; }

var reading_frame_start = atg_position; var reading_frame_end = stop_position - 1; //the last nucleotide to be translated

return [reading_frame_start,reading_frame_end]; } catch(err){ txt = "An error occured while determining the appropriate open reading frame (ORF) of the provided sequence.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } };

function translate_to_aa_and_codon_count(nuc_sequence){ try{ var amino_sequence = ""; var codon_count = { 'TTT': 0, 'TTC': 0, 'TTA': 0, 'TTG': 0, 'TCT': 0, 'TCC': 0, 'TCA': 0, 'TCG': 0, 'TAT': 0, 'TAC': 0, 'TGT': 0, 'TGC': 0, 'TGG': 0, 'CTT': 0, 'CTC': 0, 'CTA': 0, 'CTG': 0, 'CCT': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0, 'CAT': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'CGT': 0, 'CGC': 0, 'CGA': 0, 'CGG': 0, 'ATT': 0, 'ATC': 0, 'ATA': 0, 'ATG': 0, 'ACT': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'AAT': 0, 'AAC': 0, 'AAA': 0, 'AAG': 0, 'AGT': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0, 'GTT': 0, 'GTC': 0, 'GTA': 0, 'GTG': 0, 'GCT': 0, 'GCC': 0, 'GCA': 0, 'GCG': 0, 'GAT': 0, 'GAC': 0, 'GAA': 0, 'GAG': 0, 'GGT': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0, 'TAA': 0, //STOP CODONS, shouldn't appear and already be removed 'TAG': 0, 'TGA': 0 }; for ( i = 0 ; i < nuc_sequence.length ; i = i + 3){ var codon = nuc_sequence.substr(i,3); codon_count[codon] ++; //count the number of each codon appearing amino_sequence = amino_sequence + translation_table[codon]; } return [amino_sequence,codon_count]; } catch(err){ txt = "An error occured while translating the nucleotide sequence into the amino acid sequence.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; }

};

function analyze_codons(codon_count){ try{ var total_synon_codons = 0; var E_coli_prod = 1; var Yeast_prod = 1; var Mammalian_prod = 1; var Arabidopsis_prod = 1; var Subtilis_prod = 1;

for (codon in codon_count){ if (codon in non_synonymous_codons){ //ignore the non-synonimous codons continue; }

total_synon_codons = total_synon_codons + codon_count[codon]; //should give the total number of codons (excluding stop codons) E_coli_prod = E_coli_prod * Math.pow( E_coli_codon_weights[codon], codon_count[codon]); Yeast_prod = Yeast_prod * Math.pow( Yeast_codon_weights[codon], codon_count[codon]); Mammalian_prod = Mammalian_prod * Math.pow( Mouse_codon_weights[codon], codon_count[codon]); Subtilis_prod = Subtilis_prod * Math.pow( Subtilis_codon_weights[codon], codon_count[codon]); Arabidopsis_prod = Arabidopsis_prod * Math.pow( Arabidopsis_codon_weights[codon], codon_count[codon]);

//could count weak codons };

var E_coli_CAI = Math.pow( E_coli_prod, 1/total_synon_codons); var Yeast_CAI = Math.pow( Yeast_prod, 1/total_synon_codons); var Mammalian_CAI = Math.pow( Mammalian_prod, 1/total_synon_codons); var Subtilis_CAI = Math.pow( Subtilis_prod, 1/total_synon_codons); var Arabidopsis_CAI = Math.pow( Arabidopsis_prod, 1/total_synon_codons);


var usageOutput = [ E_coli_CAI , Yeast_CAI , Mammalian_CAI , Subtilis_CAI , Arabidopsis_CAI ]; return usageOutput; } catch(err){ txt = "An error occured while analyzing the codon usage of the provided sequence.\n"; if ((err.toString()).substr(0,16) == "An error occured"){ txt = txt + "This error originated at a lower level: \n\n" + err.toString(); } else{ txt=txt + "The originating error is: \n" + err + "\n\n"; } throw txt; } }