Team:TU-Delft/NovelPeptides

From 2013.igem.org

(Difference between revisions)
Line 75: Line 75:
<p class="mycss">
<p class="mycss">
Rules generated :
Rules generated :
-
class 0 IF : amino_acids in {8,9,11,12,13,14,15,16,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,34,36,
+
class 0 IF : amino_acids in {8,9,11,12,13,14,15,16,18,19,20,21,40,41,42,46,62,66,72,118} ^ charge in {-2,0,1,2,3,4,5,6,7,8,9,10,11,12,13} ^ proline in {0,1,2,3,4,5,8,20} ^ glycine in {0,1,2,3,4,5,6,7,8,9,15} ^ I in {0,1,2,3,4,5,6,7,8}
-
37,38,39,40,41,42,46,62,66,72,118} ^ charge in {-2,0,1,2,3,4,5,6,7,8,9,10,11,12,13}  
+
^ V in {0,1,2,3,4,5,6} ^ L in {0,1,2,3,4,5,6,7} ^ F in {0,1,2,3,4,5,6,7} ^ C in {0,1,2,3,4,5,6,8,15} ^ M in {0,1,2}  
-
^ proline in {0,1,2,3,4,5,8,20} ^ glycine in {0,1,2,3,4,5,6,7,8,9,15} ^ I in {0,1,2,3,4,5,6,7,8} ^ V in {0,1,2,3,4,5,6} ^ L in {0,1,2,3,4,5,6,7} ^ F in {0,1,2,3,4,5,6,7} ^ C in {0,1,2,3,4,5,6,8,15}  
+
^ A in {0,1,2,3,4,5,6,7,10,12} ^ W in {0,1,2,3,4} ^ hydropathy in {-2.29,-1.45,-1.38,-1.29,-1.15,-1.01,-0.99,-0.95,-0.91,-0.89,-0.85,-0.81,-0.8,-0.78,-0.69,-0.68,
-
^ M in {0,1,2} ^ A in {0,1,2,3,4,5,6,7,10,12} ^ W in {0,1,2,3,4} ^ hydropathy in {-2.29,-1.45,-1.38,-1.29,-1.15,-1.01,-0.99,-0.95,-0.91,-0.89
+
-0.67,-0.64,-0.54,-0.49,-0.47,-0.46,-0.45,-0.44,-0.43,-0.42,-0.37,-0.34,-0.28,-0.26,-0.25,-0.24,-0.23,-0.21,-0.19,-0.18,-0.17,-0.1,
-
,-0.85,-0.81,-0.8,-0.78,-0.69,
+
-0.09,-0.08,-0.07,-0.05,-0.03,0.01,0.03,0.04,0.06,0.08,0.12,0.13,0.14,0.15,0.16,0.17,0.19,0.2,0.21,0.23,0.24,0.25,0.27,0.29,0.3,0.33,0.34,
-
-0.68,-0.67,-0.64,-0.54,-0.49,-0.47,-0.46,-0.45,-0.44,-0.43,-0.42,-0.37,
+
0.36,0.43,0.44,0.45,0.48,0.49,0.51,0.52,0.53,0.54,0.55,0.57,0.58,0.59,0.6,0.62,0.63,0.64,0.66,0.7,0.73,0.75,0.76,0.78,0.81,0.82,0.84,
-
-0.34,-0.28,-0.26,-0.25,
+
0.87,0.89,0.91,0.95,1,1.02,1.04,1.05,1.06,1.08,1.09,1.11,1.12,1.14,1.16,1.22,1.23,1.26,1.29,1.3,1.31,1.34,1.35,1.36,1.39,
-
-0.24,-0.23,-0.21,-0.19,-0.18,-0.17,-0.1,-0.09,-0.08,-0.07,-0.05,-0.03,0.01,
+
1.41,1.42,1.53,1.54,1.59,1.62,1.67,1.81,1.82,1.85} ^ N1 in {8} ^ N2 in {11} ^ N3 in {8} ^ C1 in {11} ^ C2 in {12} ^ c3 in {12}
-
0.03,0.04,0.06,0.08,
+
class 1 IF : amino_acids in {13,14,23,24,33,37,40,44} ^ charge in {1,2,3,4,10} ^ proline in {0,1,2} ^ glycine in {0,1,2,3,4,7} ^ I in {0,1,2,3,4} ^ V in {0,1,4,6} ^ L in {1,2,3,4,5,6,7} ^ F in {0,1,2,3} ^ C in {0,2,8} ^ M in {0,1} ^ A in {0,1,2,3,4,5,6} ^ W in {0,1,3}  
-
0.12,0.13,0.14,0.15,0.16,0.17,0.19,0.2,0.21,0.23,0.24,0.25,0.27,0.29,0.3,
+
^ hydropathy in {-0.37,-0.27,0.26,0.56,0.81,0.9,0.97,0.98,1.25,1.26,1.34,1.47,1.73,1.8,1.82} ^ N1 in {8} ^ N2 in {14} ^ N3 in {8} ^ C1 in {11} ^ C2 in {11} ^ c3 in {8}   
-
0.33,0.34,0.36,0.43,
+
class 3 IF : amino_acids in {11,13,14,17,19,21,22,23,28,33} ^ charge in {-1,0,1,2,3} ^ proline in {0,1,2,8} ^ glycine in {0,1,2,3,5,7} ^ I in {0,1,2,3} ^ V in {0,1,2,4} ^ L in {0,1,2,3,4,5,6} ^ F in {0,1,2,4} ^ C in {0,2} ^ M in {0} ^ A in {0,1,2,3} ^ W in {0} ^ hydropathy in {-1.1,0.07,0.4,0.81,0.98,1.07,1.19,1.23,1.35,1.46,1.47,1.81,2.06} ^ N1 in {14} ^ N2 in {11} ^ N3 in {15} ^ C1 in {5} ^ C2 in {2} ^ c3 in {8}  (18)
-
0.44,0.45,0.48,0.49,0.51,0.52,0.53,0.54,0.55,0.57,0.58,0.59,0.6,0.62,0.63,0.64,
+
class 2 IF : amino_acids in {15,17,21,25,27,31,32,33,37,38,39,44} ^ charge in {1,2,3,4,5,6,7,10} ^ proline in {0,1,2,4,19} ^ glycine in {1,2,3,4,5,6} ^ I in {0,1,2,3,4,5} ^ V in {0,1,2,3,5,7} ^ L in {2,3,4,5,6,7} ^ F in {0,1,2,3,5} ^ C in {0,1,2,6} ^ M in {0,1,2} ^
-
0.66,0.7,
+
A in {0,1,2,3,4,5,6,8} ^ W in {0,1,3} ^ hydropathy in {-1.31,-1.14,-0.91,-0.82,-0.72,-0.68,-0.58,-0.52,-0.46,0,0.06,0.09,0.18,0.26,0.36,0.38,0.43,0.44,0.45,0.6,0.64,0.65,1.16,1.21,1.24,1.9} ^ N1 in {8} ^ N2 in {11} ^ N3 in {11} ^ C1 in {6} ^ C2 in {15} ^ c3 in {7}   
-
\0.73,0.75,0.76,0.78,0.81,0.82,0.84,0.87,0.89,0.91,0.95,1,1.02,1.04,1.05,
+
 
-
1.06,1.08,1.09,
+
-
1.11,1.12,1.14,1.16,1.22,1.23,1.26,1.29,1.3,1.31,1.34,1.35,1.36,1.39,1.41,
+
-
1.42,1.53,1.54,
+
-
1.59,1.62,1.67,1.81,1.82,1.85} ^ N1 in {8} ^ N2 in {11} ^ N3 in {8} ^ C1 in {11}  
+
-
^ C2 in {12} ^ c3 in {12}  
+
-
class 1 IF : amino_acids in {13,14,23,24,33,37,40,44} ^ charge in {1,2,3,4,10}  
+
-
^ proline in {0,1,2} ^ glycine in {0,1,2,3,4,7} ^ I in {0,1,2,3,4} ^ V in {0,1,4,6}  
+
-
^ L in {1,2,3,4,5,6,7} ^ F in {0,1,2,3} ^ C in {0,2,8} ^ M in {0,1} ^ A in {0,1,2,3,4,5,6}  
+
-
^ W in {0,1,3}  
+
-
^ hydropathy in {-0.37,-0.27,0.26,0.56,0.81,0.9,0.97,0.98,1.25,1.26,1.34,1.47,1.73,1.8,1.82} ^  
+
-
N1 in {8} ^ N2 in {14} ^ N3 in {8} ^ C1 in {11} ^ C2 in {11} ^ c3 in {8}  (16)
+
-
class 3 IF : amino_acids in {11,13,14,17,19,21,22,23,28,33} ^ charge in {-1,0,1,2,3}  
+
-
^ proline in {0,1,2,8} ^ glycine in {0,1,2,3,5,7} ^ I in {0,1,2,3} ^ V in {0,1,2,4}  
+
-
^ L in {0,1,2,3,4,5,6} ^ F in {0,1,2,4} ^  
+
-
C in {0,2} ^ M in {0} ^ A in {0,1,2,3} ^ W in {0} ^ hydropathy in {-1.1,0.07,0.4,0.81,0.98,1.07,1.19,1.23,1.35,1.46,1.47,1.81,2.06} ^ N1 in {14} ^ N2 in {11}  
+
-
^ N3 in {15} ^ C1 in {5} ^ C2 in {2} ^ c3 in {8}  (18)
+
-
class 2 IF : amino_acids in {15,17,21,25,27,31,32,33,37,38,39,44}  
+
-
^ charge in {1,2,3,4,5,6,7,10} ^ proline in {0,1,2,4,19} ^ glycine in {1,2,3,4,5,6}  
+
-
^ I in {0,1,2,3,4,5} ^ V in {0,1,2,3,5,7} ^ L in {2,3,4,5,6,7} ^ F in {0,1,2,3,5}  
+
-
^ C in {0,1,2,6} ^ M in {0,1,2} ^ A in {0,1,2,3,4,5,6,8} ^ W in {0,1,3} ^ hydropathy in {-1.31,-1.14,-0.91,-0.82,-0.72,-0.68,-0.58,-0.52,-0.46,0,0.06,0.09,
+
-
0.18,0.26,0.36,0.38,0.43,0.44,0.45,0.6,0.64,0.65,1.16,1.21,1.24,1.9} ^ N1 in {8} ^ N2 in {11} ^ N3 in {11} ^ C1 in {6} ^ C2 in {15} ^ c3 in {7}  (54)
+

Revision as of 08:33, 2 October 2013

Novel Peptides

The antimicrobial peptide(AMPs) field is growing rapidly in response to the demand for novel antimicrobial agents. In particular AMPs are promising candidates in the fight against antibiotic-resistant pathogents due to their low toxicity, and broad range of activity. Antimicrobial peptides are generally between 12 and 50 amino acids long. These peptides include two or more positively charged residues provided by arginine, lysine or, in acidic environments, histidine, and a large proportion of hydrophobic residues.

Due to the fact that AMPs constitute a current research area, both the knowledge and the experimentally validated data are rapidly increasing.It was decided to use these data in order to create novel peptides which will be high toxic for S.aureus but low toxic for E.coli. The method that was developed is described in the following sections.


Data and Feature extraction

The necessary data were acquired from the CAMP: Collection of Anti-Microbial Peptides Database. The database contains 3789 records with MIC values but only the records that target both E.coli and S.aureus were taken into account. The acquired records were seperated into 4 classes based on the MIC values:


  • 1st class: Toxic for both S.aureus and E.coli
  • 2nd class: Toxic for S.aureus but not for E.coli
  • 3rd class: Toxic for E.coli but not for S.aureus
  • 4th class: Non Toxic for both E.coli and S.aureus

The next step is related to the feature extraction for each one of the collected peptides.The resulting number of features per sequence is 21[1][2][3].In particular, the attributes for each peptide are either general such as the length of the sequence or specific based on AMPs properties.A list of them is presented underneath:
  • length
  • charge
  • prolines' frequency
  • glycines' frequency
  • hydrophobic residues appearance
  • hydropathy
  • C terminus
  • N terminus
  • polarity

The N and C terminus were examined only for 3 positions due to the different size of each peptide.

Rule Learning

After creating the final data set, a machine learning toolkit, WEKA, was used. In particular, WEKA contains a collection of machine learning algorithms for data mining tasks. In our case, it was decided to use nnge algorithm in order to perform association rule mining[4].

By the term association rule mining, a method for discovering interesting relations between variables in data sets is described.In that way, it is possible to discover rules that represent the class of interest and create our novel peptides!

Rules generated : class 0 IF : amino_acids in {8,9,11,12,13,14,15,16,18,19,20,21,40,41,42,46,62,66,72,118} ^ charge in {-2,0,1,2,3,4,5,6,7,8,9,10,11,12,13} ^ proline in {0,1,2,3,4,5,8,20} ^ glycine in {0,1,2,3,4,5,6,7,8,9,15} ^ I in {0,1,2,3,4,5,6,7,8} ^ V in {0,1,2,3,4,5,6} ^ L in {0,1,2,3,4,5,6,7} ^ F in {0,1,2,3,4,5,6,7} ^ C in {0,1,2,3,4,5,6,8,15} ^ M in {0,1,2} ^ A in {0,1,2,3,4,5,6,7,10,12} ^ W in {0,1,2,3,4} ^ hydropathy in {-2.29,-1.45,-1.38,-1.29,-1.15,-1.01,-0.99,-0.95,-0.91,-0.89,-0.85,-0.81,-0.8,-0.78,-0.69,-0.68, -0.67,-0.64,-0.54,-0.49,-0.47,-0.46,-0.45,-0.44,-0.43,-0.42,-0.37,-0.34,-0.28,-0.26,-0.25,-0.24,-0.23,-0.21,-0.19,-0.18,-0.17,-0.1, -0.09,-0.08,-0.07,-0.05,-0.03,0.01,0.03,0.04,0.06,0.08,0.12,0.13,0.14,0.15,0.16,0.17,0.19,0.2,0.21,0.23,0.24,0.25,0.27,0.29,0.3,0.33,0.34, 0.36,0.43,0.44,0.45,0.48,0.49,0.51,0.52,0.53,0.54,0.55,0.57,0.58,0.59,0.6,0.62,0.63,0.64,0.66,0.7,0.73,0.75,0.76,0.78,0.81,0.82,0.84, 0.87,0.89,0.91,0.95,1,1.02,1.04,1.05,1.06,1.08,1.09,1.11,1.12,1.14,1.16,1.22,1.23,1.26,1.29,1.3,1.31,1.34,1.35,1.36,1.39, 1.41,1.42,1.53,1.54,1.59,1.62,1.67,1.81,1.82,1.85} ^ N1 in {8} ^ N2 in {11} ^ N3 in {8} ^ C1 in {11} ^ C2 in {12} ^ c3 in {12} class 1 IF : amino_acids in {13,14,23,24,33,37,40,44} ^ charge in {1,2,3,4,10} ^ proline in {0,1,2} ^ glycine in {0,1,2,3,4,7} ^ I in {0,1,2,3,4} ^ V in {0,1,4,6} ^ L in {1,2,3,4,5,6,7} ^ F in {0,1,2,3} ^ C in {0,2,8} ^ M in {0,1} ^ A in {0,1,2,3,4,5,6} ^ W in {0,1,3} ^ hydropathy in {-0.37,-0.27,0.26,0.56,0.81,0.9,0.97,0.98,1.25,1.26,1.34,1.47,1.73,1.8,1.82} ^ N1 in {8} ^ N2 in {14} ^ N3 in {8} ^ C1 in {11} ^ C2 in {11} ^ c3 in {8} class 3 IF : amino_acids in {11,13,14,17,19,21,22,23,28,33} ^ charge in {-1,0,1,2,3} ^ proline in {0,1,2,8} ^ glycine in {0,1,2,3,5,7} ^ I in {0,1,2,3} ^ V in {0,1,2,4} ^ L in {0,1,2,3,4,5,6} ^ F in {0,1,2,4} ^ C in {0,2} ^ M in {0} ^ A in {0,1,2,3} ^ W in {0} ^ hydropathy in {-1.1,0.07,0.4,0.81,0.98,1.07,1.19,1.23,1.35,1.46,1.47,1.81,2.06} ^ N1 in {14} ^ N2 in {11} ^ N3 in {15} ^ C1 in {5} ^ C2 in {2} ^ c3 in {8} (18) class 2 IF : amino_acids in {15,17,21,25,27,31,32,33,37,38,39,44} ^ charge in {1,2,3,4,5,6,7,10} ^ proline in {0,1,2,4,19} ^ glycine in {1,2,3,4,5,6} ^ I in {0,1,2,3,4,5} ^ V in {0,1,2,3,5,7} ^ L in {2,3,4,5,6,7} ^ F in {0,1,2,3,5} ^ C in {0,1,2,6} ^ M in {0,1,2} ^ A in {0,1,2,3,4,5,6,8} ^ W in {0,1,3} ^ hydropathy in {-1.31,-1.14,-0.91,-0.82,-0.72,-0.68,-0.58,-0.52,-0.46,0,0.06,0.09,0.18,0.26,0.36,0.38,0.43,0.44,0.45,0.6,0.64,0.65,1.16,1.21,1.24,1.9} ^ N1 in {8} ^ N2 in {11} ^ N3 in {11} ^ C1 in {6} ^ C2 in {15} ^ c3 in {7}

Model Evaluation

In order to evaluate the performance of our model, we are interested in investigating the ability of the model to correctly predict or separate the classes. For that reason, the measurements accuracy, precision , recall and F-measure are computed. A brief explanation for each measurement is presented below.

  • Accuracy: the overall correctness of the model
  • Precision:percent of positive predictions which are correct
  • Recall:true positive rate (percent of positive cases that you can catch)
  • F-measure:a measure that combines precision and recall

In our case, we succeeded in the aforementioned results:
Accuracy: 94.4149 %
Detailed Accuracy by class
Class Precision Recall F-measure
10.9550.9860.97
20.917 0.611 0.733
30.963 0.867 0.912
40.737 0.875 0.8
Weighted Avg. 0.945 0.944 0.942

Final Created Peptides

The rules that generated are taken into consideration in order to create our final peptides.First of all it was decided to create peptides which are 13 amino acids long in order to avoid post translation modification. The next step was to set the amino acids for the N and C terminus as it was proven to be of great importance for the the toxicity and selectivity of the peptides. Finally, we chose the rest of the amino acids so as to satisfy the remaining rules.

Finally it was also significant to ensure that the synthesized peptide would have a high probability of working. For that reason after synthesizing the peptides we also checked the aforementioned criteria.

The amino acid sequences for each peptide and their properties are depicted underneath.

  1. joepini-mat : GFGLCKNKAFGLL
     

    Figure 1: joepini - mat properties     Figure 2: joepini - mat amino acid composition



    The joepini-mat peptide was also proven to have similarity with the MIRJA antimicrobial peptide(E- Value 6.5). The specific peptide do not target E.coli but it targets Gram positive bacteria.

    We also run SVM classifier in CAMP database for predicting the antimicrobial nature of the peptide.
    Sequence Id Class Probability
    Unknown AMP 0.961

  2. derkini-bharatini: FLPILGVARKGLL
     

    Figure 3: derkini-bharatini properties     Figure 4: derkini-bharatini amino acid composition



    The derkini-bharatini peptide was proven to have similarity with both Vespid chemotactic peptide 5h and Temporin-1CSb(E-value: 3.6). Temporin is an AMP which has MIC = 128 μM for E.coli and MIC = 8 μM for S.aureus. The other AMP is inactive against E.coli but active against S.aureus.

    After running SVM classifier in CAMP the peptide was predicted as antimicrobial.
    Sequence Id Class Probability
    Unknown AMP 0.955

  3. sebastini-dim: FLPLLASLFSRLL
     

    Figure 5: sebastini-dim properties     Figure 6: sebastini-dim amino acid composition

    Sebastini-dim was proven to have similarity with Temporin-1CSb(E-value: 0.011).

    Temporin has MIC = 70 μM for E.Coli and MIC = 2 μM for S.Aureus.
    Sequence Id Class Probability
    Unknown AMP 0.862


Our lab people test our synthesized peptides in the lab!!! Unfortunately, the joepini-mat peptide didn't work. However, sebastini-dim peptide worked as expected whereas derkini-bharatini had and inhibitory effect to the growth of S.aureus. For more information, check our lab pages!