[Biojava-dev] Protein Alignment Exception
John Hawkins
john.hawkins at biotec.tu-dresden.de
Tue Dec 7 16:26:52 UTC 2010
Hi All,
I have searched the archives of this list to try and solve my problem
and I can not find an answer.
I am attempting to use BioJava to perform some pairwise alignments of
protein sequences.
I have written a simple test class to check the functionality, and I am
getting an exception which does not make sense to me.
Using the code below I receive the Exception
org.biojava.bio.BioRuntimeException: Alphabet missmatch occured:
sequences with different alphabet cannot be aligned.
at
org.biojava.bio.alignment.NeedlemanWunsch.pairwiseAlignment(NeedlemanWunsch.java:635)
at utilities.PairWiseAlignment.alignProteins(PairWiseAlignment.java:68)
at utilities.PairWiseAlignment.main(PairWiseAlignment.java:184)
This is occuring even when I input two identical protein sequences (as
Strings).
It appears that the ProteinTools.createProteinSequence method generates
a unique alphabet for each sequence it is given,
and I cannot see any way to force the resulting sequence to have the
same underlying alphabet.
Please forgive me I have overlooked something obvious.
Regards
John Hawkins
----------------------------------------------------------------------------------------------------------------------------
package utilities;
import org.biojava.bio.alignment.NeedlemanWunsch;
import org.biojava.bio.alignment.SequenceAlignment;
import org.biojava.bio.alignment.SmithWaterman;
import org.biojava.bio.alignment.SubstitutionMatrix;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;
public class PairWiseAlignment {
private int score;
private int score2;
private String aln1;
private String aln2;
public PairWiseAlignment(int score, int score2, String aln1,
String aln2) {
super();
this.score = score;
this.score2 = score2;
this.aln1 = aln1;
this.aln2 = aln2;
}
/** This performs an alignment of two given sequences and
* returns the result in an object.
*
* @param seq1 a query sequence
* @param seq2 a target sequence
*/
public static PairWiseAlignment alignProteins(String seq1, String
seq2) {
int score = 0;
int score2 = 0;
String aln1= "";
String aln2= "";
try {
// The alphabet of the sequences. For this example DNA is choosen.
FiniteAlphabet alphabet = (FiniteAlphabet)
AlphabetManager.alphabetForName("PROTEIN");
// Read the substitution matrix file.
// For this example the matrix NUC.4.4 is good.
SubstitutionMatrix matrix = new SubstitutionMatrix(alphabet,
PairWiseAlignment.getSubstitutionMatrix(), "BLOSUM62");
// Define the default costs for sequence manipulation for the
global alignment.
SequenceAlignment aligner = new NeedlemanWunsch(
(short) 0, // match
(short) 3, // replace
(short) 2, // insert
(short) 2, // delete
(short) 1, // gapExtend
matrix // SubstitutionMatrix
);
Sequence query = ProteinTools.createProteinSequence(seq1,
"query");
Sequence target = ProteinTools.createProteinSequence(seq2,
"target");
System.err.println("ALPHA 1: " + query.getAlphabet() );
System.err.println("ALPHA 2: " + target.getAlphabet() );
// Perform an alignment and save the results.
score = aligner.pairwiseAlignment(
query, // first sequence
target // second one
);
// Print the alignment to the screen
aln1 = "Global alignment with Needleman-Wunsch:\n" +
aligner.getAlignmentString();
// Perform a local alginment from the sequences with
Smith-Waterman.
// Firstly, define the expenses (penalties) for every single
operation.
aligner = new SmithWaterman(
(short) -1, // match
(short) 3, // replace
(short) 2, // insert
(short) 2, // delete
(short) 1, // gapExtend
matrix // SubstitutionMatrix
);
// Perform the local alignment.
score2 = aligner.pairwiseAlignment(query, target);
aln2 = "\nlocal alignment with SmithWaterman:\n" +
aligner.getAlignmentString();
} catch (Exception exc) {
exc.printStackTrace();
}
return new PairWiseAlignment(score, score2, aln1, aln2);
}
public static String getSubstitutionMatrix() {
//TODO: Load this as a resource
String result = "# Matrix made by matblas from
blosum62.iij \n" +
"# * column uses minimum score\n" +
"# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units\n" +
"# Blocks Database = /data/blocks_5.0/blocks.dat\n" +
"# Cluster Percentage: >= 62\n" +
"# Entropy = 0.6979, Expected = -0.5209\n" +
" A R N D C Q E G H I L K M F P S T W Y
V B Z X \n" +
"A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2
0 -2 -1 0 \n" +
"R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2
-3 -1 0 -1 \n" +
"N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2
-3 3 0 -1 \n" +
"D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3
-3 4 1 -1 \n" +
"C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2
-1 -3 -3 -2 \n" +
"Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1
-2 0 3 -1 \n" +
"E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2
-2 1 4 -1 \n" +
"G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3
-3 -1 -2 -1 \n" +
"H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2
-3 0 0 -1 \n" +
"I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1
3 -3 -3 -1 \n" +
"L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1
1 -4 -3 -1 \n" +
"K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2
-2 0 1 -1 \n" +
"M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1
1 -3 -1 -1 \n" +
"F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3
-1 -3 -3 -1 \n" +
"P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3
-2 -2 -1 -2 \n" +
"S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2
-2 0 0 0 \n" +
"T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2
0 -1 -1 0 \n" +
"W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2
-3 -4 -3 -2 \n" +
"Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7
-1 -3 -2 -1 \n" +
"V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1
4 -3 -2 -1 \n" +
"B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3
-3 4 1 -1 \n" +
"Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2
-2 1 4 -1 \n" +
"X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1
-1 -1 -1 -1 ";
return result;
}
public static void main (String args[]) {
String seq1=
"RRRVTVRKADAGGLGISIKGGRENKMPILISKIFKGLAADQTEALFVGDAILSVNGEDLSSATHDEAVQALKKTGKEVVLEVKYMK";
String seq2=
"LGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAN";
PairWiseAlignment test = PairWiseAlignment.alignProteins(
seq2, seq2 ) ;
System.err.println("SCORE 1: " + test.score + "\n" +
test.getAln1() + "\n");
System.err.println("SCORE 2: " + test.score2 + "\n" +
test.getAln2() + "\n");
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
public int getScore2() {
return score2;
}
public void setScore2(int score2) {
this.score2 = score2;
}
public String getAln1() {
return aln1;
}
public void setAln1(String aln1) {
this.aln1 = aln1;
}
public String getAln2() {
return aln2;
}
public void setAln2(String aln2) {
this.aln2 = aln2;
}
}
-------------------------------------------------------------------------------------------------------
--
Dr John Hawkins
Post-Doctoral Researcher
Technische Universität Dresden
Biotechnology Center
Tatzberg 47/49
01307 Dresden
Tel.: +49 (0) 351 463-40083
Fax: +49 (0) 351 463-40087
E-Mail: john.hawkins at biotec.tu-dresden
Webpage: www.biotec.tu-dresden.de
More information about the biojava-dev
mailing list