[Biojava-dev] Protein Alignment Exception

John Hawkins john.hawkins at biotec.tu-dresden.de
Tue Dec 7 16:26:52 UTC 2010


Hi All,

I have searched the archives of this list to try and solve my problem
and I can not find an answer.

I am attempting to use BioJava to perform some pairwise alignments of
protein sequences.

I have written a simple test class to check the functionality, and I am
getting an exception which does not make sense to me.

Using the code below I receive the Exception

org.biojava.bio.BioRuntimeException: Alphabet missmatch occured:
sequences with different alphabet cannot be aligned.
    at
org.biojava.bio.alignment.NeedlemanWunsch.pairwiseAlignment(NeedlemanWunsch.java:635)
    at utilities.PairWiseAlignment.alignProteins(PairWiseAlignment.java:68)
    at utilities.PairWiseAlignment.main(PairWiseAlignment.java:184)

This is occuring even when I input two identical protein sequences (as
Strings).
It appears that the ProteinTools.createProteinSequence method generates
a unique alphabet for each sequence it is given,
and I cannot see any way to force the resulting sequence to have the
same underlying alphabet.

Please forgive me I have overlooked something obvious.

Regards
John Hawkins
----------------------------------------------------------------------------------------------------------------------------
package utilities;

import org.biojava.bio.alignment.NeedlemanWunsch;
import org.biojava.bio.alignment.SequenceAlignment;
import org.biojava.bio.alignment.SmithWaterman;
import org.biojava.bio.alignment.SubstitutionMatrix;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;


public class PairWiseAlignment {
     
      private int score;
      private int score2;
      private String aln1;
      private String aln2;
     
      public PairWiseAlignment(int score, int score2, String aln1,
String aln2) {
        super();
        this.score = score;
        this.score2 = score2;
        this.aln1 = aln1;
        this.aln2 = aln2;
    }


    /** This performs an alignment of two given sequences and
        * returns the result in an object.
        *
        * @param seq1 a query sequence
        * @param seq2 a target sequence
        */
      public static PairWiseAlignment alignProteins(String seq1, String
seq2) {
          int score = 0;
          int score2 = 0;
          String aln1= "";
          String aln2= "";
        try {
          // The alphabet of the sequences. For this example DNA is choosen.
          FiniteAlphabet alphabet = (FiniteAlphabet)
AlphabetManager.alphabetForName("PROTEIN");
          // Read the substitution matrix file.
          // For this example the matrix NUC.4.4 is good.
          SubstitutionMatrix matrix = new SubstitutionMatrix(alphabet,
PairWiseAlignment.getSubstitutionMatrix(), "BLOSUM62");
          // Define the default costs for sequence manipulation for the
global alignment.
          SequenceAlignment aligner = new NeedlemanWunsch(
            (short) 0,     // match
            (short) 3,    // replace
            (short) 2,  // insert
            (short) 2,    // delete
            (short) 1,  // gapExtend
            matrix     // SubstitutionMatrix
          );
          Sequence query  = ProteinTools.createProteinSequence(seq1,
"query");
          Sequence target  = ProteinTools.createProteinSequence(seq2,
"target");
         
          System.err.println("ALPHA 1: " + query.getAlphabet() );
          System.err.println("ALPHA 2: " + target.getAlphabet() );
                 
          // Perform an alignment and save the results.
          score = aligner.pairwiseAlignment(
            query, // first sequence
            target // second one
          );
          // Print the alignment to the screen
          aln1 = "Global alignment with Needleman-Wunsch:\n" +
aligner.getAlignmentString();     
     
          // Perform a local alginment from the sequences with
Smith-Waterman.
          // Firstly, define the expenses (penalties) for every single
operation.
          aligner = new SmithWaterman(
            (short) -1,     // match
            (short) 3,      // replace
            (short) 2,      // insert
            (short) 2,      // delete
            (short) 1,      // gapExtend
            matrix  // SubstitutionMatrix
          );
          // Perform the local alignment.
          score2 = aligner.pairwiseAlignment(query, target);

          aln2 = "\nlocal alignment with SmithWaterman:\n" +
aligner.getAlignmentString();
        } catch (Exception exc) {
          exc.printStackTrace();
        }
       
        return new PairWiseAlignment(score, score2,  aln1, aln2);
       
      }
     
      public static String getSubstitutionMatrix() {
           //TODO: Load this as a resource
          String result =     "#  Matrix made by matblas from
blosum62.iij \n" +
            "#  * column uses minimum score\n" +
            "#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units\n" +
            "#  Blocks Database = /data/blocks_5.0/blocks.dat\n" +
            "#  Cluster Percentage: >= 62\n" +
            "#  Entropy =   0.6979, Expected =  -0.5209\n" +
            "   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y 
V  B  Z  X \n" +
            "A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2 
0 -2 -1  0 \n" +
            "R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2
-3 -1  0 -1 \n" +
            "N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2
-3  3  0 -1 \n" +
            "D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3
-3  4  1 -1 \n" +
            "C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2
-1 -3 -3 -2 \n" +
            "Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1
-2  0  3 -1 \n" +
            "E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2
-2  1  4 -1 \n" +
            "G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3
-3 -1 -2 -1 \n" +
            "H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2
-3  0  0 -1 \n" +
            "I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1 
3 -3 -3 -1 \n" +
            "L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1 
1 -4 -3 -1 \n" +
            "K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2
-2  0  1 -1 \n" +
            "M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1 
1 -3 -1 -1 \n" +
            "F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3
-1 -3 -3 -1 \n" +
            "P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3
-2 -2 -1 -2 \n" +
            "S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2
-2  0  0  0 \n" +
            "T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2 
0 -1 -1  0 \n" +
            "W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2
-3 -4 -3 -2 \n" +
            "Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7
-1 -3 -2 -1 \n" +
            "V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1 
4 -3 -2 -1 \n" +
            "B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3
-3  4  1 -1 \n" +
            "Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2
-2  1  4 -1 \n" +
            "X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1
-1 -1 -1 -1 ";
         
          return result;
      }
         
     
      public static void main (String args[]) {
          String seq1=
"RRRVTVRKADAGGLGISIKGGRENKMPILISKIFKGLAADQTEALFVGDAILSVNGEDLSSATHDEAVQALKKTGKEVVLEVKYMK";
          String seq2=
"LGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAN";
          PairWiseAlignment test = PairWiseAlignment.alignProteins(
seq2, seq2 ) ;
          System.err.println("SCORE 1: " + test.score + "\n" +
test.getAln1() + "\n");
          System.err.println("SCORE 2: " + test.score2 + "\n" +
test.getAln2() + "\n");
      }


    public int getScore() {
        return score;
    }


    public void setScore(int score) {
        this.score = score;
    }


    public int getScore2() {
        return score2;
    }


    public void setScore2(int score2) {
        this.score2 = score2;
    }


    public String getAln1() {
        return aln1;
    }


    public void setAln1(String aln1) {
        this.aln1 = aln1;
    }


    public String getAln2() {
        return aln2;
    }


    public void setAln2(String aln2) {
        this.aln2 = aln2;
    }
}

-------------------------------------------------------------------------------------------------------


-- 

Dr John Hawkins
Post-Doctoral Researcher

Technische Universität Dresden
Biotechnology Center
Tatzberg 47/49
01307 Dresden

Tel.: +49 (0) 351 463-40083
Fax: +49 (0) 351 463-40087
E-Mail: john.hawkins at biotec.tu-dresden
Webpage: www.biotec.tu-dresden.de




More information about the biojava-dev mailing list