[Biojava-l] Java implementation of Smith-Waterman algorithm

Ryan Golhar rgolhar@telocity.com
Tue, 20 Aug 2002 09:13:39 -0400


This is a multi-part message in MIME format.

------=_NextPart_000_0000_01C24829.DF5BA760
Content-Type: text/plain;
	charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

Here you go.  Its attached.  If you have any questions, let me know...



-----Original Message-----
From: biojava-l-admin@biojava.org [mailto:biojava-l-admin@biojava.org]On
Behalf Of borg collective
Sent: Tuesday, August 20, 2002 4:14 AM
To: Biojava-l@biojava.org
Subject: Re: [Biojava-l] Java implementation of Smith-Waterman algorithm


Ryan:  If you are willing to share it, can you please email it to me? Thx.

Thomas:  Thx for the info. I'll look through the example you've pointed out
if I have to reinvent the wheel (hopefully no because the amount
documentation for biojava is quite daunting for a newbie..)


_________________________________________________________________
Chat with friends online, try MSN Messenger: http://messenger.msn.com

_______________________________________________
Biojava-l mailing list  -  Biojava-l@biojava.org
http://biojava.org/mailman/listinfo/biojava-l

------=_NextPart_000_0000_01C24829.DF5BA760
Content-Type: application/octet-stream;
	name="SmithWaterman.java"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="SmithWaterman.java"

package org.umdnj.JDNA;

import java.util.*;
import java.lang.Math;
/*  Implementation of the local alignment algorithm developed by Smith & =
Waterman
 *
 *  Details taken from J. Mol. Biol. (1981), Volume 147, pages 195-197
 *
 *  Author: Ryan Golhar (golharam@umdnj.edu)
 */

public class SmithWaterman {

    private static double s(char a, char b) {
        if (a=3D=3Db)
            return 1;
        else
            return (-0.25);
    }

    private static double W(int k) {
        return 1.0 + (0.25)*k;
    }

    public static void align(String A, String B, Alignment al) {
        int n =3D A.length();
        int m =3D B.length();
        double[][] H =3D new double[1+n][1+m];
        int k, l;
        int i, j;
        double c1, c2, c3, c4; // case 1, case 2, case 3, case 4

        // start algorithm
        for (k =3D 0; k <=3D n; k++)
            H[k][0] =3D 0;

        for (l =3D 0; l <=3D m; l++)
            H[0][l] =3D 0;

        for (i =3D 1; i <=3D n; i++) {
            for (j =3D 1; j <=3D m; j++) {
                c1 =3D c2 =3D c3 =3D c4 =3D 0;

                // case 1
                c1 =3D H[i-1][j-1] + s(A.charAt(i-1), B.charAt(j-1));

                // case 2 (max of H[i-k][j] - W[k]), k >=3D 1

                // v1:
                c2 =3D Math.max(c2, H[i-1][j] - W(1));

                // v1:
                c3 =3D Math.max(c3, H[i][j-1] - W(1));

                // v2:
/*                k =3D i;
                c2 =3D 0;
                while (k >=3D 1) {
                    c2 =3D Math.max(c2, H[i-k][j] - W(k));
                    k--;
                }

                // case 3 (max of H[i][j-l] - W(l)), l >=3D 1

                // v2:
                l =3D j;
                c3 =3D 0;
                while (l >=3D 1) {
                    c3 =3D Math.max(c3, H[i][j-l] - W(l));
                    l--;
                }
*/

                // case 4
                //c4 =3D 0;

                H[i][j] =3D Math.max(Math.max(c1, c2), Math.max(c3, =
c4));
            }
        }

        // output
        makealignment(A, B, H, al);//, aTraceback, bTraceback);
    }

    private static void makealignment(String A, String B, double[][] H, =
Alignment al) { //, int[][] aTraceback, int[][] bTraceback) {
        int maxrow, maxcol;
        int row, col;
        int rowlen =3D A.length();
        int collen =3D B.length();
        StringBuffer outA, outB, midline;
        boolean skipA, skipB;
        double diag, left, top;

//        System.out.println("SIDE: " + A);
//        System.out.println("TOP: " + B);
         maxrow =3D maxcol =3D 0;

        // print matrix
        // find maximum value cell
/*        System.out.print("\t");
        for (col =3D 1; col <=3D collen; col++) {
                System.out.print("\t " + B.charAt(col-1));
        }
        System.out.println();
*/

        for (row =3D 0; row <=3D rowlen; row++) {
 //           if (row > 0)
 //               System.out.print(A.charAt(row-1));

            for (col =3D 0; col <=3D collen; col++) {
 //               System.out.print("\t" + Double.toString(H[row][col]));
                if (H[row][col] > H[maxrow][maxcol]) {
                    maxrow =3D row;
                    maxcol =3D col;
                }
            }
//            System.out.println();
        }

        // print alignments
        // maxrow, maxcol gives us the end of the path,
        // so we need to trace back to the beginning of the path
        // as we do that, add the base to the beginning of the string as
        // we are "tracing back".  Once done, concat the beginning and =
end.
        outA =3D new StringBuffer();
        outB =3D new StringBuffer();
        skipA =3D skipB =3D false;

        row =3D maxrow;
        col =3D maxcol;
        while (H[row][col] > 0) {
            diag =3D H[row-1][col-1];
            top =3D H[row-1][col];
            left =3D H[row][col-1];

            if ((!skipA) && (!skipB)) {
                outA.insert(0, A.charAt(row-1));
                outB.insert(0, B.charAt(col-1));
            } else if (skipA) {
                outA.insert(0, '-');
                outB.insert(0, B.charAt(col-1));
            } else {
                outA.insert(0, A.charAt(row-1));
                outB.insert(0, '-');
            }

            if ((diag >=3D top) && (diag >=3D left)) {
                row--;
                col--;
                skipA =3D skipB =3D false;
            } else
            if ((top >=3D diag) && (top >=3D left)) {
                row--;
                skipA =3D false;
                skipB =3D true;
            } else {
                col--;
                skipA =3D true;
                skipB =3D false;
            }
        }

        // insert the beginning of the sequences
/*        if (row >=3D 0) {
            outA.insert(0, A.substring(0, row+1));
        }

        if (col >=3D 0) {
            outB.insert(0, B.substring(0, col+1));
        }

        if (maxrow < rowlen) {
            outA.append(A.substring(maxrow+1));
        }

        if (maxcol < collen) {
            outB.append(B.substring(maxcol+1));
        }
    */
        int i;
	midline =3D new StringBuffer();

	for (i =3D 0; i < outA.length(); i++) {
            if (outA.charAt(i) =3D=3D outB.charAt(i))
                midline.append("|");
	    else
	        midline.append(" ");
	}

        //return new Alignment(outA.toString(), outB.toString(), =
midline.toString());
        al.setAlignment(outA.toString(), outB.toString(), =
midline.toString());
//      System.out.println("Alignments:");
//	System.out.println(outA);
//	System.out.println(midline);
//	System.out.println(outB);
//	System.out.println();
    }

/*
    public static void main(String args[]) {
        SmithWaterman smwm =3D new SmithWaterman();
        // sw (side, top)
        //smwm.sw("AAUGCCAUUGACGG", "CAGCCUCGCUUAG");
        //smwm.sw("ACCCCAGGCTTTACACAT", "TTGACACCCTCCCAATTGTA");
        smwm.sw(args[0], args[1]);
    }
*/
}

------=_NextPart_000_0000_01C24829.DF5BA760--