[Biojava-l] Java implementation of Smith-Waterman algorithm
Ryan Golhar
rgolhar@telocity.com
Tue, 20 Aug 2002 09:13:39 -0400
This is a multi-part message in MIME format.
------=_NextPart_000_0000_01C24829.DF5BA760
Content-Type: text/plain;
charset="iso-8859-1"
Content-Transfer-Encoding: 7bit
Here you go. Its attached. If you have any questions, let me know...
-----Original Message-----
From: biojava-l-admin@biojava.org [mailto:biojava-l-admin@biojava.org]On
Behalf Of borg collective
Sent: Tuesday, August 20, 2002 4:14 AM
To: Biojava-l@biojava.org
Subject: Re: [Biojava-l] Java implementation of Smith-Waterman algorithm
Ryan: If you are willing to share it, can you please email it to me? Thx.
Thomas: Thx for the info. I'll look through the example you've pointed out
if I have to reinvent the wheel (hopefully no because the amount
documentation for biojava is quite daunting for a newbie..)
_________________________________________________________________
Chat with friends online, try MSN Messenger: http://messenger.msn.com
_______________________________________________
Biojava-l mailing list - Biojava-l@biojava.org
http://biojava.org/mailman/listinfo/biojava-l
------=_NextPart_000_0000_01C24829.DF5BA760
Content-Type: application/octet-stream;
name="SmithWaterman.java"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="SmithWaterman.java"
package org.umdnj.JDNA;
import java.util.*;
import java.lang.Math;
/* Implementation of the local alignment algorithm developed by Smith & =
Waterman
*
* Details taken from J. Mol. Biol. (1981), Volume 147, pages 195-197
*
* Author: Ryan Golhar (golharam@umdnj.edu)
*/
public class SmithWaterman {
private static double s(char a, char b) {
if (a=3D=3Db)
return 1;
else
return (-0.25);
}
private static double W(int k) {
return 1.0 + (0.25)*k;
}
public static void align(String A, String B, Alignment al) {
int n =3D A.length();
int m =3D B.length();
double[][] H =3D new double[1+n][1+m];
int k, l;
int i, j;
double c1, c2, c3, c4; // case 1, case 2, case 3, case 4
// start algorithm
for (k =3D 0; k <=3D n; k++)
H[k][0] =3D 0;
for (l =3D 0; l <=3D m; l++)
H[0][l] =3D 0;
for (i =3D 1; i <=3D n; i++) {
for (j =3D 1; j <=3D m; j++) {
c1 =3D c2 =3D c3 =3D c4 =3D 0;
// case 1
c1 =3D H[i-1][j-1] + s(A.charAt(i-1), B.charAt(j-1));
// case 2 (max of H[i-k][j] - W[k]), k >=3D 1
// v1:
c2 =3D Math.max(c2, H[i-1][j] - W(1));
// v1:
c3 =3D Math.max(c3, H[i][j-1] - W(1));
// v2:
/* k =3D i;
c2 =3D 0;
while (k >=3D 1) {
c2 =3D Math.max(c2, H[i-k][j] - W(k));
k--;
}
// case 3 (max of H[i][j-l] - W(l)), l >=3D 1
// v2:
l =3D j;
c3 =3D 0;
while (l >=3D 1) {
c3 =3D Math.max(c3, H[i][j-l] - W(l));
l--;
}
*/
// case 4
//c4 =3D 0;
H[i][j] =3D Math.max(Math.max(c1, c2), Math.max(c3, =
c4));
}
}
// output
makealignment(A, B, H, al);//, aTraceback, bTraceback);
}
private static void makealignment(String A, String B, double[][] H, =
Alignment al) { //, int[][] aTraceback, int[][] bTraceback) {
int maxrow, maxcol;
int row, col;
int rowlen =3D A.length();
int collen =3D B.length();
StringBuffer outA, outB, midline;
boolean skipA, skipB;
double diag, left, top;
// System.out.println("SIDE: " + A);
// System.out.println("TOP: " + B);
maxrow =3D maxcol =3D 0;
// print matrix
// find maximum value cell
/* System.out.print("\t");
for (col =3D 1; col <=3D collen; col++) {
System.out.print("\t " + B.charAt(col-1));
}
System.out.println();
*/
for (row =3D 0; row <=3D rowlen; row++) {
// if (row > 0)
// System.out.print(A.charAt(row-1));
for (col =3D 0; col <=3D collen; col++) {
// System.out.print("\t" + Double.toString(H[row][col]));
if (H[row][col] > H[maxrow][maxcol]) {
maxrow =3D row;
maxcol =3D col;
}
}
// System.out.println();
}
// print alignments
// maxrow, maxcol gives us the end of the path,
// so we need to trace back to the beginning of the path
// as we do that, add the base to the beginning of the string as
// we are "tracing back". Once done, concat the beginning and =
end.
outA =3D new StringBuffer();
outB =3D new StringBuffer();
skipA =3D skipB =3D false;
row =3D maxrow;
col =3D maxcol;
while (H[row][col] > 0) {
diag =3D H[row-1][col-1];
top =3D H[row-1][col];
left =3D H[row][col-1];
if ((!skipA) && (!skipB)) {
outA.insert(0, A.charAt(row-1));
outB.insert(0, B.charAt(col-1));
} else if (skipA) {
outA.insert(0, '-');
outB.insert(0, B.charAt(col-1));
} else {
outA.insert(0, A.charAt(row-1));
outB.insert(0, '-');
}
if ((diag >=3D top) && (diag >=3D left)) {
row--;
col--;
skipA =3D skipB =3D false;
} else
if ((top >=3D diag) && (top >=3D left)) {
row--;
skipA =3D false;
skipB =3D true;
} else {
col--;
skipA =3D true;
skipB =3D false;
}
}
// insert the beginning of the sequences
/* if (row >=3D 0) {
outA.insert(0, A.substring(0, row+1));
}
if (col >=3D 0) {
outB.insert(0, B.substring(0, col+1));
}
if (maxrow < rowlen) {
outA.append(A.substring(maxrow+1));
}
if (maxcol < collen) {
outB.append(B.substring(maxcol+1));
}
*/
int i;
midline =3D new StringBuffer();
for (i =3D 0; i < outA.length(); i++) {
if (outA.charAt(i) =3D=3D outB.charAt(i))
midline.append("|");
else
midline.append(" ");
}
//return new Alignment(outA.toString(), outB.toString(), =
midline.toString());
al.setAlignment(outA.toString(), outB.toString(), =
midline.toString());
// System.out.println("Alignments:");
// System.out.println(outA);
// System.out.println(midline);
// System.out.println(outB);
// System.out.println();
}
/*
public static void main(String args[]) {
SmithWaterman smwm =3D new SmithWaterman();
// sw (side, top)
//smwm.sw("AAUGCCAUUGACGG", "CAGCCUCGCUUAG");
//smwm.sw("ACCCCAGGCTTTACACAT", "TTGACACCCTCCCAATTGTA");
smwm.sw(args[0], args[1]);
}
*/
}
------=_NextPart_000_0000_01C24829.DF5BA760--