[Biojava-l] Help on NCBIQBlastService and BlastXMLQuery
Shakuntala Baichoo
shakunb at uom.ac.mu
Thu Jun 30 19:20:41 UTC 2011
Hi!
Grateful If anybody could help me with NCBIQBlastService
I need to blast a set (in this case only 2) of nucleotide sequences and I am
using Biojava3's NCBIQBlastService. I direct the results in xml files and
try to parse that xml file so as to get all the results, in terms of %
match, e-value etc... But I am only getting the reference of the sequences
that have matched, as follows:
...........
trying to get BLAST results for RID 0TJFFD5E01S
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.
BlastXMLQuery <init>
INFO: Start read of 0TJFFD5E01SResults_XML.xml
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Read finished
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: Query for hits
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: 1 hits
[CP002614, CP002487, FQ312003, CP001363, FN424405, CP000857, AE006468,
AE017220, CP001138, CP001127, AM933172, AM933173, CP001144, FM200053,
CP001120, CP001113, CP000886, CP000026, FR775193, AE014613, AL627266]
***********************************************
trying to get BLAST results for RID 0TJFHZV201S
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Start read of 0TJFHZV201SResults_XML.xml
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Read finished
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: Query for hits
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: 1 hits
[CP002614, CP002487, AP011957, FQ312003, CP001363, FN424405, AE006468,
L19338, CP001113, CP000857, CP001138, AE017220, CP001120, CP000886,
FR775195, AM933172, FM200053, AM933173, CP000026, CP001144, CP001127,
AE014613, AL627267, M90677, CP000822]
BUILD SUCCESSFUL (total time: 54 seconds)
Note that when I open the generated xml file, it does contain all the
results. Any idea how to extract all the info. Please...
Here's the sample program:
--------------------------------
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package BlastPackage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import org.biojava3.core.sequence.DNASequence;
import org.biojava3.genome.query.BlastXMLQuery;
import org.biojava3.core.sequence.ProteinSequence;
import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
import org.biojava3.core.sequence.compound.NucleotideCompound;
import org.biojava3.core.sequence.io.DNASequenceCreator;
import org.biojava3.core.sequence.io.FastaReader;
import org.biojava3.core.sequence.io.FastaReaderHelper;
import org.biojava3.core.sequence.io.GenericFastaHeaderParser;
import org.biojava3.ws.alignment.qblast.NCBIQBlastService;
import org.biojava3.ws.alignment.qblast.NCBIQBlastAlignmentProperties;
import org.biojava3.ws.alignment.qblast.NCBIQBlastOutputProperties;
import org.biojava3.ws.alignment.qblast.NCBIQBlastOutputFormat;
import org.biojava.bio.program.sax.*;
import org.biojava.bio.program.ssbind.*;
import org.biojava.bio.search.*;
import org.biojava.bio.seq.db.*;
import org.xml.sax.*;
import org.biojava.bio.*;
public class NCBIQBlastServiceTest {
/**
* The program take only a string with a path toward a sequence file
*
* For this example, I keep it simple with a single FASTA formatted file
*
*/
public static void main(String[] args) {
NCBIQBlastService rbw;
NCBIQBlastAlignmentProperties rqb;
NCBIQBlastOutputProperties rof;
InputStream is = null;
ArrayList<String> rid = new ArrayList<String>();
try {
// Let's capture the sequences in a file...
//LinkedHashMap<String, DNASequence> a =
FastaReaderHelper.readFastaDNASequence(new File("TestBlast.fas"));
FileInputStream inStream = new FileInputStream(
"TestBlast.fas" );
FastaReader<DNASequence,NucleotideCompound>
fastaReader =
new FastaReader<DNASequence,NucleotideCompound>(
inStream,
new
GenericFastaHeaderParser<DNASequence,NucleotideCompound>(),
new
DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet()));
LinkedHashMap<String, DNASequence> b =
fastaReader.process();
/*
* You would imagine that one would blast a
bunch of sequences of
* identical nature with identical
parameters...
*/
rbw = new NCBIQBlastService();
rqb = new NCBIQBlastAlignmentProperties();
rqb.NCBIQBlastAlignmentProperties();
rqb.setBlastProgram("blastn");
rqb.setBlastDatabase("nr");
/*
* First, let's send all the sequences to the QBlast
service and
* keep the RID for fetching the results at some
later moments
* (actually, in a few seconds :-))
*
* Using a data structure to keep track of all
request IDs is a good
* practice.
*
*/
for (Entry<String, DNASequence> entry :
b.entrySet()) {
System.out.println(
entry.getValue().getOriginalHeader() + "\n");
String s = entry.getValue().toString();
//System.out.println("Query Sequence:");
System.out.println(s);
String request =
rbw.sendAlignmentRequest(s,rqb);
//request=rbw.
rid.add(request);
}
/*
* Let's check that our requests have been
processed. If completed,
* let's look at the alignments with my own
selection of output and
* alignment formats.
*/
for (String aRid : rid) {
System.out.println("***********************************************");
System.out.println("trying to get BLAST
results for RID "
+ aRid);
boolean wasBlasted = false;
while (!wasBlasted) {
wasBlasted = rbw.isReady(aRid,
System.currentTimeMillis());
}
rof = new NCBIQBlastOutputProperties();
rof.setOutputFormat(NCBIQBlastOutputFormat.XML);
rof.setAlignmentOutputFormat(NCBIQBlastOutputFormat.TABULAR);
rof.setDescriptionNumber(20);
rof.setAlignmentNumber(20);
//System.out.println("Output
Options:"+"\n"+rof.getOutputOptions());
is = rbw.getAlignmentResults(aRid, rof);
BufferedReader br = new BufferedReader(
new InputStreamReader(is));
String line = null;
String
OutputFilename1=aRid+"Results_XML.xml";
FileOutputStream fp1=null;
fp1 = new FileOutputStream(OutputFilename1);
while ((line = br.readLine()) != null) {
//System.out.println(line);
new PrintStream(fp1).println(line);
}
fp1.close();
BlastHomologyHits BL=new
BlastHomologyHits();
BlastXMLQuery B=new
BlastXMLQuery(OutputFilename1);
LinkedHashMap<String, ArrayList<String>>
hits=B.getHitsQueryDef(1E-100);
//System.out.println(hits);
//LinkedHashMap<String, ArrayList<String>>
Homologyhits=BL.getMatches(new File(OutputFilename1), 1E-100);
Collection c=hits.values();
Iterator i=c.iterator();
while(i.hasNext())
System.out.println(i.next());
}
is.close();
}
/*
* What happens if the file can't be read
*/
catch (IOException ioe) {
ioe.printStackTrace();
}
/*
* What happens if FastaReaderHelper hits a snag
*/
catch (Exception bio) {
bio.printStackTrace();
}
}
}
------------------------
Thanks
Shakuntala
Email Disclaimer:
This email and all its contents are subject to the disclaimer at http://www.uom.ac.mu/emaildisclaimer
More information about the Biojava-l
mailing list