[Bioperl-l] Regarding blast in Bioperl

Roopa Raghuveer rtbio.2009 at gmail.com
Mon Jan 25 22:35:32 UTC 2010


Hello Russell,

Thank you very much for your reply. My problem is that Remote blast is
getting well executed with my code and I am getting the .out file with
sequences producing significant alignments. But, when I am trying to
retrieve the sequences into an array @seqs, I am able to retrieve all the
sequences except for the first hit. If the number of hits that I get in the
.out file to be 3, I am able to retrieve only 2 hits i.e., I am able to get
only 2 sequences. If there is only one significant hit for my sequence, then
the name and description of the sequence appears in the .out file, but I am
unable to get it into the array,the array count shows 0 and there would not
be any sequence in the array.

I hope that you have got me now.

Here comes my code,

use Bio::SearchIO;
use Bio::Search::Result::BlastResult;
use Bio::Perl;
use Bio::Tools::Run::RemoteBlast;
use Bio::Seq;
use Bio::SeqIO;
use Bio::DB::GenBank;

$serverpath = "/srv/www/htdocs/rain/RNAi";
$serverurl = "http://141.84.66.66/rain/RNAi";
$outfile = $serverpath."/rnairesult_".time().".html";
$nuc = $serverpath."/nuc".time().".txt";
$debugfile = $serverpath."/debug_".time().".txt";
$blastdebugfile = $serverpath."/blastdebug_".time().".txt";

my $outstring ="";

&parse_form;

print "Content-type: text/html\n\n";
print "<HTML>\n";
print "<head><title>RNAi Result</title>";
print "<META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
URL=$serverurl/rnairesult_".time().".html\"> \n";
print "</head>\n";
print "<body>\n";
print " Your results will appear <a
href=$serverurl/rnairesult_".time().".html>here</a><br>";
print " Please be patient, runtime can be up to 5 minutes<br>";
print " This page will automatically reload in 30 seconds.";
print "</BODY>\n";
print "</HTML>\n";

defined(my $pid = fork) or die "Can't fork: $!";
exit if $pid;
open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";



open(OUTFILE, '>',$outfile);

print OUTFILE "<HTML>\n
 <head><title>RNAi Result</title>
 <META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
URL=$serverurl//rnairesult_".time().".html\"> \n
 <meta http-equiv=\"expires\" content=\"0\">
 </head>\n
 <body>\n
  Your results will appear <a
href=$serverurl/rnairesult_".time().".html>here</a><br>
  Please be patient, runtime can be up to 5 minutes <br>
 This page will automatically reload in 30 seconds  <br>
 </BODY>\n
 </HTML>\n";

close(OUTFILE);


@compseqs = blastcode($in{'Inputseq'},$in{'Organism'});

$in{'Inputseq'} =~ s/>.*$//m;
$in{'Inputseq'} =~ s/[^TAGC]//gim;
$in{'Inputseq'} =~ tr/actg/ACTG/;

@out = similar($in{'Inputseq'}, \@compseqs, $in{'Windowsize'},
$in{'Threshold'});


sub blastcode
{

$inpu1= $_[0];

$organ= $_[1];

open(NUC,'>',$nuc);
print NUC $inpu1,"\n";
close(NUC);

 my $prog = 'blastn';
 my $db   = 'refseq_rna';
 my $e_val= '1e-10';
 my $organism= $organ;

$gb = new Bio::DB::GenBank;

 my @params = ( '-prog' => $prog,
         '-data' => $db,
         '-expect' => $e_val,
         '-readmethod' => 'SearchIO',
         '-Organism'   => $organism );

            # open(OUTFILE,'>',$debugfile);
             #  print OUTFILE @params;
             # close(OUTFILE);


my $factory = Bio::Tools::Run::RemoteBlast->new(@params, -ENTREZ_QUERY =>
"$organ\[ORGN]");

 #my $factory = Bio::Tools::Run::RemoteBlast->new(@params);

  #change a paramter

 #$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Trypanosoma
Brucei[ORGN]';

#change a paramter
# $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = '$input2[ORGN]';

  my $v = 1;
  #$v is just to turn on and off the messages

 my $str = Bio::SeqIO->new('-file' => $nuc , '-format' => 'fasta' ,
'-organism' => "$organ\[ORGN]");

while (my $input = $str->next_seq())
{
   #Blast a sequence against a database:
    #Alternatively, you could  pass in a file with many
    #sequences rather than loop through sequence one at a time
    #Remove the loop starting 'while (my $input = $str->next_seq())'
    #and swap the two lines below for an example of that.

             open(OUTFILE,'>',$debugfile);
               print OUTFILE $input;
              close(OUTFILE);
 my $r = $factory->submit_blast($input);

                open(OUTFILE,'>',$debugfile);
             #   print OUTFILE $r;
                close(OUTFILE);


   print STDERR "waiting...." if($v>0);

  while ( my @rids = $factory->each_rid ) {

     foreach my $rid ( @rids ) {


        my $rc = $factory->retrieve_blast($rid);

        if( !ref($rc) )
        {
        if( $rc < 0 )
        {
        $factory->remove_rid($rid);
        }
       print STDERR "." if ( $v > 0 );
         sleep 5;
        }
       else {
          my $result = $rc->next_result();
         #save the output

      $blastdebugfile = $serverpath."/blastdebug_".time().".txt";

          open(BLASTDEBUGFILE,'>',$blastdebugfile);
          print BLASTDEBUGFILE $result->next_hit();
          close(BLASTDEBUGFILE);

        my $filename = $serverpath."/blastdata_".time()."\.out";

         # open(DEBUGFILE,'>',$debugfile);
         # open(new,'>',$filename);
         # @arra=<new>;
         # print DEBUGFILE @arra;
         # close(DEBUGFILE);
         # close(new);

         $factory->save_output($filename);

       # open(BLASTDEBUGFILE,'>',$debugfile);
       # print BLASTDEBUGFILE  "Hello $rid";
       # close(BLASTDEBUGFILE);

       $factory->remove_rid($rid);

   while ( my $hit = $result->next_hit ) {

            next unless ( $v >= 0);


       my $sequ = $gb->get_Seq_by_version($hit->name);
           my $dna = $sequ->seq(); # get the sequence as a string
        $dummy++;
             open(OUTFILE,'>',$debugfile);
             open(OUTFILE,'>',$debugfile);
          #     print OUTFILE $dna;
              close(OUTFILE);
          push(@seqs,$dna);
         }
        }
      }
    }
  }

$warum=scalar(@seqs);
              open(OUTFILE,'>',$debugfile);
               print OUTFILE $warum;
             #  print OUTFILE @seqs;
              close(OUTFILE);
      return(@seqs);
}

open(OUTFILE, '>',$outfile) || die ;

print OUTFILE "<HTML>\n
<head><title>RNAi Result</title>
<meta http-equiv=\"expires\" content=\"0\"></head>\n
<body>\n
<p><font face=\"Courier, monospace font set\">
Inputsequence: <br>";

for ($i=0; $i<length ($in{'Inputseq'}); $i++) {

        print OUTFILE substr ($in{'Inputseq'}, $i, 1);

        if ( ($i+1)%10==0){
                print OUTFILE " ";
        }
        if ( ($i+1)%60==0){
                print OUTFILE "<br>\n";
        }
}



print OUTFILE "</font> <p>";

$z=@compseqs;

for($k=0;$k<$z;$k++) {
        print OUTFILE "<font face=\"Courier, monospace font set\"><p>Compare
Sequence: <br>";

        for ($i=0; $i<length ($compseqs[$k]); $i++) {

                print OUTFILE substr ($compseqs[$k], $i, 1);

                if ( ($i+1)%10==0){
                        print OUTFILE " ";
                }
                if ( ($i+1)%60==0){
                        print OUTFILE "<br>\n";
                }
        }
        print OUTFILE "<p></font>";
}

print OUTFILE "<p>
Window: <br>$in{'Windowsize'}
<p>
<p>
Threshold: <br>$in{'Threshold'}
<p>";
my $j=0;

for ($i=0; $i<length ($in{'Inputseq'}); $i++) {

        if ($i<=length ($in{'Inputseq'})-$in{'Windowsize'}){
                if ($out[$i]->{similar}<=$in{'Threshold'}){
                        $j=$in{'Windowsize'};
                }
                $height=$out[$i]->{similar}*5;
        }

        if ($j>0) {
                print OUTFILE "<img src=\"$serverurl/blue.gif\" width=\"1\"
height=\"5\">";
                $outstring .= "<font color=\"green\">".substr
($in{'Inputseq'}, $i, 1)."</font>";
                $j--;
        }
        else {
                print OUTFILE "<img src=\"$serverurl/red.gif\" width=\"1\"
height=\"5\">";
                $outstring .= "<font color=\"red\">".substr
($in{'Inputseq'}, $i, 1)."</font>";
        }

        if ( ($i+1)%10==0){
                $outstring .= " ";
        }
        if ( ($i+1)%60==0){
                $outstring .= "<br>\n";

        }
        if ( ($i+1)%800==0){
                print OUTFILE "<br><br>\n";

        }
}

print OUTFILE "<br><br><font face=\"Courier, monospace font
set\">$outstring</font>";

#foreach (@out) {
#print OUTFILE "<p>Sequence: $_->{sequence}: $_->{similar} matchs<p>";
#if ($_->{similar}<=$in{'Threshold'}){

#       }
#}

print OUTFILE "</BODY>\n</HTML>\n";

close OUTFILE;

#nameprint();

sub parse_form {
    local ($buffer, @pairs, $pair, $name, $value);
    # Read in text
    $ENV{'REQUEST_METHOD'} =~ tr/a-z/A-Z/;
    if ($ENV{'REQUEST_METHOD'} eq "POST")
    {
      read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
    }
    else
    {
        $buffer = $ENV{'QUERY_STRING'};
    }
    @pairs = split(/&/, $buffer);
    foreach $pair (@pairs)
    {
        ($name, $value) = split(/=/, $pair);
        $value =~ tr/+/ /;
        $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
        $in{$name} = $value;
    }
}

Regards,
Roopa.





On Mon, Jan 25, 2010 at 10:14 PM, Smithies, Russell <
Russell.Smithies at agresearch.co.nz> wrote:

> That's a fair mix of incomplete code you've supplied!!
> Did you read the documentation for RemoteBlast? The example there will do
> 99% of what you want.
> http://search.cpan.org/~cjfields/BioPerl-1.6.1/Bio/Tools/Run/RemoteBlast.pm<http://search.cpan.org/%7Ecjfields/BioPerl-1.6.1/Bio/Tools/Run/RemoteBlast.pm>
>
> I'm not entirely sure what you're trying to do (as you've left out a bit of
> your code) but I assume you're trying to retrieve and print the sequence for
> each hit.
>
> Here's something that works, not sure exactly what/why you want to print
> but it should get you a bit further.
>
> --Russell
>
>
> ================================
> #!perl -w
>
> use Bio::Tools::Run::RemoteBlast;
> use Bio::DB::GenBank;
>
> use CGI ':standard';
>
> use strict;
>
> my $q = new CGI;
>
> my @params = (
>               -prog         => 'blastn',
>               -data         => 'nr',
>               -expect       => '1e-30',
>               -entrez_query => 'Homo sapiens [ORGN]',
>               -readmethod   => 'SearchIO'
> );
>
> my $gb = Bio::DB::GenBank->new;
>
> my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
>
> #$v is just to turn on and off the messages
> my $v = 1;
>
> my $str = Bio::SeqIO->new( -file => 'test.faa', -format => "fasta" );
>
> while ( my $input = $str->next_seq() ) {
>
>   my $r = $factory->submit_blast($input);
>
>   print STDERR "waiting..." if ( $v > 0 );
>  while ( my @rids = $factory->each_rid ) {
>     foreach my $rid (@rids) {
>      my @seqs = ();
>       my $rc   = $factory->retrieve_blast($rid);
>      if ( !ref($rc) ) {
>        if ( $rc < 0 ) {
>          $factory->remove_rid($rid);
>        }
>         print STDERR "." if ( $v > 0 );
>        sleep 5;
>      }
>      else {
>         my $result = $rc->next_result();
>
>         #save the blast output
>        my $filename = $result->query_accession . '.out';
>        $factory->save_output($filename);
>        $factory->remove_rid($rid);
>        print "\nQuery Name: ", $result->query_name(), "\n";
>         while ( my $hit = $result->next_hit ) {
>
>           # store the hit sequences
>          push @seqs, $gb->get_Seq_by_version( $hit->name );
>
>          next unless ( $v > 0 );
>          print "\thit name is ", $hit->name, "\n";
>          while ( my $hsp = $hit->next_hsp ) {
>            print "\t\tscore is ", $hsp->score, "\n";
>          }
>        }
>
>        ## print the seqs you've retrieved??
>        open( OUTFILE, '>', $result->query_accession . '.htm' );
>        print OUTFILE $q->start_html('RNAi Result'),
>          $q->h1('RNAi Result'),
>          $q->h2('Input'),
>          $q->pre( toString($input) ),
>          $q->h2('Output');
>
>        foreach (@seqs) {
>
>          #there's probably a better way of printing the seq
>          print OUTFILE $q->pre( toString($_) );
>        }
>        print OUTFILE $q->end_html;
>        close OUTFILE;
>      }
>    }
>  }
> }
>
> sub toString {
>  my $s = shift;
>  return '>' . $s->display_id . " " . $s->desc . "\n" . $s->seq;
> }
>
>
> =======================================================================
> Attention: The information contained in this message and/or attachments
> from AgResearch Limited is intended only for the persons or entities
> to which it is addressed and may contain confidential and/or privileged
> material. Any review, retransmission, dissemination or other use of, or
> taking of any action in reliance upon, this information by persons or
> entities other than the intended recipients is prohibited by AgResearch
> Limited. If you have received this message in error, please notify the
> sender immediately.
> =======================================================================
>



More information about the Bioperl-l mailing list