[Bioperl-l] help converting seq objects.

Barry Moore barry.moore at genetics.utah.edu
Wed Aug 18 10:45:11 EDT 2004


Pedro,

If I understand you correctly you just want the entire script to run 
without the redundancy of writing the GB sequences to a file only to 
promptly reopen the file and read the sequences back in.  If so, you 
just need to delete a few lines of code and adjust your loops like this:

#!/usr/sbin/perl -w
# How to retrieve GenBank entries over the Web
# by Jason Stajich

use Bio::DB::GenBank;
use Bio::SeqIO;
use Bio::Seq;
use strict;

my $in = shift;
my @AC;

###### acc numbers from file to an array ##########
open (F, "$in") || die;
while(<F>){
  next unless /(NM_\d+)\s/;
  my $acc = $1;
  chomp($acc);
  push @AC, $acc;
}
close(F);
my $list = join " ", @AC;
print "$list\n";

##### get genbank records  
#######################################################
my $gb = new Bio::DB::GenBank;
my $seqio = $gb->get_Stream_by_acc(\@AC);

###########get translation ############################################
my $temp = "$in.tfa";
my $out = new Bio::SeqIO(-file => ">$temp", -format => 'fasta');
while (my $seq = $seqio->next_seq ) {
  foreach my $f ( grep { $_->primary_tag eq 'CDS' } 
$seq->top_SeqFeatures ) {
    my ($gname);
    if ( $f->has_tag('gene') ) {
      ($gname) = $f->each_tag_value('gene');
    } elsif ( $f->has_tag('product') ) {
      ($gname) = $f->each_tag_value('product');
    }
    ($gname) =~ s/\s+/_/g;
    my ($ref) = $f->has_tag('protein_id') && 
$f->each_tag_value('protein_id');
    my ($gi) = $f->has_tag('db_xref') && $f->each_tag_value('db_xref');
    my ($translation) = $f->has_tag('translation') && 
$f->each_tag_value('translation');
    unless( $gi && $ref && $gname && $translation ) {
      print STDERR "not fully annotated CDS ($gi,$ref,$gname), 
skipping...\n";
      next;
    }
    my $tfa = Bio::PrimarySeq->new (-seq => $translation, -display_id =>
    sprintf("%s|%s|%s",$gi,$ref,$gname));
    $out->write_seq($tfa);
  }
}



More information about the Bioperl-l mailing list