[Bioperl-l] help converting seq objects.
Barry Moore
barry.moore at genetics.utah.edu
Wed Aug 18 10:45:11 EDT 2004
Pedro,
If I understand you correctly you just want the entire script to run
without the redundancy of writing the GB sequences to a file only to
promptly reopen the file and read the sequences back in. If so, you
just need to delete a few lines of code and adjust your loops like this:
#!/usr/sbin/perl -w
# How to retrieve GenBank entries over the Web
# by Jason Stajich
use Bio::DB::GenBank;
use Bio::SeqIO;
use Bio::Seq;
use strict;
my $in = shift;
my @AC;
###### acc numbers from file to an array ##########
open (F, "$in") || die;
while(<F>){
next unless /(NM_\d+)\s/;
my $acc = $1;
chomp($acc);
push @AC, $acc;
}
close(F);
my $list = join " ", @AC;
print "$list\n";
##### get genbank records
#######################################################
my $gb = new Bio::DB::GenBank;
my $seqio = $gb->get_Stream_by_acc(\@AC);
###########get translation ############################################
my $temp = "$in.tfa";
my $out = new Bio::SeqIO(-file => ">$temp", -format => 'fasta');
while (my $seq = $seqio->next_seq ) {
foreach my $f ( grep { $_->primary_tag eq 'CDS' }
$seq->top_SeqFeatures ) {
my ($gname);
if ( $f->has_tag('gene') ) {
($gname) = $f->each_tag_value('gene');
} elsif ( $f->has_tag('product') ) {
($gname) = $f->each_tag_value('product');
}
($gname) =~ s/\s+/_/g;
my ($ref) = $f->has_tag('protein_id') &&
$f->each_tag_value('protein_id');
my ($gi) = $f->has_tag('db_xref') && $f->each_tag_value('db_xref');
my ($translation) = $f->has_tag('translation') &&
$f->each_tag_value('translation');
unless( $gi && $ref && $gname && $translation ) {
print STDERR "not fully annotated CDS ($gi,$ref,$gname),
skipping...\n";
next;
}
my $tfa = Bio::PrimarySeq->new (-seq => $translation, -display_id =>
sprintf("%s|%s|%s",$gi,$ref,$gname));
$out->write_seq($tfa);
}
}
More information about the Bioperl-l
mailing list