[Bioperl-l] retrieving top_SeqFeatures for RefSeq proteins fails
e.rapsomaniki at mail.cryst.bbk.ac.uk
e.rapsomaniki at mail.cryst.bbk.ac.uk
Sat Apr 8 13:08:00 UTC 2006
Hi
I am trying to retrieve coding sequences associated with RefSeq proteins. My
code (below) works for non-refseq proteins (e.g BAB26271) but not for refseq
(no sequence
features are retrieved although I checked the web-page and a coded_by feature
should be there). Any suggestions? I am using bioperl 1.4
Here's my code:
use Bio::Seq;
use Bio::DB::GenPept;
use Bio::DB::GenBank;
use Bio::DB::RefSeq;
my $gb = new Bio::DB::GenBank;
my $gp = new Bio::DB::RefSeq;
my $prot_obj = $gp->get_Seq_by_acc("NP_001008293");
return unless defined($prot_obj);
# factory to turn strings into Bio::Location objects
my $loc_factory = new Bio::Factory::FTLocationFactory;
my $orf;
my @f=$prot_obj->top_SeqFeatures();
print "@f\n"; #returns nothing
foreach my $feat ( $prot_obj->top_SeqFeatures ) {
print $feat->primary_tag, "\n";
if ( $feat->primary_tag eq 'CDS' ) {
my @coded_by = $feat->each_tag_value('coded_by');
print @coded_by, "\n";
my ($nuc_acc,$loc_str) = split /\:/, $coded_by[0];
#$nuc_acc=~ s/\..*//;
my $nuc_obj = $gb->get_Seq_by_acc($nuc_acc);
return unless defined($nuc_obj);
my $loc_object = $loc_factory->from_string($loc_str);
# create a Feature object by using a Location
my $feat_obj = new Bio::SeqFeature::Generic(-location =>$loc_object);
# associate the Feature object with the nucleotide Seq object
$nuc_obj->add_SeqFeature($feat_obj);
my $cds_obj = $feat_obj->spliced_seq;
$orf=$cds_obj->seq;
}
}
print "$orf\n";
----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.
More information about the Bioperl-l
mailing list