[Bioperl-l] from protein gi number to CDS seqs.
Sun, Jian
jsun at utdallas.edu
Tue Sep 27 13:09:28 EDT 2005
Dear all, I got some reference code from this board as attached below to get the CDS sequence from protein gi number. But for most of my protein gi #, I got the erroe message. Could you please help me to point out what's the problem is?
Thanks in advance.
Jian Sun
The codes I got from the bBioperl Board are :
#!C:\Perl\bin\perl.exe
use lib "C:\Perl\lib";
use strict;
use Bio::DB::GenBank;
use Bio::DB::GenPept;
use Bio::DB::FileCache;
use Bio::Factory::FTLocationFactory;
use Bio::SeqFeature::Generic;
my $ntdb = new Bio::DB::GenBank;
my $pepdb= new Bio::DB::GenPept;
# do some caching in the event you're pulling up the same
# chromosome and/or you are debugging
my $cachent = new Bio::DB::FileCache(-kept => 1,
-file => './tmp/nt.idx',
-seqdb => $ntdb);
my $cachepep = new Bio::DB::FileCache(-kept => 1,
-file => './tmp/pep.idx',
-seqdb => $pepdb);
# obj to turn strings into Bio::Location object
my $locfactory = new Bio::Factory::FTLocationFactory;
# you might get these from a file (and they can be accessions too)
# protein gi number (10956263)
my @protgis = (23172508);
foreach my $gi ( @protgis ) { #id
my $protseq = $cachepep->get_Seq_by_id($gi);
if( ! $protseq ) { print STDERR "could not find a seq for gi:$gi\n";
next;
}
foreach my $cds ( grep { $_->primary_tag eq 'CDS' }
$protseq->get_SeqFeatures() )
{
next unless( $cds->has_tag('coded_by') ); # skip CDSes with no coded_by
my ($codedby) = $cds->each_tag_value('coded_by');
my ($ntacc,$loc) = split(/\:/, $codedby);
$ntacc =~ s/(\.\d+)//; # genbank wants an accession not a versioned one
my $cdslocation = $locfactory->from_string($loc);
my $cdsfeature = new Bio::SeqFeature::Generic(-location => $cdslocation);
my $ntseq = $cachent->get_Seq_by_acc($ntacc);
next unless $ntseq;
$ntseq->add_SeqFeature($cdsfeature); # locate the feature on a seq
my $cdsseq = $cdsfeature->spliced_seq();
print "cds seq is : \n", $cdsseq->seq(), "\n";
my $startP = (109-1) * 3 + 1;
my $endP = (116-1) * 3 + 1 ;
my $mypiece = $cdsseq->subseq($startP,$endP);
print "\n my seq. piece is: ", $mypiece;
}
}
******************************************************************
And the error message I got is:
----------EXCEPTION-------------------------------
MSG: acc does not exist
STACK BIO::DB::WebDBSeqI::get_Seq_by acc C:/perl/site/Lib/Bio/DB/WebDBSeqI.pm :176.
STACK BIO::DB::WebDBSeqI::get_Seq_by acc C:/perl/site/Lib/Bio/DB/WebDBSeqI.pm :170.
--------------------------------------
More information about the Bioperl-l
mailing list