[Bioperl-l] Module: Bio::Structure::IO
Saskia Priesel
priesel at caesar.de
Wed Jun 15 02:53:30 EDT 2005
Hello to all,
I have a problem with very much Files from the PDB (Protein Data Bank).
I want to analyse 30000 PDB Files. For this I take the Bioperl Module
Bio::Structure::IO for reading the whole entry. Below I give you the
source code. My Problem is now that I will have to much open entries in
the memory. Is there a method or so in the module which can handle this?
sub filter_data {
my $pdb_files_ref = shift;
my @pdb_files = @$pdb_files_ref;
#print join ("\n", at pdb_files);
#initialize variables
my @file_data = ();
my $min_length = 0;
my $max_length = 100;
for(my $i=0;$i<=$#pdb_files;$i++) {
my $data = $pdb_files[$i];
#print "$data\n";
my $structio = Bio::Structure::IO->new(-file => "$data", '-format'
=> 'pdb');
my $structure = $structio->next_structure();
print "Structure",$structure->id,"\n";
my @chain_list = $structure->get_chains();
my $length = scalar @chain_list;
#print "Laenge: $length\n";
#print "Letztes Element: $chain_list[-1]\n";
for(my $i=0;$i<=$#chain_list;$i++) {
my $chain = $chain_list[$i];
#print "Chain: $chain\n";
my $chainid = $chain->id;
#print "Chain: $chainid\n";
if($chainid =~ m/default/) {
$pseq = $structure->seqres();
my $default_seq = $pseq->seq();
#print "$default_seq\n";
if(length($default_seq) >= $min_length && length($default_seq)
<= $max_length) {
if($default_seq =~ m/.*C.*C.*C/i == 0) {
print "Structure",$structure->id,"\n";
print "Chain: $chainid\n";
print "$default_seq\n";
print "Laenge: ",length($default_seq),"\n";
}
}
next;
}
my $pseq = $structure->seqres($chainid);
if (!$pseq){
last;
}
my $sequence = $pseq->seq();
#print "$sequence\n";
if(length($sequence) >= $min_length && length($sequence) <=
$max_length) {
if($sequence =~ m/.*C.*C.*C/i == 0) {
print "Structure",$structure->id,"\n";
print "Chain: $chainid\n";
print "$sequence\n";
print "Laenge: ",length($sequence),"\n";
next;
}
}
}
}
}
More information about the Bioperl-l
mailing list