[Bioperl-l] Bug in Bio::FeatureIO::gff
Steffen Grossmann
grossman at molgen.mpg.de
Mon Oct 25 08:17:34 EDT 2004
Dear all,
there is a small problem in Bio::FeatureIO::gff, which prevents it from
writing out gff3 lines previously read in by itself.
The reason is that the 'source' annotation is treated differently in
reading/writing. When reading in gff3 lines, the second column (the
'source') is stored via the Bio::SeqFeature::Annotated::source method.
However, Bio::FeatureIO::gff::_write_feature_3 tries to retrieve it from
the annotation via
($feature->annotation('source'))[0]->value
which is then not present...
I attached an example gff3 (example1.gff) file and an example script (Bio_FeatureIO_test.pl) which together
show the problem.
I also attached a patch (gff_patch.diff) for the bug which I also can check in directly
as soon as I get an OK and a developer account...
Steffen
example1.gff
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
##gff-version 3
ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN
ctg123 scan TF_binding_site 1000 1012 . + . ID=tfbs00001;Parent=gene00001
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
Bio_FeatureIO_test.pl
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
#!/bin/perl
#
use lib '/home/grossman/bioperl/bioperl-live'; # or whatever
use strict;
use Bio::FeatureIO;
# Reading in a gff file, collect read in features in an array
my $infile = 'example1.gff';
my $gff_in = Bio::FeatureIO->new(-file => $infile,
-format => 'GFF',
-version => 3);
my @features;
my $feature;
while($feature = $gff_in->next_feature()) {
push(@features, $feature);
}
# Writing out again
my $outfile = 'example1_out.gff';
my $gff_out = Bio::FeatureIO->new(-file => ">$outfile",
-format => 'GFF',
-version => 3);
foreach $feature (@features) {
$gff_out->write_feature($feature);
}
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
gff_patch.diff
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
*** bioperl-live/Bio/FeatureIO/gff.pm Mon Oct 25 11:08:18 2004
--- modified_bioperl-live/Bio/FeatureIO/gff.pm Mon Oct 25 13:06:38 2004
***************
*** 134,140 ****
}
my $seq = $feature->seq_id || '.';
! my $source = ($feature->annotation->get_Annotations('source'))[0]->value;
my $type = ($feature->annotation->get_Annotations('feature_type'))[0]->name;
$type = 'EXON' if $type eq 'exon'; #a GTF peculiarity, incosistent with the sequence ontology.
my $min = $feature->start || '.';
--- 134,145 ----
}
my $seq = $feature->seq_id || '.';
! my $source;
! if (my ($sa) = $feature->annotation->get_Annotations('source')) {
! $source = $sa->value;
! } else {
! $source = '.';
! }
my $type = ($feature->annotation->get_Annotations('feature_type'))[0]->name;
$type = 'EXON' if $type eq 'exon'; #a GTF peculiarity, incosistent with the sequence ontology.
my $min = $feature->start || '.';
***************
*** 160,167 ****
sub _write_feature_3 {
my($self,$feature) = @_;
! my $seq = $feature->seq_id || '.';
! my $source = ($feature->annotation->get_Annotations('source'))[0]->value;
my $type = ($feature->annotation->get_Annotations('feature_type'))[0]->name;
my $min = $feature->start || '.';
my $max = $feature->end || '.';
--- 165,177 ----
sub _write_feature_3 {
my($self,$feature) = @_;
! my $seq = $feature->seq_id || 'SEQ';
! my $source;
! if (my ($sa) = $feature->annotation->get_Annotations('source')) {
! $source = $sa->value;
! } else {
! $source = '.';
! }
my $type = ($feature->annotation->get_Annotations('feature_type'))[0]->name;
my $min = $feature->start || '.';
my $max = $feature->end || '.';
***************
*** 273,279 ****
my($seq,$source,$type,$start,$end,$score,$strand,$phase,$attribute_string) = split /\t/, $feature_string;
$feat->seq_id( uri_unescape($seq) );
! $feat->source( uri_unescape($source) );
$feat->start($start) unless $start eq '.';
$feat->end($end) unless $end eq '.';
$feat->strand($strand eq '+' ? 1 : $strand eq '-' ? -1 : 0);
--- 283,291 ----
my($seq,$source,$type,$start,$end,$score,$strand,$phase,$attribute_string) = split /\t/, $feature_string;
$feat->seq_id( uri_unescape($seq) );
! $ac->add_Annotation(Bio::Annotation::SimpleValue->new(-value => uri_unescape($source),
! -tagname => 'source')
! ) unless $source eq '.';
$feat->start($start) unless $start eq '.';
$feat->end($end) unless $end eq '.';
$feat->strand($strand eq '+' ? 1 : $strand eq '-' ? -1 : 0);
***************
*** 297,302 ****
--- 309,315 ----
$fta->term($feature_type);
my %attr = ();
+ chomp $attribute_string;
my @attributes = split ';', $attribute_string;
foreach my $attribute (@attributes){
my($key,$values) = split '=', $attribute;
---8<---8<---8<---8<---8<---8<---8<---8<---cut here
--
%---------------------------------------------%
% Steffen Grossmann %
% %
% Max Planck Institute for Molecular Genetics %
% Computational Molecular Biology %
%---------------------------------------------%
% Ihnestrasse 73 %
% 14195 Berlin %
% Germany %
%---------------------------------------------%
% Tel: (++49 +30) 8413-1167 %
% Fax: (++49 +30) 8413-1152 %
%---------------------------------------------%
More information about the Bioperl-l
mailing list