[BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.21,1.22
Mitsuteru C. Nakao
nakao at pub.open-bio.org
Sun Oct 23 05:59:50 EDT 2005
Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory pub.open-bio.org:/tmp/cvs-serv18980/bio/db/embl
Modified Files:
embl.rb
Log Message:
* Changed to RDocc format.
* Bio::EMBL::Common changed to Bio::EMBLDB::Common.
Index: embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** embl.rb 24 Sep 2005 01:15:56 -0000 1.21
--- embl.rb 23 Oct 2005 09:59:48 -0000 1.22
***************
*** 1,6 ****
#
! # bio/db/embl/embl.rb - EMBL database class
#
! # Copyright (C) 2001, 2002 Mitsuteru C. Nakao <n at bioruby.org>
#
# This library is free software; you can redistribute it and/or
--- 1,26 ----
#
! # = bio/db/embl/embl.rb - EMBL database class
#
! #
! # Author:: Mitsuteru C. Nakao <n at bioruby.org>
! # Copyright:: Copyright (C) 2001-2005 BioRuby Project
! # License:: LGPL
! #
! # $Id$
! #
! # == EMBL database entry
! #
! #
! #
! # == Example
! #
! # emb = Bio::EMBL.new($<.read)
! # emb.entry_id
! # emb.each_cds do |cds|
! # cds
! # end
! # emb.seq
! #
! #--
#
# This library is free software; you can redistribute it and/or
***************
*** 18,22 ****
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
--- 38,42 ----
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! #++
#
***************
*** 25,34 ****
module Bio
! class EMBL
! include Bio::EMBL::Common
! ##
# ID Line
! # "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
#
# DATA_CLASS = ['standard']
--- 45,59 ----
module Bio
! class EMBL < EMBLDB
! include Bio::EMBLDB::Common
! # returns contents in the ID line.
! # * Bio::EMBL#id_line -> <ID Hash>
! # where <ID Hash> is:
! # {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
! # 'SEQUENCE_LENGTH' => Int}
! #
# ID Line
! # "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
#
# DATA_CLASS = ['standard']
***************
*** 59,75 ****
unless @data['ID']
tmp = Hash.new
! idline = @orig['ID'].split(/ +/)
! tmp['ENTRY_NAME'] = idline[1]
! tmp['DATA_CLASS'] = idline[2].sub(/;/,'')
! tmp['MOLECULE_TYPE'] = idline[3].sub(/;/,'') # "cyclic DNA"
! tmp['DVISION'] = idline[4].sub(/;/,'')
! tmp['SEQUENCE_LENGTH'] = idline[5].to_i
@data['ID'] = tmp
end
! if block_given?
! @data['ID'].each do |k,v|
! yield(k,v)
! end
! elsif key
@data['ID'][key]
else
--- 84,97 ----
unless @data['ID']
tmp = Hash.new
! idline = fetch('ID').split(/; +/)
! tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/)
! tmp['MOLECULE_TYPE'] = idline[1]
! tmp['DIVISION'] = idline[2]
! tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i
!
@data['ID'] = tmp
end
!
! if key
@data['ID'][key]
else
***************
*** 78,84 ****
end
! ##
! # Bio::EMBL#entry -> String
! # #entry_name -> String
def entry
id_line('ENTRY_NAME')
--- 100,105 ----
end
! # returns ENTRY_NAME in the ID line.
! # * Bio::EMBL#entry -> String
def entry
id_line('ENTRY_NAME')
***************
*** 87,93 ****
alias entry_id entry
! ##
! # Bio::EMBL#molecule -> String
! #
def molecule
id_line('MOLECULE_TYPE')
--- 108,113 ----
alias entry_id entry
! # returns MOLECULE_TYPE in the ID line.
! # * Bio::EMBL#molecule -> String
def molecule
id_line('MOLECULE_TYPE')
***************
*** 95,108 ****
alias molecule_type molecule
! ##
! # Bio::EMBL#division -> String
! #
def division
id_line('DIVISION')
end
! ##
! # Bio::EMBL#sequencelength -> String
! #
def sequence_length
id_line('SEQUENCE_LENGTH')
--- 115,126 ----
alias molecule_type molecule
! # returns DIVISION in the ID line.
! # * Bio::EMBL#division -> String
def division
id_line('DIVISION')
end
! # returns SEQUENCE_LENGTH in the ID line.
! # * Bio::EMBL#sequencelength -> String
def sequence_length
id_line('SEQUENCE_LENGTH')
***************
*** 110,128 ****
alias seqlen sequence_length
! ##
# AC Line
# "AC A12345; B23456;"
- #
- # Bio::EMBLDB#ac -> Array
- # #accessions -> Array
! ##
! # SV Line; sequence version (1/entry)
! # "SV Accession.Version"
! #
! # Bio::EMBL#sv -> String
! # Bio::EMBL#version -> Int
#
def sv
field_fetch('SV').sub(/;/,'')
--- 128,142 ----
alias seqlen sequence_length
!
# AC Line
# "AC A12345; B23456;"
! # returns the version information in the sequence version (SV) line.
! # * Bio::EMBL#sv -> Accession.Version in String
! # * Bio::EMBL#version -> accession in Int
#
+ # SV Line; sequence version (1/entry)
+ # SV Accession.Version
def sv
field_fetch('SV').sub(/;/,'')
***************
*** 131,141 ****
sv.split(".")[1].to_i
end
! ##
! # DT Line; date (2/entry)
! # Bio::EMBL#dt -> Hash
! # Bio::EMBL#dt(key) -> String
! # key = (created|updated)
#
def dt(key=nil)
unless @data['DT']
--- 145,158 ----
sv.split(".")[1].to_i
end
+
! # returns contents in the date (DT) line.
! # * Bio::EMBL#dt -> <DT Hash>
! # where <DT Hash> is:
! # {}
! # * Bio::EMBL#dt(key) -> String
! # keys: 'created' and 'updated'
#
+ # DT Line; date (2/entry)
def dt(key=nil)
unless @data['DT']
***************
*** 146,154 ****
@data['DT'] = tmp
end
! if block_given?
! @data['DT'].each do |k,v|
! yield(k,v)
! end
! elsif key
@data['DT'][key]
else
--- 163,167 ----
@data['DT'] = tmp
end
! if key
@data['DT'][key]
else
***************
*** 208,223 ****
! ##
! # FH Line; feature table header (0 or 2)
! # FT Line; feature table data (>=0)
! #
! # Bio::EMBL#ft -> Array
! # Bio::EMBL#ft {} -> {|Hash| }
! # Bio::EMBL#ft(Int) -> Hash
#
def fh
! get('FH')
end
# same as features method in bio/db/genbank.rb
def ft(num = nil)
unless @data['FT']
--- 221,239 ----
! # returns feature table header (String) in the feature header (FH) line.
#
+ # FH Line; feature table header (0 or 2)
def fh
! fetch('FH')
end
+
+ # returns contents in the feature table (FT) lines.
+ # * Bio::EMBL#ft -> [ <FT Hash>* ]
+ # * Bio::EMBL#ft {} -> {|<FT Hash>| }
+ # * Bio::EMBL#ft(Int) -> Hash
+ #
# same as features method in bio/db/genbank.rb
+ #
+ # FT Line; feature table data (>=0)
def ft(num = nil)
unless @data['FT']
***************
*** 255,260 ****
end
if block_given?
! @data['FT'].each do |f|
! yield f
end
else
--- 271,276 ----
end
if block_given?
! @data['FT'].each do |feature_table|
! yield feature_table
end
else
***************
*** 264,279 ****
alias features ft
def each_cds
! ft.each do |feature|
! if feature.feature == 'CDS'
! yield feature
end
end
end
def each_gene
! ft.each do |feature|
! if feature.feature == 'gene'
! yield feature
end
end
--- 280,297 ----
alias features ft
+ # iterates on CDS features in the FT lines.
def each_cds
! ft.each do |cds_feature|
! if cds_feature.feature == 'CDS'
! yield cds_feature
end
end
end
+ # iterates on gene features in the FT lines.
def each_gene
! ft.each do |gene_feature|
! if gene_feature.feature == 'gene'
! yield gene_feature
end
end
***************
*** 281,287 ****
! ##
! # CC Line; comments of notes (>=0)
#
def cc
get('CC')
--- 299,305 ----
! # returns comment text in the comments (CC) line.
#
+ # CC Line; comments of notes (>=0)
def cc
get('CC')
***************
*** 295,319 ****
! ##
! # SQ Line; sequence header (1/entry)
! # "SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;"
! # Bio::EMBL#sq -> Hash
! # Bio::EMBL#sq(base) -> Int
! # #sq[base] -> Int
#
def sq(base = nil)
unless @data['SQ']
fetch('SQ') =~ \
/(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
! @data['SQ']={'ntlen'=>$1.to_i, 'other'=>$6.to_i,
! 'a'=>$2.to_i,'c'=>$3.to_i,'g'=>$4.to_i,'t'=>$5.to_i}
else
@data['SQ']
end
! if block_given?
! @data['SQ'].each do |k,v|
! yield(k,v)
! end
! elsif base
@data['SQ'][base.downcase]
else
--- 313,337 ----
! # returns sequence header information in the sequence header (SQ) line.
! # * Bio::EMBL#sq -> <SQ Hash>
! # where <SQ Hash> is:
! # {'ntlen' => Int, 'other' => Int,
! # 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
! # * Bio::EMBL#sq(base) -> <base content in Int>
! # * Bio::EMBL#sq[base] -> <base content in Int>
#
+ # SQ Line; sequence header (1/entry)
+ # SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
def sq(base = nil)
unless @data['SQ']
fetch('SQ') =~ \
/(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
! @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
! 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
else
@data['SQ']
end
!
! if base
@data['SQ'][base.downcase]
else
***************
*** 321,335 ****
end
end
- # Bio::EMBL#gc -> Float
- def gc
- ( sq('g') + sq('c') ) / sq('ntlen').to_f * 100
- end
! ##
# @orig[''] as sequence
# bb Line; (blanks) sequence data (>=1)
- # Bio::EMBL#seq -> Bio::Sequence::NA
- #
def seq
Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
--- 339,349 ----
end
end
! # returns the nucleotie sequence in this entry.
! # * Bio::EMBL#seq -> Bio::Sequence::NA
! #
# @orig[''] as sequence
# bb Line; (blanks) sequence data (>=1)
def seq
Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
***************
*** 341,345 ****
-
### private methods
--- 355,358 ----
***************
*** 434,517 ****
- =begin
-
- = Bio::EMBL
-
- === Initialize
-
- --- Bio::EMBL#new(an_embl_entry)
-
- === ID line (Identification)
-
- --- Bio::EMBL#id_line -> Hash
- --- Bio::EMBL#id_line(key) -> String
-
- key = (entryname|molecule|division|sequencelength)
-
- --- Bio::EMBL#entry -> String
- --- Bio::EMBL#entryname -> String
- --- Bio::EMBL#molecule -> String
- --- Bio::EMBL#division -> String
- --- Bio::EMBL#sequencelength -> Int
-
- === AC lines (Accession number)
-
- --- Bio::EMBL#ac -> Array
-
- === SV line (Sequence version)
-
- --- Bio::EMBL#sv -> String
-
- === DT lines (Date)
-
- --- Bio::EMBL#dt -> Hash
- --- Bio::EMBL#dt(key) -> String
-
- key = (created|updated)
-
- === DE lines (Description)
-
- --- Bio::EMBL#de -> String
-
- === KW lines (Keyword)
-
- --- Bio::EMBL#kw -> Array
-
- === OS lines (Organism species)
-
- --- Bio::EMBL#os -> Hash
-
- === OC lines (organism classification)
-
- --- Bio::EMBL#oc -> Array
-
- === OG line (Organella)
-
- --- Bio::EMBL#og -> String
-
- === RN RC RP RX RA RT RL lines (Reference)
-
- --- Bio::EMBL#ref -> String
-
- === DR lines (Database cross-reference)
-
- --- Bio::EMBL#dr -> Array
-
- === FH FT lines (Feature table header and data)
-
- --- Bio::EMBL#ft -> Bio::Features
- --- Bio::EMBL#each_cds -> Array
- --- Bio::EMBL#each_gene -> Array
-
-
- === SQ Lines (Sequence header and data)
-
- --- Bio::EMBL#sq -> Hash
- --- Bio::EMBL#sq(base) -> Int
-
- base = (a|c|g|t|u|other)
-
- --- Bio::EMBL#gc -> Float
- --- Bio::EMBL#seq -> Bio::Sequece::NA
-
- =end
--- 447,448 ----
More information about the bioruby-cvs
mailing list