[BioRuby-cvs] bioruby/lib/bio/db fasta.rb,1.21,1.22

Sat Jan 28 10:49:01 UTC 2006

Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory pub.open-bio.org:/tmp/cvs-serv5883/lib/bio/db

Modified Files:
	fasta.rb 
Log Message:
* Added RDoc.


Index: fasta.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/fasta.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** fasta.rb	26 Sep 2005 13:00:06 -0000	1.21
--- fasta.rb	28 Jan 2006 10:48:59 -0000	1.22
***************
*** 1,7 ****
  #
! # bio/db/fasta.rb - FASTA format class
  #
! #   Copyright (C) 2001 GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>
! #   Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k at bioruby.org>
  #
  #  This library is free software; you can redistribute it and/or
--- 1,67 ----
  #
! # = bio/db/fasta.rb - FASTA format class
  #
! # Copyright::  Copyright (C) 2001, 2002
! #              GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>,
! #              KATAYAMA Toshiaki <k at bioruby.org>
! # Lisence::    LGPL
! #
! # $Id$
! # 
! # == Description
! # 
! # FASTA format class.
! #
! # == Examples
! #
! #       rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
! #       rub.entry_id       ==> 'gi|671595'
! #       rub.get('emb')     ==> 'CAA85678.1'
! #       rub.emb            ==> 'CAA85678.1'
! #       rub.gi             ==> '671595'
! #       rub.accession      ==> 'CAA85678'
! #       rub.accessions     ==> [ 'CAA85678' ]
! #       rub.acc_version    ==> 'CAA85678.1'
! #       rub.locus          ==> nil
! #       rub.list_ids       ==> [["gi", "671595"],
! #                               ["emb", "CAA85678.1", nil],
! #                               ["Perovskia abrotanoides"]]
! #
! #       ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
! #       ckr.entry_id      ==> "gi|2495000"
! #       ckr.sp            ==> "CCKR_CAVPO"
! #       ckr.pir           ==> "I51898"
! #       ckr.gb            ==> "AAB29504.1"
! #       ckr.gi            ==> "2495000"
! #       ckr.accession     ==> "AAB29504"
! #       ckr.accessions    ==> ["Q63931", "AAB29504"]
! #       ckr.acc_version   ==> "AAB29504.1"
! #       ckr.locus         ==> nil
! #       ckr.description   ==>
! #         "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
! #       ckr.descriptions  ==>
! #         ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
! #          "cholecystokinin A receptor - guinea pig",
! #          "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
! #       ckr.words         ==> 
! #         ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
! #          "receptor", "type"]
! #       ckr.id_strings    ==>
! #         ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
! #          "544724", "AAB29504.1", "Cavia"]
! #       ckr.list_ids      ==>
! #         [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
! #          ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
! #          ["gb", "AAB29504.1", nil], ["Cavia"]]
! #
! # == References
! #
! # * FASTA format (WikiPedia)
! #   http://en.wikipedia.org/wiki/FASTA_format
! #   
! # * Fasta format description (NCBI)
! #   http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
! #
! #--
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 19,23 ****
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #  $Id$
  #
  
--- 79,83 ----
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #++
  #
  
***************
*** 27,34 ****
--- 87,171 ----
  module Bio
  
+ 
+   # Treats a FASTA formatted entry, such as:
+   #
+   #   >id and/or some comments                    <== comment line
+   #   ATGCATGCATGCATGCATGCATGCATGCATGCATGC        <== sequence lines
+   #   ATGCATGCATGCATGCATGCATGCATGCATGCATGC
+   #   ATGCATGCATGC
+   # 
+   # The precedent '>' can be omitted and the trailing '>' will be removed
+   # automatically.
+   #
+   # === Examples
+   #
+   #   f_str = <<END
+   #   >sce:YBR160W  CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
+   #   MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
+   #   VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
+   #   GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
+   #   KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
+   #   IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
+   #   QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
+   #   >sce:YBR274W  CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
+   #   MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
+   #   TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
+   #   GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
+   #   DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
+   #   DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
+   #   EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
+   #   AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
+   #   QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
+   #   CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
+   #   FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
+   #   KTGDPLEWRRLFKKISTICRDIILIPN
+   #   END
+   #
+   #   f = Bio::FastaFormat.new(f_str)
+   #   puts "### FastaFormat"
+   #   puts "# entry"
+   #   puts f.entry
+   #   puts "# entry_id"
+   #   p f.entry_id
+   #   puts "# definition"
+   #   p f.definition
+   #   puts "# data"
+   #   p f.data
+   #   puts "# seq"
+   #   p f.seq
+   #   puts "# seq.type"
+   #   p f.seq.type
+   #   puts "# length"
+   #   p f.length
+   #   puts "# aaseq"
+   #   p f.aaseq
+   #   puts "# aaseq.type"
+   #   p f.aaseq.type
+   #   puts "# aaseq.composition"
+   #   p f.aaseq.composition
+   #   puts "# aalen"
+   #   p f.aalen
+   #
+   # === References
+   #
+   # * FASTA format (WikiPedia) 
+   #   http://en.wikipedia.org/wiki/FASTA_format
+   #
    class FastaFormat < DB
  
+     # Entry delimiter in flatfile text.
      DELIMITER	= RS = "\n>"
  
+     # The comment line of the FASTA formatted data.
+     attr_accessor :definition
+ 
+     # The seuqnce lines in text.
+     attr_accessor :data
+ 
+     attr_reader :entry_overrun
+ 
+     # Stores the comment and sequence information from one entry of the
+     # FASTA format string.  If the argument contains more than one
+     # entry, only the first entry is used.
      def initialize(str)
        @definition = str[/.*/].sub(/^>/, '').strip	# 1st line
***************
*** 37,43 ****
        @entry_overrun = $&
      end
-     attr_accessor :definition, :data
-     attr_reader :entry_overrun
  
      def entry
        @entry = ">#{@definition}\n#{@data.strip}\n"
--- 174,179 ----
        @entry_overrun = $&
      end
  
+     # Returns the stored one entry as a FASTA format. (same as to_s)
      def entry
        @entry = ">#{@definition}\n#{@data.strip}\n"
***************
*** 45,48 ****
--- 181,202 ----
      alias to_s entry
  
+ 
+     # Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
+     # factory object.
+     #
+     #   #!/usr/bin/env ruby
+     #   require 'bio'
+     #   
+     #   factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
+     #   flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
+     #   flatfile.each do |entry|
+     #     p entry.definition
+     #     result = entry.fasta(factory)
+     #     result.each do |hit|
+     #       print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
+     #       p hit.lap_at
+     #     end
+     #   end
+     #
      def query(factory)
        factory.query(@entry)
***************
*** 51,54 ****
--- 205,209 ----
      alias blast query
  
+     # Returns a joined sequence line as a String.
      def seq
        unless defined?(@seq)
***************
*** 76,79 ****
--- 231,235 ----
      end
  
+     # Returns comments.
      def comment
        seq
***************
*** 81,104 ****
--- 237,269 ----
      end
  
+     # Returns sequence length.
      def length
        seq.length
      end
  
+     # Returens the Bio::Sequence::NA.
      def naseq
        Sequence::NA.new(seq)
      end
  
+     # Returens the length of Bio::Sequence::NA.
      def nalen
        self.naseq.length
      end
  
+     # Returens the Bio::Sequence::AA.
      def aaseq
        Sequence::AA.new(seq)
      end
  
+     # Returens the length of Bio::Sequence::AA.
      def aalen
        self.aaseq.length
      end
  
+     # Parsing FASTA Defline, and extract IDs.
+     # IDs are NSIDs (NCBI standard FASTA sequence identifiers)
+     # or ":"-separated IDs.
+     # It returns a Bio::FastaDefline instance.
      def identifiers
        unless defined?(@ids) then
***************
*** 108,131 ****
--- 273,310 ----
      end
  
+     # Parsing FASTA Defline (using #identifiers method), and
+     # shows a possibly unique identifier.
+     # It returns a string.
      def entry_id
        identifiers.entry_id
      end
  
+     # Parsing FASTA Defline (using #identifiers method), and
+     # shows GI/locus/accession/accession with version number.
+     # If a entry has more than two of such IDs,
+     # only the first ID are shown.
+     # It returns a string or nil.
      def gi
        identifiers.gi
      end
  
+     # Returns an accession number.
      def accession
        identifiers.accession
      end
  
+     # Parsing FASTA Defline (using #identifiers method), and
+     # shows accession numbers.
+     # It returns an array of strings.
      def accessions
        identifiers.accessions
      end
  
+     # Returns accession number with version.
      def acc_version
        identifiers.acc_version
      end
  
+     # Returns locus.
      def locus
        identifiers.locus
***************
*** 134,139 ****
--- 313,339 ----
    end #class FastaFormat
  
+   # Treats a FASTA formatted numerical entry, such as:
+   # 
+   #   >id and/or some comments                    <== comment line
+   #   24 15 23 29 20 13 20 21 21 23 22 25 13      <== numerical data
+   #   22 17 15 25 27 32 26 32 29 29 25
+   # 
+   # The precedent '>' can be omitted and the trailing '>' will be removed
+   # automatically.
+   #
+   # --- Bio::FastaNumericFormat.new(entry)
+   # 
+   # Stores the comment and the list of the numerical data.
+   # 
+   # --- Bio::FastaNumericFormat#definition
+   #
+   # The comment line of the FASTA formatted data.
+   #
+   # * FASTA format (Wikipedia)
+   #   http://en.wikipedia.org/wiki/FASTA_format
    class FastaNumericFormat < FastaFormat
  
+     # Returns the list of the numerical data (typically the quality score
+     # of its corresponding sequence) as an Array.
      def data
        unless @list
***************
*** 143,150 ****
--- 343,352 ----
      end
  
+     # Returns the number of elements in the numerical data.
      def length
        data.length
      end
  
+     # Yields on each elements of the numerical data.
      def each
        data.each do |x|
***************
*** 153,156 ****
--- 355,359 ----
      end
  
+     # Returns the n-th element.
      def [](n)
        data[n]
***************
*** 161,169 ****
    end #class FastaNumericFormat
  
-   class FastaDefline
  
!     # specs are described in:
!     # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
!     # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
  
      NSIDs = {
--- 364,430 ----
    end #class FastaNumericFormat
  
  
!   # Parsing FASTA Defline, and extract IDs and other informations.
!   # IDs are NSIDs (NCBI standard FASTA sequence identifiers)
!   # or ":"-separated IDs.
!   # 
!   # specs are described in:
!   # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
!   # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
!   #
!   # === Examples
!   #
!   #   rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
!   #   rub.entry_id       ==> 'gi|671595'
!   #   rub.get('emb')     ==> 'CAA85678.1'
!   #   rub.emb            ==> 'CAA85678.1'
!   #   rub.gi             ==> '671595'
!   #   rub.accession      ==> 'CAA85678'
!   #   rub.accessions     ==> [ 'CAA85678' ]
!   #   rub.acc_version    ==> 'CAA85678.1'
!   #   rub.locus          ==> nil
!   #   rub.list_ids       ==> [["gi", "671595"],
!   #                           ["emb", "CAA85678.1", nil],
!   #                           ["Perovskia abrotanoides"]]
!   #
!   #   ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
!   #   ckr.entry_id      ==> "gi|2495000"
!   #   ckr.sp            ==> "CCKR_CAVPO"
!   #   ckr.pir           ==> "I51898"
!   #   ckr.gb            ==> "AAB29504.1"
!   #   ckr.gi            ==> "2495000"
!   #   ckr.accession     ==> "AAB29504"
!   #   ckr.accessions    ==> ["Q63931", "AAB29504"]
!   #   ckr.acc_version   ==> "AAB29504.1"
!   #   ckr.locus         ==> nil
!   #   ckr.description   ==>
!   #     "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
!   #   ckr.descriptions  ==>
!   #     ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
!   #      "cholecystokinin A receptor - guinea pig",
!   #      "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
!   #   ckr.words         ==> 
!   #     ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
!   #      "receptor", "type"]
!   #   ckr.id_strings    ==>
!   #     ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
!   #      "544724", "AAB29504.1", "Cavia"]
!   #   ckr.list_ids      ==>
!   #     [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
!   #      ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
!   #      ["gb", "AAB29504.1", nil], ["Cavia"]]
!   #
!   # === Refereneces
!   #
!   # * Fasta format description (NCBI)
!   #   http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
!   #
!   # * Frequently Asked Questions:  Indexing of Sequence Identifiers (by Warren R. Gish.)
!   #   http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
!   #
!   # * README.formatdb
!   #   ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
!   # 
!   class FastaDefline
  
      NSIDs = {
***************
*** 198,201 ****
--- 459,471 ----
      }
  
+     # Shows array that contains IDs (or ID-like strings).
+     # Returns an array of arrays of strings.
+     attr_reader :list_ids
+ 
+     # Shows a possibly unique identifier.
+     # Returns a string.
+     attr_reader :entry_id
+ 
+     # Parses given string.
      def initialize(str)
        @deflines = []
***************
*** 211,217 ****
      end #def initialize
  
!     attr_reader :list_ids
!     attr_reader :entry_id
! 
      def add_defline(str)
        case str
--- 481,485 ----
      end #def initialize
  
!     # Parses given string and adds parsed data.
      def add_defline(str)
        case str
***************
*** 344,347 ****
--- 612,619 ----
      private :parse_NSIDs
  
+ 
+     # Shows original string.
+     # Note that the result of this method may be different from
+     # original string which is given in FastaDefline.new method.
      def to_s
        @deflines.collect { |a|
***************
*** 351,358 ****
--- 623,632 ----
      end
  
+     # Shows description.
      def description
        @deflines[0].to_a[-1]
      end
  
+     # Returns descriptions.
      def descriptions
        @deflines.collect do |a|
***************
*** 361,364 ****
--- 635,640 ----
      end
  
+     # Shows ID-like strings.
+     # Returns an array of strings.
      def id_strings
        r = []
***************
*** 402,405 ****
--- 678,682 ----
      ]
  
+     # Shows words used in the defline. Returns an Array.
      def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
                kwhash = self.class::KillWordsHash)
***************
*** 427,432 ****
      end
  
!     def get(db)
!       db =db.to_s
        r = nil
        unless r = @info[db] then
--- 704,710 ----
      end
  
!     # Returns identifires by a database name.
!     def get(dbname)
!       db = dbname.to_s
        r = nil
        unless r = @info[db] then
***************
*** 450,457 ****
      end
  
!     def get_by_type(tstr)
        @list_ids.each do |x|
          if labels = self.class::NSIDs[x[0]] then
!           if i = labels.index(tstr) then
              return x[i+1]
            end
--- 728,736 ----
      end
  
!     # Returns an identifier by given type.
!     def get_by_type(type_str)
        @list_ids.each do |x|
          if labels = self.class::NSIDs[x[0]] then
!           if i = labels.index(type_str) then
              return x[i+1]
            end
***************
*** 461,469 ****
      end
  
!     def get_all_by_type(*tstrarg)
        d = []
        @list_ids.each do |x|
          if labels = self.class::NSIDs[x[0]] then
!           tstrarg.each do |y|
              if i = labels.index(y) then
                d << x[i+1] if x[i+1]
--- 740,749 ----
      end
  
!     # Returns identifiers by given type.
!     def get_all_by_type(*type_strarg)
        d = []
        @list_ids.each do |x|
          if labels = self.class::NSIDs[x[0]] then
!           type_strarg.each do |y|
              if i = labels.index(y) then
                d << x[i+1] if x[i+1]
***************
*** 475,478 ****
--- 755,762 ----
      end
  
+     # Shows locus.
+     # If the entry has more than two of such IDs,
+     # only the first ID are shown.
+     # Returns a string or nil.
      def locus
        unless defined?(@locus)
***************
*** 482,485 ****
--- 766,773 ----
      end
  
+     # Shows GI.
+     # If the entry has more than two of such IDs,
+     # only the first ID are shown.
+     # Returns a string or nil.
      def gi
        unless defined?(@gi) then
***************
*** 489,492 ****
--- 777,784 ----
      end
  
+     # Shows accession with version number.
+     # If the entry has more than two of such IDs,
+     # only the first ID are shown.
+     # Returns a string or nil.
      def acc_version
        unless defined?(@acc_version) then
***************
*** 496,499 ****
--- 788,793 ----
      end
  
+     # Shows accession numbers.
+     # Returns an array of strings.
      def accessions
        unless defined?(@accessions) then
***************
*** 504,507 ****
--- 798,802 ----
      end
  
+     # Shows an accession number.
      def accession
        unless defined?(@accession) then
***************
*** 524,527 ****
--- 819,823 ----
        r
      end
+     
  
    end #class FastaDefline
***************
*** 610,869 ****
  
  end
- 
- =begin
- 
- = Bio::FastaFormat
- 
- Treats a FASTA formatted entry, such as:
- 
-   >id and/or some comments                    <== comment line
-   ATGCATGCATGCATGCATGCATGCATGCATGCATGC        <== sequence lines
-   ATGCATGCATGCATGCATGCATGCATGCATGCATGC
-   ATGCATGCATGC
- 
- The precedent '>' can be omitted and the trailing '>' will be removed
- automatically.
- 
- --- Bio::FastaFormat.new(entry)
- 
-       Stores the comment and sequence information from one entry of the
-       FASTA format string.  If the argument contains more than one
-       entry, only the first entry is used.
- 
- --- Bio::FastaFormat#entry
- 
-       Returns the stored one entry as a FASTA format. (same as to_s)
- 
- --- Bio::FastaFormat#definition
- 
-       Returns the comment line of the FASTA formatted data.
- 
- --- Bio::FastaFormat#seq
- 
-       Returns a joined sequence line as a String.
- 
- --- Bio::FastaFormat#query(factory)
- --- Bio::FastaFormat#fasta(factory)
- --- Bio::FastaFormat#blast(factory)
- 
-       Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
-       factory object.
- 
-         #!/usr/bin/env ruby
- 
-         require 'bio'
- 
-         factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
-         flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
-         flatfile.each do |entry|
-           p entry.definition
-           result = entry.fasta(factory)
-           result.each do |hit|
-             print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
-             p hit.lap_at
-           end
-         end
- 
- --- Bio::FastaFormat#length
- 
-       Returns sequence length.
- 
- --- Bio::FastaFormat#naseq
- --- Bio::FastaFormat#nalen
- --- Bio::FastaFormat#aaseq
- --- Bio::FastaFormat#aalen
- 
-       If you know whether the sequence is NA or AA, use these methods.
-       'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or
-       Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods
-       return the length of them.
- 
- --- Bio::FastaFormat#identifiers
- 
-       Parsing FASTA Defline, and extract IDs.
-       IDs are NSIDs (NCBI standard FASTA sequence identifiers)
-       or ":"-separated IDs.
-       It returns a Bio::FastaDefline instance.
- 
- --- Bio::FastaFormat#entry_id
- 
-       Parsing FASTA Defline (using #identifiers method), and
-       shows a possibly unique identifier.
-       It returns a string.
- 
- --- Bio::FastaFormat#gi
- --- Bio::FastaFormat#locus
- --- Bio::FastaFormat#accession
- --- Bio::FastaFormat#acc_version
- 
-       Parsing FASTA Defline (using #identifiers method), and
-       shows GI/locus/accession/accession with version number.
-       If a entry has more than two of such IDs,
-       only the first ID are shown.
-       It returns a string or nil.
- 
- --- Bio::FastaFormat#accessions
- 
-       Parsing FASTA Defline (using #identifiers method), and
-       shows accession numbers.
-       It returns an array of strings.
- 
- --- Bio::FastaFormat
- 
- = Bio::FastaNumericFormat
- 
- Treats a FASTA formatted numerical entry, such as:
- 
-   >id and/or some comments                    <== comment line
-   24 15 23 29 20 13 20 21 21 23 22 25 13      <== numerical data
-   22 17 15 25 27 32 26 32 29 29 25
- 
- The precedent '>' can be omitted and the trailing '>' will be removed
- automatically.
- 
- --- Bio::FastaNumericFormat.new(entry)
- 
-       Stores the comment and the list of the numerical data.
- 
- --- Bio::FastaNumericFormat#definition
- 
-       The comment line of the FASTA formatted data.
- 
- --- Bio::FastaNumericFormat#data
- 
-       Returns the list of the numerical data (typically the quality score
-       of its corresponding sequence) as an Array.
- 
- --- Bio::FastaNumericFormat#length
- 
-       Returns the number of elements in the numerical data.
- 
- --- Bio::FastaNumericFormat#each
- 
-       Yields on each elements of the numerical data.
- 
- --- Bio::FastaNumericFormat#[](n)
- 
-       Returns the n-th element.
- 
- --- Bio::FastaNumericFormat#identifiers
- --- Bio::FastaNumericFormat#entry_id
- --- Bio::FastaNumericFormat#gi
- --- Bio::FastaNumericFormat#locus
- --- Bio::FastaNumericFormat#accession
- --- Bio::FastaNumericFormat#acc_version
- --- Bio::FastaNumericFormat#accessions
- 
-       Same as Bio::FastaFormat.
- 
- 
- = Bio::FastaDefline
- 
-       Parsing FASTA Defline, and extract IDs and other informations.
-       IDs are NSIDs (NCBI standard FASTA sequence identifiers)
-       or ":"-separated IDs.
-       
- --- see also:
-       ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
-       http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
- 
- --- Bio::FastaDefline.new(str)
- 
-       Parses given string.
- 
- --- Bio::FastaFormat#entry_id
- 
-       Shows a possibly unique identifier.
-       Returns a string.
- 
- --- Bio::FastaDefline#gi
- --- Bio::FastaDefline#locus
- --- Bio::FastaDefline#accession
- --- Bio::FastaDefline#acc_version
- 
-       Shows GI/locus/accession/accession with version number.
-       If the entry has more than two of such IDs,
-       only the first ID are shown.
-       Returns a string or nil.
- 
- --- Bio::FastaFormat#accessions
- 
-       Shows accession numbers.
-       Returns an array of strings.
- 
- --- Bio::FastaDefline#add_defline(str)
- 
-       Parses given string and adds parsed data.
- 
- --- Bio::FastaDefline#to_s
- 
-       Shows original string.
-       Note that the result of this method may be different from
-       original string which is given in FastaDefline.new method.
- 
- --- Bio::FastaDefline#id_strings
- 
-       Shows ID-like strings.
-       Returns an array of strings.
- 
- --- Bio::FastaDefline#list_ids
- 
-       Shows array that contains IDs (or ID-like strings).
-       Returns an array of arrays of strings.
- 
- --- Bio::FastaDefline#description
- --- Bio::FastaDefline#descriptions
- 
- --- Bio::FastaDefline#words(case_sensitive = nil,
-                             kill_words_regexp_array, kill_words_hash)
- 
- --- Bio::FastaDefline#get(tag_of_id)
- 
- --- Bio::FastaDefline#get_by_type(type_of_id)
- 
- --- Bio::FastaDefline#get_all_by_type(type_of_id)
- 
- --- examples:
-       rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
-       rub.entry_id       ==> 'gi|671595'
-       rub.get('emb')     ==> 'CAA85678.1'
-       rub.emb            ==> 'CAA85678.1'
-       rub.gi             ==> '671595'
-       rub.accession      ==> 'CAA85678'
-       rub.accessions     ==> [ 'CAA85678' ]
-       rub.acc_version    ==> 'CAA85678.1'
-       rub.locus          ==> nil
-       rub.list_ids       ==> [["gi", "671595"],
-                               ["emb", "CAA85678.1", nil],
-                               ["Perovskia abrotanoides"]]
- 
-       ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
-       ckr.entry_id      ==> "gi|2495000"
-       ckr.sp            ==> "CCKR_CAVPO"
-       ckr.pir           ==> "I51898"
-       ckr.gb            ==> "AAB29504.1"
-       ckr.gi            ==> "2495000"
-       ckr.accession     ==> "AAB29504"
-       ckr.accessions    ==> ["Q63931", "AAB29504"]
-       ckr.acc_version   ==> "AAB29504.1"
-       ckr.locus         ==> nil
-       ckr.description   ==>
-         "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
-       ckr.descriptions  ==>
-         ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
-          "cholecystokinin A receptor - guinea pig",
-          "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
-       ckr.words         ==> 
-         ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
-          "receptor", "type"]
-       ckr.id_strings    ==>
-         ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
-          "544724", "AAB29504.1", "Cavia"]
-       ckr.list_ids      ==>
-         [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
-          ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
-          ["gb", "AAB29504.1", nil], ["Cavia"]]
- 
- =end
- 
  
--- 906,908 ----