[BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11

Tue Mar 21 12:18:16 UTC 2006

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637

Modified Files:
	fastacmd.rb 
Log Message:
Added/reformatted documentation.


Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb	28 Jan 2006 08:12:21 -0000	1.10
--- fastacmd.rb	21 Mar 2006 12:18:14 -0000	1.11
***************
*** 5,45 ****
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>
  # Lisence::    LGPL
  #
  # $Id$
  #
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using 
- # NCBI fastacmd command.
- # 
- # This class requires 'fastacmd' command and a blast database  
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- #    database = ARGV.shift || "/db/myblastdb"
- #    entry_id = ARGV.shift || "sp:128U_DROME"
- #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- #    fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- #    entry = fastacmd.get_by_id(entry_id)
- #    fastacmd.fetch(entry_id)
- #    fastacmd.fetch(ent_list)
- #
- #    fastacmd.fetch(ent_list).each do |fasta|
- #      puts fasta
- #    end
- #
- # == References
- #
- # * NCBI tool
- #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
  #--
  #
--- 5,14 ----
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>,
! #              Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # Lisence::    LGPL
  #
  # $Id$
  #
  #--
  #
***************
*** 68,72 ****
  class Blast
  
! # NCBI fastacmd wrapper class
  #
  class Fastacmd
--- 37,68 ----
  class Blast
  
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using 
! # NCBI fastacmd command.
! # 
! # This class requires 'fastacmd' command and a blast database  
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! #  require 'bio'
! #  
! #  fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! #  entry = fastacmd.get_by_id("sp:128U_DROME")
! #  fastacmd.fetch("sp:128U_DROME")
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! #    puts fasta
! #  end
! #
! # = REFERENCES
! #
! # * NCBI tool
! #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
  #
  class Fastacmd
***************
*** 78,90 ****
    attr_accessor :database
  
!   # fastcmd command file path.
    attr_accessor :fastacmd
  
-   # 
    attr_accessor :errorlog
  
!   # Initalize a fastacmd object.
!   #    
!   #    fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
--- 74,103 ----
    attr_accessor :database
  
!   # fastacmd command file path.
    attr_accessor :fastacmd
  
    attr_accessor :errorlog
  
!   # This method provides a handle to a BLASTable database, which you can then
!   # use to retrieve sequences.
!   # 
!   # Prerequisites:
!   # * You have created a BLASTable database with the '-o T' option.
!   # * You have the NCBI fastacmd tool installed.
!   #
!   # For example, suppose the original input file looks like:
!   #  >my_seq_1
!   #  ACCGACCTCCGGAACGGATAGCCCGACCTACG
!   #  >my_seq_2
!   #  TCCGACCTTTCCTACCGCACACCTACGCCATCAC
!   #  ...
!   # and you've created a BLASTable database from that with the command
!   #  cd /my_dir/
!   #  formatdb -i my_input_file -t Test -n Test -o T
!   # then you can get a handle to this database with the command
!   #  fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
!   # ---
!   # *Arguments*:
!   # * _database_:: path and name of BLASTable database
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
***************
*** 93,117 ****
  
  
!   # get an entry_id and returns a Bio::FastaFormat object.
!   #
!   #   entry_id = "sp:128U_DROME"
!   #   entry = fastacmd.get_by_id(entry_id)
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
!   #
!   # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
!   # object even when the result is a single entry.
!   #
!   #    p fastacmd.fetch(entry_id)
    #
!   # Fastacmd#fetch method also accepts a list of entry_id and returns
!   # an Array of Bio::FastaFormat objects.
!   #    
!   #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
!   #    p fastacmd.fetch(ent_list)
    #
    def fetch(list)
      if list.respond_to?(:join)
--- 106,131 ----
  
  
!   # Get the sequence of a specific entry in the BLASTable database.
!   # For example:
!   #  entry = fastacmd.get_by_id("sp:128U_DROME")
!   # ---
!   # *Arguments*:
!   # * _id_: id of an entry in the BLAST database
!   # *Returns*:: a Bio::FastaFormat object
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # Get the sequence for a _list_ of IDs in the database.
    #
!   # For example:
!   #  p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
    #
+   # This method always returns an array of Bio::FastaFormat objects, even when 
+   # the result is a single entry.
+   # ---
+   # *Arguments*:
+   # * _ids_: list of IDs to retrieve from the database
+   # *Returns*:: array of Bio::FastaFormat objects
    def fetch(list)
      if list.respond_to?(:join)
***************
*** 128,138 ****
    end
  
!   # Iterates each entry.
!   #
!   # You can also iterate on all sequences in the database!
!   #    fastacmd.each do |fasta|
!   #      p [ fasta.definition[0..30], fasta.seq.size ]
!   #    end
    #
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
    end
  
!   # Iterates over _all_ sequences in the database.
    #
+   #  fastacmd.each_entry do |fasta|
+   #    p [ fasta.definition[0..30], fasta.seq.size ]
+   #  end
+   # ---
+   # *Returns*:: a Bio::FastaFormat object for each iteration
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
  end # module Bio
  
+ if $0 == __FILE__
+   fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+   seq = fastacmd.get_by_id('id_of_entry1')
+   puts seq.class
+   puts seq
+   
+   seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+   seqs.each do |seq|
+     puts seq
+   end
  
+   fastacmd.each_entry do |fasta|
+     puts fasta.seq.size.to_s + "\t" + fasta.definition
+   end
+ end