[BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11
Jan Aerts
aerts at pub.open-bio.org
Tue Mar 21 12:18:16 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637
Modified Files:
fastacmd.rb
Log Message:
Added/reformatted documentation.
Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb 28 Jan 2006 08:12:21 -0000 1.10
--- fastacmd.rb 21 Mar 2006 12:18:14 -0000 1.11
***************
*** 5,45 ****
# Shuji SHIGENOBU <shige at nibb.ac.jp>,
# Toshiaki Katayama <k at bioruby.org>,
! # Mitsuteru C. Nakao <n at bioruby.org>
# Lisence:: LGPL
#
# $Id$
#
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using
- # NCBI fastacmd command.
- #
- # This class requires 'fastacmd' command and a blast database
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- # database = ARGV.shift || "/db/myblastdb"
- # entry_id = ARGV.shift || "sp:128U_DROME"
- # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- # fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- # entry = fastacmd.get_by_id(entry_id)
- # fastacmd.fetch(entry_id)
- # fastacmd.fetch(ent_list)
- #
- # fastacmd.fetch(ent_list).each do |fasta|
- # puts fasta
- # end
- #
- # == References
- #
- # * NCBI tool
- # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
#--
#
--- 5,14 ----
# Shuji SHIGENOBU <shige at nibb.ac.jp>,
# Toshiaki Katayama <k at bioruby.org>,
! # Mitsuteru C. Nakao <n at bioruby.org>,
! # Jan Aerts <jan.aerts at bbsrc.ac.uk>
# Lisence:: LGPL
#
# $Id$
#
#--
#
***************
*** 68,72 ****
class Blast
! # NCBI fastacmd wrapper class
#
class Fastacmd
--- 37,68 ----
class Blast
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using
! # NCBI fastacmd command.
! #
! # This class requires 'fastacmd' command and a blast database
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! # require 'bio'
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # fastacmd.fetch("sp:128U_DROME")
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! # puts fasta
! # end
! #
! # = REFERENCES
! #
! # * NCBI tool
! # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
#
class Fastacmd
***************
*** 78,90 ****
attr_accessor :database
! # fastcmd command file path.
attr_accessor :fastacmd
- #
attr_accessor :errorlog
! # Initalize a fastacmd object.
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
def initialize(blast_database_file_path)
@database = blast_database_file_path
--- 74,103 ----
attr_accessor :database
! # fastacmd command file path.
attr_accessor :fastacmd
attr_accessor :errorlog
! # This method provides a handle to a BLASTable database, which you can then
! # use to retrieve sequences.
! #
! # Prerequisites:
! # * You have created a BLASTable database with the '-o T' option.
! # * You have the NCBI fastacmd tool installed.
! #
! # For example, suppose the original input file looks like:
! # >my_seq_1
! # ACCGACCTCCGGAACGGATAGCCCGACCTACG
! # >my_seq_2
! # TCCGACCTTTCCTACCGCACACCTACGCCATCAC
! # ...
! # and you've created a BLASTable database from that with the command
! # cd /my_dir/
! # formatdb -i my_input_file -t Test -n Test -o T
! # then you can get a handle to this database with the command
! # fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
! # ---
! # *Arguments*:
! # * _database_:: path and name of BLASTable database
def initialize(blast_database_file_path)
@database = blast_database_file_path
***************
*** 93,117 ****
! # get an entry_id and returns a Bio::FastaFormat object.
! #
! # entry_id = "sp:128U_DROME"
! # entry = fastacmd.get_by_id(entry_id)
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
! #
! # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
! # object even when the result is a single entry.
! #
! # p fastacmd.fetch(entry_id)
#
! # Fastacmd#fetch method also accepts a list of entry_id and returns
! # an Array of Bio::FastaFormat objects.
! #
! # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
! # p fastacmd.fetch(ent_list)
#
def fetch(list)
if list.respond_to?(:join)
--- 106,131 ----
! # Get the sequence of a specific entry in the BLASTable database.
! # For example:
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # ---
! # *Arguments*:
! # * _id_: id of an entry in the BLAST database
! # *Returns*:: a Bio::FastaFormat object
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # Get the sequence for a _list_ of IDs in the database.
#
! # For example:
! # p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
#
+ # This method always returns an array of Bio::FastaFormat objects, even when
+ # the result is a single entry.
+ # ---
+ # *Arguments*:
+ # * _ids_: list of IDs to retrieve from the database
+ # *Returns*:: array of Bio::FastaFormat objects
def fetch(list)
if list.respond_to?(:join)
***************
*** 128,138 ****
end
! # Iterates each entry.
! #
! # You can also iterate on all sequences in the database!
! # fastacmd.each do |fasta|
! # p [ fasta.definition[0..30], fasta.seq.size ]
! # end
#
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
end
! # Iterates over _all_ sequences in the database.
#
+ # fastacmd.each_entry do |fasta|
+ # p [ fasta.definition[0..30], fasta.seq.size ]
+ # end
+ # ---
+ # *Returns*:: a Bio::FastaFormat object for each iteration
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
end # module Bio
+ if $0 == __FILE__
+ fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+ seq = fastacmd.get_by_id('id_of_entry1')
+ puts seq.class
+ puts seq
+
+ seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+ seqs.each do |seq|
+ puts seq
+ end
+ fastacmd.each_entry do |fasta|
+ puts fasta.seq.size.to_s + "\t" + fasta.definition
+ end
+ end
More information about the bioruby-cvs
mailing list