[BioRuby-cvs] bioruby/lib/bio/db fasta.rb,1.21,1.22
Mitsuteru C. Nakao
nakao at pub.open-bio.org
Sat Jan 28 10:49:01 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory pub.open-bio.org:/tmp/cvs-serv5883/lib/bio/db
Modified Files:
fasta.rb
Log Message:
* Added RDoc.
Index: fasta.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/fasta.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** fasta.rb 26 Sep 2005 13:00:06 -0000 1.21
--- fasta.rb 28 Jan 2006 10:48:59 -0000 1.22
***************
*** 1,7 ****
#
! # bio/db/fasta.rb - FASTA format class
#
! # Copyright (C) 2001 GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>
! # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k at bioruby.org>
#
# This library is free software; you can redistribute it and/or
--- 1,67 ----
#
! # = bio/db/fasta.rb - FASTA format class
#
! # Copyright:: Copyright (C) 2001, 2002
! # GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>,
! # KATAYAMA Toshiaki <k at bioruby.org>
! # Lisence:: LGPL
! #
! # $Id$
! #
! # == Description
! #
! # FASTA format class.
! #
! # == Examples
! #
! # rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
! # rub.entry_id ==> 'gi|671595'
! # rub.get('emb') ==> 'CAA85678.1'
! # rub.emb ==> 'CAA85678.1'
! # rub.gi ==> '671595'
! # rub.accession ==> 'CAA85678'
! # rub.accessions ==> [ 'CAA85678' ]
! # rub.acc_version ==> 'CAA85678.1'
! # rub.locus ==> nil
! # rub.list_ids ==> [["gi", "671595"],
! # ["emb", "CAA85678.1", nil],
! # ["Perovskia abrotanoides"]]
! #
! # ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
! # ckr.entry_id ==> "gi|2495000"
! # ckr.sp ==> "CCKR_CAVPO"
! # ckr.pir ==> "I51898"
! # ckr.gb ==> "AAB29504.1"
! # ckr.gi ==> "2495000"
! # ckr.accession ==> "AAB29504"
! # ckr.accessions ==> ["Q63931", "AAB29504"]
! # ckr.acc_version ==> "AAB29504.1"
! # ckr.locus ==> nil
! # ckr.description ==>
! # "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
! # ckr.descriptions ==>
! # ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
! # "cholecystokinin A receptor - guinea pig",
! # "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
! # ckr.words ==>
! # ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
! # "receptor", "type"]
! # ckr.id_strings ==>
! # ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
! # "544724", "AAB29504.1", "Cavia"]
! # ckr.list_ids ==>
! # [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
! # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
! # ["gb", "AAB29504.1", nil], ["Cavia"]]
! #
! # == References
! #
! # * FASTA format (WikiPedia)
! # http://en.wikipedia.org/wiki/FASTA_format
! #
! # * Fasta format description (NCBI)
! # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
! #
! #--
#
# This library is free software; you can redistribute it and/or
***************
*** 19,23 ****
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
--- 79,83 ----
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! #++
#
***************
*** 27,34 ****
--- 87,171 ----
module Bio
+
+ # Treats a FASTA formatted entry, such as:
+ #
+ # >id and/or some comments <== comment line
+ # ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
+ # ATGCATGCATGCATGCATGCATGCATGCATGCATGC
+ # ATGCATGCATGC
+ #
+ # The precedent '>' can be omitted and the trailing '>' will be removed
+ # automatically.
+ #
+ # === Examples
+ #
+ # f_str = <<END
+ # >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
+ # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
+ # VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
+ # GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
+ # KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
+ # IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
+ # QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
+ # >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
+ # MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
+ # TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
+ # GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
+ # DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
+ # DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
+ # EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
+ # AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
+ # QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
+ # CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
+ # FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
+ # KTGDPLEWRRLFKKISTICRDIILIPN
+ # END
+ #
+ # f = Bio::FastaFormat.new(f_str)
+ # puts "### FastaFormat"
+ # puts "# entry"
+ # puts f.entry
+ # puts "# entry_id"
+ # p f.entry_id
+ # puts "# definition"
+ # p f.definition
+ # puts "# data"
+ # p f.data
+ # puts "# seq"
+ # p f.seq
+ # puts "# seq.type"
+ # p f.seq.type
+ # puts "# length"
+ # p f.length
+ # puts "# aaseq"
+ # p f.aaseq
+ # puts "# aaseq.type"
+ # p f.aaseq.type
+ # puts "# aaseq.composition"
+ # p f.aaseq.composition
+ # puts "# aalen"
+ # p f.aalen
+ #
+ # === References
+ #
+ # * FASTA format (WikiPedia)
+ # http://en.wikipedia.org/wiki/FASTA_format
+ #
class FastaFormat < DB
+ # Entry delimiter in flatfile text.
DELIMITER = RS = "\n>"
+ # The comment line of the FASTA formatted data.
+ attr_accessor :definition
+
+ # The seuqnce lines in text.
+ attr_accessor :data
+
+ attr_reader :entry_overrun
+
+ # Stores the comment and sequence information from one entry of the
+ # FASTA format string. If the argument contains more than one
+ # entry, only the first entry is used.
def initialize(str)
@definition = str[/.*/].sub(/^>/, '').strip # 1st line
***************
*** 37,43 ****
@entry_overrun = $&
end
- attr_accessor :definition, :data
- attr_reader :entry_overrun
def entry
@entry = ">#{@definition}\n#{@data.strip}\n"
--- 174,179 ----
@entry_overrun = $&
end
+ # Returns the stored one entry as a FASTA format. (same as to_s)
def entry
@entry = ">#{@definition}\n#{@data.strip}\n"
***************
*** 45,48 ****
--- 181,202 ----
alias to_s entry
+
+ # Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
+ # factory object.
+ #
+ # #!/usr/bin/env ruby
+ # require 'bio'
+ #
+ # factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
+ # flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
+ # flatfile.each do |entry|
+ # p entry.definition
+ # result = entry.fasta(factory)
+ # result.each do |hit|
+ # print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
+ # p hit.lap_at
+ # end
+ # end
+ #
def query(factory)
factory.query(@entry)
***************
*** 51,54 ****
--- 205,209 ----
alias blast query
+ # Returns a joined sequence line as a String.
def seq
unless defined?(@seq)
***************
*** 76,79 ****
--- 231,235 ----
end
+ # Returns comments.
def comment
seq
***************
*** 81,104 ****
--- 237,269 ----
end
+ # Returns sequence length.
def length
seq.length
end
+ # Returens the Bio::Sequence::NA.
def naseq
Sequence::NA.new(seq)
end
+ # Returens the length of Bio::Sequence::NA.
def nalen
self.naseq.length
end
+ # Returens the Bio::Sequence::AA.
def aaseq
Sequence::AA.new(seq)
end
+ # Returens the length of Bio::Sequence::AA.
def aalen
self.aaseq.length
end
+ # Parsing FASTA Defline, and extract IDs.
+ # IDs are NSIDs (NCBI standard FASTA sequence identifiers)
+ # or ":"-separated IDs.
+ # It returns a Bio::FastaDefline instance.
def identifiers
unless defined?(@ids) then
***************
*** 108,131 ****
--- 273,310 ----
end
+ # Parsing FASTA Defline (using #identifiers method), and
+ # shows a possibly unique identifier.
+ # It returns a string.
def entry_id
identifiers.entry_id
end
+ # Parsing FASTA Defline (using #identifiers method), and
+ # shows GI/locus/accession/accession with version number.
+ # If a entry has more than two of such IDs,
+ # only the first ID are shown.
+ # It returns a string or nil.
def gi
identifiers.gi
end
+ # Returns an accession number.
def accession
identifiers.accession
end
+ # Parsing FASTA Defline (using #identifiers method), and
+ # shows accession numbers.
+ # It returns an array of strings.
def accessions
identifiers.accessions
end
+ # Returns accession number with version.
def acc_version
identifiers.acc_version
end
+ # Returns locus.
def locus
identifiers.locus
***************
*** 134,139 ****
--- 313,339 ----
end #class FastaFormat
+ # Treats a FASTA formatted numerical entry, such as:
+ #
+ # >id and/or some comments <== comment line
+ # 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
+ # 22 17 15 25 27 32 26 32 29 29 25
+ #
+ # The precedent '>' can be omitted and the trailing '>' will be removed
+ # automatically.
+ #
+ # --- Bio::FastaNumericFormat.new(entry)
+ #
+ # Stores the comment and the list of the numerical data.
+ #
+ # --- Bio::FastaNumericFormat#definition
+ #
+ # The comment line of the FASTA formatted data.
+ #
+ # * FASTA format (Wikipedia)
+ # http://en.wikipedia.org/wiki/FASTA_format
class FastaNumericFormat < FastaFormat
+ # Returns the list of the numerical data (typically the quality score
+ # of its corresponding sequence) as an Array.
def data
unless @list
***************
*** 143,150 ****
--- 343,352 ----
end
+ # Returns the number of elements in the numerical data.
def length
data.length
end
+ # Yields on each elements of the numerical data.
def each
data.each do |x|
***************
*** 153,156 ****
--- 355,359 ----
end
+ # Returns the n-th element.
def [](n)
data[n]
***************
*** 161,169 ****
end #class FastaNumericFormat
- class FastaDefline
! # specs are described in:
! # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
! # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
NSIDs = {
--- 364,430 ----
end #class FastaNumericFormat
! # Parsing FASTA Defline, and extract IDs and other informations.
! # IDs are NSIDs (NCBI standard FASTA sequence identifiers)
! # or ":"-separated IDs.
! #
! # specs are described in:
! # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
! # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
! #
! # === Examples
! #
! # rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
! # rub.entry_id ==> 'gi|671595'
! # rub.get('emb') ==> 'CAA85678.1'
! # rub.emb ==> 'CAA85678.1'
! # rub.gi ==> '671595'
! # rub.accession ==> 'CAA85678'
! # rub.accessions ==> [ 'CAA85678' ]
! # rub.acc_version ==> 'CAA85678.1'
! # rub.locus ==> nil
! # rub.list_ids ==> [["gi", "671595"],
! # ["emb", "CAA85678.1", nil],
! # ["Perovskia abrotanoides"]]
! #
! # ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
! # ckr.entry_id ==> "gi|2495000"
! # ckr.sp ==> "CCKR_CAVPO"
! # ckr.pir ==> "I51898"
! # ckr.gb ==> "AAB29504.1"
! # ckr.gi ==> "2495000"
! # ckr.accession ==> "AAB29504"
! # ckr.accessions ==> ["Q63931", "AAB29504"]
! # ckr.acc_version ==> "AAB29504.1"
! # ckr.locus ==> nil
! # ckr.description ==>
! # "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
! # ckr.descriptions ==>
! # ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
! # "cholecystokinin A receptor - guinea pig",
! # "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
! # ckr.words ==>
! # ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
! # "receptor", "type"]
! # ckr.id_strings ==>
! # ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
! # "544724", "AAB29504.1", "Cavia"]
! # ckr.list_ids ==>
! # [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
! # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
! # ["gb", "AAB29504.1", nil], ["Cavia"]]
! #
! # === Refereneces
! #
! # * Fasta format description (NCBI)
! # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
! #
! # * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
! # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
! #
! # * README.formatdb
! # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
! #
! class FastaDefline
NSIDs = {
***************
*** 198,201 ****
--- 459,471 ----
}
+ # Shows array that contains IDs (or ID-like strings).
+ # Returns an array of arrays of strings.
+ attr_reader :list_ids
+
+ # Shows a possibly unique identifier.
+ # Returns a string.
+ attr_reader :entry_id
+
+ # Parses given string.
def initialize(str)
@deflines = []
***************
*** 211,217 ****
end #def initialize
! attr_reader :list_ids
! attr_reader :entry_id
!
def add_defline(str)
case str
--- 481,485 ----
end #def initialize
! # Parses given string and adds parsed data.
def add_defline(str)
case str
***************
*** 344,347 ****
--- 612,619 ----
private :parse_NSIDs
+
+ # Shows original string.
+ # Note that the result of this method may be different from
+ # original string which is given in FastaDefline.new method.
def to_s
@deflines.collect { |a|
***************
*** 351,358 ****
--- 623,632 ----
end
+ # Shows description.
def description
@deflines[0].to_a[-1]
end
+ # Returns descriptions.
def descriptions
@deflines.collect do |a|
***************
*** 361,364 ****
--- 635,640 ----
end
+ # Shows ID-like strings.
+ # Returns an array of strings.
def id_strings
r = []
***************
*** 402,405 ****
--- 678,682 ----
]
+ # Shows words used in the defline. Returns an Array.
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
kwhash = self.class::KillWordsHash)
***************
*** 427,432 ****
end
! def get(db)
! db =db.to_s
r = nil
unless r = @info[db] then
--- 704,710 ----
end
! # Returns identifires by a database name.
! def get(dbname)
! db = dbname.to_s
r = nil
unless r = @info[db] then
***************
*** 450,457 ****
end
! def get_by_type(tstr)
@list_ids.each do |x|
if labels = self.class::NSIDs[x[0]] then
! if i = labels.index(tstr) then
return x[i+1]
end
--- 728,736 ----
end
! # Returns an identifier by given type.
! def get_by_type(type_str)
@list_ids.each do |x|
if labels = self.class::NSIDs[x[0]] then
! if i = labels.index(type_str) then
return x[i+1]
end
***************
*** 461,469 ****
end
! def get_all_by_type(*tstrarg)
d = []
@list_ids.each do |x|
if labels = self.class::NSIDs[x[0]] then
! tstrarg.each do |y|
if i = labels.index(y) then
d << x[i+1] if x[i+1]
--- 740,749 ----
end
! # Returns identifiers by given type.
! def get_all_by_type(*type_strarg)
d = []
@list_ids.each do |x|
if labels = self.class::NSIDs[x[0]] then
! type_strarg.each do |y|
if i = labels.index(y) then
d << x[i+1] if x[i+1]
***************
*** 475,478 ****
--- 755,762 ----
end
+ # Shows locus.
+ # If the entry has more than two of such IDs,
+ # only the first ID are shown.
+ # Returns a string or nil.
def locus
unless defined?(@locus)
***************
*** 482,485 ****
--- 766,773 ----
end
+ # Shows GI.
+ # If the entry has more than two of such IDs,
+ # only the first ID are shown.
+ # Returns a string or nil.
def gi
unless defined?(@gi) then
***************
*** 489,492 ****
--- 777,784 ----
end
+ # Shows accession with version number.
+ # If the entry has more than two of such IDs,
+ # only the first ID are shown.
+ # Returns a string or nil.
def acc_version
unless defined?(@acc_version) then
***************
*** 496,499 ****
--- 788,793 ----
end
+ # Shows accession numbers.
+ # Returns an array of strings.
def accessions
unless defined?(@accessions) then
***************
*** 504,507 ****
--- 798,802 ----
end
+ # Shows an accession number.
def accession
unless defined?(@accession) then
***************
*** 524,527 ****
--- 819,823 ----
r
end
+
end #class FastaDefline
***************
*** 610,869 ****
end
-
- =begin
-
- = Bio::FastaFormat
-
- Treats a FASTA formatted entry, such as:
-
- >id and/or some comments <== comment line
- ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
- ATGCATGCATGCATGCATGCATGCATGCATGCATGC
- ATGCATGCATGC
-
- The precedent '>' can be omitted and the trailing '>' will be removed
- automatically.
-
- --- Bio::FastaFormat.new(entry)
-
- Stores the comment and sequence information from one entry of the
- FASTA format string. If the argument contains more than one
- entry, only the first entry is used.
-
- --- Bio::FastaFormat#entry
-
- Returns the stored one entry as a FASTA format. (same as to_s)
-
- --- Bio::FastaFormat#definition
-
- Returns the comment line of the FASTA formatted data.
-
- --- Bio::FastaFormat#seq
-
- Returns a joined sequence line as a String.
-
- --- Bio::FastaFormat#query(factory)
- --- Bio::FastaFormat#fasta(factory)
- --- Bio::FastaFormat#blast(factory)
-
- Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
- factory object.
-
- #!/usr/bin/env ruby
-
- require 'bio'
-
- factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
- flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
- flatfile.each do |entry|
- p entry.definition
- result = entry.fasta(factory)
- result.each do |hit|
- print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
- p hit.lap_at
- end
- end
-
- --- Bio::FastaFormat#length
-
- Returns sequence length.
-
- --- Bio::FastaFormat#naseq
- --- Bio::FastaFormat#nalen
- --- Bio::FastaFormat#aaseq
- --- Bio::FastaFormat#aalen
-
- If you know whether the sequence is NA or AA, use these methods.
- 'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or
- Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods
- return the length of them.
-
- --- Bio::FastaFormat#identifiers
-
- Parsing FASTA Defline, and extract IDs.
- IDs are NSIDs (NCBI standard FASTA sequence identifiers)
- or ":"-separated IDs.
- It returns a Bio::FastaDefline instance.
-
- --- Bio::FastaFormat#entry_id
-
- Parsing FASTA Defline (using #identifiers method), and
- shows a possibly unique identifier.
- It returns a string.
-
- --- Bio::FastaFormat#gi
- --- Bio::FastaFormat#locus
- --- Bio::FastaFormat#accession
- --- Bio::FastaFormat#acc_version
-
- Parsing FASTA Defline (using #identifiers method), and
- shows GI/locus/accession/accession with version number.
- If a entry has more than two of such IDs,
- only the first ID are shown.
- It returns a string or nil.
-
- --- Bio::FastaFormat#accessions
-
- Parsing FASTA Defline (using #identifiers method), and
- shows accession numbers.
- It returns an array of strings.
-
- --- Bio::FastaFormat
-
- = Bio::FastaNumericFormat
-
- Treats a FASTA formatted numerical entry, such as:
-
- >id and/or some comments <== comment line
- 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
- 22 17 15 25 27 32 26 32 29 29 25
-
- The precedent '>' can be omitted and the trailing '>' will be removed
- automatically.
-
- --- Bio::FastaNumericFormat.new(entry)
-
- Stores the comment and the list of the numerical data.
-
- --- Bio::FastaNumericFormat#definition
-
- The comment line of the FASTA formatted data.
-
- --- Bio::FastaNumericFormat#data
-
- Returns the list of the numerical data (typically the quality score
- of its corresponding sequence) as an Array.
-
- --- Bio::FastaNumericFormat#length
-
- Returns the number of elements in the numerical data.
-
- --- Bio::FastaNumericFormat#each
-
- Yields on each elements of the numerical data.
-
- --- Bio::FastaNumericFormat#[](n)
-
- Returns the n-th element.
-
- --- Bio::FastaNumericFormat#identifiers
- --- Bio::FastaNumericFormat#entry_id
- --- Bio::FastaNumericFormat#gi
- --- Bio::FastaNumericFormat#locus
- --- Bio::FastaNumericFormat#accession
- --- Bio::FastaNumericFormat#acc_version
- --- Bio::FastaNumericFormat#accessions
-
- Same as Bio::FastaFormat.
-
-
- = Bio::FastaDefline
-
- Parsing FASTA Defline, and extract IDs and other informations.
- IDs are NSIDs (NCBI standard FASTA sequence identifiers)
- or ":"-separated IDs.
-
- --- see also:
- ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
- http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
-
- --- Bio::FastaDefline.new(str)
-
- Parses given string.
-
- --- Bio::FastaFormat#entry_id
-
- Shows a possibly unique identifier.
- Returns a string.
-
- --- Bio::FastaDefline#gi
- --- Bio::FastaDefline#locus
- --- Bio::FastaDefline#accession
- --- Bio::FastaDefline#acc_version
-
- Shows GI/locus/accession/accession with version number.
- If the entry has more than two of such IDs,
- only the first ID are shown.
- Returns a string or nil.
-
- --- Bio::FastaFormat#accessions
-
- Shows accession numbers.
- Returns an array of strings.
-
- --- Bio::FastaDefline#add_defline(str)
-
- Parses given string and adds parsed data.
-
- --- Bio::FastaDefline#to_s
-
- Shows original string.
- Note that the result of this method may be different from
- original string which is given in FastaDefline.new method.
-
- --- Bio::FastaDefline#id_strings
-
- Shows ID-like strings.
- Returns an array of strings.
-
- --- Bio::FastaDefline#list_ids
-
- Shows array that contains IDs (or ID-like strings).
- Returns an array of arrays of strings.
-
- --- Bio::FastaDefline#description
- --- Bio::FastaDefline#descriptions
-
- --- Bio::FastaDefline#words(case_sensitive = nil,
- kill_words_regexp_array, kill_words_hash)
-
- --- Bio::FastaDefline#get(tag_of_id)
-
- --- Bio::FastaDefline#get_by_type(type_of_id)
-
- --- Bio::FastaDefline#get_all_by_type(type_of_id)
-
- --- examples:
- rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
- rub.entry_id ==> 'gi|671595'
- rub.get('emb') ==> 'CAA85678.1'
- rub.emb ==> 'CAA85678.1'
- rub.gi ==> '671595'
- rub.accession ==> 'CAA85678'
- rub.accessions ==> [ 'CAA85678' ]
- rub.acc_version ==> 'CAA85678.1'
- rub.locus ==> nil
- rub.list_ids ==> [["gi", "671595"],
- ["emb", "CAA85678.1", nil],
- ["Perovskia abrotanoides"]]
-
- ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
- ckr.entry_id ==> "gi|2495000"
- ckr.sp ==> "CCKR_CAVPO"
- ckr.pir ==> "I51898"
- ckr.gb ==> "AAB29504.1"
- ckr.gi ==> "2495000"
- ckr.accession ==> "AAB29504"
- ckr.accessions ==> ["Q63931", "AAB29504"]
- ckr.acc_version ==> "AAB29504.1"
- ckr.locus ==> nil
- ckr.description ==>
- "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
- ckr.descriptions ==>
- ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
- "cholecystokinin A receptor - guinea pig",
- "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
- ckr.words ==>
- ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
- "receptor", "type"]
- ckr.id_strings ==>
- ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
- "544724", "AAB29504.1", "Cavia"]
- ckr.list_ids ==>
- [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
- ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
- ["gb", "AAB29504.1", nil], ["Cavia"]]
-
- =end
-
--- 906,908 ----
More information about the bioruby-cvs
mailing list