[BioRuby-cvs] bioruby/sample fastagrep.rb, NONE, 1.1 fastasort.rb, 1.1, 1.2
Pjotr Prins
pjotr at dev.open-bio.org
Mon May 19 12:22:07 UTC 2008
Update of /home/repository/bioruby/bioruby/sample
In directory dev.open-bio.org:/tmp/cvs-serv829/sample
Modified Files:
fastasort.rb
Added Files:
fastagrep.rb
Log Message:
Piping FASTA files (examples and doc)
--- NEW FILE: fastagrep.rb ---
#!/usr/bin/env ruby
#
# fastagrep: Greps a FASTA file (in fact it can use any flat file input supported
# by BIORUBY) and outputs sorted FASTA
#
# Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# $Id: fastagrep.rb,v 1.1 2008/05/19 12:22:05 pjotr Exp $
#
require 'bio'
include Bio
usage = <<USAGE
Usage: fastagrep.rb [--skip] [regex] infiles
-v Invert the sense of matching, to select non-matching lines.
Examples:
Output all sequence descriptors containing 'Arabidopsis' or 'Drosophila'
regardless of case
fastagrep.rb "/Arabidopsis|Drosophila/i" *.seq > reduced.fasta
As the result is a FASTA stream you could pipe it for sorting:
fastagrep.rb "/Arabidopsis|Drosophila/i" *.seq | fastasort.rb
USAGE
if ARGV.size == 0
print usage
exit 1
end
skip = (ARGV[0] == '-v')
ARGV.shift if skip
# ---- Valid regular expression - if it is not a file
regex = ARGV[0]
if regex=~/^\// and !File.exist?(regex)
ARGV.shift
else
print usage
exit 1
end
ARGV.each do | fn |
Bio::FlatFile.auto(fn).each do | item |
if skip
next if eval("item.definition =~ #{regex}")
else
next if eval("item.definition !~ #{regex}")
end
rec = Bio::FastaFormat.new('> '+item.definition.strip+"\n"+item.data)
print rec
end
end
Index: fastasort.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/sample/fastasort.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** fastasort.rb 19 May 2008 11:23:56 -0000 1.1
--- fastasort.rb 19 May 2008 12:22:05 -0000 1.2
***************
*** 3,7 ****
# fastasort: Sorts a FASTA file (in fact it can use any flat file input supported
# by BIORUBY) while modifying the definition of each record in the
! # process.
#
# Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
--- 3,8 ----
# fastasort: Sorts a FASTA file (in fact it can use any flat file input supported
# by BIORUBY) while modifying the definition of each record in the
! # process so it is suitable for processing with (for example) pal2nal
! # and PAML.
#
# Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
***************
*** 27,35 ****
ARGV.each do | fn |
Bio::FlatFile.auto(fn).each do | item |
# strip JALView extension from definition e.g. .../1-212
if item.definition =~ /\/\d+-\d+$/
item.definition = $`
end
! table[item.definition] = item.data
end
end
--- 28,47 ----
ARGV.each do | fn |
Bio::FlatFile.auto(fn).each do | item |
+ # Some procession of the definition for external programs (just
+ # an example):
+
# strip JALView extension from definition e.g. .../1-212
if item.definition =~ /\/\d+-\d+$/
item.definition = $`
end
! # substitute slashes:
! definition = item.definition.gsub(/\//,'-')
! # substitute quotes and ampersands:
! definition = item.definition.gsub(/['"&]/,'x')
! # prefix letters if the first position is a number:
! definition = 'seq'+definition if definition =~ /^\d/
!
! # Now add the data to the sort table
! table[definition] = item.data
end
end
More information about the bioruby-cvs
mailing list