[BioRuby-cvs] bioruby/sample fastagrep.rb, NONE, 1.1 fastasort.rb, 1.1, 1.2

Mon May 19 12:22:07 UTC 2008

Update of /home/repository/bioruby/bioruby/sample
In directory dev.open-bio.org:/tmp/cvs-serv829/sample

Modified Files:
	fastasort.rb 
Added Files:
	fastagrep.rb 
Log Message:
Piping FASTA files (examples and doc)

--- NEW FILE: fastagrep.rb ---
#!/usr/bin/env ruby
#
# fastagrep: Greps a FASTA file (in fact it can use any flat file input supported
#            by BIORUBY) and outputs sorted FASTA
#
#   Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  $Id: fastagrep.rb,v 1.1 2008/05/19 12:22:05 pjotr Exp $
#

require 'bio'

include Bio

usage = <<USAGE

Usage: fastagrep.rb [--skip] [regex] infiles

    -v            Invert the sense of matching, to select non-matching lines.

  Examples:

    Output all sequence descriptors containing 'Arabidopsis' or 'Drosophila'
    regardless of case
		
	    fastagrep.rb "/Arabidopsis|Drosophila/i" *.seq > reduced.fasta

    As the result is a FASTA stream you could pipe it for sorting:
		
	    fastagrep.rb "/Arabidopsis|Drosophila/i" *.seq | fastasort.rb
USAGE

if ARGV.size == 0
  print usage
	exit 1
end

skip = (ARGV[0] == '-v')
ARGV.shift if skip

# ---- Valid regular expression - if it is not a file
regex = ARGV[0]
if regex=~/^\// and !File.exist?(regex)
  ARGV.shift
else
  print usage
  exit 1
end

ARGV.each do | fn |
  Bio::FlatFile.auto(fn).each do | item |
    if skip
  		next if eval("item.definition =~ #{regex}")
    else
  		next if eval("item.definition !~ #{regex}")
    end
    rec = Bio::FastaFormat.new('> '+item.definition.strip+"\n"+item.data)
    print rec
  end
end


Index: fastasort.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/sample/fastasort.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** fastasort.rb	19 May 2008 11:23:56 -0000	1.1
--- fastasort.rb	19 May 2008 12:22:05 -0000	1.2
***************
*** 3,7 ****
  # fastasort: Sorts a FASTA file (in fact it can use any flat file input supported
  #            by BIORUBY) while modifying the definition of each record in the
! #            process.
  #
  #   Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
--- 3,8 ----
  # fastasort: Sorts a FASTA file (in fact it can use any flat file input supported
  #            by BIORUBY) while modifying the definition of each record in the
! #            process so it is suitable for processing with (for example) pal2nal
! #            and PAML.
  #
  #   Copyright (C) 2008 KATAYAMA Toshiaki <k at bioruby.org> & Pjotr Prins
***************
*** 27,35 ****
  ARGV.each do | fn |
    Bio::FlatFile.auto(fn).each do | item |
      # strip JALView extension from definition e.g. .../1-212
      if item.definition =~ /\/\d+-\d+$/
        item.definition = $`
      end
!     table[item.definition] = item.data
    end
  end
--- 28,47 ----
  ARGV.each do | fn |
    Bio::FlatFile.auto(fn).each do | item |
+     # Some procession of the definition for external programs (just
+     # an example):
+ 
      # strip JALView extension from definition e.g. .../1-212
      if item.definition =~ /\/\d+-\d+$/
        item.definition = $`
      end
!     # substitute slashes:
!     definition = item.definition.gsub(/\//,'-')
!     # substitute quotes and ampersands:
!     definition = item.definition.gsub(/['"&]/,'x')
!     # prefix letters if the first position is a number:
!     definition = 'seq'+definition if definition =~ /^\d/
! 
!     # Now add the data to the sort table
!     table[definition] = item.data
    end
  end