[BioRuby-cvs] bioruby/lib/bio/appl/iprscan report.rb,NONE,1.1

Mitsuteru C. Nakao nakao at dev.open-bio.org
Thu Dec 14 16:22:14 UTC 2006


Update of /home/repository/bioruby/bioruby/lib/bio/appl/iprscan
In directory dev.open-bio.org:/tmp/cvs-serv14577/lib/bio/appl/iprscan

Added Files:
	report.rb 
Log Message:
* Newly added files for InterProScan. 


--- NEW FILE: report.rb ---
#
# = bio/appl/iprscan/report.rb - a class for iprscan output.
#
# Copyright::   Copyright (C) 2006
#               Mitsuteru C. Nakao <mn at kazusa.or.jp>
# License::     Ruby's
#
#  $Id: report.rb,v 1.1 2006/12/14 16:22:12 nakao Exp $
#
# == Report classes for the iprscan program.
# 


module Bio

  class Iprscan

    # = DESCRIPTION
    # Class for InterProScan report. It is used to parse results and reformat 
    # results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
    #
    # See ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
    # 
    # == USAGE
    #  # Read a marged.txt and split each entry.
    #  Bio::Iprscan::Report.reports_in_txt(File.read("marged.txt") do |report| 
    #    report.query_id
    #    report.matches.size
    #    report.matches.each do |match|
    #      match.ipr_id #=> 'IPR...'
    #      match.ipr_description
    #      match.method
    #      match.accession
    #      match.description
    #      match.match_start
    #      match.match_end
    #      match.evalue    
    #    end
    #    # report.to_gff3 
    #    # report.to_html
    #  end
    #
    #  Bio::Iprscan::Report.reports_in_raw(File.read("marged.raw") do |report| 
    #    report.class #=> Bio::Iprscan::Report
    #  end
    #
    class Report
      # Entry delimiter pattern.
      RS = DELIMITER = "\n\/\/\n"

      # Qeury sequence name (entry_id).
      attr_accessor :query_id
      alias :entry_id :query_id

      # Qeury sequence length.
      attr_accessor :query_length

      # Matched InterPro motifs in Hash. Each InterPro motif have :name, 
      # :definition, :accession and :motifs keys. And :motifs key contains
      # motifs in Array. Each motif have :method, :accession, :definition, 
      # :score, :location_from and :location_to keys.
      attr_accessor :matches
      
      # == USAGE
      #  Bio::Iprscan::Report.reports_in_raw(File.open("merged.raw")) do |report|
      #    report
      #  end
      #
      def self.reports_in_raw(io)
        entry = ''
        while line = io.gets
          if entry != '' and entry.split("\t").first == line.split("\t").first
            entry << line
          elsif entry != ''
            yield Bio::Iprscan::Report.parse_in_raw(entry)
            entry = line
          else
            entry << line
          end
        end
      end
    
      # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report 
      # object.
      def self.parse_in_raw(str)
        report = self.new
        str.split(/\n/).each do |line|
          line = line.split("\t")
          report.matches << Match.new(:query_id => line[0],
                                      :crc64    => line[1],
                                      :query_length => line[2].to_i,
                                      :method       => line[3], 
                                      :accession    => line[4],
                                      :description => line[5], 
                                      :match_start => line[6].to_i,
                                      :match_end   => line[7].to_i,
                                      :evalue => line[8],
                                      :status => line[9],
                                      :date   => line[10])
          if line[11]
            report.matches.last.ipr_id = line[11]
            report.matches.last.ipr_description = line[12]
          end
          report.matches.last.go_terms = line[13].split(', ') if line[13]          
        end
        report.query_id = report.matches.first.query_id
        report.query_length = report.matches.first.query_length
        report
      end



      # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report 
      # object.
      def self.parse_in_xml(str)
        NotImplementedError
      end

      # Splits entry stream.
      # 
      # == Usage
      #  Bio::Iprscan::Report.reports_in_txt(File.open("merged.txt")) do |report|
      #    report
      #  end
      def self.reports_in_txt(io)
        io.each(/\n\/\/\n/m) do |entry|
          yield self.parse_in_txt(entry)
        end
      end

      # Parser method for a txt formated entry. Retruns a Bio::Iprscan::Report 
      # object.
      # 
      # == Usage
      #
      #  File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| 
      #    report = Bio::Iprscan::Report.parse_in_txt(e)
      #  end
      #
      def self.parse_in_txt(str)
        report = self.new
        ipr_line = ''
        str.split(/\n/).each do |line|
          line = line.split("\t")
          if line.size == 2
            report.query_id = line[0]
            report.query_length = line[1].to_i
          elsif line.first == '//'
          elsif line.first == 'InterPro'
            ipr_line = line
          else
            startp, endp = line[4].split("-")
            report.matches << Match.new(:ipr_id => ipr_line[1], 
                                        :ipr_description => ipr_line[2],
                                        :method => line[0], 
                                        :accession => line[1],
                                        :description => line[2], 
                                        :evalue => line[3],
                                        :match_start => startp.to_i,
                                        :match_end => endp.to_i)
          end
        end
        report
      end

      # 
      def initialize
        @query_id = nil
        @query_length = nil
        @matches = []
      end

      def to_html
        NotImplementedError
      end
      
      def to_xml
        NotImplementedError
      end
      
      def to_ebihtml
        NotImplementedError        
      end
      
      def to_txt
        NotImplementedError        
      end
      
      def to_raw
        NotImplementedError        
      end
      
      def to_gff3
        NotImplementedError        
      end

      # == DESCRIPTION
      # Container class for InterProScan matches.
      #
      # == USAGE
      #  match = Match.new(:query_id => ...)
      #
      #  match.ipr_id = 'IPR001234'
      #  match.ipr_id #=> 'IPR1234'
      #
      class Match
        def initialize(hash)
          @data = Hash.new
          hash.each do |key, value|
            @data[key.to_sym] = value
          end
        end

        # Date for computation.
        def date;            @data[:date];            end
        # CRC64 checksum of query sequence.
        def crc64;           @data[:crc64];           end
        # E-value of the match
        def evalue;          @data[:evalue];          end
        # Status of the match (T for true / M for marginal).
        def status;          @data[:status];          end
        # the corresponding InterPro entry (if any).
        def ipr_id;          @data[:ipr_id];          end
        # the length of the sequence in AA.
        def length;          @data[:length];          end
        # the analysis method launched.
        def method;          @data[:method];          end  # Object#metod overrided by Match#method
        # the Gene Ontology description for the InterPro entry, in "Aspect:term (ID)" format.
        def go_terms;        @data[:go_terms];        end
        # Id of the input sequence.
        def query_id;        @data[:query_id];        end
        # the end of the domain match.
        def match_end;       @data[:match_end];       end
        # the database members entry for this match.
        def accession;       @data[:accession];       end
        # the database mambers description for this match.
        def description;     @data[:description];     end
        # the start of the domain match.
        def match_start;     @data[:match_start];     end
        # the descriotion of the InterPro entry.
        def ipr_description; @data[:ipr_description]; end

        def method_missing(name, arg = nil)
          if arg
            name = name.to_s.sub(/=$/, '') 
            @data[name.to_sym] = arg 
          else
            @data[name.to_sym]
          end
        end

      end # class Match

    end # class Report

  end # class Iprscan

end # module Bio




More information about the bioruby-cvs mailing list