class Bio::Iprscan::Report
DESCRIPTION¶ ↑
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
USAGE¶ ↑
# Read a marged.txt and split each entry. Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| report.query_id report.matches.size report.matches.each do |match| match.ipr_id #=> 'IPR...' match.ipr_description match.method match.accession match.description match.match_start match.match_end match.evalue end # report.to_gff3 # report.to_html end Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| report.class #=> Bio::Iprscan::Report end
Constants
- RS
Entry delimiter pattern.
Attributes
CRC64 checksum of query sequence.
Qeury sequence name (entry_id).
Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.
Qeury sequence name (entry_id).
Qeury sequence length.
Public Class Methods
# File lib/bio/appl/iprscan/report.rb, line 236 def initialize @query_id = nil @query_length = nil @crc64 = nil @matches = [] end
Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report object.
Usage¶ ↑
File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| report = Bio::Iprscan::Report.parse_ptxt_entry(e) end
# File lib/bio/appl/iprscan/report.rb, line 209 def self.parse_ptxt_entry(str) report = self.new ipr_line = '' str.split(/\n/).each do |line| line = line.split("\t") if line.size == 2 report.query_id = line[0] report.query_length = line[1].to_i elsif line.first == '//' elsif line.first == 'InterPro' ipr_line = line else startp, endp = line[4].split("-") report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => line[0], :accession => line[1], :description => line[2], :evalue => line[3], :match_start => startp.to_i, :match_end => endp.to_i) end end report end
USAGE¶ ↑
Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report| report end
# File lib/bio/appl/iprscan/report.rb, line 72 def self.parse_raw(io) entry = '' while line = io.gets if entry != '' and entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' yield Bio::Iprscan::Report.parse_raw_entry(entry) entry = line else entry << line end end yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' end
Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 89 def self.parse_raw_entry(str) report = self.new str.split(/\n/).each do |line| line = line.split("\t") report.matches << Match.new(:query_id => line[0], :crc64 => line[1], :query_length => line[2].to_i, :method => line[3], :accession => line[4], :description => line[5], :match_start => line[6].to_i, :match_end => line[7].to_i, :evalue => line[8], :status => line[9], :date => line[10]) if line[11] report.matches.last.ipr_id = line[11] report.matches.last.ipr_description = line[12] end report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] end report.query_id = report.matches.first.query_id report.query_length = report.matches.first.query_length report end
Splits the entry stream.
Usage¶ ↑
Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report| report.class #=> Bio::Iprscan::Report end
# File lib/bio/appl/iprscan/report.rb, line 130 def self.parse_txt(io) io.each("\n\nSequence") do |entry| if entry =~ /Sequence$/ entry = entry.sub(/Sequence$/, '') end unless entry =~ /^Sequence/ entry = 'Sequence' + entry end yield self.parse_txt_entry(entry) end end
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 147 def self.parse_txt_entry(str) unless str =~ /^Sequence / raise ArgumentError, "Invalid format: \n\n#{str}" end header, *matches = str.split(/\n\n/) report = self.new report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end ipr_line = '' go_annotation = '' matches.each do |m| m = m.split(/\n/).map {|x| x.split(/ +/) } m.each do |match| case match[0] when 'method' when /(Molecular Function|Cellular Component|Biological Process):/ go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) when 'InterPro' ipr_line = match else pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) pos_scores.each do |pos_score| report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => match[0], :accession => match[1], :description => match[2], :evalue => pos_score[3], :status => pos_score[0], :match_start => pos_score[1].to_i, :match_end => pos_score[2].to_i, :go_terms => go_annotation) end end end end return report end
Public Instance Methods
def format_txt end
# File lib/bio/appl/iprscan/report.rb, line 266 def format_raw @matches.map { |match| [self.query_id, self.crc64, self.query_length, match.method_name, match.accession, match.description, match.match_start, match.match_end, match.evalue, match.status, match.date, match.ipr_id, match.ipr_description, match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') ].join("\t") }.join("\n") end
Output interpro matches in the format_type.
# File lib/bio/appl/iprscan/report.rb, line 245 def output(format_type) case format_type when 'raw', :raw format_raw else raise NameError, "Invalid format_type." end end
Returns a Hash (key as an Interpro ID and value as a Match).
report.to_hash.each do |ipr_id, matches| matches.each do |match| report.matches.ipr_id == ipr_id #=> true end end
# File lib/bio/appl/iprscan/report.rb, line 298 def to_hash unless @ipr_ids @ipr_ids = {} @matches.each_with_index do |match, i| @ipr_ids[match.ipr_id] ||= [] @ipr_ids[match.ipr_id] << match end return @ipr_ids else return @ipr_ids end end