class Bio::Genscan::Report

Bio::Genscan::Report - Class for Genscan report output.

Parser for the Genscan report output.

Attributes

date_run[R]

Returns

gccontent[R]

Returns C+G content of the query sequence.

genes[R]

Returns Array of Bio::Genscan::Report::Gene.

genscan_version[R]

Returns Genscan version.

isochore[R]

Returns

length[R]

Returns Length of the query sequence.

matrix[R]

Returns

name[R]

Returns Name of query sequence.

prediction[R]

Returns Array of Bio::Genscan::Report::Gene.

predictions[R]

Returns Array of Bio::Genscan::Report::Gene.

query_name[R]

Returns Name of query sequence.

sequence_name[R]

Returns Name of query sequence.

time[R]

Returns

Public Class Methods

new(report) click to toggle source

::new

Parse a Genscan report output string.

# File lib/bio/appl/genscan/report.rb, line 67
def initialize(report)
  @predictions = []
  @genscan_version = nil
  @date_run   = nil
  @time       = nil
  @query_name = nil
  @length     = nil
  @gccontent  = nil
  @isochore   = nil
  @matrix     = nil

  report.each_line("\n") do |line|
    case line
    when /^GENSCAN/
      parse_headline(line)
    when /^Sequence/
      parse_sequence(line)
    when /^Parameter/
      parse_parameter(line)
    when /^Predicted genes/
      break
    end
  end

  # rests
  i = report.index(/^Predicted gene/)
  j = report.index(/^Predicted peptide sequence/)

  # genes/exons
  genes_region = report[i...j]
  genes_region.each_line("\n") do |line|
    if /Init|Intr|Term|PlyA|Prom|Sngl/ =~ line
      gn, en = line.strip.split(" +")[0].split(/\./).map {|i| i.to_i }
      add_exon(gn, en, line)
    end
  end

  # sequences (peptide|CDS)
  sequence_region = report[j...report.size]
  sequence_region.gsub!(/^Predicted .+?:/, '')
  sequence_region.gsub!(/^\s*$/, '')
  sequence_region.split(Bio::FastaFormat::RS).each do |ff|
    add_seq(Bio::FastaFormat.new(ff))
  end
end

Private Instance Methods

add_exon(gn, en, line) click to toggle source

#add_exon

# File lib/bio/appl/genscan/report.rb, line 157
def add_exon(gn, en, line)
  exon = Exon.parser(line)
  case line
  when /Prom/
    begin
      @predictions[gn - 1].set_promoter(exon)
    rescue NameError
      add_gene(gn)
      @predictions[gn - 1].set_promoter(exon)
    end
  when /PlyA/
    @predictions[gn - 1].set_polyA(exon)
  else
    begin
      @predictions[gn - 1].exons[en - 1] = exon
    rescue NameError
      add_gene(gn)
      @predictions[gn - 1].exons[en - 1] = exon
    end
  end
end
add_gene(gn) click to toggle source

#add_gene

# File lib/bio/appl/genscan/report.rb, line 150
def add_gene(gn)
  @predictions[gn - 1] = Gene.new(gn)
end
add_seq(seq) click to toggle source

#add_seq

# File lib/bio/appl/genscan/report.rb, line 182
def add_seq(seq)
  if /peptide_(\d+)/ =~ seq.definition
    gn = $1.to_i
    @predictions[gn - 1].set_aaseq(seq)
  elsif /CDS_(\d+)/ =~ seq.definition
    gn = $1.to_i
    @predictions[gn - 1].set_naseq(seq)
  end
end
parse_headline(line) click to toggle source

#parse_headline

# File lib/bio/appl/genscan/report.rb, line 115
def parse_headline(line)
  tmp = line.chomp.split(/\t/)
  @genscan_version = tmp[0].split(' ')[1]
  @date_run        = tmp[1].split(': ')[1]
  @time            = tmp[2].split(': ')[1]
end
parse_parameter(line) click to toggle source

#parse_parameter

# File lib/bio/appl/genscan/report.rb, line 139
def parse_parameter(line)
  if /^Parameter matrix: (\w.+)$/ =~ line.chomp
    @matrix = $1
  else
    raise "Error: [#{line}]"  
  end
end
parse_sequence(line) click to toggle source

#parse_sequence

# File lib/bio/appl/genscan/report.rb, line 125
def parse_sequence(line)
  if /^Sequence (\S+) : (\d+) bp : (\d+[\.\d]+)% C\+G : Isochore (\d+.+?)$/ =~ line
    @query_name = $1
    @length     = $2.to_i
    @gccontent  = $3.to_f
    @isochore   = $4
  else
    raise "Error: [#{line.inspect}]"
  end
end