class Bio::Phylip::PhylipFormat

This is phylip multiple alignment format parser. The two formats, interleaved and non-interleaved, are automatically determined.

Attributes

alignment_length[R]

alignment length

number_of_sequences[R]

number of sequences

Public Class Methods

new(str) click to toggle source

create a new object from a string

# File lib/bio/appl/phylip/alignment.rb, line 26
def initialize(str)
  @data = str.strip.split(/(?:\r\n|\r|\n)/)
  @first_line = @data.shift
  @number_of_sequences, @alignment_length =
    @first_line.to_s.strip.split(/\s+/).collect { |x| x.to_i }
end

Public Instance Methods

alignment() click to toggle source

Gets the alignment. Returns a Bio::Alignment object.

# File lib/bio/appl/phylip/alignment.rb, line 54
def alignment
  unless defined? @alignment then
    do_parse
    a = Bio::Alignment.new
    (0...@number_of_sequences).each do |i|
      a.add_seq(@sequences[i], @sequence_names[i])
    end
    @alignment = a
  end
  @alignment
end
interleaved?() click to toggle source

If the alignment format is “interleaved”, returns true. If not, returns false. It would mistake to determine if the alignment is very short.

# File lib/bio/appl/phylip/alignment.rb, line 42
def interleaved?
  unless defined? @interleaved_flag then
    if /\A +/ =~ @data[1].to_s then
      @interleaved_flag = false
    else
      @interleaved_flag = true
    end
  end
  @interleaved_flag
end

Private Instance Methods

do_parse() click to toggle source
# File lib/bio/appl/phylip/alignment.rb, line 68
def do_parse
  if interleaved? then
    do_parse_interleaved
  else
    do_parse_noninterleaved
  end
end
do_parse_interleaved() click to toggle source
# File lib/bio/appl/phylip/alignment.rb, line 76
def do_parse_interleaved
  first_block = @data[0, @number_of_sequences]
  @data[0, @number_of_sequences] = ''
  @sequence_names = Array.new(@number_of_sequences) { '' }
  @sequences = Array.new(@number_of_sequences) do
    ' ' * @alignment_length
  end
  first_block.each_with_index do |x, i|
    n, s = x.split(/ +/, 2)
    @sequence_names[i] = n
    @sequences[i].replace(s.gsub(/\s+/, ''))
  end
  i = 0
  @data.each do |x|
    if x.strip.length <= 0 then
      i = 0
    else
      @sequences[i] << x.gsub(/\s+/, '')
      i = (i + 1) % @number_of_sequences
    end
  end
  @data.clear
  true
end
do_parse_noninterleaved() click to toggle source
# File lib/bio/appl/phylip/alignment.rb, line 101
def do_parse_noninterleaved
  @sequence_names = Array.new(@number_of_sequences) { '' }
  @sequences = Array.new(@number_of_sequences) do
    ' ' * @alignment_length
  end
  curseq = nil
  i = 0
  @data.each do |x|
    next if x.strip.length <= 0
    if !curseq or
        curseq.length > @alignment_length or /^\s/ !~ x then
      p i
      n, s = x.strip.split(/ +/, 2)
      @sequence_names[i] = n
      curseq = @sequences[i]
      curseq.replace(s.gsub(/\s+/, ''))
      i += 1
    else
      curseq << x.gsub(/\s+/, '')
    end
  end
  @data.clear
  true
end