class Bio::GCG::Msf
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
Constants
- DELIMITER
delimiter used by Bio::FlatFile
Attributes
checksum[R]
checksum
date[R]
date
description[R]
description
entry_id[R]
ID of the alignment
heading[R]
heading ('!!NA_MULTIPLE_ALIGNMENT 1.0' or whatever like this)
length[R]
alignment length
seq_type[R]
sequence type (āNā for DNA/RNA or āPā for protein)
Public Class Methods
new(str)
click to toggle source
Creates a new Msf object.
# File lib/bio/appl/gcg/msf.rb, line 31 def initialize(str) str = str.sub(/\A[\r\n]+/, '') preamble, @data = str.split(/^\/\/$/, 2) preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this preamble.sub!(/.*\.\.\s*$/m, '') @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s d = $&.to_s if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @seq_type = m[3] @date = m[4].to_s.strip @checksum = (m[6] ? m[6].to_i : nil) end @seq_info = [] preamble.each_line do |x| if /Name\: / =~ x then s = {} x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } @seq_info << s end end @description.sub!(/\A(\r\n|\r|\n)/, '') @align = nil end
Public Instance Methods
alignment()
click to toggle source
returns Bio::Alignment object.
# File lib/bio/appl/gcg/msf.rb, line 176 def alignment do_parse @align end
compcheck()
click to toggle source
CompCheck field
# File lib/bio/appl/gcg/msf.rb, line 118 def compcheck unless defined?(@compcheck) if /CompCheck\: +(\d+)/ =~ @description then @compcheck = $1.to_i else @compcheck = nil end end @compcheck end
gap_length_weight()
click to toggle source
gap length weight
# File lib/bio/appl/gcg/msf.rb, line 109 def gap_length_weight unless defined?(@gap_length_weight) /GapLengthWeight\: +(\S+)/ =~ @description @gap_length_weight = $1 end @gap_length_weight end
gap_weight()
click to toggle source
gap weight
# File lib/bio/appl/gcg/msf.rb, line 100 def gap_weight unless defined?(@gap_weight) /GapWeight\: +(\S+)/ =~ @description @gap_weight = $1 end @gap_weight end
seq_data()
click to toggle source
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb, line 182 def seq_data do_parse @seq_data end
symbol_comparison_table()
click to toggle source
symbol comparison table
# File lib/bio/appl/gcg/msf.rb, line 91 def symbol_comparison_table unless defined?(@symbol_comparison_table) /Symbol comparison table\: +(\S+)/ =~ @description @symbol_comparison_table = $1 end @symbol_comparison_table end
validate_checksum()
click to toggle source
validates checksum
# File lib/bio/appl/gcg/msf.rb, line 188 def validate_checksum do_parse valid = true total = 0 @seq_data.each_with_index do |x, i| sum = Bio::GCG::Seq.calc_checksum(x) if sum != @seq_info[i]['Check'].to_i valid = false break end total += sum end return false unless valid if @checksum != 0 # "Check:" field of BioPerl is always 0 valid = ((total % 10000) == @checksum) end valid end
Private Instance Methods
do_parse()
click to toggle source
parsing
# File lib/bio/appl/gcg/msf.rb, line 130 def do_parse return if @align a = @data.split(/\r?\n\r?\n/) @seq_data = Array.new(@seq_info.size) @seq_data.collect! { |x| Array.new } a.each do |x| next if x.strip.empty? b = x.sub(/\A[\r\n]+/, '').split(/[\r\n]+/) nw = 0 if b.size > @seq_info.size then if /^ +/ =~ b.shift.to_s nw = $&.to_s.length end end if nw > 0 then b.each_with_index { |y, i| y[0, nw] = ''; @seq_data[i] << y } else b.each_with_index { |y, i| @seq_data[i] << y.strip.split(/ +/, 2)[1].to_s } end end case seq_type when 'P', 'p' k = Bio::Sequence::AA when 'N', 'n' k = Bio::Sequence::NA else k = Bio::Sequence::Generic end @seq_data.collect! do |x| y = x.join('') y.gsub!(/[\s\d]+/, '') k.new(y) end aln = Bio::Alignment.new @seq_data.each_with_index do |x, i| aln.store(@seq_info[i]['Name'], x) end @align = aln end