class Bio::Scf
Description¶ ↑
This class inherits from the SangerChromatogram superclass. It captures the information contained within an scf format chromatogram file generated by DNA sequencing. See the SangerChromatogram class for usage
Attributes
The quality of each base at each position along the length of the sequence is captured by the nqual attributes where n is one of a, c, g or t. Generally the quality will be high for the base that is called at a particular position and low for all the other bases. However at positions of poor sequence quality, more than one base may have similar top scores. By analysing the nqual attributes it may be possible to determine if the base calling was correct or not. The quality of the A base at each sequence position
A hash of extra information extracted from the chromatogram file
The quality of the C base at each sequence position
The quality of the G base at each sequence position
The quality of the T base at each sequence position
Public Class Methods
see SangerChromatogram class for how to create an Scf object and its usage
# File lib/bio/db/sanger_chromatogram/scf.rb, line 37 def initialize(string) header = string.slice(0,128) # read in header info @chromatogram_type, @samples, @sample_offset, @bases, @bases_left_clip, @bases_right_clip, @bases_offset, @comment_size, @comments_offset, @version, @sample_size, @code_set, @header_spare = header.unpack("a4 NNNNNNNN a4 NN N20") get_traces(string) get_bases_peakIndices_and_qualities(string) get_comments(string) if @comments["DYEP"] @dye_mobility = @comments["DYEP"] else @dye_mobility = "Unnown" end end
Private Instance Methods
# File lib/bio/db/sanger_chromatogram/scf.rb, line 181 def convert_accuracies_to_qualities qualities = Array.new for base_pos in (0..@sequence.length-1) case sequence.slice(base_pos,1) when "a" qualities << @aqual[base_pos] when "c" qualities << @cqual[base_pos] when "g" qualities << @gqual[base_pos] when "t" qualities << @tqual[base_pos] else qualities << 0 end end return qualities end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 168 def convert_deltas_to_values(trace_read) p_sample = 0; for sample_num in (0..trace_read.size-1) trace_read[sample_num] = trace_read[sample_num] + p_sample p_sample = trace_read[sample_num]; end p_sample = 0; for sample_num in (0..trace_read.size-1) trace_read[sample_num] = trace_read[sample_num] + p_sample p_sample = trace_read[sample_num]; end return trace_read end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 98 def get_bases_peakIndices_and_qualities(string) if @version == "3.00" # now go and get the peak index information offset = @bases_offset length = @bases * 4 get_v3_peak_indices(string,offset,length) # now go and get the accuracy information offset += length; get_v3_accuracies(string,offset,length) # OK, now go and get the base information. offset += length; length = @bases; get_v3_sequence(string,offset,length) #combine accuracies to get quality scores @qualities= convert_accuracies_to_qualities elsif @version == "2.00" @peak_indices = [] @aqual = [] @cqual = [] @gqual = [] @tqual = [] @qualities = [] @sequence = "" # now go and get the base information offset = @bases_offset length = @bases * 12 all_bases_info = string.slice(offset,length) (0..length-1).step(12) do |offset2| base_info = all_bases_info.slice(offset2,12).unpack("N C C C C a C3") @peak_indices << base_info[0] @aqual << base_info[1] @cqual << base_info[2] @gqual << base_info[3] @tqual << base_info[4] @sequence += base_info[5].downcase case base_info[5].downcase when "a" @qualities << base_info[1] when "c" @qualities << base_info[2] when "g" @qualities << base_info[3] when "t" @qualities << base_info[4] else @qualities << 0 end end end end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 199 def get_comments(string) @comments = Hash.new comment_string = string.slice(@comments_offset,@comment_size) comment_string.gsub!(/\0/, "") comment_array = comment_string.split("\n") comment_array.each do |comment| comment =~ /(\w+)=(.*)/ @comments[$1] = $2 end end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 53 def get_traces(string) if @version == "3.00" # read in trace info offset = @sample_offset length = @samples * @sample_size # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short @sample_size == 2 ? byte = "n" : byte = "c" for base in ["a" , "c" , "g" , "t"] trace_read = string.slice(offset,length).unpack("#{byte}#{@samples}") # convert offsets for sample_num in (0..trace_read.size-1) if trace_read[sample_num] > 30000 trace_read[sample_num] = trace_read[sample_num] - 65536 end end # For 8-bit data we need to emulate a signed/unsigned # cast that is implicit in the C implementations..... if @sample_size == 1 for sample_num in (0..trace_read.size-1) trace_read[sample_num] += 256 if trace_read[sample_num] < 0 end end trace_read = convert_deltas_to_values(trace_read) self.instance_variable_set("@#{base}trace", trace_read) offset += length end elsif @version == "2.00" @atrace = [] @ctrace = [] @gtrace = [] @ttrace = [] # read in trace info offset = @sample_offset length = @samples * @sample_size * 4 # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short @sample_size == 2 ? byte = "n" : byte = "c" trace_read = string.slice(offset,length).unpack("#{byte}#{@samples*4}") (0..(@samples-1)*4).step(4) do |offset2| @atrace << trace_read[offset2] @ctrace << trace_read[offset2+1] @gtrace << trace_read[offset2+2] @ttrace << trace_read[offset2+3] end end end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 155 def get_v3_accuracies(string,offset,length) qualities = string.slice(offset,length) qual_length = length/4; qual_offset = 0; for base in ["a" , "c" , "g" , "t"] self.instance_variable_set("@#{base}qual",qualities.slice(qual_offset,qual_length).unpack("C#{qual_length}")) qual_offset += qual_length end end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 152 def get_v3_peak_indices(string,offset,length) @peak_indices = string.slice(offset,length).unpack("N#{length/4}") end
# File lib/bio/db/sanger_chromatogram/scf.rb, line 164 def get_v3_sequence(string,offset,length) @sequence = string.slice(offset,length).unpack("a#{length}").join('').downcase end