class Bio::GFF::GFF3
DESCRIPTION¶ ↑
Represents version 3 of GFF specification. For more information on version GFF3, see song.sourceforge.net/gff3.shtml
Constants
Attributes
GFF3 version string (String or nil). nil means “3”.
Metadata (except “##sequence-region”, “##gff-version”, “###”). Must be an array of Bio::GFF::GFF3::MetaData objects.
Metadata of “##sequence-region”. Must be an array of Bio::GFF::GFF3::SequenceRegion objects.
Sequences bundled within GFF3. Must be an array of Bio::Sequence objects.
Public Class Methods
Creates a Bio::GFF::GFF3 object by building a collection of Bio::GFF::GFF3::Record (and metadata) objects.
Arguments:
-
str: string in GFF format
- Returns
-
Bio::GFF object
# File lib/bio/db/gff.rb, line 875 def initialize(str = nil) @gff_version = nil @records = [] @sequence_regions = [] @metadata = [] @sequences = [] @in_fasta = false parse(str) if str end
Public Instance Methods
Parses a GFF3 entries, and concatenated the parsed data.
Note that after “##FASTA” line is given, only fasta-formatted text is accepted.
Arguments:
-
str: string in GFF format
- Returns
-
self
# File lib/bio/db/gff.rb, line 909 def parse(str) # if already after the ##FASTA line, parses fasta format and return if @in_fasta then parse_fasta(str) return self end if str.respond_to?(:gets) then # str is a IO-like object fst = nil else # str is a String gff, sep, fst = str.split(/^(\>|##FASTA.*)/n, 2) fst = sep + fst if sep == '>' and fst str = gff end # parses GFF lines str.each_line do |line| if /^\#\#([^\s]+)/ =~ line then parse_metadata($1, line) parse_fasta(str) if @in_fasta elsif /^\>/ =~ line then @in_fasta = true parse_fasta(str, line) else @records << GFF3::Record.new(line) end end # parses fasta format when str is a String and fasta data exists if fst then @in_fasta = true parse_fasta(fst) end self end
string representation of whole entry.
# File lib/bio/db/gff.rb, line 964 def to_s ver = @gff_version || VERSION.to_s if @sequences.size > 0 then seqs = "##FASTA\n" + @sequences.collect { |s| s.to_fasta(s.entry_id, 70) }.join('') else seqs = '' end ([ "##gff-version #{escape(ver)}\n" ] + @metadata.collect { |m| m.to_s } + @sequence_regions.collect { |m| m.to_s } + @records.collect{ |r| r.to_s }).join('') + seqs end
Private Instance Methods
parses fasta formatted data
# File lib/bio/db/gff.rb, line 949 def parse_fasta(str, line = nil) str.each_line("\n>") do |seqstr| if line then seqstr = line + seqstr; line = nil; end x = seqstr.strip next if x.empty? or x == '>' fst = Bio::FastaFormat.new(seqstr) seq = fst.to_seq seq.entry_id = unescape(fst.definition.strip.split(/\s/, 2)[0].to_s) @sequences.push seq end end
parses metadata
# File lib/bio/db/gff.rb, line 1836 def parse_metadata(directive, line) case directive when 'gff-version' @gff_version ||= line.split(/\s+/)[1] when 'FASTA' @in_fasta = true when 'sequence-region' @sequence_regions.push SequenceRegion.parse(line) when '#' # "###" directive @records.push RecordBoundary.new else @metadata.push MetaData.parse(line) end true end