Object
Print some simple statistics on the extracted results, like the count of extracted instances by each pattern
# File lib/scrubyt/output/result_dumper.rb, line 79 def self.print_statistics(pattern) puts "\n" * 2 print_statistics_recursive(pattern,0) puts end
# File lib/scrubyt/output/result_dumper.rb, line 24 def self.remove_empty_leaves(node) node.remove if node.elements.empty? && node.text == nil node.elements.each {|child| remove_empty_leaves child } end
# File lib/scrubyt/output/result_dumper.rb, line 44 def self.to_csv(pattern) result = [] flat_csv_inner = lambda {|e, parts| content = e.text || '' parts << content if ((e.is_a? REXML::Element) && content != '') e.children.each {|c| flat_csv_inner.call(c, parts) if c.is_a? REXML::Element } parts } to_xml(pattern).root.elements['/root'].each {|e| result << flat_csv_inner.call(e, []) } (result.map! {|a| a.join(',')}).join("\n") end
# File lib/scrubyt/output/result_dumper.rb, line 56 def self.to_hash(pattern) result = [] flat_hash_inner = lambda {|e, parts| content = e.text ? REXML::Text.unnormalize(e.text) : '' if ((e.is_a? REXML::Element) && content != '') if parts[e.local_name] parts[e.local_name] = parts[e.local_name] + "," + content else parts[e.local_name] = content end end e.children.each {|c| flat_hash_inner.call(c, parts) if c.is_a? REXML::Element } parts } to_xml(pattern).root.elements['/root'].each {|e| result << flat_hash_inner.call(e, {}) } result end
Output the text of the pattern; If this pattern is a tree, collect the text from its result instance node; otherwise rely on the last_result TODO: throw this away!!!
# File lib/scrubyt/output/result_dumper.rb, line 33 def self.to_text(pattern) last_result = pattern.last_result result = "" if pattern.type == :tree last_result.traverse_text { |t| result += t.to_s } else result = last_result end result end
Output the results as XML
# File lib/scrubyt/output/result_dumper.rb, line 11 def self.to_xml(pattern) doc = REXML::Document.new root = REXML::Element.new('root') doc.add_element(root) all_extracted_docs = pattern.last_result [all_extracted_docs].flatten.each do |lr| pattern.last_result = lr to_xml_recursive(pattern, root) end remove_empty_leaves(doc) @@last_doc = doc end
Generated with the Darkfish Rdoc Generator 2.