module ClassifierReborn::Summarizer

Public Instance Methods

paragraph_summary( str, count=1, separator=" [...] " ) click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 13
def paragraph_summary( str, count=1, separator=" [...] " )
  perform_lsi split_paragraphs(str), count, separator
end
perform_lsi(chunks, count, separator) click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 25
def perform_lsi(chunks, count, separator)
  lsi = ClassifierReborn::LSI.new :auto_rebuild => false
  chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
  lsi.build_index
  summaries = lsi.highest_relative_content count
  return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
end
split_paragraphs(str) click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 21
def split_paragraphs(str)
  str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive
end
split_sentences(str) click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 17
def split_sentences(str)
  str.split(/(\.|\!|\?)/) # TODO: make this less primitive
end
summary( str, count=10, separator=" [...] " ) click to toggle source
# File lib/classifier-reborn/lsi/summarizer.rb, line 9
def summary( str, count=10, separator=" [...] " )
  perform_lsi split_sentences(str), count, separator
end