class Licensee::Matchers::Dice

Public Class Methods

new(file) click to toggle source
# File lib/licensee/matchers/dice_matcher.rb, line 4
def initialize(file)
  @file = file
end

Public Instance Methods

confidence() click to toggle source

Confidence that the matched license is a match

# File lib/licensee/matchers/dice_matcher.rb, line 54
def confidence
  @confidence ||= match ? similarity(match) : 0
end
length_delta(license) click to toggle source

Calculate the difference between the file length and a given license's length

# File lib/licensee/matchers/dice_matcher.rb, line 41
def length_delta(license)
  (@file.wordset.size - license.wordset.size).abs
end
match() click to toggle source

Return the first potential license that is more similar than the confidence threshold

# File lib/licensee/matchers/dice_matcher.rb, line 10
def match
  return @match if defined? @match
  matches = potential_licenses.map do |license|
    if (sim = similarity(license)) >= Licensee.confidence_threshold
      [license, sim]
    end
  end
  matches.compact!
  @match = if matches.empty?
    nil
  else
    matches.max_by { |_l, sim| sim }.first
  end
end
max_delta() click to toggle source

Maximum possible difference between file length and license length for a license to be a potential license to be matched

# File lib/licensee/matchers/dice_matcher.rb, line 47
def max_delta
  @max_delta ||= (
    @file.wordset.size * (Licensee.confidence_threshold / 100.0)
  )
end
potential_licenses() click to toggle source

Sort all licenses, in decending order, by difference in length to the file Difference in lengths cannot exceed the file's length * the confidence threshold / 100

# File lib/licensee/matchers/dice_matcher.rb, line 29
def potential_licenses
  @potential_licenses ||= begin
    licenses = Licensee.licenses(hidden: true)
    licenses = licenses.select do |license|
      license.wordset && length_delta(license) <= max_delta
    end
    licenses.sort_by { |l| length_delta(l) }
  end
end

Private Instance Methods

similarity(license) click to toggle source

Calculate percent changed between file and potential license

# File lib/licensee/matchers/dice_matcher.rb, line 61
def similarity(license)
  overlap = (@file.wordset & license.wordset).size
  total = @file.wordset.size + license.wordset.size
  100.0 * (overlap * 2.0 / total)
end