class Bundler::SimilarityDetector

def initialize(corpus)

initialize with an array of words to be matched against
def initialize(corpus)
  @corpus = corpus
end

def levenshtein_distance(this, that, ins=2, del=2, sub=1)

http://www.informit.com/articles/article.aspx?p=683059&seqNum=36
def levenshtein_distance(this, that, ins=2, del=2, sub=1)
  # ins, del, sub are weighted costs
  return nil if this.nil?
  return nil if that.nil?
  dm = []        # distance matrix
  # Initialize first row values
  dm[0] = (0..this.length).collect { |i| i * ins }
  fill = [0] * (this.length - 1)
  # Initialize first column values
  for i in 1..that.length
    dm[i] = [i * del, fill.flatten]
  end
  # populate matrix
  for i in 1..that.length
    for j in 1..this.length
      # critical comparison
      dm[i][j] = [
           dm[i-1][j-1] +
             (this[j-1] == that[i-1] ? 0 : sub),
               dm[i][j-1] + ins,
           dm[i-1][j] + del
     ].min
    end
  end
  # The last value in matrix is the Levenshtein distance between the strings
  dm[that.length][this.length]
end

def similar_word_list(word, limit=3)

(eg "a, b, or c")
return the result of 'similar_words', concatenated into a list
def similar_word_list(word, limit=3)
  words = similar_words(word,limit)
  if words.length==1
    words[0]
  elsif words.length>1
    [words[0..-2].join(', '), words[-1]].join(' or ')
  end
end

def similar_words(word, limit=3)

return an array of words similar to 'word' from the corpus
def similar_words(word, limit=3)
  words_by_similarity = @corpus.map{|w| SimilarityScore.new(w, levenshtein_distance(word, w))}
  words_by_similarity.select{|s| s.distance<=limit}.sort_by(&:distance).map(&:string)
end