module Eco::Data::FuzzyMatch::StringHelpers

def get_words(str, normalized: false)

def get_words(str, normalized: false)
  return [] unless str
  str = normalize_string(str) unless normalized
  str.scan(/[a-zA-Z'-]+/).compact
end

def no_blanks(str)

def no_blanks(str)
  return nil unless str && str.is_a?(String)
  str.tr(' ', '')
end

def normalize_string(value)

Downcases and trims
def normalize_string(value)
  case value
  when Array
    value.map {|val| normalize_string(val)}
  when Symbol
    normalize_string(value.to_sym)
  when String
    value.downcase.strip
  end
end

def remove_matching_words(str1, str2, normalized: false)

Returns:
  • (Array) - pair of words.
def remove_matching_words(str1, str2, normalized: false)
  unless normalized
    str1 = normalize_string(str1)
    str2 = normalize_string(str2)
  end
  return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
  ws1 = get_words(str1)
  ws2 = get_words(str2)
  [(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
end

def string_combinations(str, range=2..3, normalized: false)

Returns:
  • (Array) - combinations of `range` length of `words`

Parameters:
  • range (Integer, Range) -- determine the lenght of the generated values.
  • str (String) -- the input string with the words.
def string_combinations(str, range=2..3, normalized: false)
  combinations(get_words(str, normalized: normalized), range)
  .map {|comb| comb.join(' ')}
end

def string_ngrams(str, range=2..3, normalized: false)

Returns:
  • (Array) - combinations of `range` length of `words`.

Parameters:
  • range (Integer, Range) -- determine the lenght of the generated values.
  • str (String) -- the input string with the words.
def string_ngrams(str, range=2..3, normalized: false)
  ngrams(get_words(str, normalized: normalized), range)
end

def string_permutations(str, range=2..3, normalized: false)

Returns:
  • (Array) - permutations of `range` length of `words`

Parameters:
  • range (Integer, Range) -- determine the lenght of the generated values.
  • str (String) -- the input string with the words.
def string_permutations(str, range=2..3, normalized: false)
  permutations(get_words(str, normalized: normalized), range)
  .map {|comb| comb.join(' ')}
end

def word_ngrams(str, range=2..3, normalized: false)

Returns:
  • (Array) - combinations of `range` length of `words`.

Parameters:
  • range (Integer, Range) -- determine the lenght of the generated values.
  • str (String) -- the input `word` string.
def word_ngrams(str, range=2..3, normalized: false)
  str = normalize_string(str) unless normalized
  ngrams(str.to_s.chars, range)
  .map {|comb| no_blanks(comb)}
end