lib/eco/data/fuzzy_match/string_helpers.rb



module Eco
  module Data
    module FuzzyMatch
      module StringHelpers
        # Downcases and trims

        def normalize_string(value)
          case value
          when Array
            value.map {|val| normalize_string(val)}
          when Symbol
            normalize_string(value.to_sym)
          when String
            value.downcase.strip
          end
        end

        def get_words(str, normalized: false)
          return [] unless str
          str = normalize_string(str) unless normalized
          str.scan(/[a-zA-Z'-]+/).compact
        end

        # Keeps the start order of the `words` and consecutive `words` together/consecutive.

        # @param str [String] the input string with the words.

        # @param range [Integer, Range] determine the lenght of the generated values.

        # @return [Array<String>] combinations of `range` length of `words`.

        def string_ngrams(str, range=2..3, normalized: false)
          ngrams(get_words(str, normalized: normalized), range)
        end

        # Keeps the start order of the `words` of the input `Array` `words`.

        # It does **not** keep consecutive `words` together (it can jump/skip items).

        # @param str [String] the input string with the words.

        # @param range [Integer, Range] determine the lenght of the generated values.

        # @return [Array<String>] combinations of `range` length of `words`

        def string_combinations(str, range=2..3, normalized: false)
          combinations(get_words(str, normalized: normalized), range)
          .map {|comb| comb.join(' ')}
        end

        # It includes `combinations` that break the initial order of the `Array`.

        # It does **not** keep consecutive `words` together (it can jump/skip items).

        # @param str [String] the input string with the words.

        # @param range [Integer, Range] determine the lenght of the generated values.

        # @return [Array<String>] permutations of `range` length of `words`

        def string_permutations(str, range=2..3, normalized: false)
          permutations(get_words(str, normalized: normalized), range)
          .map {|comb| comb.join(' ')}
        end

        # Keeps the start order of the `charts` and consecutive `charts` together/consecutive.

        # @param str [String] the input `word` string.

        # @param range [Integer, Range] determine the lenght of the generated values.

        # @return [Array<String>] combinations of `range` length of `words`.

        def word_ngrams(str, range=2..3, normalized: false)
          str = normalize_string(str) unless normalized
          ngrams(str.to_s.chars, range)
          .map {|comb| no_blanks(comb)}
        end

        def no_blanks(str)
          return nil unless str && str.is_a?(String)
          str.tr(' ', '')
        end

        # Deletes the words of `str1` and `str2` that match

        # @return [Array<String>] pair of words.

        def remove_matching_words(str1, str2, normalized: false)
          unless normalized
            str1 = normalize_string(str1)
            str2 = normalize_string(str2)
          end
          return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
          ws1 = get_words(str1)
          ws2 = get_words(str2)
          [(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
        end

      end
    end
  end
end