class RuboCop::StringUtil::Jaro
between two strings.
This class computes Jaro distance, which is a measure of similarity
def self.distance(*args)
def self.distance(*args) new(*args).distance end
def compute_distance
def compute_distance common_chars_a, common_chars_b = find_common_characters matched_count = common_chars_a.size return 0.0 if matched_count.zero? transposition_count = count_transpositions(common_chars_a, common_chars_b) sum = (matched_count / shorter.size.to_f) + (matched_count / longer.size.to_f) + ((matched_count - transposition_count / 2) / matched_count.to_f) sum / 3.0 end
def count_transpositions(common_chars_a, common_chars_b)
def count_transpositions(common_chars_a, common_chars_b) common_chars_a.size.times.count do |index| common_chars_a[index] != common_chars_b[index] end end
def distance
def distance @distance ||= compute_distance end
def find_common_characters
def find_common_characters common_chars_of_shorter = Array.new(shorter.size) common_chars_of_longer = Array.new(longer.size) # In Ruby 1.9 String#chars returns Enumerator rather than Array. longer_chars = longer.each_char.to_a shorter.each_char.with_index do |shorter_char, shorter_index| matching_index_range(shorter_index).each do |longer_index| longer_char = longer_chars[longer_index] next unless shorter_char == longer_char common_chars_of_shorter[shorter_index] = shorter_char common_chars_of_longer[longer_index] = longer_char # Mark the matching character as already used longer_chars[longer_index] = nil break end end [common_chars_of_shorter, common_chars_of_longer].map(&:compact) end
def initialize(a, b)
def initialize(a, b) if a.size < b.size @shorter, @longer = a, b else @shorter, @longer = b, a end end
def matching_index_range(origin)
def matching_index_range(origin) min = origin - matching_window min = 0 if min < 0 max = origin + matching_window min..max end
def matching_window
def matching_window @matching_window ||= (longer.size / 2).to_i - 1 end