module ActiveSupport::Multibyte::Unicode
def normalize(string, form=nil)
:c, :kc, :d, or :kd. Default is
* form - The form you want to normalize in. Should be one of the following:
* string - The string to perform normalization on.
passing strings to databases and validations.
Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
def normalize(string, form=nil) form ||= @default_normalization_form # See http://www.unicode.org/reports/tr15, Table 1 codepoints = u_unpack(string) case form when :d reorder_characters(decompose_codepoints(:canonical, codepoints)) when :c compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints))) when :kd reorder_characters(decompose_codepoints(:compatability, codepoints)) when :kc compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints))) else raise ArgumentError, "#{form} is not a valid normalization variant", caller end.pack('U*') end