module ActiveSupport::Multibyte::Unicode
def unpack_graphemes(string)
Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
lists.
Unpack the string at grapheme boundaries. Returns a list of character
def unpack_graphemes(string) codepoints = string.codepoints.to_a unpacked = [] pos = 0 marker = 0 eoc = codepoints.length while(pos < eoc) pos += 1 previous = codepoints[pos-1] current = codepoints[pos] should_break = # GB3. CR X LF if previous == database.boundary[:cr] and current == database.boundary[:lf] false # GB4. (Control|CR|LF) ÷ elsif previous and in_char_class?(previous, [:control,:cr,:lf]) true # GB5. ÷ (Control|CR|LF) elsif in_char_class?(current, [:control,:cr,:lf]) true # GB6. L X (L|V|LV|LVT) elsif database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) false # GB7. (LV|V) X (V|T) elsif in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) false # GB8. (LVT|T) X (T) elsif in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current false # GB8a. Regional_Indicator X Regional_Indicator elsif database.boundary[:regional_indicator] === previous and database.boundary[:regional_indicator] === current false # GB9. X Extend elsif database.boundary[:extend] === current false # GB9a. X SpacingMark elsif database.boundary[:spacingmark] === current false # GB9b. Prepend X elsif database.boundary[:prepend] === previous false # GB10. Any ÷ Any else true end if should_break unpacked << codepoints[marker..pos-1] marker = pos end end unpacked end