module LHC::FixInvalidEncodingConcern::ClassMethods
def fix_invalid_encoding(string)
should none of the fix-attempts be successful,
fix strings that contain non-UTF8 encoding in a forceful way
def fix_invalid_encoding(string) return string unless string.is_a?(String) result = string.dup # we assume it's ISO-8859-1 first if !result.valid_encoding? || !utf8?(result) result.encode!('UTF-8', 'ISO-8859-1', invalid: :replace, undef: :replace, replace: '') end # if it's still an issue, try with BINARY if !result.valid_encoding? || !utf8?(result) result.encode!('UTF-8', 'BINARY', invalid: :replace, undef: :replace, replace: '') end # if its STILL an issue, return an empty string :( if !result.valid_encoding? || !utf8?(result) result = "" end result end
def utf8?(string)
def utf8?(string) string.encoding == Encoding::UTF_8 end