class RSpec::Support::EncodedString
@private
def self.pick_encoding(source_a, source_b)
def self.pick_encoding(source_a, source_b) Encoding.compatible?(source_a, source_b) || Encoding.default_external end
def self.pick_encoding(_source_a, _source_b)
def self.pick_encoding(_source_a, _source_b) end
def <<(string)
def <<(string) @string << matching_encoding(string) end
def detect_source_encoding(string)
def detect_source_encoding(string) string.encoding end
def detect_source_encoding(_string)
def detect_source_encoding(_string) US_ASCII end
def initialize(string, encoding=nil)
def initialize(string, encoding=nil) @encoding = encoding @source_encoding = detect_source_encoding(string) @string = matching_encoding(string) end
def matching_encoding(string)
RangeError: out of char range
Raised by byte <-> char conversions
e.g. "\x80".force_encoding('ASCII-8BIT').encode('Emacs-Mule')
or a converter path cannot be found
e.g. 'abc'.force_encoding('UTF-8').encode('foo')
when a named encoding does not correspond with a known converter
Encoding::ConverterNotFoundError:
Raised by transcoding methods:
that doesn't respond to #to_str
when calling force_encoding on an object
Encoding.find(:"UTF-8")
when a symbol is passed as an encoding
TypeError
e.g."\x80".split("\n")
when operating on a string with invalid bytes
ArgumentError
# => '
vs "\x80".encode('UTF-8','US-ASCII', invalid: :replace, replace: '
e.g. "\x80".encode('UTF-8','US-ASCII')
either the source or target encoding
when the string being transcoded contains a byte invalid for
Encoding::InvalidByteSequenceError:
e.g. valid_unicode_string.encode(utf8_encoding) << ascii_string
e.g. utf_16le_emoji_string.split("\n")
when Encoding.compatible?(str1, str2) is nil
Encoding::CompatibilityError
# => '
vs "\x80".encode('UTF-8','ASCII-8BIT', undef: :replace, replace: '
e.g. "\x80".encode('UTF-8','ASCII-8BIT')
if the String contains characters invalid for the target encoding
when a transcoding operation fails
Encoding::UndefinedConversionError:
Raised by Encoding and String methods:
Encoding Exceptions:
def matching_encoding(string) string = remove_invalid_bytes(string) string.encode(@encoding) rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError # Originally defined as a constant to avoid unneeded allocations, this hash must # be defined inline (without {}) to avoid warnings on Ruby 2.7 # # In MRI 2.1 'invalid: :replace' changed to also replace an invalid byte sequence # see https://github.com/ruby/ruby/blob/v2_1_0/NEWS#L176 # https://www.ruby-forum.com/topic/6861247 # https://twitter.com/nalsh/status/553413844685438976 # # For example, given: # "\x80".force_encoding("Emacs-Mule").encode(:invalid => :replace).bytes.to_a # # On MRI 2.1 or above: 63 # '?' # else : 128 # "\x80" # string.encode(@encoding, :invalid => :replace, :undef => :replace, :replace => REPLACE) rescue Encoding::ConverterNotFoundError # Originally defined as a constant to avoid unneeded allocations, this hash must # be defined inline (without {}) to avoid warnings on Ruby 2.7 string.dup.force_encoding(@encoding).encode(:invalid => :replace, :replace => REPLACE) end
def matching_encoding(string)
def matching_encoding(string) string end
def remove_invalid_bytes(string)
https://github.com/hsbt/string-scrub
https://github.com/ruby/ruby/blob/v2_1_0/string.c#L8242
https://github.com/ruby/ruby/blob/eeb05e8c11/doc/NEWS-2.1.0#L120-L123
def remove_invalid_bytes(string) string.scrub(REPLACE) end
def remove_invalid_bytes(string)
with invalid encoding, which is a pretty good proxy
Loop over chars in a string replacing chars
http://stackoverflow.com/a/8711118/879854
def remove_invalid_bytes(string) string.chars.map do |char| char.valid_encoding? ? char : REPLACE end.join end
def split(regex_or_string)
def split(regex_or_string) @string.split(matching_encoding(regex_or_string)) rescue ArgumentError # JRuby raises an ArgumentError when splitting a source string that # contains invalid bytes. remove_invalid_bytes(@string).split regex_or_string end
def split(regex_or_string)
def split(regex_or_string) @string.split(matching_encoding(regex_or_string)) end
def to_s
def to_s @string end