module Patron::ResponseDecoding
def charset_from_content_type
def charset_from_content_type return $1 if @headers["Content-Type"].to_s =~ CHARSET_CONTENT_TYPE_RE end
def convert_encoding_and_raise(str)
def convert_encoding_and_raise(str) internal = internal_encoding str.encode(internal) rescue Encoding::UndefinedConversionError => e enc = str.encoding == Encoding::BINARY ? 'binary' : str.encoding.to_s raise NonRepresentableBody, INTERNAL_CHARSET_MISMATCH_ERROR % {source_encoding: enc, target_encoding: internal} end
def decode_body(strict)
def decode_body(strict) # Try to detect the body encoding from headers body_encoding = encoding_from_headers_or_binary # See if the body actually _is_ in this encoding. encoding_matched = @body.force_encoding(body_encoding).valid_encoding? if !encoding_matched raise HeaderCharsetInvalid, MISREPORTED_ENCODING_ERROR % {declared: body_encoding} end if strict convert_encoding_and_raise(@body) else @body.encode(internal_encoding, :undefined => :replace, :replace => '?') end end
def decode_header_data(str)
def decode_header_data(str) # Header data is tricky. Strictly speaking, it _must_ be ISO-encoded. However, Content-Disposition # sometimes gets sent as raw UTF8 - and most browsers (except for localized IE versions on Windows) # treat it as such. So a fallback chain of 8859-1->UTF8->binary seems the most sane. tries = [Encoding::ISO8859_1, Encoding::UTF_8, Encoding::BINARY] tries.each do |possible_enc| begin return str.encode(possible_enc) rescue ::Encoding::UndefinedConversionError next end end str # if it doesn't encode, just give back what we got end
def encoding_from_headers_or_binary
def encoding_from_headers_or_binary return Encoding::BINARY unless charset_name = charset_from_content_type Encoding.find(charset_name) rescue ArgumentError => e # invalid charset name raise HeaderCharsetInvalid, INVALID_CHARSET_NAME_ERROR % {content_type: @headers['Content-Type'].inspect} end
def internal_encoding
def internal_encoding # Use a trick here - instead of using `default_internal` we will create # an empty string, and then get it's encoding instead. For example, this holds # true on 2.1+ on OSX: # # Encoding.default_internal #=> nil # ''.encoding #=> #<Encoding:UTF-8> Encoding.default_internal || ''.encoding end