# encoding: utf-8moduleMailclassRuby19classStrictCharsetEncoderdefencode(string,charset)string.force_encoding(Mail::Ruby19.pick_encoding(charset))endendclassBestEffortCharsetEncoderdefencode(string,charset)string.force_encoding(pick_encoding(charset))endprivatedefpick_encoding(charset)charset=casecharsetwhen/ansi_x3.110-1983/'ISO-8859-1'when/Windows-?1258/i# Windows-1258 is similar to 1252"Windows-1252"elsecharsetendMail::Ruby19.pick_encoding(charset)endendclass<<selfattr_accessor:charset_encoderendself.charset_encoder=StrictCharsetEncoder.new# Escapes any parenthesis in a string that are unescaped this uses# a Ruby 1.9.1 regexp feature of negative look behinddefRuby19.escape_paren(str)re=/(?<!\\)([\(\)])/# Only match unescaped parensstr.gsub(re){|s|'\\'+s}enddefRuby19.paren(str)str=$1ifstr=~/^\((.*)?\)$/str=escape_paren(str)'('+str+')'enddefRuby19.escape_bracket(str)re=/(?<!\\)([\<\>])/# Only match unescaped bracketsstr.gsub(re){|s|'\\'+s}enddefRuby19.bracket(str)str=$1ifstr=~/^\<(.*)?\>$/str=escape_bracket(str)'<'+str+'>'enddefRuby19.decode_base64(str)str.unpack('m').firstenddefRuby19.encode_base64(str)[str].pack('m')enddefRuby19.has_constant?(klass,string)klass.const_defined?(string,false)enddefRuby19.get_constant(klass,string)klass.const_get(string)enddefRuby19.b_value_encode(str,encoding=nil)encoding=str.encoding.to_s[Ruby19.encode_base64(str),encoding]enddefRuby19.b_value_decode(str)match=str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)ifmatchcharset=match[1]str=Ruby19.decode_base64(match[2])str=charset_encoder.encode(str,charset)enddecoded=str.encode(Encoding::UTF_8,:invalid=>:replace,:replace=>"")decoded.valid_encoding??decoded:decoded.encode(Encoding::UTF_16LE,:invalid=>:replace,:replace=>"").encode(Encoding::UTF_8)rescueEncoding::UndefinedConversionError,ArgumentError,Encoding::ConverterNotFoundErrorwarn"Encoding conversion failed #{$!}"str.dup.force_encoding(Encoding::UTF_8)enddefRuby19.q_value_encode(str,encoding=nil)encoding=str.encoding.to_s[Encodings::QuotedPrintable.encode(str),encoding]enddefRuby19.q_value_decode(str)match=str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)ifmatchcharset=match[1]string=match[2].gsub(/_/,'=20')# Remove trailing = if it exists in a Q encodingstring=string.sub(/\=$/,'')str=Encodings::QuotedPrintable.decode(string)str=charset_encoder.encode(str,charset)# We assume that binary strings hold utf-8 directly to work around# jruby/jruby#829 which subtly changes String#encode semantics.str.force_encoding(Encoding::UTF_8)ifstr.encoding==Encoding::ASCII_8BITenddecoded=str.encode(Encoding::UTF_8,:invalid=>:replace,:replace=>"")decoded.valid_encoding??decoded:decoded.encode(Encoding::UTF_16LE,:invalid=>:replace,:replace=>"").encode(Encoding::UTF_8)rescueEncoding::UndefinedConversionError,ArgumentError,Encoding::ConverterNotFoundErrorwarn"Encoding conversion failed #{$!}"str.dup.force_encoding(Encoding::UTF_8)enddefRuby19.param_decode(str,encoding)str=uri_parser.unescape(str)str=charset_encoder.encode(str,encoding)ifencodingstrenddefRuby19.param_encode(str)encoding=str.encoding.to_s.downcaselanguage=Configuration.instance.param_encode_language"#{encoding}'#{language}'#{uri_parser.escape(str)}"enddefRuby19.uri_parser@uri_parser||=URI::Parser.newend# Pick a Ruby encoding corresponding to the message charset. Most# charsets have a Ruby encoding, but some need manual aliasing here.## TODO: add this as a test somewhere:# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}defRuby19.pick_encoding(charset)casecharset# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-Iwhen/^iso-?8859-(\d+)(-i)?$/i"ISO-8859-#{$1}"# ISO-8859-15, ISO-2022-JP and alikewhen/iso-?(\d{4})-?(\w{1,2})/i"ISO-#{$1}-#{$2}"# "ISO-2022-JP-KDDI" and alikewhen/iso-?(\d{4})-?(\w{1,2})-?(\w*)/i"ISO-#{$1}-#{$2}-#{$3}"# UTF-8, UTF-32BE and alikewhen/utf[\-_]?(\d{1,2})?(\w{1,2})/i"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/,'\\1BE')# Windows-1252 and alikewhen/Windows-?(.*)/i"Windows-#{$1}"when/^8bit$/Encoding::ASCII_8BIT# alternatives/misspellings of us-ascii seen in the wildwhen/^iso-?646(-us)?$/i,/us=ascii/iEncoding::ASCII# Microsoft-specific alias for MACROMANwhen/^macintosh$/iEncoding::MACROMAN# Microsoft-specific alias for CP949 (Korean)when'ks_c_5601-1987'Encoding::CP949# Wrongly written Shift_JIS (Japanese)when'shift-jis'Encoding::Shift_JIS# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)when/gb2312/iEncoding::GB18030elsecharsetendendendend