class Mail::UnstructuredField
:nodoc:
“unfolding” as described in section 2.2.3).
with no further processing (except for header “folding” and
field bodies are simply to be treated as a single line of characters
referred to as unstructured field bodies. Semantically, unstructured
except for CR and LF) with no further restrictions. These are
“unstructured” (which is specified below as any US-ASCII characters,
Some field bodies in this standard are defined simply as
2.2.1. Unstructured Header Field Bodies
===Per RFC 2822:
Provides access to an unstructured header field
def do_decode
def do_decode Utilities.blank?(value) ? nil : Encodings.decode_encode(value, :decode) end
def do_encode
def do_encode if value && !value.empty? "#{wrapped_value}\r\n" else '' end end
def encode(value)
def encode(value) value = [value].pack(Constants::CAPITAL_M).gsub(Constants::EQUAL_LF, Constants::EMPTY) value.gsub!(/"/, '=22') value.gsub!(/\(/, '=28') value.gsub!(/\)/, '=29') value.gsub!(/\?/, '=3F') value.gsub!(/_/, '=5F') value.gsub!(/ /, '_') value end
def encode_crlf(value)
def encode_crlf(value) value.gsub!(Constants::CR, Constants::CR_ENCODED) value.gsub!(Constants::LF, Constants::LF_ENCODED) value end
def fold(prepend = 0) # :nodoc:
def fold(prepend = 0) # :nodoc: encoding = normalized_encoding decoded_string = decoded.to_s should_encode = !decoded_string.ascii_only? if should_encode first = true words = decoded_string.split(/[ \t]/).map do |word| if first first = !first else word = " #{word}" end if !word.ascii_only? word else word.scan(/.{7}|.+$/) end end.flatten else words = decoded_string.split(/[ \t]/) end folded_lines = [] while !words.empty? limit = 78 - prepend limit = limit - 7 - encoding.length if should_encode line = String.new first_word = true while !words.empty? break unless word = words.first.dup # Convert on 1.9+ only since we aren't sure of the current # charset encoding on 1.8. We'd need to track internal/external # charset on each field. if charset && word.respond_to?(:encoding) word = Encodings.transcode_charset(word, word.encoding, charset) end word = encode(word) if should_encode word = encode_crlf(word) # Skip to next line if we're going to go past the limit # Unless this is the first word, in which case we're going to add it anyway # Note: This means that a word that's longer than 998 characters is going to break the spec. Please fix if this is a problem for you. # (The fix, it seems, would be to use encoded-word encoding on it, because that way you can break it across multiple lines and # the linebreak will be ignored) break if !line.empty? && (line.length + word.length + 1 > limit) # Remove the word from the queue ... words.shift # Add word separator if first_word first_word = false else line << " " if !should_encode end # ... add it in encoded form to the current line line << word end # Encode the line if necessary line = "=?#{encoding}?Q?#{line}?=" if should_encode # Add the line to the output and reset the prepend folded_lines << line prepend = 0 end folded_lines end
def initialize(name, value, charset = nil)
"unfolding" as described in section 2.2.3).
with no further processing (except for header "folding" and
field bodies are simply to be treated as a single line of characters
referred to as unstructured field bodies. Semantically, unstructured
except for CR and LF) with no further restrictions. These are
"unstructured" (which is specified below as any US-ASCII characters,
Some field bodies in this standard are defined simply as
2.2.1. Unstructured Header Field Bodies
===Per RFC 2822:
Provides access to an unstructured header field
def initialize(name, value, charset = nil) if value.is_a?(Array) # Probably has arrived here from a failed parse of an AddressList Field value = value.join(', ') # Mark UTF-8 strings parsed from ASCII-8BIT elsif value.respond_to?(:force_encoding) && value.encoding == Encoding::ASCII_8BIT utf8 = value.dup.force_encoding(Encoding::UTF_8) value = utf8 if utf8.valid_encoding? end charset ||= if value.respond_to?(:encoding) value.encoding end super name, value.to_s, charset end
def normalized_encoding
def normalized_encoding charset.to_s.upcase.gsub('_', '-') end
def parse
def parse self end
def wrap_lines(name, folded_lines)
without having to separate 'encoded-word's where spaces occur in the
multiple 'encoded-word's to represent long strings of unencoded text,
adjacent 'encoded-word's is ignored. (This is to allow the use of
'encoded-word's, any 'linear-white-space' that separates a pair of
When displaying a particular header field that contains multiple
6.2. Display of 'encoded-word's
def wrap_lines(name, folded_lines) result = ["#{name}: #{folded_lines.shift}"] result.concat(folded_lines) result.join("\r\n\s") end
def wrapped_value # :nodoc:
it is allowed elsewhere.
preference to other places where the field could be folded, even if
that folding occur after the comma separating the structured items in
a field body is defined as comma-separated values, it is recommended
placing the CRLF at higher-level syntactic breaks. For instance, if
within some of the lexical tokens), folding SHOULD be limited to
folding can take place between many of the lexical tokens (and even
Note: Though structured field bodies are defined in such a way that
is a test
Subject: This
can be represented as:
Subject: This is a test
example, the header field:
simply WSP characters), a CRLF may be inserted before any WSP. For
that wherever this standard allows for folding white space (not
line representation; this is called "folding". The general rule is
the field body portion of a header field can be split into a multiple
however, and to deal with the 998/78 character limitations per line,
the field name, the colon, and the field body. For convenience
Each header field is logically a single line of characters comprising
2.2.3. Long Header Fields
def wrapped_value # :nodoc: wrap_lines(name, fold("#{name}: ".length)) end