# frozen_string_literal: true
require_relative "errors"
require_relative "response_parser/parser_utils"
module Net
class IMAP < Protocol
# Parses an \IMAP server response.
class ResponseParser
include ParserUtils
extend ParserUtils::Generator
attr_reader :config
# Creates a new ResponseParser.
#
# When +config+ is frozen or global, the parser #config inherits from it.
# Otherwise, +config+ will be used directly.
def initialize(config: Config.global)
@str = nil
@pos = nil
@lex_state = nil
@token = nil
@config = Config[config]
@config = @config.new if @config == Config.global || @config.frozen?
end
# :call-seq:
# parse(str) -> ContinuationRequest
# parse(str) -> UntaggedResponse
# parse(str) -> TaggedResponse
#
# Raises ResponseParseError for unparsable strings.
def parse(str)
@str = str
@pos = 0
@lex_state = EXPR_BEG
@token = nil
return response
end
private
# :stopdoc:
EXPR_BEG = :EXPR_BEG # the default, used in most places
EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
T_SPACE = :SPACE # atom special
T_ATOM = :ATOM # atom (subset of astring chars)
T_NIL = :NIL # subset of atom and label
T_NUMBER = :NUMBER # subset of atom
T_LBRA = :LBRA # subset of atom
T_PLUS = :PLUS # subset of atom; tag special
T_RBRA = :RBRA # atom special; resp_special; valid astring char
T_QUOTED = :QUOTED # starts/end with atom special
T_BSLASH = :BSLASH # atom special; quoted special
T_LPAR = :LPAR # atom special; paren list delimiter
T_RPAR = :RPAR # atom special; paren list delimiter
T_STAR = :STAR # atom special; list wildcard
T_PERCENT = :PERCENT # atom special; list wildcard
T_LITERAL = :LITERAL # starts with atom special
T_LITERAL8 = :LITERAL8 # starts with atom char "~"
T_CRLF = :CRLF # atom special; text special; quoted special
T_TEXT = :TEXT # any char except CRLF
T_EOF = :EOF # end of response string
module ResponseConditions
OK = "OK"
NO = "NO"
BAD = "BAD"
BYE = "BYE"
PREAUTH = "PREAUTH"
RESP_COND_STATES = [OK, NO, BAD ].freeze
RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
AUTH_CONDS = [OK, PREAUTH].freeze
GREETING_CONDS = [OK, BYE, PREAUTH].freeze
RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
end
include ResponseConditions
module Patterns
module CharClassSubtraction
refine Regexp do
def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
end
end
using CharClassSubtraction
# From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
# >>>
# ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
# CHAR = %x01-7F
# CRLF = CR LF
# ; Internet standard newline
# CTL = %x00-1F / %x7F
# ; controls
# DIGIT = %x30-39
# ; 0-9
# DQUOTE = %x22
# ; " (Double Quote)
# HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
# OCTET = %x00-FF
# SP = %x20
module RFC5234
ALPHA = /[A-Za-z]/n
CHAR = /[\x01-\x7f]/n
CRLF = /\r\n/n
CTL = /[\x00-\x1F\x7F]/n
DIGIT = /\d/n
DQUOTE = /"/n
HEXDIG = /\h/
OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
SP = / /n
end
# UTF-8, a transformation format of ISO 10646
# >>>
# UTF8-1 = %x00-7F
# UTF8-tail = %x80-BF
# UTF8-2 = %xC2-DF UTF8-tail
# UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
# %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
# UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
# %xF4 %x80-8F 2( UTF8-tail )
# UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
# UTF8-octets = *( UTF8-char )
#
# n.b. String * Integer is used for repetition, rather than /x{3}/,
# because ruby 3.2's linear-time cache-based optimization doesn't work
# with "bounded or fixed times repetition nesting in another repetition
# (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
# believe it is hard to support this case correctly."
# See https://bugs.ruby-lang.org/issues/19104
module RFC3629
UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
UTF8_TAIL = /[\x80-\xBF]/n
UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
/\xED[\x80-\x9F]#{UTF8_TAIL}/n,
/[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
/[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
/\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
/\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
UTF8_OCTETS = /#{UTF8_CHAR}*/n
end
include RFC5234
include RFC3629
# CHAR8 = %x01-ff
# ; any OCTET except NUL, %x00
CHAR8 = /[\x01-\xff]/n
# list-wildcards = "%" / "*"
LIST_WILDCARDS = /[%*]/n
# quoted-specials = DQUOTE / "\"
QUOTED_SPECIALS = /["\\]/n
# resp-specials = "]"
RESP_SPECIALS = /[\]]/n
# atomish = 1*<any ATOM-CHAR except "[">
# ; We use "atomish" for msg-att and section, in order
# ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
#
# atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
# quoted-specials / resp-specials
# ATOM-CHAR = <any CHAR except atom-specials>
# atom = 1*ATOM-CHAR
# ASTRING-CHAR = ATOM-CHAR / resp-specials
# tag = 1*<any ASTRING-CHAR except "+">
ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
ASTRING_CHAR = CHAR - ASTRING_SPECIALS
ATOM_CHAR = CHAR - ATOM_SPECIALS
ATOM = /#{ATOM_CHAR}+/n
ASTRING_CHARS = /#{ASTRING_CHAR}+/n
ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
TAG = /#{ASTRING_CHAR - /[+]/ }+/
# TEXT-CHAR = <any CHAR except CR and LF>
TEXT_CHAR = CHAR - /[\r\n]/
# resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
# flag = "\Answered" / "\Flagged" / "\Deleted" /
# "\Seen" / "\Draft" / flag-keyword / flag-extension
# ; Does not include "\Recent"
# flag-extension = "\" atom
# ; Future expansion. Client implementations
# ; MUST accept flag-extension flags. Server
# ; implementations MUST NOT generate
# ; flag-extension flags except as defined by
# ; a future Standard or Standards Track
# ; revisions of this specification.
# flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
# "$NotJunk" / "$Phishing" / atom
#
# flag-perm = flag / "\*"
#
# Not checking for max one mbx-list-sflag in the parser.
# >>>
# mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
# "\Subscribed" / "\Remote" / flag-extension
# ; Other flags; multiple from this list are
# ; possible per LIST response, but each flag
# ; can only appear once per LIST response
# mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
# "\Unmarked"
# ; Selectability flags; only one per LIST response
# child-mbox-flag = "\HasChildren" / "\HasNoChildren"
# ; attributes for the CHILDREN return option, at most
# ; one possible per LIST response
FLAG = /\\?#{ATOM}/n
FLAG_EXTENSION = /\\#{ATOM}/n
FLAG_KEYWORD = ATOM
FLAG_PERM = Regexp.union(FLAG, "\\*")
MBX_FLAG = FLAG_EXTENSION
# flag-list = "(" [flag *(SP flag)] ")"
# resp-text-code =/ "PERMANENTFLAGS" SP
# "(" [flag-perm *(SP flag-perm)] ")"
# mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
# *(SP mbx-list-oflag) /
# mbx-list-oflag *(SP mbx-list-oflag)
# (Not checking for max one mbx-list-sflag in the parser.)
FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
# Gmail allows SP and "]" in flags.......
QUIRKY_FLAG = Regexp.union(/\\?#{ASTRING_CHARS}/n, "\\*")
QUIRKY_FLAGS_LIST = /\G\(( [^)]* )\)/nx
# RFC3501:
# QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
# "\" quoted-specials
# RFC9051:
# QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
# "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
# RFC3501 & RFC9051:
# quoted = DQUOTE *QUOTED-CHAR DQUOTE
QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
UTF8_2, UTF8_3, UTF8_4)
QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
# RFC3501:
# text = 1*TEXT-CHAR
# RFC9051:
# text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
# ; Non-ASCII text can only be returned
# ; after ENABLE IMAP4rev2 command
TEXT_rev1 = /#{TEXT_CHAR}+/
TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
# tagged-label-fchar = ALPHA / "-" / "_" / "."
TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
# tagged-label-char = tagged-label-fchar / DIGIT / ":"
TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
# tagged-ext-label = tagged-label-fchar *tagged-label-char
# ; Is a valid RFC 3501 "atom".
TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
# nz-number = digit-nz *DIGIT
# ; Non-zero unsigned 32-bit integer
# ; (0 < n < 4,294,967,296)
NZ_NUMBER = /[1-9]\d*/n
# seq-number = nz-number / "*"
# ; message sequence number (COPY, FETCH, STORE
# ; commands) or unique identifier (UID COPY,
# ; UID FETCH, UID STORE commands).
# ; * represents the largest number in use. In
# ; the case of message sequence numbers, it is
# ; the number of messages in a non-empty mailbox.
# ; In the case of unique identifiers, it is the
# ; unique identifier of the last message in the
# ; mailbox or, if the mailbox is empty, the
# ; mailbox's current UIDNEXT value.
# ; The server should respond with a tagged BAD
# ; response to a command that uses a message
# ; sequence number greater than the number of
# ; messages in the selected mailbox. This
# ; includes "*" if the selected mailbox is empty.
SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
# seq-range = seq-number ":" seq-number
# ; two seq-number values and all values between
# ; these two regardless of order.
# ; Example: 2:4 and 4:2 are equivalent and
# ; indicate values 2, 3, and 4.
# ; Example: a unique identifier sequence range of
# ; 3291:* includes the UID of the last message in
# ; the mailbox, even if that value is less than
# ; 3291.
SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
# sequence-set = (seq-number / seq-range) ["," sequence-set]
# ; set of seq-number values, regardless of order.
# ; Servers MAY coalesce overlaps and/or execute
# ; the sequence in any order.
# ; Example: a message sequence number set of
# ; 2,4:7,9,12:* for a mailbox with 15 messages is
# ; equivalent to 2,4,5,6,7,9,12,13,14,15
# ; Example: a message sequence number set of
# ; *:4,5:7 for a mailbox with 10 messages is
# ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
# ; be reordered and overlap coalesced to be
# ; 4,5,6,7,8,9,10.
SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
# partial-range-first = nz-number ":" nz-number
# ;; Request to search from oldest (lowest UIDs) to
# ;; more recent messages.
# ;; A range 500:400 is the same as 400:500.
# ;; This is similar to <seq-range> from [RFC3501]
# ;; but cannot contain "*".
PARTIAL_RANGE_FIRST = /\A(#{NZ_NUMBER}):(#{NZ_NUMBER})\z/n
# partial-range-last = MINUS nz-number ":" MINUS nz-number
# ;; Request to search from newest (highest UIDs) to
# ;; oldest messages.
# ;; A range -500:-400 is the same as -400:-500.
PARTIAL_RANGE_LAST = /\A(-#{NZ_NUMBER}):(-#{NZ_NUMBER})\z/n
# partial-range = partial-range-first / partial-range-last
PARTIAL_RANGE = Regexp.union(PARTIAL_RANGE_FIRST,
PARTIAL_RANGE_LAST)
# RFC3501:
# literal = "{" number "}" CRLF *CHAR8
# ; Number represents the number of CHAR8s
# RFC9051:
# literal = "{" number64 ["+"] "}" CRLF *CHAR8
# ; <number64> represents the number of CHAR8s.
# ; A non-synchronizing literal is distinguished
# ; from a synchronizing literal by the presence of
# ; "+" before the closing "}".
# ; Non-synchronizing literals are not allowed when
# ; sent from server to the client.
LITERAL = /\{(\d+)\}\r\n/n
# RFC3516 (BINARY):
# literal8 = "~{" number "}" CRLF *OCTET
# ; <number> represents the number of OCTETs
# ; in the response string.
# RFC9051:
# literal8 = "~{" number64 "}" CRLF *OCTET
# ; <number64> represents the number of OCTETs
# ; in the response string.
LITERAL8 = /~\{(\d+)\}\r\n/n
module_function
def unescape_quoted!(quoted)
quoted
&.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
&.force_encoding("UTF-8")
end
def unescape_quoted(quoted)
quoted
&.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
&.force_encoding("UTF-8")
end
end
# the default, used in most places
BEG_REGEXP = /\G(?:\
(?# 1: SPACE )( )|\
(?# 2: LITERAL8)#{Patterns::LITERAL8}|\
(?# 3: ATOM prefixed with a compatible subtype)\
((?:\
(?# 4: NIL )(NIL)|\
(?# 5: NUMBER )(\d+)|\
(?# 6: PLUS )(\+))\
(?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
(?# This enables greedy alternation without lookahead, in linear time.)\
)|\
(?# Also need to check for ATOM without a subtype prefix.)\
(?# 8: ATOM )(#{Patterns::ATOMISH})|\
(?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
(?# 10: LPAR )(\()|\
(?# 11: RPAR )(\))|\
(?# 12: BSLASH )(\\)|\
(?# 13: STAR )(\*)|\
(?# 14: LBRA )(\[)|\
(?# 15: RBRA )(\])|\
(?# 16: LITERAL )#{Patterns::LITERAL}|\
(?# 17: PERCENT )(%)|\
(?# 18: CRLF )(\r\n)|\
(?# 19: EOF )(\z))/ni
# envelope, body(structure), namespaces
DATA_REGEXP = /\G(?:\
(?# 1: SPACE )( )|\
(?# 2: NIL )(NIL)|\
(?# 3: NUMBER )(\d+)|\
(?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
(?# 5: LITERAL )#{Patterns::LITERAL}|\
(?# 6: LPAR )(\()|\
(?# 7: RPAR )(\)))/ni
# text, after 'resp-text-code "]"'
TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
# resp-text-code, after 'atom SP'
CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
Token = Struct.new(:symbol, :value)
def_char_matchers :SP, " ", :T_SPACE
def_char_matchers :PLUS, "+", :T_PLUS
def_char_matchers :STAR, "*", :T_STAR
def_char_matchers :lpar, "(", :T_LPAR
def_char_matchers :rpar, ")", :T_RPAR
def_char_matchers :lbra, "[", :T_LBRA
def_char_matchers :rbra, "]", :T_RBRA
# valid number ranges are not enforced by parser
# number = 1*DIGIT
# ; Unsigned 32-bit integer
# ; (0 <= n < 4,294,967,296)
def_token_matchers :number, T_NUMBER, coerce: Integer
def_token_matchers :quoted, T_QUOTED
# string = quoted / literal
def_token_matchers :string, T_QUOTED, T_LITERAL
# used by nstring8 = nstring / literal8
def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
# use where string represents "LABEL" values
def_token_matchers :case_insensitive__string,
T_QUOTED, T_LITERAL,
send: :upcase
# n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
# NIL? returns nil when it does *not* match
def_token_matchers :NIL, T_NIL
# In addition to explicitly uses of +tagged-ext-label+, use this to match
# keywords when the grammar has not provided any extension syntax.
#
# Do *not* use this for labels where the grammar specifies extensions
# can be +atom+, even if all currently defined labels would match. For
# example response codes in +resp-text-code+.
#
# tagged-ext-label = tagged-label-fchar *tagged-label-char
# ; Is a valid RFC 3501 "atom".
# tagged-label-fchar = ALPHA / "-" / "_" / "."
# tagged-label-char = tagged-label-fchar / DIGIT / ":"
#
# TODO: add to lexer and only match tagged-ext-label
def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
def_token_matchers :CRLF, T_CRLF
def_token_matchers :EOF, T_EOF
# atom = 1*ATOM-CHAR
# ATOM-CHAR = <any CHAR except atom-specials>
ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
# sequence-set = (seq-number / seq-range) ["," sequence-set]
# sequence-set =/ seq-last-command
# ; Allow for "result of the last command"
# ; indicator.
# seq-last-command = "$"
#
# *note*: doesn't match seq-last-command
def sequence_set
str = combine_adjacent(*SEQUENCE_SET_TOKENS)
if Patterns::SEQUENCE_SET_STR.match?(str)
SequenceSet[str]
else
parse_error("unexpected atom %p, expected sequence-set", str)
end
end
# ASTRING-CHAR = ATOM-CHAR / resp-specials
# resp-specials = "]"
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
# tag = 1*<any ASTRING-CHAR except "+">
TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
# TODO: handle atom, astring_chars, and tag entirely inside the lexer
def atom; combine_adjacent(*ATOM_TOKENS) end
def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
def tag; combine_adjacent(*TAG_TOKENS) end
# the #accept version of #atom
def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
# Returns <tt>atom.upcase</tt>
def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
# Returns <tt>atom?&.upcase</tt>
def case_insensitive__atom?
-combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
end
# astring = 1*ASTRING-CHAR / string
def astring
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
end
def astring?
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
end
# Use #label or #label_in to assert specific known labels
# (+tagged-ext-label+ only, not +atom+).
def label(word)
(val = tagged_ext_label) == word and return val
parse_error("unexpected atom %p, expected %p instead", val, word)
end
# Use #label or #label_in to assert specific known labels
# (+tagged-ext-label+ only, not +atom+).
def label_in(*labels)
lbl = tagged_ext_label and labels.include?(lbl) and return lbl
parse_error("unexpected atom %p, expected one of %s instead",
lbl, labels.join(" or "))
end
# expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
def resp_cond_auth__name
lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
raise InvalidResponseError, "bad response type %p, expected %s" % [
lbl, AUTH_CONDS.join(" or ")
]
end
# expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
def resp_cond_state__name
lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
raise InvalidResponseError, "bad response type %p, expected %s" % [
lbl, RESP_COND_STATES.join(" or ")
]
end
# nstring = string / nil
def nstring
NIL? ? nil : string
end
def nstring8
NIL? ? nil : string8
end
def nquoted
NIL? ? nil : quoted
end
# use where nstring represents "LABEL" values
def case_insensitive__nstring
NIL? ? nil : case_insensitive__string
end
# tagged-ext-comp = astring /
# tagged-ext-comp *(SP tagged-ext-comp) /
# "(" tagged-ext-comp ")"
# ; Extensions that follow this general
# ; syntax should use nstring instead of
# ; astring when appropriate in the context
# ; of the extension.
# ; Note that a message set or a "number"
# ; can always be represented as an "atom".
# ; A URL should be represented as
# ; a "quoted" string.
def tagged_ext_comp
vals = []
while true
vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
when T_NUMBER then number
else astring
end
SP? or break
end
vals
end
# tagged-ext-simple is a subset of atom
# TODO: recognize sequence-set in the lexer
#
# tagged-ext-simple = sequence-set / number / number64
def tagged_ext_simple
number? || sequence_set
end
# tagged-ext-val = tagged-ext-simple /
# "(" [tagged-ext-comp] ")"
def tagged_ext_val
if lpar?
_ = peek_rpar? ? [] : tagged_ext_comp
rpar
_
else
tagged_ext_simple
end
end
# mailbox = "INBOX" / astring
# ; INBOX is case-insensitive. All case variants of
# ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
# ; not as an astring. An astring which consists of
# ; the case-insensitive sequence "I" "N" "B" "O" "X"
# ; is considered to be INBOX and not an astring.
# ; Refer to section 5.1 for further
# ; semantic details of mailbox names.
alias mailbox astring
# valid number ranges are not enforced by parser
# number64 = 1*DIGIT
# ; Unsigned 63-bit integer
# ; (0 <= n <= 9,223,372,036,854,775,807)
alias number64 number
alias number64? number?
# valid number ranges are not enforced by parser
# nz-number = digit-nz *DIGIT
# ; Non-zero unsigned 32-bit integer
# ; (0 < n < 4,294,967,296)
alias nz_number number
alias nz_number? number?
# valid number ranges are not enforced by parser
# nz-number64 = digit-nz *DIGIT
# ; Unsigned 63-bit integer
# ; (0 < n <= 9,223,372,036,854,775,807)
alias nz_number64 nz_number
# valid number ranges are not enforced by parser
# uniqueid = nz-number
# ; Strictly ascending
alias uniqueid nz_number
# valid number ranges are not enforced by parser
#
# a 64-bit unsigned integer and is the decimal equivalent for the ID hex
# string used in the web interface and the Gmail API.
alias x_gm_id number
# [RFC3501 & RFC9051:]
# response = *(continue-req / response-data) response-done
#
# For simplicity, response isn't interpreted as the combination of the
# three response types, but instead represents any individual server
# response. Our simplified interpretation is defined as:
# response = continue-req | response_data | response-tagged
#
# n.b: our "response-tagged" definition parses "greeting" too.
def response
resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
when T_PLUS then continue_req
when T_STAR then response_data
else response_tagged
end
accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
CRLF!
EOF!
resp
end
# RFC3501 & RFC9051:
# continue-req = "+" SP (resp-text / base64) CRLF
#
# n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
# (and to workaround existing servers), we use the following grammar:
#
# continue-req = "+" (SP (resp-text)) CRLF
def continue_req
PLUS!
ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
end
RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
# [RFC3501:]
# response-data = "*" SP (resp-cond-state / resp-cond-bye /
# mailbox-data / message-data / capability-data) CRLF
# [RFC4466:]
# response-data = "*" SP response-payload CRLF
# response-payload = resp-cond-state / resp-cond-bye /
# mailbox-data / message-data / capability-data
# RFC5161 (ENABLE capability):
# response-data =/ "*" SP enable-data CRLF
# RFC5255 (LANGUAGE capability)
# response-payload =/ language-data
# RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
# response-payload =/ comparator-data
# [RFC9051:]
# response-data = "*" SP (resp-cond-state / resp-cond-bye /
# mailbox-data / message-data / capability-data /
# enable-data) CRLF
#
# [merging in greeting and response-fatal:]
# greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
# response-fatal = "*" SP resp-cond-bye CRLF
# response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
# [removing duplicates, this is simply]
# response-payload =/ resp-cond-auth
#
# TODO: remove resp-cond-auth and handle greeting separately
def response_data
STAR!; SP!
m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
case m["type"].upcase
when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
when "FETCH" then message_data__fetch # RFC3501, RFC9051
when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
when "VANISHED" then expunged_resp # RFC7162
when "UIDFETCH" then uidfetch_resp # RFC9586
when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
when "LIST" then mailbox_data__list # RFC3501, RFC9051
when "STATUS" then mailbox_data__status # RFC3501, RFC9051
when "NAMESPACE" then namespace_response # RFC2342, RFC9051
when "ENABLED" then enable_data # RFC5161, RFC9051
when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
when "BYE" then resp_cond_bye # RFC3501, RFC9051
when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
when "SORT" then sort_data # RFC5256, RFC7162
when "THREAD" then thread_data # RFC5256
when "QUOTA" then quota_response # RFC2087, RFC9208
when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
when "ID" then id_response # RFC2971
when "ACL" then acl_data # RFC4314
when "LISTRIGHTS" then listrights_data # RFC4314
when "MYRIGHTS" then myrights_data # RFC4314
when "METADATA" then metadata_resp # RFC5464
when "LANGUAGE" then language_data # RFC5255
when "COMPARATOR" then comparator_data # RFC5255
when "CONVERTED" then message_data__converted # RFC5259
when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
when "XLIST" then mailbox_data__xlist # deprecated
when "NOOP" then response_data__noop
else response_data__unhandled
end
end
def response_data__unhandled(klass = UntaggedResponse)
num = number?; SP?
type = tagged_ext_label; SP?
text = remaining_unparsed
data =
if num && text then UnparsedNumericResponseData.new(num, text)
elsif text then UnparsedData.new(text)
else num
end
klass.new(type, data, @str)
end
# reads all the way up until CRLF
def remaining_unparsed
str = @str[@pos...-2] and @pos += str.bytesize
str&.empty? ? nil : str
end
def response_data__ignored; response_data__unhandled(IgnoredResponse) end
alias response_data__noop response_data__ignored
alias listrights_data response_data__unhandled
alias myrights_data response_data__unhandled
alias metadata_resp response_data__unhandled
alias language_data response_data__unhandled
alias comparator_data response_data__unhandled
alias message_data__converted response_data__unhandled
# RFC3501 & RFC9051:
# response-tagged = tag SP resp-cond-state CRLF
def response_tagged
TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
end
# RFC3501 & RFC9051:
# resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
#
# NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
# servers), we don't require a final SP and instead parse this as:
#
# resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
def resp_cond_state
[resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
end
def resp_cond_state__untagged
UntaggedResponse.new(*resp_cond_state, @str)
end
# resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
#
# NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
# servers), we don't require a final SP and instead parse this as:
#
# resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
def resp_cond_auth
UntaggedResponse.new(resp_cond_auth__name,
SP? ? resp_text : ResponseText::EMPTY,
@str)
end
# resp-cond-bye = "BYE" SP resp-text
#
# NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
# servers), we don't require a final SP and instead parse this as:
#
# resp-cond-bye = "BYE" [SP resp-text]
def resp_cond_bye
UntaggedResponse.new(label(BYE),
SP? ? resp_text : ResponseText::EMPTY,
@str)
end
# message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
def message_data__fetch
seq = nz_number; SP!
name = label "FETCH"; SP!
data = FetchData.new(seq, msg_att(seq))
UntaggedResponse.new(name, data, @str)
end
# uidfetch-resp = uniqueid SP "UIDFETCH" SP msg-att
def uidfetch_resp
uid = uniqueid; SP!
name = label "UIDFETCH"; SP!
data = UIDFetchData.new(uid, msg_att(uid))
UntaggedResponse.new(name, data, @str)
end
def response_data__simple_numeric
data = nz_number; SP!
name = tagged_ext_label
UntaggedResponse.new(name, data, @str)
end
alias message_data__expunge response_data__simple_numeric
alias mailbox_data__exists response_data__simple_numeric
alias mailbox_data__recent response_data__simple_numeric
# The name for this is confusing, because it *replaces* EXPUNGE
# >>>
# expunged-resp = "VANISHED" [SP "(EARLIER)"] SP known-uids
def expunged_resp
name = label "VANISHED"; SP!
earlier = if lpar? then label("EARLIER"); rpar; SP!; true else false end
uids = known_uids
data = VanishedData[uids, earlier]
UntaggedResponse.new name, data, @str
end
# TODO: replace with uid_set
alias known_uids sequence_set
# RFC3501 & RFC9051:
# msg-att = "(" (msg-att-dynamic / msg-att-static)
# *(SP (msg-att-dynamic / msg-att-static)) ")"
#
# msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
# RFC5257 (ANNOTATE extension):
# msg-att-dynamic =/ "ANNOTATION" SP
# ( "(" entry-att *(SP entry-att) ")" /
# "(" entry *(SP entry) ")" )
# RFC7162 (CONDSTORE extension):
# msg-att-dynamic =/ fetch-mod-resp
# fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
# RFC8970 (PREVIEW extension):
# msg-att-dynamic =/ "PREVIEW" SP nstring
#
# RFC3501:
# msg-att-static = "ENVELOPE" SP envelope /
# "INTERNALDATE" SP date-time /
# "RFC822" [".HEADER" / ".TEXT"] SP nstring /
# "RFC822.SIZE" SP number /
# "BODY" ["STRUCTURE"] SP body /
# "BODY" section ["<" number ">"] SP nstring /
# "UID" SP uniqueid
# RFC3516 (BINARY extension):
# msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
# / "BINARY.SIZE" section-binary SP number
# RFC8514 (SAVEDATE extension):
# msg-att-static =/ "SAVEDATE" SP (date-time / nil)
# RFC8474 (OBJECTID extension):
# msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
# fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
# fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
# RFC9051:
# msg-att-static = "ENVELOPE" SP envelope /
# "INTERNALDATE" SP date-time /
# "RFC822.SIZE" SP number64 /
# "BODY" ["STRUCTURE"] SP body /
# "BODY" section ["<" number ">"] SP nstring /
# "BINARY" section-binary SP (nstring / literal8) /
# "BINARY.SIZE" section-binary SP number /
# "UID" SP uniqueid
#
# Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
# official "BINARY" ABNF, like so:
#
# msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
# (nstring / literal8)
def msg_att(n)
lpar
attr = {}
while true
name = msg_att__label; SP!
val =
case name
when "UID" then uniqueid
when "FLAGS" then flag_list
when "BODY" then body
when /\ABODY\[/ni then nstring
when "BODYSTRUCTURE" then body
when "ENVELOPE" then envelope
when "INTERNALDATE" then date_time
when "RFC822.SIZE" then number64
when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
when "RFC822" then nstring # not in rev2
when "RFC822.HEADER" then nstring # not in rev2
when "RFC822.TEXT" then nstring # not in rev2
when "MODSEQ" then parens__modseq # CONDSTORE
when "EMAILID" then parens__objectid # OBJECTID
when "THREADID" then nparens__objectid # OBJECTID
when "X-GM-MSGID" then x_gm_id # GMail
when "X-GM-THRID" then x_gm_id # GMail
when "X-GM-LABELS" then x_gm_labels # GMail
else parse_error("unknown attribute `%s' for {%d}", name, n)
end
attr[name] = val
break unless SP?
break if lookahead_rpar?
end
rpar
attr
end
# appends "[section]" and "<partial>" to the base label
def msg_att__label
case (name = tagged_ext_label)
when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
# ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
lbra? and rbra
when "BODY"
peek_lbra? and name << section and
peek_str?("<") and name << gt__number__lt # partial
when "BINARY", "BINARY.SIZE"
name << section_binary
# see https://www.rfc-editor.org/errata/eid7246 and the note above
peek_str?("<") and name << gt__number__lt # partial
end
name
end
# this represents the partial size for BODY or BINARY
alias gt__number__lt atom
# RFC3501 & RFC9051:
# envelope = "(" env-date SP env-subject SP env-from SP
# env-sender SP env-reply-to SP env-to SP env-cc SP
# env-bcc SP env-in-reply-to SP env-message-id ")"
def envelope
@lex_state = EXPR_DATA
lpar; date = env_date
SP!; subject = env_subject
SP!; from = env_from
SP!; sender = env_sender
SP!; reply_to = env_reply_to
SP!; to = env_to
SP!; cc = env_cc
SP!; bcc = env_bcc
SP!; in_reply_to = env_in_reply_to
SP!; message_id = env_message_id
rpar
Envelope.new(date, subject, from, sender, reply_to,
to, cc, bcc, in_reply_to, message_id)
ensure
@lex_state = EXPR_BEG
end
# env-date = nstring
# env-subject = nstring
# env-in-reply-to = nstring
# env-message-id = nstring
alias env_date nstring
alias env_subject nstring
alias env_in_reply_to nstring
alias env_message_id nstring
# env-from = "(" 1*address ")" / nil
# env-sender = "(" 1*address ")" / nil
# env-reply-to = "(" 1*address ")" / nil
# env-to = "(" 1*address ")" / nil
# env-cc = "(" 1*address ")" / nil
# env-bcc = "(" 1*address ")" / nil
def nlist__address
return if NIL?
lpar; list = [address]; list << address until (quirky_SP?; rpar?)
list
end
alias env_from nlist__address
alias env_sender nlist__address
alias env_reply_to nlist__address
alias env_to nlist__address
alias env_cc nlist__address
alias env_bcc nlist__address
# Used when servers erroneously send an extra SP.
#
# As of 2023-11-28, Outlook.com (still) sends SP
# between +address+ in <tt>env-*</tt> lists.
alias quirky_SP? SP?
# date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
# SP time SP zone DQUOTE
alias date_time quoted
alias ndatetime nquoted
# RFC-3501 & RFC-9051:
# body = "(" (body-type-1part / body-type-mpart) ")"
def body
@lex_state = EXPR_DATA
lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
result
ensure
@lex_state = EXPR_BEG
end
alias lookahead_body? lookahead_lpar?
# RFC-3501 & RFC9051:
# body-type-1part = (body-type-basic / body-type-msg / body-type-text)
# [SP body-ext-1part]
def body_type_1part
# This regexp peek is a performance optimization.
# The lookahead fallback would work fine too.
m = peek_re(/\G(?:
(?<TEXT> "TEXT" \s "[^"]+" )
|(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
|(?<BASIC> "[^"]+" \s "[^"]+" )
|(?<MIXED> "MIXED" )
)/nix)
choice = m&.named_captures&.compact&.keys&.first
# In practice, the following line should never be used. But the ABNF
# *does* allow literals, and this will handle them.
choice ||= lookahead_case_insensitive__string!
case choice
when "BASIC" then body_type_basic # => BodyTypeBasic
when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
when "TEXT" then body_type_text # => BodyTypeText
when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
else body_type_basic # might be a bug; server's or ours?
end
end
# RFC-3501 & RFC9051:
# body-type-basic = media-basic SP body-fields
def body_type_basic
type = media_basic # n.b. "basic" type isn't enforced here
if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
SP!; flds = body_fields
SP? and exts = body_ext_1part
BodyTypeBasic.new(*type, *flds, *exts)
end
# RFC-3501 & RFC-9051:
# body-type-text = media-text SP body-fields SP body-fld-lines
def body_type_text
type = media_text
SP!; flds = body_fields
SP!; lines = body_fld_lines
SP? and exts = body_ext_1part
BodyTypeText.new(*type, *flds, lines, *exts)
end
# RFC-3501 & RFC-9051:
# body-type-msg = media-message SP body-fields SP envelope
# SP body SP body-fld-lines
def body_type_msg
# n.b. "message/rfc822" type isn't enforced here
type = media_message
SP!; flds = body_fields
# Sometimes servers send body-type-basic when body-type-msg should be.
# E.g: when a message/rfc822 part has "Content-Disposition: attachment".
#
# * SP "(" --> SP envelope --> continue as body-type-msg
# * ")" --> no body-ext-1part --> completed body-type-basic
# * SP nstring --> SP body-fld-md5
# --> SP body-ext-1part --> continue as body-type-basic
#
# It's probably better to return BodyTypeBasic---even for
# "message/rfc822"---than BodyTypeMessage with invalid fields.
unless peek_str?(" (")
SP? and exts = body_ext_1part
return BodyTypeBasic.new(*type, *flds, *exts)
end
SP!; env = envelope
SP!; bdy = body
SP!; lines = body_fld_lines
SP? and exts = body_ext_1part
BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
end
# This is a malformed body-type-mpart with no subparts.
def body_type_mixed
# warn "malformed body-type-mpart: multipart/mixed with no parts."
type = media_subtype # => "MIXED"
SP? and exts = body_ext_mpart
BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
end
# RFC-3501 & RFC-9051:
# body-type-mpart = 1*body SP media-subtype
# [SP body-ext-mpart]
def body_type_mpart
parts = [body]; parts << body until SP?; msubtype = media_subtype
SP? and exts = body_ext_mpart
BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
end
# n.b. this handles both type and subtype
#
# RFC-3501 vs RFC-9051:
# media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
# "MESSAGE" /
# "VIDEO") DQUOTE) / string) SP media-subtype
# media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
# "FONT" / "MESSAGE" / "MODEL" /
# "VIDEO") DQUOTE) / string) SP media-subtype
#
# media-message = DQUOTE "MESSAGE" DQUOTE SP
# DQUOTE "RFC822" DQUOTE
# media-message = DQUOTE "MESSAGE" DQUOTE SP
# DQUOTE ("RFC822" / "GLOBAL") DQUOTE
#
# RFC-3501 & RFC-9051:
# media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
# media-subtype = string
def media_type
mtype = case_insensitive__string
SP? or return mtype, nil # ??? quirky!
msubtype = media_subtype
return mtype, msubtype
end
# TODO: check types
alias media_basic media_type # */* --- catchall
alias media_message media_type # message/rfc822, message/global
alias media_text media_type # text/*
alias media_subtype case_insensitive__string
# RFC-3501 & RFC-9051:
# body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
# body-fld-enc SP body-fld-octets
def body_fields
fields = []
fields << body_fld_param; SP!
fields << body_fld_id; SP!
fields << body_fld_desc; SP!
fields << body_fld_enc; SP!
fields << body_fld_octets
fields
end
# RFC3501, RFC9051:
# body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
def body_fld_param
quirky_SP? # See comments on test_bodystructure_extra_space
return if NIL?
param = {}
lpar
name = case_insensitive__string; SP!; param[name] = string
while SP?
name = case_insensitive__string; SP!; param[name] = string
end
rpar
param
end
# RFC2060
# body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
# [SPACE body_fld_lang
# [SPACE 1#body_extension]]]
# ;; MUST NOT be returned on non-extensible
# ;; "BODY" fetch
# RFC3501 & RFC9051
# body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
# [SP body-fld-loc *(SP body-extension)]]]
# ; MUST NOT be returned on non-extensible
# ; "BODY" fetch
def body_ext_1part
fields = []; fields << body_fld_md5
SP? or return fields; fields << body_fld_dsp
SP? or return fields; fields << body_fld_lang
SP? or return fields; fields << body_fld_loc
SP? or return fields; fields << body_extensions
fields
end
# RFC-2060:
# body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
# [SP 1#body_extension]]
# ;; MUST NOT be returned on non-extensible
# ;; "BODY" fetch
# RFC-3501 & RFC-9051:
# body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
# [SP body-fld-loc *(SP body-extension)]]]
# ; MUST NOT be returned on non-extensible
# ; "BODY" fetch
def body_ext_mpart
fields = []; fields << body_fld_param
SP? or return fields; fields << body_fld_dsp
SP? or return fields; fields << body_fld_lang
SP? or return fields; fields << body_fld_loc
SP? or return fields; fields << body_extensions
fields
end
alias body_fld_desc nstring
alias body_fld_id nstring
alias body_fld_loc nstring
alias body_fld_lines number64 # number in 3501, number64 in 9051
alias body_fld_md5 nstring
alias body_fld_octets number
# RFC-3501 & RFC-9051:
# body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
# "QUOTED-PRINTABLE") DQUOTE) / string
alias body_fld_enc case_insensitive__string
# body-fld-dsp = "(" string SP body-fld-param ")" / nil
def body_fld_dsp
return if NIL?
lpar; dsp_type = case_insensitive__string
SP!; param = body_fld_param
rpar
ContentDisposition.new(dsp_type, param)
end
# body-fld-lang = nstring / "(" string *(SP string) ")"
def body_fld_lang
if lpar?
result = [case_insensitive__string]
result << case_insensitive__string while SP?
rpar
result
else
case_insensitive__nstring
end
end
# body-extension *(SP body-extension)
def body_extensions
result = []
result << body_extension; while SP? do result << body_extension end
result
end
# body-extension = nstring / number / number64 /
# "(" body-extension *(SP body-extension) ")"
# ; Future expansion. Client implementations
# ; MUST accept body-extension fields. Server
# ; implementations MUST NOT generate
# ; body-extension fields except as defined by
# ; future Standard or Standards Track
# ; revisions of this specification.
def body_extension
if (uint = number64?) then uint
elsif lpar? then exts = body_extensions; rpar; exts
else nstring
end
end
# section = "[" [section-spec] "]"
def section
str = +lbra
str << section_spec unless peek_rbra?
str << rbra
end
# section-binary = "[" [section-part] "]"
def section_binary
str = +lbra
str << section_part unless peek_rbra?
str << rbra
end
# section-spec = section-msgtext / (section-part ["." section-text])
# section-msgtext = "HEADER" /
# "HEADER.FIELDS" [".NOT"] SP header-list /
# "TEXT"
# ; top-level or MESSAGE/RFC822 or
# ; MESSAGE/GLOBAL part
# section-part = nz-number *("." nz-number)
# ; body part reference.
# ; Allows for accessing nested body parts.
# section-text = section-msgtext / "MIME"
# ; text other than actual body part (headers,
# ; etc.)
#
# n.b: we could "cheat" here and just grab all text inside the brackets,
# but literals would need special treatment.
def section_spec
str = "".b
str << atom # grabs everything up to "SP header-list" or "]"
str << " " << header_list if SP?
str
end
# header-list = "(" header-fld-name *(SP header-fld-name) ")"
def header_list
str = +""
str << lpar << header_fld_name
str << " " << header_fld_name while SP?
str << rpar
end
# section-part = nz-number *("." nz-number)
# ; body part reference.
# ; Allows for accessing nested body parts.
alias section_part atom
# RFC3501 & RFC9051:
# header-fld-name = astring
#
# NOTE: Previously, Net::IMAP recreated the raw original source string.
# Now, it returns the decoded astring value. Although this is technically
# incompatible, it should almost never make a difference: all standard
# header field names are valid atoms:
#
# https://www.iana.org/assignments/message-headers/message-headers.xhtml
#
# See also RFC5233:
# optional-field = field-name ":" unstructured CRLF
# field-name = 1*ftext
# ftext = %d33-57 / ; Printable US-ASCII
# %d59-126 ; characters not including
# ; ":".
alias header_fld_name astring
# mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
# "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
# "STATUS" SP mailbox SP "(" [status-att-list] ")" /
# number SP "EXISTS" / number SP "RECENT"
def mailbox_data__flags
name = label("FLAGS")
SP!
UntaggedResponse.new(name, flag_list, @str)
end
def mailbox_data__list
name = label_in("LIST", "LSUB", "XLIST")
SP!
UntaggedResponse.new(name, mailbox_list, @str)
end
alias mailbox_data__lsub mailbox_data__list
alias mailbox_data__xlist mailbox_data__list
# mailbox-list = "(" [mbx-list-flags] ")" SP
# (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
# [SP mbox-list-extended]
# ; This is the list information pointed to by the ABNF
# ; item "mailbox-data", which is defined above
def mailbox_list
lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
SP!; delim = nquoted
SP!; name = mailbox
# TODO: mbox-list-extended
MailboxList.new(attr, delim, name)
end
def quota_response
# If quota never established, get back
# `NO Quota root does not exist'.
# If quota removed, get `()' after the
# folder spec with no mention of `STORAGE'.
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
mailbox = astring
match(T_SPACE)
match(T_LPAR)
token = lookahead
case token.symbol
when T_RPAR
shift_token
data = MailboxQuota.new(mailbox, nil, nil)
return UntaggedResponse.new(name, data, @str)
when T_ATOM
shift_token
match(T_SPACE)
token = match(T_NUMBER)
usage = token.value
match(T_SPACE)
token = match(T_NUMBER)
quota = token.value
match(T_RPAR)
data = MailboxQuota.new(mailbox, usage, quota)
return UntaggedResponse.new(name, data, @str)
else
parse_error("unexpected token %s", token.symbol)
end
end
def quotaroot_response
# Similar to getquota, but only admin can use getquota.
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
mailbox = astring
quotaroots = []
while true
token = lookahead
break unless token.symbol == T_SPACE
shift_token
quotaroots.push(astring)
end
data = MailboxQuotaRoot.new(mailbox, quotaroots)
return UntaggedResponse.new(name, data, @str)
end
# acl-data = "ACL" SP mailbox *(SP identifier SP rights)
def acl_data
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
mailbox = astring
data = []
token = lookahead
if token.symbol == T_SPACE
shift_token
while true
token = lookahead
case token.symbol
when T_CRLF
break
when T_SPACE
shift_token
end
user = astring
match(T_SPACE)
rights = astring
data.push(MailboxACLItem.new(user, rights, mailbox))
end
end
return UntaggedResponse.new(name, data, @str)
end
# RFC3501:
# mailbox-data = "SEARCH" *(SP nz-number) / ...
# RFC5256: SORT
# sort-data = "SORT" *(SP nz-number)
# RFC7162: CONDSTORE, QRESYNC
# mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
# search-sort-mod-seq]
# sort-data = "SORT" [1*(SP nz-number) SP
# search-sort-mod-seq]
# ; Updates the SORT response from RFC 5256.
# search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
# RFC9051:
# mailbox-data = obsolete-search-response / ...
# obsolete-search-response = "SEARCH" *(SP nz-number)
def mailbox_data__search
name = label_in("SEARCH", "SORT")
data = []
while _ = SP? && nz_number? do data << _ end
if lpar?
label("MODSEQ"); SP!
modseq = mod_sequence_value
rpar
end
data = SearchResult.new(data, modseq: modseq)
UntaggedResponse.new(name, data, @str)
end
alias sort_data mailbox_data__search
# esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
# *(SP search-return-data)
# ;; Note that SEARCH and ESEARCH responses
# ;; SHOULD be mutually exclusive,
# ;; i.e., only one of the response types
# ;; should be
# ;; returned as a result of a command.
# esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
# *(SP search-return-data)
# ; ESEARCH response replaces SEARCH response
# ; from IMAP4rev1.
# search-correlator = SP "(" "TAG" SP tag-string ")"
def esearch_response
name = label("ESEARCH")
tag = search_correlator if peek_str?(" (")
uid = peek_re?(/\G UID\b/i) && (SP!; label("UID"); true)
data = []
data << search_return_data while SP?
esearch = ESearchResult.new(tag, uid, data)
UntaggedResponse.new(name, esearch, @str)
end
# From RFC4731 (ESEARCH):
# search-return-data = "MIN" SP nz-number /
# "MAX" SP nz-number /
# "ALL" SP sequence-set /
# "COUNT" SP number /
# search-ret-data-ext
# ; All return data items conform to
# ; search-ret-data-ext syntax.
# search-ret-data-ext = search-modifier-name SP search-return-value
# search-modifier-name = tagged-ext-label
# search-return-value = tagged-ext-val
#
# From RFC4731 (ESEARCH):
# search-return-data =/ "MODSEQ" SP mod-sequence-value
#
# From RFC9394 (PARTIAL):
# search-return-data =/ ret-data-partial
#
def search_return_data
label = search_modifier_name; SP!
value =
case label
when "MIN" then nz_number
when "MAX" then nz_number
when "ALL" then sequence_set
when "COUNT" then number
when "MODSEQ" then mod_sequence_value # RFC7162: CONDSTORE
when "PARTIAL" then ret_data_partial__value # RFC9394: PARTIAL
else search_return_value
end
[label, value]
end
# From RFC5267 (CONTEXT=SEARCH, CONTEXT=SORT) and RFC9394 (PARTIAL):
# ret-data-partial = "PARTIAL"
# SP "(" partial-range SP partial-results ")"
def ret_data_partial__value
lpar
range = partial_range; SP!
results = partial_results
rpar
ESearchResult::PartialResult.new(range, results)
end
# partial-range = partial-range-first / partial-range-last
# tagged-ext-simple =/ partial-range-last
def partial_range
case (str = atom)
when Patterns::PARTIAL_RANGE_FIRST, Patterns::PARTIAL_RANGE_LAST
min, max = [Integer($1), Integer($2)].minmax
min..max
else
parse_error("unexpected atom %p, expected partial-range", str)
end
end
# partial-results = sequence-set / "NIL"
# ;; <sequence-set> from [RFC3501].
# ;; NIL indicates that no results correspond to
# ;; the requested range.
def partial_results; NIL? ? nil : sequence_set end
# search-modifier-name = tagged-ext-label
alias search_modifier_name tagged_ext_label
# search-return-value = tagged-ext-val
# ; Data for the returned search option.
# ; A single "nz-number"/"number"/"number64" value
# ; can be returned as an atom (i.e., without
# ; quoting). A sequence-set can be returned
# ; as an atom as well.
def search_return_value; ExtensionData.new(tagged_ext_val) end
# search-correlator = SP "(" "TAG" SP tag-string ")"
def search_correlator
SP!; lpar; label("TAG"); SP!; tag = tag_string; rpar
tag
end
# tag-string = astring
# ; <tag> represented as <astring>
alias tag_string astring
# RFC5256: THREAD
# thread-data = "THREAD" [SP 1*thread-list]
def thread_data
name = label("THREAD")
threads = []
if SP?
threads << thread_list while lookahead_thread_list?
end
UntaggedResponse.new(name, threads, @str)
end
alias lookahead_thread_list? lookahead_lpar?
alias lookahead_thread_nested? lookahead_thread_list?
# RFC5256: THREAD
# thread-list = "(" (thread-members / thread-nested) ")"
def thread_list
lpar
thread = if lookahead_thread_nested?
ThreadMember.new(nil, thread_nested)
else
thread_members
end
rpar
thread
end
# RFC5256: THREAD
# thread-members = nz-number *(SP nz-number) [SP thread-nested]
def thread_members
members = []
members << nz_number # thread root
while SP?
case lookahead!(T_NUMBER, T_LPAR).symbol
when T_NUMBER then members << nz_number
else nested = thread_nested; break
end
end
members.reverse.inject(nested || []) {|subthreads, number|
[ThreadMember.new(number, subthreads)]
}.first
end
# RFC5256: THREAD
# thread-nested = 2*thread-list
def thread_nested
nested = [thread_list, thread_list]
while lookahead_thread_list? do nested << thread_list end
nested
end
# mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
def mailbox_data__status
resp_name = label("STATUS"); SP!
mbox_name = mailbox; SP!
lpar; attr = status_att_list; rpar
UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
end
# RFC3501
# status-att-list = status-att SP number *(SP status-att SP number)
# RFC4466, RFC9051, and RFC3501 Errata
# status-att-list = status-att-val *(SP status-att-val)
def status_att_list
attrs = [status_att_val]
while SP? do attrs << status_att_val end
attrs.to_h
end
# RFC3501 Errata:
# status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
# ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number)
# RFC4466:
# status-att-val = ("MESSAGES" SP number) /
# ("RECENT" SP number) /
# ("UIDNEXT" SP nz-number) /
# ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number)
# ;; Extensions to the STATUS responses
# ;; should extend this production.
# ;; Extensions should use the generic
# ;; syntax defined by tagged-ext.
# RFC9051:
# status-att-val = ("MESSAGES" SP number) /
# ("UIDNEXT" SP nz-number) /
# ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number) /
# ("DELETED" SP number) /
# ("SIZE" SP number64)
# ; Extensions to the STATUS responses
# ; should extend this production.
# ; Extensions should use the generic
# ; syntax defined by tagged-ext.
# RFC7162:
# status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
# ;; Extends non-terminal defined in [RFC4466].
# ;; Value 0 denotes that the mailbox doesn't
# ;; support persistent mod-sequences
# ;; as described in Section 3.1.2.2.
# RFC7889:
# status-att-val =/ "APPENDLIMIT" SP (number / nil)
# ;; status-att-val is defined in RFC 4466
# RFC8438:
# status-att-val =/ "SIZE" SP number64
# RFC8474:
# status-att-val =/ "MAILBOXID" SP "(" objectid ")"
# ; follows tagged-ext production from [RFC4466]
def status_att_val
key = tagged_ext_label
SP!
val =
case key
when "MESSAGES" then number # RFC3501, RFC9051
when "UNSEEN" then number # RFC3501, RFC9051
when "DELETED" then number # RFC3501, RFC9051
when "UIDNEXT" then nz_number # RFC3501, RFC9051
when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
when "RECENT" then number # RFC3501 (obsolete)
when "SIZE" then number64 # RFC8483, RFC9051
when "HIGHESTMODSEQ" then mod_sequence_valzer # RFC7162
when "MAILBOXID" then parens__objectid # RFC8474
else
number? || ExtensionData.new(tagged_ext_val)
end
[key, val]
end
# The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
# The grammar rule is used by both response-data and resp-text-code.
# But this method only returns UntaggedResponse (response-data).
#
# RFC3501:
# capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
# *(SP capability)
# RFC9051:
# capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
# *(SP capability)
def capability_data__untagged
UntaggedResponse.new label("CAPABILITY"), capability__list, @str
end
# enable-data = "ENABLED" *(SP capability)
def enable_data
UntaggedResponse.new label("ENABLED"), capability__list, @str
end
# As a workaround for buggy servers, allow a trailing SP:
# *(SP capability) [SP]
def capability__list
list = []; while SP? && (capa = capability?) do list << capa end; list
end
alias resp_code__capability capability__list
# capability = ("AUTH=" auth-type) / atom
# ; New capabilities MUST begin with "X" or be
# ; registered with IANA as standard or
# ; standards-track
alias capability case_insensitive__atom
alias capability? case_insensitive__atom?
def id_response
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
token = match(T_LPAR, T_NIL)
if token.symbol == T_NIL
return UntaggedResponse.new(name, nil, @str)
else
data = {}
while true
token = lookahead
case token.symbol
when T_RPAR
shift_token
break
when T_SPACE
shift_token
next
else
key = string
match(T_SPACE)
val = nstring
data[key] = val
end
end
return UntaggedResponse.new(name, data, @str)
end
end
# namespace-response = "NAMESPACE" SP namespace
# SP namespace SP namespace
# ; The first Namespace is the Personal Namespace(s).
# ; The second Namespace is the Other Users'
# ; Namespace(s).
# ; The third Namespace is the Shared Namespace(s).
def namespace_response
name = label("NAMESPACE")
@lex_state = EXPR_DATA
data = Namespaces.new((SP!; namespace),
(SP!; namespace),
(SP!; namespace))
UntaggedResponse.new(name, data, @str)
ensure
@lex_state = EXPR_BEG
end
# namespace = nil / "(" 1*namespace-descr ")"
def namespace
NIL? and return []
lpar
list = [namespace_descr]
list << namespace_descr until rpar?
list
end
# namespace-descr = "(" string SP
# (DQUOTE QUOTED-CHAR DQUOTE / nil)
# [namespace-response-extensions] ")"
def namespace_descr
lpar
prefix = string; SP!
delimiter = nquoted # n.b: should only accept single char
extensions = namespace_response_extensions
rpar
Namespace.new(prefix, delimiter, extensions)
end
# namespace-response-extensions = *namespace-response-extension
# namespace-response-extension = SP string SP
# "(" string *(SP string) ")"
def namespace_response_extensions
data = {}
while SP?
name = string; SP!
lpar
data[name] ||= []
data[name] << string
data[name] << string while SP?
rpar
end
data
end
# TEXT-CHAR = <any CHAR except CR and LF>
# RFC3501:
# text = 1*TEXT-CHAR
# RFC9051:
# text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
# ; Non-ASCII text can only be returned
# ; after ENABLE IMAP4rev2 command
def text
match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
end
# an "accept" versiun of #text
def text?
accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
end
# RFC3501:
# resp-text = ["[" resp-text-code "]" SP] text
# RFC9051:
# resp-text = ["[" resp-text-code "]" SP] [text]
#
# We leniently re-interpret this as
# resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
def resp_text
if lbra?
code = resp_text_code; rbra
ResponseText.new(code, SP? && text? || "")
else
ResponseText.new(nil, text? || "")
end
end
# RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
# resp-text-code = "ALERT" /
# "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
# capability-data / "PARSE" /
# "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
# "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
# "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
# "UNSEEN" SP nz-number /
# atom [SP 1*<any TEXT-CHAR except "]">]
# capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
# *(SP capability)
#
# RFC5530:
# resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
# "AUTHORIZATIONFAILED" / "EXPIRED" /
# "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
# "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
# "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
# "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
# "NONEXISTENT"
# RFC9051:
# resp-text-code = "ALERT" /
# "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
# capability-data / "PARSE" /
# "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
# "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
# "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
# resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
# "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
# "AUTHORIZATIONFAILED" / "EXPIRED" /
# "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
# "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
# "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
# "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
# "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
# "CLOSED" /
# "UNKNOWN-CTE" /
# atom [SP 1*<any TEXT-CHAR except "]">]
# capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
# *(SP capability)
#
# RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
# resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
# resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
# resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
#
# RFC7162 (CONDSTORE):
# resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
# "NOMODSEQ" /
# "MODIFIED" SP sequence-set
# RFC7162 (QRESYNC):
# resp-text-code =/ "CLOSED"
#
# RFC8474: OBJECTID
# resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
#
# RFC9586: UIDONLY
# resp-text-code =/ "UIDREQUIRED"
def resp_text_code
name = resp_text_code__name
data =
case name
when "CAPABILITY" then resp_code__capability
when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
when "UIDNEXT" then SP!; nz_number
when "UIDVALIDITY" then SP!; nz_number
when "UNSEEN" then SP!; nz_number # rev1 only
when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
when "BADCHARSET" then SP? ? charset__list : []
when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
"UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
"EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
"EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
"LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
"NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
when "NOMODSEQ" then nil # CONDSTORE
when "HIGHESTMODSEQ" then SP!; mod_sequence_value # CONDSTORE
when "MODIFIED" then SP!; sequence_set # CONDSTORE
when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
when "UIDREQUIRED" then # RFC9586: UIDONLY
else
SP? and text_chars_except_rbra
end
ResponseCode.new(name, data)
end
alias resp_text_code__name case_insensitive__atom
# 1*<any TEXT-CHAR except "]">
def text_chars_except_rbra
match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
end
# "(" charset *(SP charset) ")"
def charset__list
lpar; list = [charset]; while SP? do list << charset end; rpar; list
end
# already matched: "APPENDUID"
#
# +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
# resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
# append-uid = uniqueid
# append-uid =/ uid-set
# ; only permitted if client uses [MULTIAPPEND]
# ; to append multiple messages.
#
# n.b, uniqueid ⊂ uid-set. To avoid inconsistent return types, we always
# match uid_set even if that returns a single-member array.
def resp_code_apnd__data
validity = number; SP!
dst_uids = uid_set # uniqueid ⊂ uid-set
AppendUID(validity, dst_uids)
end
# already matched: "COPYUID"
#
# resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
def resp_code_copy__data
validity = number; SP!
src_uids = uid_set; SP!
dst_uids = uid_set
CopyUID(validity, src_uids, dst_uids)
end
def AppendUID(...) DeprecatedUIDPlus(...) || AppendUIDData.new(...) end
def CopyUID(...) DeprecatedUIDPlus(...) || CopyUIDData.new(...) end
# TODO: remove this code in the v0.6.0 release
def DeprecatedUIDPlus(validity, src_uids = nil, dst_uids)
return unless config.parser_use_deprecated_uidplus_data
compact_uid_sets = [src_uids, dst_uids].compact
count = compact_uid_sets.map { _1.count_with_duplicates }.max
max = config.parser_max_deprecated_uidplus_data_size
if count <= max
src_uids &&= src_uids.each_ordered_number.to_a
dst_uids = dst_uids.each_ordered_number.to_a
UIDPlusData.new(validity, src_uids, dst_uids)
elsif config.parser_use_deprecated_uidplus_data != :up_to_max_size
parse_error("uid-set is too large: %d > %d", count, max)
end
end
ADDRESS_REGEXP = /\G
\( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
\)
/nix
# address = "(" addr-name SP addr-adl SP addr-mailbox SP
# addr-host ")"
# addr-adl = nstring
# addr-host = nstring
# addr-mailbox = nstring
# addr-name = nstring
def address
if (match = accept_re(ADDRESS_REGEXP))
# note that "NIL" isn't captured by the regexp
name, route, mailbox, host = match.captures
.map { Patterns.unescape_quoted _1 }
else # address may include literals
lpar; name = addr_name
SP!; route = addr_adl
SP!; mailbox = addr_mailbox
SP!; host = addr_host
rpar
end
Address.new(name, route, mailbox, host)
end
alias addr_adl nstring
alias addr_host nstring
alias addr_mailbox nstring
alias addr_name nstring
# flag-list = "(" [flag *(SP flag)] ")"
def flag_list
if (match = accept_re(Patterns::FLAG_LIST))
match[1].split(nil)
.map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
else
quirky__flag_list "flags-list"
end
end
# "(" [flag-perm *(SP flag-perm)] ")"
def flag_perm__list
if (match = accept_re(Patterns::FLAG_PERM_LIST))
match[1].split(nil)
.map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
else
quirky__flag_list "PERMANENTFLAGS flag-perm list"
end
end
# This allows illegal "]" in flag names (Gmail),
# or "\*" in a FLAGS response (greenmail).
def quirky__flag_list(name)
match_re(Patterns::QUIRKY_FLAGS_LIST, "quirks mode #{name}")[1]
.scan(Patterns::QUIRKY_FLAG)
.map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
end
# See Patterns::MBX_LIST_FLAGS
def mbx_list_flags
match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
.split(nil)
.map! { _1.delete_prefix!("\\"); _1.capitalize.to_sym }
end
# See https://developers.google.com/gmail/imap/imap-extensions
def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
# See https://developers.google.com/gmail/imap/imap-extensions
def x_gm_labels
lpar; return [] if rpar?
labels = []
labels << x_gm_label
labels << x_gm_label while SP?
rpar
labels
end
# See https://www.rfc-editor.org/errata/rfc3501
#
# charset = atom / quoted
def charset; quoted? || atom end
# RFC7162:
# mod-sequence-value = 1*DIGIT
# ;; Positive unsigned 63-bit integer
# ;; (mod-sequence)
# ;; (1 <= n <= 9,223,372,036,854,775,807).
alias mod_sequence_value nz_number64
# RFC7162:
# permsg-modsequence = mod-sequence-value
# ;; Per-message mod-sequence.
alias permsg_modsequence mod_sequence_value
# RFC7162:
# mod-sequence-valzer = "0" / mod-sequence-value
alias mod_sequence_valzer number64
def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
# RFC8474:
# objectid = 1*255(ALPHA / DIGIT / "_" / "-")
# ; characters in object identifiers are case
# ; significant
alias objectid atom
def parens__objectid; lpar; _ = objectid; rpar; _ end
def nparens__objectid; NIL? ? nil : parens__objectid end
# RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
# uid-set = (uniqueid / uid-range) *("," uid-set)
# uid-range = (uniqueid ":" uniqueid)
# ; two uniqueid values and all values
# ; between these two regardless of order.
# ; Example: 2:4 and 4:2 are equivalent.
# uniqueid = nz-number
# ; Strictly ascending
def uid_set
set = sequence_set
parse_error("uid-set cannot contain '*'") if set.include_star?
set
end
def nil_atom
match(T_NIL)
return nil
end
SPACES_REGEXP = /\G */n
# The RFC is very strict about this and usually we should be too.
# But skipping spaces is usually a safe workaround for buggy servers.
#
# This advances @pos directly so it's safe before changing @lex_state.
def accept_spaces
return false unless SP?
@str.index(SPACES_REGEXP, @pos) and
@pos = $~.end(0)
true
end
def next_token
case @lex_state
when EXPR_BEG
if @str.index(BEG_REGEXP, @pos)
@pos = $~.end(0)
if $1
return Token.new(T_SPACE, $+)
elsif $2
len = $+.to_i
val = @str[@pos, len]
@pos += len
return Token.new(T_LITERAL8, val)
elsif $3 && $7
# greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
return Token.new(T_ATOM, $3)
elsif $4
return Token.new(T_NIL, $+)
elsif $5
return Token.new(T_NUMBER, $+)
elsif $6
return Token.new(T_PLUS, $+)
elsif $8
# match ATOM, without a NUMBER, NIL, or PLUS prefix
return Token.new(T_ATOM, $+)
elsif $9
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
elsif $10
return Token.new(T_LPAR, $+)
elsif $11
return Token.new(T_RPAR, $+)
elsif $12
return Token.new(T_BSLASH, $+)
elsif $13
return Token.new(T_STAR, $+)
elsif $14
return Token.new(T_LBRA, $+)
elsif $15
return Token.new(T_RBRA, $+)
elsif $16
len = $+.to_i
val = @str[@pos, len]
@pos += len
return Token.new(T_LITERAL, val)
elsif $17
return Token.new(T_PERCENT, $+)
elsif $18
return Token.new(T_CRLF, $+)
elsif $19
return Token.new(T_EOF, $+)
else
parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
end
else
@str.index(/\S*/n, @pos)
parse_error("unknown token - %s", $&.dump)
end
when EXPR_DATA
if @str.index(DATA_REGEXP, @pos)
@pos = $~.end(0)
if $1
return Token.new(T_SPACE, $+)
elsif $2
return Token.new(T_NIL, $+)
elsif $3
return Token.new(T_NUMBER, $+)
elsif $4
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
elsif $5
len = $+.to_i
val = @str[@pos, len]
@pos += len
return Token.new(T_LITERAL, val)
elsif $6
return Token.new(T_LPAR, $+)
elsif $7
return Token.new(T_RPAR, $+)
else
parse_error("[Net::IMAP BUG] DATA_REGEXP is invalid")
end
else
@str.index(/\S*/n, @pos)
parse_error("unknown token - %s", $&.dump)
end
else
parse_error("invalid @lex_state - %s", @lex_state.inspect)
end
end
end
end
end