class Rack::Multipart::Parser
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp)
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if boundary.length > 70 # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. # Most clients use no more than 55 characters. raise BoundaryTooLongError, "multipart boundary size too large (#{boundary.length} characters)" end io = BoundedIO.new(io, content_length) if content_length parser = new(boundary, tmpfile, bufsize, qp) parser.parse(io) parser.result end
def self.parse_boundary(content_type)
def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data data[1] end
def consume_boundary
end of the boundary. If we don't find the start or end of the
If we find it, return the appropriate symbol for the start or
Scan until the we find the start or end of the boundary.
def consume_boundary if read_buffer = @sbuf.scan_until(@body_regex) read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY else @sbuf.terminate nil end end
def dequote(str) # From WEBrick::HTTPUtils
def dequote(str) # From WEBrick::HTTPUtils ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup ret.gsub!(/\\(.)/, "\\1") ret end
def find_encoding(enc)
enc is submitted by the user, it may be invalid, so
Return the related Encoding object. However, because
def find_encoding(enc) Encoding.find enc rescue ArgumentError Encoding::BINARY end
def handle_consume_token
def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end
def handle_empty_content!(content)
def handle_empty_content!(content) if content.nil? || content.empty? raise EmptyContentError end end
def handle_fast_forward
boundary. The client would have to deliberately craft a response
and retry. It's highly unlikely the initial read will not consume the
boundary in that case. If no boundary found, we need to keep reading data
boundary, this is an invalid multipart upload, but keep scanning for opening
boundary, then we can transition to the next state. If we find the ending
This handles the initial parser state. We read until we find the starting
def handle_fast_forward while true case consume_boundary when :BOUNDARY # found opening boundary, transition to next state @state = :MIME_HEAD return when :END_BOUNDARY # invalid multipart upload if @sbuf.pos == @end_boundary_size && @sbuf.rest == EOL # stop parsing a buffer if a buffer is only an end boundary. @state = :DONE return end # retry for opening boundary else # no boundary found, keep reading data return :want_read end end end
def handle_mime_body
def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(@body_regex_at_end, '') # remove the boundary from the string @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size @collector.on_mime_body @mime_index, @sbuf.peek(delta) @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end
def handle_mime_head
def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if (disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]) && disposition.bytesize <= CONTENT_DISPOSITION_MAX_BYTES # ignore actual content-disposition value (should always be form-data) i = disposition.index(';') disposition.slice!(0, i+1) param = nil num_params = 0 # Parse parameter list while i = disposition.index('=') # Only parse up to max parameters, to avoid potential denial of service num_params += 1 break if num_params > CONTENT_DISPOSITION_MAX_PARAMS # Found end of parameter name, ensure forward progress in loop param = disposition.slice!(0, i+1) # Remove ending equals and preceding whitespace from parameter name param.chomp!('=') param.lstrip! if disposition[0] == '"' # Parameter value is quoted, parse it, handling backslash escapes disposition.slice!(0, 1) value = String.new while i = disposition.index(/(["\\])/) c = $1 # Append all content until ending quote or escape value << disposition.slice!(0, i) # Remove either backslash or ending quote, # ensures forward progress in loop disposition.slice!(0, 1) # stop parsing parameter value if found ending quote break if c == '"' escaped_char = disposition.slice!(0, 1) if param == 'filename' && escaped_char != '"' # Possible IE uploaded filename, append both escape backslash and value value << c << escaped_char else # Other only append escaped value value << escaped_char end end else if i = disposition.index(';') # Parameter value unquoted (which may be invalid), value ends at semicolon value = disposition.slice!(0, i) else # If no ending semicolon, assume remainder of line is value and stop # parsing disposition.strip! value = disposition disposition = '' end end case param when 'name' name = value when 'filename' filename = value when 'filename*' filename_star = value # else # ignore other parameters end # skip trailing semicolon, to proceed to next parameter if i = disposition.index(';') disposition.slice!(0, i+1) end end else name = head[MULTIPART_CONTENT_ID, 1] end if filename_star encoding, _, filename = filename_star.split("'", 3) filename = normalize_filename(filename || '') filename.force_encoding(find_encoding(encoding)) elsif filename filename = normalize_filename(filename) end if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else :want_read end end
def initialize(boundary, tempfile, bufsize, query_parser)
def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @bufsize = bufsize @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m @body_regex_at_end = /#{@body_regex}\z/m @end_boundary_size = boundary.bytesize + 4 # (-- at start, -- at finish) @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end
def normalize_filename(filename)
def normalize_filename(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! filename.split(/[\/\\]/).last || String.new end
def parse(io)
def parse(io) outbuf = String.new read_data(io, outbuf) loop do status = case @state when :FAST_FORWARD handle_fast_forward when :CONSUME_TOKEN handle_consume_token when :MIME_HEAD handle_mime_head when :MIME_BODY handle_mime_body else # when :DONE return end read_data(io, outbuf) if status == :want_read end end
def read_data(io, outbuf)
def read_data(io, outbuf) content = io.read(@bufsize, outbuf) handle_empty_content!(content) @sbuf.concat(content) end
def result
def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) @query_parser.normalize_params(@params, part.name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end
def tag_multipart_encoding(filename, content_type, name, body)
def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') if k == "charset" encoding = find_encoding(v) end end end end name.force_encoding(encoding) body.force_encoding(encoding) end