class Rack::Multipart::Parser
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp)
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if PARSER_BYTESIZE_LIMIT && content_length && content_length > PARSER_BYTESIZE_LIMIT raise EOFError, "multipart Content-Length #{content_length} exceeds limit of #{PARSER_BYTESIZE_LIMIT} bytes" end io = BoundedIO.new(io, content_length) if content_length outbuf = String.new parser = new(boundary, tmpfile, bufsize, qp) parser.on_read io.read(bufsize, outbuf) loop do break if parser.state == :DONE parser.on_read io.read(bufsize, outbuf) end io.rewind parser.result end
def self.parse_boundary(content_type)
def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data unless data[1].empty? raise EOFError, "whitespace between boundary parameter name and equal sign" end if data.post_match =~ /boundary\s*=/i raise EOFError, "multiple boundary parameters found in multipart content type" end data[2] end
def consume_boundary
end of the boundary. If we don't find the start or end of the
If we find it, return the appropriate symbol for the start or
Scan until the we find the start or end of the boundary.
def consume_boundary while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX) case read_buffer.strip when full_boundary then return :BOUNDARY when @end_boundary then return :END_BOUNDARY end return if @sbuf.eos? end end
def full_boundary; @full_boundary; end
def full_boundary; @full_boundary; end
def get_filename(head)
def get_filename(head) filename = nil case head when RFC2183 params = Hash[*head.scan(DISPPARM).flat_map(&:compact)] if filename = params['filename'] filename = $1 if filename =~ /^"(.*)"$/ elsif filename = params['filename*'] encoding, _, filename = filename.split("'", 3) end when BROKEN filename = $1 filename = $1 if filename =~ /^"(.*)"$/ end return unless filename if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! if filename !~ /\\[^\\"]/ filename = filename.gsub(/\\(.)/, '\1') end if encoding filename.force_encoding ::Encoding.find(encoding) end filename end
def handle_consume_token
def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end
def handle_empty_content!(content)
def handle_empty_content!(content) if content.nil? || content.empty? raise EOFError end end
def handle_fast_forward
def handle_fast_forward tok = consume_boundary if tok == :END_BOUNDARY && @sbuf.pos == @end_boundary_size && @sbuf.eos? # stop parsing a buffer if a buffer is only an end boundary. @state = :DONE elsif tok @state = :MIME_HEAD else raise EOFError, "bad content body" if @sbuf.rest_size >= @bufsize # We raise if we don't find the multipart boundary, to avoid unbounded memory # buffering. Note that the actual limit is the higher of 16KB and the buffer size (1MB by default) raise EOFError, "multipart boundary not found within limit" if @sbuf.string.bytesize > BOUNDARY_START_LIMIT # no boundary found, keep reading data return :want_read end end
def handle_mime_body
def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(/#{@body_regex}\z/m, '') # remove the boundary from the string update_retained_size(body.bytesize) if @body_retained @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size body = @sbuf.peek(delta) update_retained_size(body.bytesize) if @body_retained @collector.on_mime_body @mime_index, body @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end
def handle_mime_head
def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if name = head[MULTIPART_CONTENT_DISPOSITION, 1] name = Rack::Auth::Digest::Params::dequote(name) else name = head[MULTIPART_CONTENT_ID, 1] end filename = get_filename(head) if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end # Mime part head data is retained for both TempfilePart and BufferPart # for the entireity of the parse, even though it isn't used for BufferPart. update_retained_size(head.bytesize) # If a filename is given, a TempfilePart will be used, so the body will # not be buffered in memory. However, if a filename is not given, a BufferPart # will be used, and the body will be buffered in memory. @body_retained = !filename @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else # We raise if the mime part header is too large, to avoid unbounded memory # buffering. Note that the actual limit is the higher of 64KB and the buffer size (1MB by default) raise EOFError, "multipart mime part header too large" if @sbuf.rest.bytesize > MIME_HEADER_BYTESIZE_LIMIT return :want_read end end
def initialize(boundary, tempfile, bufsize, query_parser)
def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @boundary = "--#{boundary}" @bufsize = bufsize @full_boundary = @boundary @end_boundary = @boundary + '--' @state = :FAST_FORWARD @mime_index = 0 @body_retained = nil @retained_size = 0 @total_bytes_read = (0 if PARSER_BYTESIZE_LIMIT) @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL})?#{Regexp.quote(@boundary)}(?:#{EOL}|--)/m @end_boundary_size = boundary.bytesize + 6 # (-- at start, -- at finish, EOL at end) @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max @head_regex = /(.*?#{EOL})#{EOL}/m end
def on_read(content)
def on_read(content) handle_empty_content!(content) if @total_bytes_read @total_bytes_read += content.bytesize if @total_bytes_read > PARSER_BYTESIZE_LIMIT raise EOFError, "multipart upload exceeds limit of #{PARSER_BYTESIZE_LIMIT} bytes" end end @sbuf.concat content run_parser end
def result
def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end
def run_parser
def run_parser loop do case @state when :FAST_FORWARD break if handle_fast_forward == :want_read when :CONSUME_TOKEN break if handle_consume_token == :want_read when :MIME_HEAD break if handle_mime_head == :want_read when :MIME_BODY break if handle_mime_body == :want_read when :DONE break end end end
def tag_multipart_encoding(filename, content_type, name, body)
def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') encoding = Encoding.find v if k == CHARSET end end end name.force_encoding(encoding) body.force_encoding(encoding) end
def update_retained_size(size)
def update_retained_size(size) @retained_size += size if @retained_size > BUFFERED_UPLOAD_BYTESIZE_LIMIT raise EOFError, "multipart data over retained size limit" end end