class Rack::Multipart::Parser
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp)
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if boundary.length > 70 # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. # Most clients use no more than 55 characters. raise Error, "multipart boundary size too large (#{boundary.length} characters)" end io = BoundedIO.new(io, content_length) if content_length parser = new(boundary, tmpfile, bufsize, qp) parser.parse(io) parser.result end
def self.parse_boundary(content_type)
def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data data[1] end
def consume_boundary
end of the boundary. If we don't find the start or end of the
If we find it, return the appropriate symbol for the start or
Scan until the we find the start or end of the boundary.
def consume_boundary if read_buffer = @sbuf.scan_until(@body_regex) read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY else @sbuf.terminate nil end end
def dequote(str) # From WEBrick::HTTPUtils
def dequote(str) # From WEBrick::HTTPUtils ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup ret.gsub!(/\\(.)/, "\\1") ret end
def get_filename(head)
def get_filename(head) filename = nil case head when RFC2183 params = Hash[*head.scan(DISPPARM).flat_map(&:compact)] if filename = params['filename*'] encoding, _, filename = filename.split("'", 3) elsif filename = params['filename'] filename = $1 if filename =~ /^"(.*)"$/ end when BROKEN filename = $1 filename = $1 if filename =~ /^"(.*)"$/ end return unless filename if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! if filename !~ /\\[^\\"]/ filename = filename.gsub(/\\(.)/, '\1') end if encoding filename.force_encoding ::Encoding.find(encoding) end filename end
def handle_consume_token
def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end
def handle_empty_content!(content)
def handle_empty_content!(content) if content.nil? || content.empty? raise EmptyContentError end end
def handle_fast_forward
boundary. The client would have to deliberately craft a response
and retry. It's highly unlikely the initial read will not consume the
boundary in that case. If no boundary found, we need to keep reading data
boundary, this is an invalid multipart upload, but keep scanning for opening
boundary, then we can transition to the next state. If we find the ending
This handles the initial parser state. We read until we find the starting
def handle_fast_forward while true case consume_boundary when :BOUNDARY # found opening boundary, transition to next state @state = :MIME_HEAD return when :END_BOUNDARY # invalid multipart upload if @sbuf.pos == @end_boundary_size && @sbuf.rest == EOL # stop parsing a buffer if a buffer is only an end boundary. @state = :DONE return end # retry for opening boundary else # no boundary found, keep reading data return :want_read end end end
def handle_mime_body
def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(/#{@body_regex}\z/m, '') # remove the boundary from the string @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size @collector.on_mime_body @mime_index, @sbuf.peek(delta) @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end
def handle_mime_head
def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if name = head[MULTIPART_CONTENT_DISPOSITION, 1] name = dequote(name) else name = head[MULTIPART_CONTENT_ID, 1] end filename = get_filename(head) if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else :want_read end end
def initialize(boundary, tempfile, bufsize, query_parser)
def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @bufsize = bufsize @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m @end_boundary_size = boundary.bytesize + 4 # (-- at start, -- at finish) @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end
def parse(io)
def parse(io) outbuf = String.new read_data(io, outbuf) loop do status = case @state when :FAST_FORWARD handle_fast_forward when :CONSUME_TOKEN handle_consume_token when :MIME_HEAD handle_mime_head when :MIME_BODY handle_mime_body else # when :DONE return end read_data(io, outbuf) if status == :want_read end end
def read_data(io, outbuf)
def read_data(io, outbuf) content = io.read(@bufsize, outbuf) handle_empty_content!(content) @sbuf.concat(content) end
def result
def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) @query_parser.normalize_params(@params, part.name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end
def tag_multipart_encoding(filename, content_type, name, body)
def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') if k == "charset" encoding = begin Encoding.find v rescue ArgumentError Encoding::BINARY end end end end end name.force_encoding(encoding) body.force_encoding(encoding) end