# HTTPClient - HTTP client library.
# Copyright (C) 2000-2009 NAKAMURA, Hiroshi <nahi@ruby-lang.org>.
#
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
# redistribute it and/or modify it under the same terms of Ruby's license;
# either the dual license version in 2003, or any later version.
require 'time'
# A namespace module for HTTP Message definitions used by HTTPClient.
module HTTP
# Represents HTTP response status code. Defines constants for HTTP response
# and some conditional methods.
module Status
OK = 200
CREATED = 201
ACCEPTED = 202
NON_AUTHORITATIVE_INFORMATION = 203
NO_CONTENT = 204
RESET_CONTENT = 205
PARTIAL_CONTENT = 206
MOVED_PERMANENTLY = 301
FOUND = 302
SEE_OTHER = 303
TEMPORARY_REDIRECT = MOVED_TEMPORARILY = 307
BAD_REQUEST = 400
UNAUTHORIZED = 401
PROXY_AUTHENTICATE_REQUIRED = 407
INTERNAL = 500
# Status codes for successful HTTP response.
SUCCESSFUL_STATUS = [
OK, CREATED, ACCEPTED,
NON_AUTHORITATIVE_INFORMATION, NO_CONTENT,
RESET_CONTENT, PARTIAL_CONTENT
]
# Status codes which is a redirect.
REDIRECT_STATUS = [
MOVED_PERMANENTLY, FOUND, SEE_OTHER,
TEMPORARY_REDIRECT, MOVED_TEMPORARILY
]
# Returns true if the given status represents successful HTTP response.
# See also SUCCESSFUL_STATUS.
def self.successful?(status)
SUCCESSFUL_STATUS.include?(status)
end
# Returns true if the given status is thought to be redirect.
# See also REDIRECT_STATUS.
def self.redirect?(status)
REDIRECT_STATUS.include?(status)
end
end
# Represents a HTTP message. A message is for a request or a response.
#
# Request message is generated from given parameters internally so users
# don't need to care about it. Response message is the instance that
# methods of HTTPClient returns so users need to know how to extract
# HTTP response data from Message.
#
# Some attributes are only for a request or a response, not both.
#
# == How to use HTTP response message
#
# 1. Gets response message body.
#
# res = clnt.get(url)
# p res.content #=> String
#
# 2. Gets response status code.
#
# res = clnt.get(url)
# p res.status #=> 200, 501, etc. (Integer)
#
# 3. Gets response header.
#
# res = clnt.get(url)
# res.header['set-cookie'].each do |value|
# p value
# end
# assert_equal(1, res.header['last-modified'].size)
# p res.header['last-modified'].first
#
class Message
CRLF = "\r\n"
# Represents HTTP message header.
class Headers
# HTTP version in a HTTP header. Float.
attr_accessor :http_version
# Size of body. nil when size is unknown (e.g. chunked response).
attr_reader :body_size
# Request/Response is chunked or not.
attr_accessor :chunked
# Request only. Requested method.
attr_reader :request_method
# Request only. Requested URI.
attr_accessor :request_uri
# Request only. Requested query.
attr_accessor :request_query
# Request only. Requested via proxy or not.
attr_accessor :request_via_proxy
# Response only. HTTP status
attr_reader :status_code
# Response only. HTTP status reason phrase.
attr_accessor :reason_phrase
# Used for dumping response.
attr_accessor :body_type # :nodoc:
# Used for dumping response.
attr_accessor :body_charset # :nodoc:
# Used for dumping response.
attr_accessor :body_date # :nodoc:
# HTTP response status code to reason phrase mapping definition.
STATUS_CODE_MAP = {
Status::OK => 'OK',
Status::CREATED => "Created",
Status::NON_AUTHORITATIVE_INFORMATION => "Non-Authoritative Information",
Status::NO_CONTENT => "No Content",
Status::RESET_CONTENT => "Reset Content",
Status::PARTIAL_CONTENT => "Partial Content",
Status::MOVED_PERMANENTLY => 'Moved Permanently',
Status::FOUND => 'Found',
Status::SEE_OTHER => 'See Other',
Status::TEMPORARY_REDIRECT => 'Temporary Redirect',
Status::MOVED_TEMPORARILY => 'Temporary Redirect',
Status::BAD_REQUEST => 'Bad Request',
Status::INTERNAL => 'Internal Server Error',
}
# $KCODE to charset mapping definition.
CHARSET_MAP = {
'NONE' => 'us-ascii',
'EUC' => 'euc-jp',
'SJIS' => 'shift_jis',
'UTF8' => 'utf-8',
}
# Creates a Message::Headers. Use init_request, init_response, or
# init_connect_request for acutual initialize.
def initialize
@http_version = 1.1
@body_size = nil
@chunked = false
@request_method = nil
@request_uri = nil
@request_query = nil
@request_via_proxy = nil
@status_code = nil
@reason_phrase = nil
@body_type = nil
@body_charset = nil
@body_date = nil
@is_request = nil
@header_item = []
@dumped = false
end
# Initialize this instance as a CONNECT request.
def init_connect_request(uri)
@is_request = true
@request_method = 'CONNECT'
@request_uri = uri
@request_query = nil
@http_version = 1.0
end
# Placeholder URI object for nil uri.
NIL_URI = URI.parse('http://nil-uri-given/')
# Initialize this instance as a general request.
def init_request(method, uri, query = nil)
@is_request = true
@request_method = method
@request_uri = uri || NIL_URI
@request_query = query
@request_via_proxy = false
end
# Initialize this instance as a response.
def init_response(status_code)
@is_request = false
self.status_code = status_code
end
# Sets status code and reason phrase.
def status_code=(status_code)
@status_code = status_code
@reason_phrase = STATUS_CODE_MAP[@status_code]
end
# Returns 'Content-Type' header value.
def contenttype
self['Content-Type'][0]
end
# Sets 'Content-Type' header value. Overrides if already exists.
def contenttype=(contenttype)
delete('Content-Type')
self['Content-Type'] = contenttype
end
# Sets byte size of message body.
# body_size == nil means that the body is_a? IO
def body_size=(body_size)
@body_size = body_size
end
# Dumps message header part and returns a dumped String.
def dump
set_header
str = nil
if @is_request
str = request_line
else
str = response_status_line
end
str + @header_item.collect { |key, value|
"#{ key }: #{ value }#{ CRLF }"
}.join
end
# Adds a header. Addition order is preserved.
def add(key, value)
if value.is_a?(Array)
value.each do |v|
@header_item.push([key, v])
end
else
@header_item.push([key, value])
end
end
# Sets a header.
def set(key, value)
delete(key)
add(key, value)
end
# Returns an Array of headers for the given key. Each element is a pair
# of key and value. It returns an single element Array even if the only
# one header exists. If nil key given, it returns all headers.
def get(key = nil)
if key.nil?
all
else
key = key.upcase
@header_item.find_all { |k, v| k.upcase == key }
end
end
# Returns an Array of all headers.
def all
@header_item
end
# Deletes headers of the given key.
def delete(key)
key = key.upcase
@header_item.delete_if { |k, v| k.upcase == key }
end
# Adds a header. See set.
def []=(key, value)
set(key, value)
end
# Returns an Array of header values for the given key.
def [](key)
get(key).collect { |item| item[1] }
end
private
def request_line
path = create_query_uri(@request_uri, @request_query)
if @request_via_proxy
path = "#{ @request_uri.scheme }://#{ @request_uri.host }:#{ @request_uri.port }#{ path }"
end
"#{ @request_method } #{ path } HTTP/#{ @http_version }#{ CRLF }"
end
def response_status_line
if defined?(Apache)
"HTTP/#{ @http_version } #{ @status_code } #{ @reason_phrase }#{ CRLF }"
else
"Status: #{ @status_code } #{ @reason_phrase }#{ CRLF }"
end
end
def set_header
if @is_request
set_request_header
else
set_response_header
end
end
def set_request_header
return if @dumped
@dumped = true
keep_alive = Message.keep_alive_enabled?(@http_version)
if !keep_alive and @request_method != 'CONNECT'
set('Connection', 'close')
end
if @chunked
set('Transfer-Encoding', 'chunked')
elsif @body_size and (keep_alive or @body_size != 0)
set('Content-Length', @body_size.to_s)
end
if @http_version >= 1.1
if @request_uri.port == @request_uri.default_port
# GFE/1.3 dislikes default port number (returns 404)
set('Host', "#{@request_uri.host}")
else
set('Host', "#{@request_uri.host}:#{@request_uri.port}")
end
end
end
def set_response_header
return if @dumped
@dumped = true
if defined?(Apache) && self['Date'].empty?
set('Date', Time.now.httpdate)
end
keep_alive = Message.keep_alive_enabled?(@http_version)
if @chunked
set('Transfer-Encoding', 'chunked')
else
if keep_alive or @body_size != 0
set('Content-Length', @body_size.to_s)
end
end
if @body_date
set('Last-Modified', @body_date.httpdate)
end
if self['Content-Type'].empty?
set('Content-Type', "#{ @body_type || 'text/html' }; charset=#{ charset_label(@body_charset || $KCODE) }")
end
end
def charset_label(charset)
CHARSET_MAP[charset] || 'us-ascii'
end
def create_query_uri(uri, query)
if @request_method == 'CONNECT'
return "#{uri.host}:#{uri.port}"
end
path = uri.path
path = '/' if path.nil? or path.empty?
query_str = nil
if uri.query
query_str = uri.query
end
if query
if query_str
query_str += "&#{Message.create_query_part_str(query)}"
else
query_str = Message.create_query_part_str(query)
end
end
if query_str
path += "?#{query_str}"
end
path
end
end
# Represents HTTP message body.
class Body
# Size of body. nil when size is unknown (e.g. chunked response).
attr_reader :size
# maxbytes of IO#read for streaming request. See DEFAULT_CHUNK_SIZE.
attr_accessor :chunk_size
# Default value for chunk_size
DEFAULT_CHUNK_SIZE = 1024 * 16
# Creates a Message::Body. Use init_request or init_response
# for acutual initialize.
def initialize
@body = nil
@size = nil
@positions = nil
@chunk_size = nil
end
# Initialize this instance as a request.
def init_request(body = nil, boundary = nil)
@boundary = boundary
@positions = {}
set_content(body, boundary)
@chunk_size = DEFAULT_CHUNK_SIZE
end
# Initialize this instance as a response.
def init_response(body = nil)
@body = body
if @body.respond_to?(:size)
@size = @body.size
else
@size = nil
end
end
# Dumps message body to given dev.
# dev needs to respond to <<.
#
# Message header must be given as the first argument for performance
# reason. (header is dumped to dev, too)
# If no dev (the second argument) given, this method returns a dumped
# String.
def dump(header = '', dev = '')
if @body.is_a?(Parts)
dev << header
buf = ''
@body.parts.each do |part|
if Message.file?(part)
reset_pos(part)
while !part.read(@chunk_size, buf).nil?
dev << buf
end
else
dev << part
end
end
elsif @body
dev << header + @body
else
dev << header
end
dev
end
# Dumps message body with chunked encoding to given dev.
# dev needs to respond to <<.
#
# Message header must be given as the first argument for performance
# reason. (header is dumped to dev, too)
# If no dev (the second argument) given, this method returns a dumped
# String.
def dump_chunked(header = '', dev = '')
dev << header
if @body.is_a?(Parts)
@body.parts.each do |part|
if Message.file?(part)
reset_pos(part)
dump_chunks(part, dev)
else
dev << dump_chunk(part)
end
end
dev << (dump_last_chunk + CRLF)
elsif @body
reset_pos(@body)
dump_chunks(@body, dev)
dev << (dump_last_chunk + CRLF)
end
dev
end
# Returns a message body itself.
def content
@body
end
private
def set_content(body, boundary = nil)
if body.respond_to?(:read)
# uses Transfer-Encoding: chunked. bear in mind that server may not
# support it. at least ruby's CGI doesn't.
@body = body
remember_pos(@body)
@size = nil
elsif boundary and Message.multiparam_query?(body)
@body = build_query_multipart_str(body, boundary)
@size = @body.size
else
@body = Message.create_query_part_str(body)
@size = @body.size
end
end
def remember_pos(io)
# IO may not support it (ex. IO.pipe)
@positions[io] = io.pos rescue nil
end
def reset_pos(io)
io.pos = @positions[io] if @positions.key?(io)
end
def dump_chunks(io, dev)
buf = ''
while !io.read(@chunk_size, buf).nil?
dev << dump_chunk(buf)
end
end
def dump_chunk(str)
dump_chunk_size(str.size) + (str + CRLF)
end
def dump_last_chunk
dump_chunk_size(0)
end
def dump_chunk_size(size)
sprintf("%x", size) + CRLF
end
class Parts
attr_reader :size
def initialize
@body = []
@size = 0
@as_stream = false
end
def add(part)
if Message.file?(part)
@as_stream = true
@body << part
if part.respond_to?(:size)
if sz = part.size
@size += sz
else
@size = nil
end
elsif part.respond_to?(:lstat)
@size += part.lstat.size
else
# use chunked upload
@size = nil
end
elsif @body[-1].is_a?(String)
@body[-1] += part.to_s
@size += part.to_s.size if @size
else
@body << part.to_s
@size += part.to_s.size if @size
end
end
def parts
if @as_stream
@body
else
[@body.join]
end
end
end
def build_query_multipart_str(query, boundary)
parts = Parts.new
query.each do |attr, value|
value ||= ''
headers = ["--#{boundary}"]
if Message.file?(value)
remember_pos(value)
param_str = params_from_file(value).collect { |k, v|
"#{k}=\"#{v}\""
}.join("; ")
if value.respond_to?(:mime_type)
content_type = value.mime_type
else
content_type = Message.mime_type(value.path)
end
headers << %{Content-Disposition: form-data; name="#{attr}"; #{param_str}}
headers << %{Content-Type: #{content_type}}
else
headers << %{Content-Disposition: form-data; name="#{attr}"}
end
parts.add(headers.join(CRLF) + CRLF + CRLF)
parts.add(value)
parts.add(CRLF)
end
parts.add("--#{boundary}--" + CRLF + CRLF) # empty epilogue
parts
end
def params_from_file(value)
params = {}
params['filename'] = File.basename(value.path || '')
# Creation time is not available from File::Stat
if value.respond_to?(:mtime)
params['modification-date'] = value.mtime.rfc822
end
if value.respond_to?(:atime)
params['read-date'] = value.atime.rfc822
end
params
end
end
class << self
private :new
# Creates a Message instance of 'CONNECT' request.
# 'CONNECT' request does not have Body.
# uri:: an URI that need to connect. Only uri.host and uri.port are used.
def new_connect_request(uri)
m = new
m.header.init_connect_request(uri)
m.header.body_size = nil
m
end
# Creates a Message instance of general request.
# method:: HTTP method String.
# uri:: an URI object which represents an URL of web resource.
# query:: a Hash or an Array of query part of URL.
# e.g. { "a" => "b" } => 'http://host/part?a=b'
# Give an array to pass multiple value like
# [["a", "b"], ["a", "c"]] => 'http://host/part?a=b&a=c'
# body:: a Hash or an Array of body part.
# e.g. { "a" => "b" } => 'a=b'.
# Give an array to pass multiple value like
# [["a", "b"], ["a", "c"]] => 'a=b&a=c'.
# boundary:: When the boundary given, it is sent as
# a multipart/form-data using this boundary String.
def new_request(method, uri, query = nil, body = nil, boundary = nil)
m = new
m.header.init_request(method, uri, query)
m.body = Body.new
m.body.init_request(body || '', boundary)
if body
m.header.body_size = m.body.size
m.header.chunked = true if m.body.size.nil?
else
m.header.body_size = nil
end
m
end
# Creates a Message instance of response.
# body:: a String or an IO of response message body.
def new_response(body)
m = new
m.header.init_response(Status::OK)
m.body = Body.new
m.body.init_response(body)
m.header.body_size = m.body.size || 0
m
end
@@mime_type_handler = nil
# Sets MIME type handler.
#
# handler must respond to :call with a single argument :path and returns
# a MIME type String e.g. 'text/html'.
# When the handler returns nil or an empty String,
# 'application/octet-stream' is used.
#
# When you set nil to the handler, internal_mime_type is used instead.
# The handler is nil by default.
def mime_type_handler=(handler)
@@mime_type_handler = handler
end
# Returns MIME type handler.
def mime_type_handler
@@mime_type_handler
end
# For backward compatibility.
alias set_mime_type_func mime_type_handler=
alias get_mime_type_func mime_type_handler
def mime_type(path) # :nodoc:
if @@mime_type_handler
res = @@mime_type_handler.call(path)
if !res || res.to_s == ''
return 'application/octet-stream'
else
return res
end
else
internal_mime_type(path)
end
end
# Default MIME type handler.
# See mime_type_handler=.
def internal_mime_type(path)
case path
when /\.txt$/i
'text/plain'
when /\.(htm|html)$/i
'text/html'
when /\.doc$/i
'application/msword'
when /\.png$/i
'image/png'
when /\.gif$/i
'image/gif'
when /\.(jpg|jpeg)$/i
'image/jpeg'
else
'application/octet-stream'
end
end
# Returns true if the given HTTP version allows keep alive connection.
# version:: Float
def keep_alive_enabled?(version)
version >= 1.1
end
# Returns true if the given query (or body) has a multiple parameter.
def multiparam_query?(query)
query.is_a?(Array) or query.is_a?(Hash)
end
# Returns true if the given object is a File. In HTTPClient, a file is;
# * must respond to :read for retrieving String chunks.
# * must respond to :path and returns a path for Content-Disposition.
# * must respond to :pos and :pos= to rewind for reading.
# Rewinding is only needed for following HTTP redirect. Some IO impl
# defines :pos= but raises an Exception for pos= such as StringIO
# but there's no problem as far as using it for non-following methods
# (get/post/etc.)
def file?(obj)
obj.respond_to?(:read) and obj.respond_to?(:path) and
obj.respond_to?(:pos) and obj.respond_to?(:pos=)
end
def create_query_part_str(query) # :nodoc:
if multiparam_query?(query)
escape_query(query)
elsif query.respond_to?(:read)
query = query.read
else
query.to_s
end
end
def escape_query(query) # :nodoc:
query.collect { |attr, value|
if value.respond_to?(:read)
value = value.read
end
escape(attr.to_s) << '=' << escape(value.to_s)
}.join('&')
end
# from CGI.escape
def escape(str) # :nodoc:
str.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
'%' + $1.unpack('H2' * $1.size).join('%').upcase
}.tr(' ', '+')
end
end
# HTTP::Message::Headers:: message header.
attr_accessor :header
# HTTP::Message::Body:: message body.
attr_reader :body
# OpenSSL::X509::Certificate:: response only. server certificate which is
# used for retrieving the response.
attr_accessor :peer_cert
# Creates a Message. This method should be used internally.
# Use Message.new_connect_request, Message.new_request or
# Message.new_response instead.
def initialize # :nodoc:
@header = Headers.new
@body = @peer_cert = nil
end
# Dumps message (header and body) to given dev.
# dev needs to respond to <<.
def dump(dev = '')
str = header.dump + CRLF
if header.chunked
dev = body.dump_chunked(str, dev)
elsif body
dev = body.dump(str, dev)
else
dev << str
end
dev
end
# Sets a new body. header.body_size is updated with new body.size.
def body=(body)
@body = body
@header.body_size = @body.size if @header
end
# Returns HTTP version in a HTTP header. Float.
def version
@header.http_version
end
# Sets HTTP version in a HTTP header. Float.
def version=(version)
@header.http_version = version
end
# Returns HTTP status code in response. Integer.
def status
@header.status_code
end
alias code status
alias status_code status
# Sets HTTP status code of response. Integer.
# Reason phrase is updated, too.
def status=(status)
@header.status_code = status
end
# Returns HTTP status reason phrase in response. String.
def reason
@header.reason_phrase
end
# Sets HTTP status reason phrase of response. String.
def reason=(reason)
@header.reason_phrase = reason
end
# Sets 'Content-Type' header value. Overrides if already exists.
def contenttype
@header.contenttype
end
# Returns 'Content-Type' header value.
def contenttype=(contenttype)
@header.contenttype = contenttype
end
# Returns a content of message body. A String or an IO.
def content
@body.content
end
end
end