class WWW::Mechanize
def fetch_page(params)
def fetch_page(params) options = { :request => nil, :response => nil, :connection => nil, :referer => current_page(), :uri => nil, :verb => :get, :agent => self, :redirects => 0, :params => [], :headers => {}, }.merge(params) before_connect = Chain.new([ Chain::URIResolver.new(@scheme_handlers), Chain::ParameterResolver.new, Chain::RequestResolver.new, Chain::ConnectionResolver.new( @connection_cache, @keep_alive, @proxy_addr, @proxy_port, @proxy_user, @proxy_pass ), Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass), Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest), Chain::HeaderResolver.new( @keep_alive, @keep_alive_time, @cookie_jar, @user_agent), Chain::CustomHeaders.new, @pre_connect_hook, ]) before_connect.handle(options) uri = options[:uri] request = options[:request] cur_page = options[:referer] request_data = options[:params] redirects = options[:redirects] http_obj = options[:connection] # Add If-Modified-Since if page is in history if( (page = visited_page(uri)) && page.response['Last-Modified'] ) request['If-Modified-Since'] = page.response['Last-Modified'] end if(@conditional_requests) # Specify timeouts if given http_obj.open_timeout = @open_timeout if @open_timeout http_obj.read_timeout = @read_timeout if @read_timeout http_obj.start unless http_obj.started? # Log specified headers for the request log.info("#{ request.class }: #{ request.path }") if log request.each_header do |k, v| log.debug("request-header: #{ k } => #{ v }") end if log # Send the request attempts = 0 begin response = http_obj.request(request, *request_data) { |r| connection_chain = Chain.new([ Chain::ResponseReader.new(r), Chain::BodyDecodingHandler.new, ]) connection_chain.handle(options) } rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x log.error("Rescuing EOF error") if log http_obj.finish raise x if attempts >= 2 request.body = nil http_obj.start attempts += 1 retry end after_connect = Chain.new([ @post_connect_hook, Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set), Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache), ]) after_connect.handle(options) res_klass = options[:res_klass] response_body = options[:response_body] page = options[:page] log.info("status: #{ page.code }") if log if follow_meta_refresh redirect_uri = nil if (page.respond_to?(:meta) && (redirect = page.meta.first)) redirect_uri = redirect.uri.to_s elsif refresh = response['refresh'] parsed_refresh = refresh.match(/^\s*(\d+\.?\d*);\s*(url|URL)=(\S*)\s*$/) raise StandardError, "Invalid refresh http header" unless parsed_refresh delay = parsed_refresh[1] location = parsed_refresh[3] location = "http://#{uri.host}#{location}" unless location.include?("http") if redirects + 1 > redirection_limit raise RedirectLimitReachedError.new(page, redirects) end sleep delay.to_i redirect_uri = location end if redirect_uri @history.push(page, page.uri) return fetch_page( :uri => redirect_uri, :referer => page, :params => [], :verb => :get, :redirects => redirects + 1 ) end end return page if res_klass <= Net::HTTPSuccess if res_klass == Net::HTTPNotModified log.debug("Got cached page") if log return visited_page(uri) || page elsif res_klass <= Net::HTTPRedirection return page unless follow_redirect? log.info("follow redirect to: #{ response['Location'] }") if log from_uri = page.uri raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit redirect_verb = options[:verb] == :head ? :head : :get page = fetch_page( :uri => response['Location'].to_s, :referer => page, :params => [], :verb => redirect_verb, :redirects => redirects + 1 ) @history.push(page, from_uri) return page elsif res_klass <= Net::HTTPUnauthorized raise ResponseCodeError.new(page) unless @user || @password raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host) if response['www-authenticate'] =~ /Digest/i @auth_hash[uri.host] = :digest if response['server'] =~ /Microsoft-IIS/ @auth_hash[uri.host] = :iis_digest end @digest = response['www-authenticate'] else @auth_hash[uri.host] = :basic end return fetch_page( :uri => uri, :referer => cur_page, :verb => request.method.downcase.to_sym, :params => request_data, :headers => options[:headers] ) end raise ResponseCodeError.new(page), "Unhandled response", caller end