lib/ferrum/proxy.rb
# frozen_string_literal: true require "tempfile" require "webrick" require "webrick/httpproxy" module Ferrum class Proxy def self.start(**args) new(**args).tap(&:start) end attr_reader :host, :port, :user, :password def initialize(host: "127.0.0.1", port: 0, user: nil, password: nil) @file = nil @host = host @port = port @user = user @password = password end def start options = { ProxyURI: nil, ServerType: Thread, Logger: Logger.new(IO::NULL), AccessLog: [], BindAddress: host, Port: port } if user && password @file = Tempfile.new("htpasswd") htpasswd = WEBrick::HTTPAuth::Htpasswd.new(@file.path) htpasswd.set_passwd "Proxy Realm", user, password htpasswd.flush authenticator = WEBrick::HTTPAuth::ProxyBasicAuth.new(Realm: "Proxy Realm", UserDB: htpasswd, Logger: Logger.new(IO::NULL)) options.merge!(ProxyAuthProc: authenticator.method(:authenticate).to_proc) end @server = HTTPProxyServer.new(**options) @server.start at_exit { stop } @port = @server.config[:Port] end def rotate(host:, port:, user: nil, password: nil) credentials = "#{user}:#{password}@" if user && password proxy_uri = "schema://#{credentials}#{host}:#{port}" @server.config[:ProxyURI] = URI.parse(proxy_uri) end def stop @file&.close(true) @server.shutdown end # Fix hanging proxy at exit class HTTPProxyServer < WEBrick::HTTPProxyServer # rubocop:disable all def do_CONNECT(req, res) # Proxy Authentication proxy_auth(req, res) ua = Thread.current[:WEBrickSocket] # User-Agent raise WEBrick::HTTPStatus::InternalServerError, "[BUG] cannot get socket" unless ua host, port = req.unparsed_uri.split(":", 2) # Proxy authentication for upstream proxy server if proxy = proxy_uri(req, res) proxy_request_line = "CONNECT #{host}:#{port} HTTP/1.0" if proxy.userinfo credentials = "Basic " + [proxy.userinfo].pack("m0") end host, port = proxy.host, proxy.port end begin @logger.debug("CONNECT: upstream proxy is `#{host}:#{port}'.") os = TCPSocket.new(host, port) # origin server if proxy @logger.debug("CONNECT: sending a Request-Line") os << proxy_request_line << CRLF @logger.debug("CONNECT: > #{proxy_request_line}") if credentials @logger.debug("CONNECT: sending credentials") os << "Proxy-Authorization: " << credentials << CRLF end os << CRLF proxy_status_line = os.gets(LF) @logger.debug("CONNECT: read Status-Line from the upstream server") @logger.debug("CONNECT: < #{proxy_status_line}") if %r{^HTTP/\d+\.\d+\s+200\s*} =~ proxy_status_line while line = os.gets(LF) break if /\A(#{CRLF}|#{LF})\z/om =~ line end else raise WEBrick::HTTPStatus::BadGateway end end @logger.debug("CONNECT #{host}:#{port}: succeeded") res.status = WEBrick::HTTPStatus::RC_OK rescue => ex @logger.debug("CONNECT #{host}:#{port}: failed `#{ex.message}'") res.set_error(ex) raise WEBrick::HTTPStatus::EOFError ensure # At exit os variable sometimes can be nil which results in hanging forever raise WEBrick::HTTPStatus::EOFError unless os if handler = @config[:ProxyContentHandler] handler.call(req, res) end res.send_response(ua) access_log(@config, req, res) # Should clear request-line not to send the response twice. # see: HTTPServer#run req.parse(NullReader) rescue nil end begin while fds = IO::select([ua, os]) if fds[0].member?(ua) buf = ua.readpartial(1024); @logger.debug("CONNECT: #{buf.bytesize} byte from User-Agent") os.write(buf) elsif fds[0].member?(os) buf = os.readpartial(1024); @logger.debug("CONNECT: #{buf.bytesize} byte from #{host}:#{port}") ua.write(buf) end end rescue os.close @logger.debug("CONNECT #{host}:#{port}: closed") end raise WEBrick::HTTPStatus::EOFError end # rubocop:enable all end end end