lib/http/uri/parsing.rb



# frozen_string_literal: true

module HTTP
  # Class methods and private helpers for URI parsing and host processing
  class URI
    # Parse the given URI string, returning an HTTP::URI object
    #
    # @example
    #   HTTP::URI.parse("http://example.com/path")
    #
    # @param [HTTP::URI, String, #to_str] uri to parse
    #
    # @api public
    # @return [HTTP::URI] new URI instance
    def self.parse(uri)
      return uri if uri.is_a?(self)
      raise InvalidError, "invalid URI: nil" if uri.nil?

      uri_string = begin
        String(uri)
      rescue TypeError, NoMethodError
        raise InvalidError, "invalid URI: #{uri.inspect}"
      end
      new(**parse_components(uri_string))
    end

    # Encodes key/value pairs as application/x-www-form-urlencoded
    #
    # @example
    #   HTTP::URI.form_encode(foo: "bar")
    #
    # @param [#to_hash, #to_ary] form_values to encode
    # @param [TrueClass, FalseClass] sort should key/value pairs be sorted first?
    #
    # @api public
    # @return [String] encoded value
    def self.form_encode(form_values, sort: false)
      return ::URI.encode_www_form(form_values) unless sort

      ::URI.encode_www_form(form_values.sort_by { |k, _| String(k) })
    end

    # Percent-encode matching characters in a string
    #
    # @param [String] string raw string
    #
    # @api private
    # @return [String] encoded value
    def self.percent_encode(string)
      string&.gsub(PERCENT_ENCODE) do |substr|
        substr.bytes.map { |c| format("%%%02X", c) }.join
      end
    end

    # Loads the addressable gem on first use
    #
    # @api private
    # @return [void]
    # @raise [LoadError] if addressable gem is not installed
    def self.require_addressable
      return if defined?(@addressable_loaded)

      require "addressable/uri"
      @addressable_loaded = true
    end

    # Convert a hostname to ASCII via IDNA (requires addressable)
    #
    # @param [String] host hostname to encode
    # @api private
    # @return [String] ASCII-encoded hostname
    def self.idna_to_ascii(host)
      return host if host.ascii_only?

      require_addressable
      Addressable::IDNA.to_ascii(host) # steep:ignore
    end

    private

    # Serialize the authority section of a URI (userinfo + host + port)
    #
    # @api private
    # @return [String] authority component
    def authority_string
      str = +"//"
      if (user = @user)
        str << user
        str << ":#{@password}" if @password
        str << "@"
      end
      str << @raw_host # steep:ignore
      str << ":#{@port}" if @port
      str
    end

    # Adds or removes IPv6 brackets from a host
    #
    # @param [String] raw_host
    # @param [Boolean] brackets
    # @api private
    # @return [String] Host with IPv6 address brackets added or removed
    def process_ipv6_brackets(raw_host, brackets: false)
      return unless raw_host

      stripped = raw_host.delete_prefix("[").delete_suffix("]")
      ip = IPAddr.new(stripped)

      if ip.ipv6?
        brackets ? "[#{ip}]" : ip.to_s
      else
        raw_host
      end
    rescue IPAddr::Error
      raw_host
    end

    # Normalize a host for comparison and lookup
    #
    # Percent-decodes, strips trailing dot, lowercases, and IDN-encodes
    # non-ASCII hostnames.
    #
    # @param [String, nil] host the host to normalize
    # @api private
    # @return [String, nil] normalized host
    def normalize_host(host)
      return nil unless host

      h = host.gsub(/%\h{2}/) { |match| match.delete_prefix("%").to_i(16).chr }
      h = h.delete_suffix(".")
      h = h.downcase
      self.class.idna_to_ascii(h)
    end

    # Parse a URI string into component parts
    #
    # Uses stdlib for printable-ASCII URIs (faster), falling back to
    # Addressable for non-ASCII or when stdlib rejects the input.
    #
    # @param [String] uri_string the URI to parse
    # @api private
    # @return [Hash] URI components
    private_class_method def self.parse_components(uri_string)
      return parse_with_addressable(uri_string) if uri_string.match?(NEEDS_ADDRESSABLE)

      parse_with_stdlib(uri_string) || parse_with_addressable(uri_string)
    end

    # Parse an ASCII URI using stdlib
    #
    # @param [String] uri_string the URI to parse
    # @api private
    # @return [Hash, nil] URI components, or nil if stdlib rejects the input
    private_class_method def self.parse_with_stdlib(uri_string)
      parsed = ::URI.parse(uri_string)
      # stdlib always returns a port (defaulting to scheme's default);
      # only store it when explicitly specified
      port = parsed.port
      port = nil if port.eql?(parsed.default_port)
      { scheme: parsed.scheme, user: parsed.user, password: parsed.password,
        host: parsed.host, port: port, path: parsed.path,
        query: parsed.query, fragment: parsed.fragment }
    rescue ::URI::InvalidURIError
      nil
    end

    # Parse a non-ASCII URI using Addressable
    #
    # @param [String] uri_string the URI to parse
    # @api private
    # @return [Hash] URI components
    private_class_method def self.parse_with_addressable(uri_string)
      require_addressable
      parsed = Addressable::URI.parse(uri_string)
      { scheme: parsed.scheme, user: parsed.user, password: parsed.password,
        host: parsed.host, port: parsed.port, path: parsed.path,
        query: parsed.query, fragment: parsed.fragment }
    rescue Addressable::URI::InvalidURIError
      raise InvalidError, "invalid URI: #{uri_string.inspect}"
    end
  end
end