class Bundler::URI::RFC2396_Parser


It contains a Hash set of patterns and Regexp’s that match and validate.
Class that parses String’s into Bundler::URI’s.

def convert_to_uri(uri)

def convert_to_uri(uri)
  if uri.is_a?(Bundler::URI::Generic)
    uri
  elsif uri = String.try_convert(uri)
    parse(uri)
  else
    raise ArgumentError,
      "bad argument (expected Bundler::URI object or Bundler::URI string)"
  end
end

def escape(str, unsafe = @regexp[:UNSAFE])


replacing them with codes.
Constructs a safe String from +str+, removing unsafe characters,

== Description

Regexp to apply. Defaults to self.regexp[:UNSAFE]
+unsafe+::
String to make safe
+str+::

== Args

escape( str, unsafe )
escape( str )
:call-seq:
def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding(Encoding::US_ASCII)
end

def extract(str, schemes = nil)


See also Bundler::URI::Parser.make_regexp.

else it calls +block+ for each element in result.
If no +block+ given, then returns the result,
Attempts to parse and merge a set of URIs.

== Description

Patterns to apply to +str+
+schemes+::
String to search
+str+::

== Args

extract( str, schemes ) {|item| block }
extract( str, schemes )
extract( str )
:call-seq:
def extract(str, schemes = nil)
  if block_given?
    str.scan(make_regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(make_regexp(schemes)) { result.push $& }
    result
  end
end

def initialize(opts = {})


u1.eql?(u2) #=> false
u1 == u2 #=> true
u2 = Bundler::URI.parse(s) #=> #
u1 = p.parse(s) #=> #
s = "http://example.com/ABCD"

Bundler::URI.parse(u.to_s) #=> raises Bundler::URI::InvalidURIError
u = p.parse("http://example.jp/%uABCD") #=> #
p = Bundler::URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")

== Examples

* :HOSTNAME (Bundler::URI::PATTERN::HOSTNAME in default)
* :TOPLABEL (Bundler::URI::PATTERN::TOPLABEL in default)
* :DOMLABEL (Bundler::URI::PATTERN::DOMLABEL in default)
* :UNRESERVED (Bundler::URI::PATTERN::UNRESERVED in default)
* :ESCAPED (Bundler::URI::PATTERN::ESCAPED in default)

You can use the following keys:

The constructor generates set of regexps for parsing URIs.
and values of options are pattern strings.
Keys of options are pattern names of Bundler::URI components
The constructor accepts a hash as options for parser.

== Args

Bundler::URI::Parser.new([opts])

== Synopsis
def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value(&:freeze)
  @pattern.freeze
  @regexp = initialize_regexp(@pattern)
  @regexp.each_value(&:freeze)
  @regexp.freeze
end

def initialize_pattern(opts = {})

Constructs the default Hash of patterns.
def initialize_pattern(opts = {})
  ret = {}
  ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
  ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED
  ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED
  ret[:DOMLABEL] = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL
  ret[:TOPLABEL] = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL
  ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME)
  # RFC 2396 (Bundler::URI Generic Syntax)
  # RFC 2732 (IPv6 Literal Addresses in URL's)
  # RFC 2373 (IPv6 Addressing Architecture)
  # uric          = reserved | unreserved | escaped
  ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})"
  # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
  #                 "&" | "=" | "+" | "$" | ","
  ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})"
  # query         = *uric
  ret[:QUERY] = query = "#{uric}*"
  # fragment      = *uric
  ret[:FRAGMENT] = fragment = "#{uric}*"
  # hostname      = *( domainlabel "." ) toplabel [ "." ]
  # reg-name      = *( unreserved / pct-encoded / sub-delims ) # RFC3986
  unless hostname
    ret[:HOSTNAME] = hostname = "(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"
  end
  # RFC 2373, APPENDIX B:
  # IPv6address = hexpart [ ":" IPv4address ]
  # IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  # hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
  # hexseq  = hex4 *( ":" hex4)
  # hex4    = 1*4HEXDIG
  #
  # XXX: This definition has a flaw. "::" + IPv4address must be
  # allowed too.  Here is a replacement.
  #
  # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
  # hex4     = 1*4HEXDIG
  hex4 = "[#{PATTERN::HEX}]{1,4}"
  # lastpart = hex4 | IPv4address
  lastpart = "(?:#{hex4}|#{ipv4addr})"
  # hexseq1  = *( hex4 ":" ) hex4
  hexseq1 = "(?:#{hex4}:)*#{hex4}"
  # hexseq2  = *( hex4 ":" ) lastpart
  hexseq2 = "(?:#{hex4}:)*#{lastpart}"
  # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
  ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"
  # IPv6prefix  = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
  # unused
  # ipv6reference = "[" IPv6address "]" (RFC 2732)
  ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]"
  # host          = hostname | IPv4address
  # host          = hostname | IPv4address | IPv6reference (RFC 2732)
  ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
  # port          = *digit
  ret[:PORT] = port = '\d*'
  # hostport      = host [ ":" port ]
  ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
  # userinfo      = *( unreserved | escaped |
  #                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
  ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*"
  # pchar         = unreserved | escaped |
  #                 ":" | "@" | "&" | "=" | "+" | "$" | ","
  pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})"
  # param         = *pchar
  param = "#{pchar}*"
  # segment       = *pchar *( ";" param )
  segment = "#{pchar}*(?:;#{param})*"
  # path_segments = segment *( "/" segment )
  ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*"
  # server        = [ [ userinfo "@" ] hostport ]
  server = "(?:#{userinfo}@)?#{hostport}"
  # reg_name      = 1*( unreserved | escaped | "$" | "," |
  #                     ";" | ":" | "@" | "&" | "=" | "+" )
  ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+"
  # authority     = server | reg_name
  authority = "(?:#{server}|#{reg_name})"
  # rel_segment   = 1*( unreserved | escaped |
  #                     ";" | "@" | "&" | "=" | "+" | "$" | "," )
  ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
  # scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"
  # abs_path      = "/"  path_segments
  ret[:ABS_PATH] = abs_path = "/#{path_segments}"
  # rel_path      = rel_segment [ abs_path ]
  ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?"
  # net_path      = "//" authority [ abs_path ]
  ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?"
  # hier_part     = ( net_path | abs_path ) [ "?" query ]
  ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"
  # opaque_part   = uric_no_slash *uric
  ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*"
  # absoluteURI   = scheme ":" ( hier_part | opaque_part )
  ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})"
  # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
  ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"
  # Bundler::URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  ret[:URI_REF] = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
  ret[:X_ABS_URI] = "
    (#{scheme}):                           (?# 1: scheme)
    (?:
       (#{opaque_part})                    (?# 2: opaque)
    |
       (?:(?:
         //(?:
             (?:(?:(#{userinfo})@)?        (?# 3: userinfo)
               (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
           |
             (#{reg_name})                 (?# 6: registry)
           )
         |
         (?!//))                           (?# XXX: '//' is the mark for hostport)
         (#{abs_path})?                    (?# 7: path)
       )(?:\\?(#{query}))?                 (?# 8: query)
    )
    (?:\\#(#{fragment}))?                  (?# 9: fragment)
  "
  ret[:X_REL_URI] = "
    (?:
      (?:
        //
        (?:
          (?:(#{userinfo})@)?       (?# 1: userinfo)
            (#{host})?(?::(\\d*))?  (?# 2: host, 3: port)
        |
          (#{reg_name})             (?# 4: registry)
        )
      )
    |
      (#{rel_segment})              (?# 5: rel_segment)
    )?
    (#{abs_path})?                  (?# 6: abs_path)
    (?:\\?(#{query}))?              (?# 7: query)
    (?:\\#(#{fragment}))?           (?# 8: fragment)
  "
  ret
end

def initialize_regexp(pattern)

Constructs the default Hash of Regexp's.
def initialize_regexp(pattern)
  ret = {}
  # for Bundler::URI::split
  ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
  ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
  # for Bundler::URI::extract
  ret[:URI_REF]     = Regexp.new(pattern[:URI_REF])
  ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED)
  ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED)
  # for Bundler::URI::escape/unescape
  ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED])
  ret[:UNSAFE]  = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")
  # for Generic#initialize
  ret[:SCHEME]   = Regexp.new("\\A#{pattern[:SCHEME]}\\z")
  ret[:USERINFO] = Regexp.new("\\A#{pattern[:USERINFO]}\\z")
  ret[:HOST]     = Regexp.new("\\A#{pattern[:HOST]}\\z")
  ret[:PORT]     = Regexp.new("\\A#{pattern[:PORT]}\\z")
  ret[:OPAQUE]   = Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")
  ret[:REGISTRY] = Regexp.new("\\A#{pattern[:REG_NAME]}\\z")
  ret[:ABS_PATH] = Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")
  ret[:REL_PATH] = Regexp.new("\\A#{pattern[:REL_PATH]}\\z")
  ret[:QUERY]    = Regexp.new("\\A#{pattern[:QUERY]}\\z")
  ret[:FRAGMENT] = Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")
  ret
end

def inspect

def inspect
  @@to_s.bind_call(self)
end

def join(*uris)


Attempts to parse and merge a set of URIs.

== Description

an Array of Strings
+uris+::

== Args
def join(*uris)
  uris[0] = convert_to_uri(uris[0])
  uris.inject :merge
end

def make_regexp(schemes = nil)

unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI].
Returns Regexp that is default self.regexp[:ABS_URI_REF],
def make_regexp(schemes = nil)
  unless schemes
    @regexp[:ABS_URI_REF]
  else
    /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end

def parse(uri)


#=> #
p.parse("ldap://ldap.example.com/dc=example?user=john")
p = Bundler::URI::Parser.new

== Usage

(File, FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or Bundler::URI::Generic.
Parses +uri+ and constructs either matching Bundler::URI scheme object

== Description

String
+uri+::

== Args
def parse(uri)
  Bundler::URI.for(*self.split(uri), self)
end

def split(uri)

Returns a split Bundler::URI against regexp[:ABS_URI].
def split(uri)
  case uri
  when ''
    # null uri
  when @regexp[:ABS_URI]
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]
    # Bundler::URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric
    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
    if !scheme
      raise InvalidURIError,
        "bad Bundler::URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad Bundler::URI(absolute but no path): #{uri}"
    end
  when @regexp[:REL_URI]
    scheme = nil
    opaque = nil
    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end
    # Bundler::URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]
    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]
  else
    raise InvalidURIError, "bad Bundler::URI(is not Bundler::URI?): #{uri}"
  end
  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                     # X
    path,                         # Y
    opaque,                       # Y
    query,
    fragment
  ]
  return ret
end

def unescape(str, escaped = @regexp[:ESCAPED])


Removes escapes from +str+.

== Description

Regexp to apply. Defaults to self.regexp[:ESCAPED]
+escaped+::
String to remove escapes from
+str+::

== Args

unescape( str, escaped )
unescape( str )
:call-seq:
def unescape(str, escaped = @regexp[:ESCAPED])
  enc = str.encoding
  enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
  str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) }
end