module Addressable::IDNA
def self.lookup_unicode_lowercase(codepoint)
def self.lookup_unicode_lowercase(codepoint) codepoint_data = UNICODE_DATA[codepoint] (codepoint_data ? (codepoint_data[UNICODE_DATA_LOWERCASE] || codepoint) : codepoint) end
def self.punycode_adapt(delta, numpoints, firsttime)
def self.punycode_adapt(delta, numpoints, firsttime) delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1 # delta >> 1 is a faster way of doing delta / 2 delta += delta / numpoints difference = PUNYCODE_BASE - PUNYCODE_TMIN k = 0 while delta > (difference * PUNYCODE_TMAX) / 2 delta /= difference k += PUNYCODE_BASE end k + (difference + 1) * delta / (delta + PUNYCODE_SKEW) end
def self.punycode_basic?(codepoint)
def self.punycode_basic?(codepoint) codepoint < 0x80 end
def self.punycode_decode(value)
def self.punycode_decode(value) IDN::Punycode.decode(value.to_s) end
def self.punycode_decode(punycode)
def self.punycode_decode(punycode) input = [] output = [] if ACE_MAX_LENGTH * 2 < punycode.size raise PunycodeBigOutput, "Output would exceed the space provided." end punycode.each_byte do |c| unless c >= 0 && c <= 127 raise PunycodeBadInput, "Input is invalid." end input.push(c) end input_length = input.length output_length = [UNICODE_MAX_LENGTH] # Initialize the state n = PUNYCODE_INITIAL_N out = i = 0 max_out = output_length[0] bias = PUNYCODE_INITIAL_BIAS # Handle the basic code points: Let b be the number of input code # points before the last delimiter, or 0 if there is none, then # copy the first b code points to the output. b = 0 input_length.times do |j| b = j if punycode_delimiter?(input[j]) end if b > max_out raise PunycodeBigOutput, "Output would exceed the space provided." end b.times do |j| unless punycode_basic?(input[j]) raise PunycodeBadInput, "Input is invalid." end output[out] = input[j] out+=1 end # Main decoding loop: Start just after the last delimiter if any # basic code points were copied; start at the beginning otherwise. in_ = b > 0 ? b + 1 : 0 while in_ < input_length # in_ is the index of the next character to be consumed, and # out is the number of code points in the output array. # Decode a generalized variable-length integer into delta, # which gets added to i. The overflow checking is easier # if we increase i as we go, then subtract off its starting # value at the end to obtain delta. oldi = i; w = 1; k = PUNYCODE_BASE while true if in_ >= input_length raise PunycodeBadInput, "Input is invalid." end digit = punycode_decode_digit(input[in_]) in_+=1 if digit >= PUNYCODE_BASE raise PunycodeBadInput, "Input is invalid." end if digit > (PUNYCODE_MAXINT - i) / w raise PunycodeOverflow, "Input needs wider integers to process." end i += digit * w t = ( if k <= bias PUNYCODE_TMIN elsif k >= bias + PUNYCODE_TMAX PUNYCODE_TMAX else k - bias end ) break if digit < t if w > PUNYCODE_MAXINT / (PUNYCODE_BASE - t) raise PunycodeOverflow, "Input needs wider integers to process." end w *= PUNYCODE_BASE - t k += PUNYCODE_BASE end bias = punycode_adapt(i - oldi, out + 1, oldi == 0) # I was supposed to wrap around from out + 1 to 0, # incrementing n each time, so we'll fix that now: if i / (out + 1) > PUNYCODE_MAXINT - n raise PunycodeOverflow, "Input needs wider integers to process." end n += i / (out + 1) i %= out + 1 # Insert n at position i of the output: # not needed for Punycode: # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base if out >= max_out raise PunycodeBigOutput, "Output would exceed the space provided." end #memmove(output + i + 1, output + i, (out - i) * sizeof *output) output[i + 1, out - i] = output[i, out - i] output[i] = n i += 1 out += 1 end output_length[0] = out output.pack("U*") end
def self.punycode_decode_digit(codepoint)
(for use in representing integers) in the range 0 to
Returns the numeric value of a basic codepoint
def self.punycode_decode_digit(codepoint) if codepoint - 48 < 10 codepoint - 22 elsif codepoint - 65 < 26 codepoint - 65 elsif codepoint - 97 < 26 codepoint - 97 else PUNYCODE_BASE end end
def self.punycode_delimiter?(codepoint)
def self.punycode_delimiter?(codepoint) codepoint == PUNYCODE_DELIMITER end
def self.punycode_encode(value)
def self.punycode_encode(value) IDN::Punycode.encode(value.to_s) end
def self.punycode_encode(unicode)
def self.punycode_encode(unicode) unicode = unicode.to_s unless unicode.is_a?(String) input = unicode.unpack("U*") output = [0] * (ACE_MAX_LENGTH + 1) input_length = input.size output_length = [ACE_MAX_LENGTH] # Initialize the state n = PUNYCODE_INITIAL_N delta = out = 0 max_out = output_length[0] bias = PUNYCODE_INITIAL_BIAS # Handle the basic code points: input_length.times do |j| if punycode_basic?(input[j]) if max_out - out < 2 raise PunycodeBigOutput, "Output would exceed the space provided." end output[out] = input[j] out += 1 end end h = b = out # h is the number of code points that have been handled, b is the # number of basic code points, and out is the number of characters # that have been output. if b > 0 output[out] = PUNYCODE_DELIMITER out += 1 end # Main encoding loop: while h < input_length # All non-basic code points < n have been # handled already. Find the next larger one: m = PUNYCODE_MAXINT input_length.times do |j| m = input[j] if (n...m) === input[j] end # Increase delta enough to advance the decoder's # <n,i> state to <m,0>, but guard against overflow: if m - n > (PUNYCODE_MAXINT - delta) / (h + 1) raise PunycodeOverflow, "Input needs wider integers to process." end delta += (m - n) * (h + 1) n = m input_length.times do |j| # Punycode does not need to check whether input[j] is basic: if input[j] < n delta += 1 if delta == 0 raise PunycodeOverflow, "Input needs wider integers to process." end end if input[j] == n # Represent delta as a generalized variable-length integer: q = delta; k = PUNYCODE_BASE while true if out >= max_out raise PunycodeBigOutput, "Output would exceed the space provided." end t = ( if k <= bias PUNYCODE_TMIN elsif k >= bias + PUNYCODE_TMAX PUNYCODE_TMAX else k - bias end ) break if q < t output[out] = punycode_encode_digit(t + (q - t) % (PUNYCODE_BASE - t)) out += 1 q = (q - t) / (PUNYCODE_BASE - t) k += PUNYCODE_BASE end output[out] = punycode_encode_digit(q) out += 1 bias = punycode_adapt(delta, h + 1, h == b) delta = 0 h += 1 end end delta += 1 n += 1 end output_length[0] = out outlen = out outlen.times do |j| c = output[j] unless c >= 0 && c <= 127 raise StandardError, "Invalid output char." end unless PUNYCODE_PRINT_ASCII[c] raise PunycodeBadInput, "Input is invalid." end end output[0..outlen].map { |x| x.chr }.join("").sub(/\0+\z/, "") end
def self.punycode_encode_digit(d)
def self.punycode_encode_digit(d) d + 22 + 75 * ((d < 26) ? 1 : 0) end
def self.to_ascii(value)
def self.to_ascii(value) value.to_s.split('.', -1).map do |segment| if segment.size > 0 && segment.size < 64 IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED) elsif segment.size >= 64 segment else '' end end.join('.') end
def self.to_ascii(input)
Converts from a Unicode internationalized domain name to an ASCII
def self.to_ascii(input) input = input.to_s unless input.is_a?(String) input = input.dup.force_encoding(Encoding::UTF_8).unicode_normalize(:nfkc) if input.respond_to?(:force_encoding) input.force_encoding(Encoding::ASCII_8BIT) end if input =~ UTF8_REGEX && input =~ UTF8_REGEX_MULTIBYTE parts = unicode_downcase(input).split('.') parts.map! do |part| if part.respond_to?(:force_encoding) part.force_encoding(Encoding::ASCII_8BIT) end if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE ACE_PREFIX + punycode_encode(part) else part end end parts.join('.') else input end end
def self.to_unicode(value)
def self.to_unicode(value) value.to_s.split('.', -1).map do |segment| if segment.size > 0 && segment.size < 64 IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED) elsif segment.size >= 64 segment else '' end end.join('.') end
def self.to_unicode(input)
Converts from an ASCII domain name to a Unicode internationalized
def self.to_unicode(input) input = input.to_s unless input.is_a?(String) parts = input.split('.') parts.map! do |part| if part =~ /^#{ACE_PREFIX}(.+)/ begin punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1]) rescue Addressable::IDNA::PunycodeBadInput # toUnicode is explicitly defined as never-fails by the spec part end else part end end output = parts.join('.') if output.respond_to?(:force_encoding) output.force_encoding(Encoding::UTF_8) end output end
def self.unicode_downcase(input)
-
(String)
- The downcased result.
Parameters:
-
input
(String
) --
Other tags:
- Api: - private
def self.unicode_downcase(input) input = input.to_s unless input.is_a?(String) unpacked = input.unpack("U*") unpacked.map! { |codepoint| lookup_unicode_lowercase(codepoint) } return unpacked.pack("U*") end
def unicode_normalize_kc(value)
- Use {String#unicode_normalize(:nfkc)} instead
def unicode_normalize_kc(value) value.to_s.unicode_normalize(:nfkc) end
def unicode_normalize_kc(value)
- Use {String#unicode_normalize(:nfkc)} instead
def unicode_normalize_kc(value) value.to_s.unicode_normalize(:nfkc) end