class Addressable::URI
def self.normalize_component(component, character_class=
-
(String)
- The normalized component.
Parameters:
-
leave_encoded
(String
) -- -
character_class
(String, Regexp
) -- -
component
(String, #to_str
) -- The URI component to encode.
def self.normalize_component(component, character_class= CharacterClasses::RESERVED + CharacterClasses::UNRESERVED, leave_encoded='') return nil if component.nil? begin component = component.to_str rescue NoMethodError, TypeError raise TypeError, "Can't convert #{component.class} into String." end if !component.is_a? String if ![String, Regexp].include?(character_class.class) raise TypeError, "Expected String or Regexp, got #{character_class.inspect}" end if character_class.kind_of?(String) leave_re = if leave_encoded.length > 0 character_class = "#{character_class}%" unless character_class.include?('%') bytes = leave_encoded.bytes leave_encoded_pattern = bytes.map { |b| SEQUENCE_ENCODING_TABLE[b] }.join('|') "|%(?!#{leave_encoded_pattern}|#{leave_encoded_pattern.upcase})" end character_class = if leave_re /[^#{character_class}]#{leave_re}/ else /[^#{character_class}]/ end end # We can't perform regexps on invalid UTF sequences, but # here we need to, so switch to ASCII. component = component.dup component.force_encoding(Encoding::ASCII_8BIT) unencoded = self.unencode_component(component, String, leave_encoded) begin encoded = self.encode_component( unencoded.unicode_normalize(:nfc), character_class, leave_encoded ) rescue ArgumentError encoded = self.encode_component(unencoded) end encoded.force_encoding(Encoding::UTF_8) return encoded end