module CGI::Escape
def escape(string)
url_encoded_string = CGI.escape("'Stop!' said Fred")
Space characters (+" "+) are encoded with plus signs (+"+"+)
URL-encode a string into application/x-www-form-urlencoded.
def escape(string) encoding = string.encoding buffer = string.b buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| '%' + m.unpack('H2' * m.bytesize).join('%').upcase end buffer.tr!(' ', '+') buffer.force_encoding(encoding) end
def escapeElement(string, *elements)
<A HREF="url"></A>"
print CGI.escapeElement('
', ["A", "IMG"])
# "
<A HREF="url"></A>"
print CGI.escapeElement('
', "A", "IMG")
instance, the double-quotes surrounding attribute values).
The attribute list of the open tag will also be escaped (for
This matches both the start and the end tag of that element.
is specified by the name of the element, without angle brackets.
Takes an element or elements or array of elements. Each element
Escape only the tags of certain HTML elements in +string+.
def escapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do CGI.escapeHTML($&) end else string end end
def escapeHTML(string)
CGI.escapeHTML('Usage: foo "bar"
Escape special characters in HTML, namely '&\"<>
def escapeHTML(string) enc = string.encoding unless enc.ascii_compatible? if enc.dummy? origenc = enc enc = Encoding::Converter.asciicompat_encoding(enc) string = enc ? string.encode(enc) : string.b end table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) string.encode!(origenc) if origenc string else string = string.b string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) string.force_encoding(enc) end end
def escapeURIComponent(string)
url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
Space characters (+" "+) are encoded with (+"%20"+)
URL-encode a string following RFC 3986
def escapeURIComponent(string) encoding = string.encoding buffer = string.b buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| '%' + m.unpack('H2' * m.bytesize).join('%').upcase end buffer.force_encoding(encoding) end
def unescape(string, encoding = @@accept_charset)
string = CGI.unescape("%27Stop%21%27+said+Fred")
URL-decode an application/x-www-form-urlencoded string with encoding(optional).
def unescape(string, encoding = @@accept_charset) str = string.tr('+', ' ') str = str.b str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') end str.force_encoding(encoding) str.valid_encoding? ? str : str.force_encoding(string.encoding) end
def unescapeElement(string, *elements)
CGI.escapeHTML('
'), ["A", "IMG"])
print CGI.unescapeElement(
# "<BR>"
CGI.escapeHTML('
'), "A", "IMG")
print CGI.unescapeElement(
Undo escaping such as that done by CGI.escapeElement()
def unescapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? string.gsub(/<\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:>)?/im) do unescapeHTML($&) end else string end end
def unescapeHTML(string)
CGI.unescapeHTML("Usage: foo "bar" <baz>")
Unescape a string that has been HTML-escaped
def unescapeHTML(string) enc = string.encoding unless enc.ascii_compatible? if enc.dummy? origenc = enc enc = Encoding::Converter.asciicompat_encoding(enc) string = enc ? string.encode(enc) : string.b end string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do case $1.encode(Encoding::US_ASCII) when 'apos' then "'".encode(enc) when 'amp' then '&'.encode(enc) when 'quot' then '"'.encode(enc) when 'gt' then '>'.encode(enc) when 'lt' then '<'.encode(enc) when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) end end string.encode!(origenc) if origenc return string end return string unless string.include? '&' charlimit = case enc when Encoding::UTF_8; 0x10ffff when Encoding::ISO_8859_1; 256 else 128 end string = string.b string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do match = $1.dup case match when 'apos' then "'" when 'amp' then '&' when 'quot' then '"' when 'gt' then '>' when 'lt' then '<' when /\A#0*(\d+)\z/ n = $1.to_i if n < charlimit n.chr(enc) else "&##{$1};" end when /\A#x([0-9a-f]+)\z/i n = $1.hex if n < charlimit n.chr(enc) else "&#x#{$1};" end else "&#{match};" end end string.force_encoding enc end
def unescapeURIComponent(string, encoding = @@accept_charset)
string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
URL-decode a string following RFC 3986 with encoding(optional).
def unescapeURIComponent(string, encoding = @@accept_charset) str = string.b str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') end str.force_encoding(encoding) str.valid_encoding? ? str : str.force_encoding(string.encoding) end