# frozen_string_literal: true
require "erb"
require "active_support/core_ext/module/redefine_method"
require "active_support/multibyte/unicode"
class ERB
module Util
HTML_ESCAPE = { "&" => "&", ">" => ">", "<" => "<", '"' => """, "'" => "'" }
JSON_ESCAPE = { "&" => '\u0026', ">" => '\u003e', "<" => '\u003c', "\u2028" => '\u2028', "\u2029" => '\u2029' }
HTML_ESCAPE_ONCE_REGEXP = /["><']|&(?!([a-zA-Z]+|(#\d+)|(#[xX][\dA-Fa-f]+));)/
JSON_ESCAPE_REGEXP = /[\u2028\u2029&><]/u
# Following XML requirements: https://www.w3.org/TR/REC-xml/#NT-Name
TAG_NAME_START_REGEXP_SET = "@:A-Z_a-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{2FF}\u{370}-\u{37D}\u{37F}-\u{1FFF}" \
"\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}" \
"\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}"
TAG_NAME_START_REGEXP = /[^#{TAG_NAME_START_REGEXP_SET}]/
TAG_NAME_FOLLOWING_REGEXP = /[^#{TAG_NAME_START_REGEXP_SET}\-.0-9\u{B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]/
TAG_NAME_REPLACEMENT_CHAR = "_"
# A utility method for escaping HTML tag characters.
# This method is also aliased as <tt>h</tt>.
#
# puts html_escape('is a > 0 & a < 10?')
# # => is a > 0 & a < 10?
def html_escape(s)
unwrapped_html_escape(s).html_safe
end
silence_redefinition_of_method :h
alias h html_escape
module_function :h
singleton_class.silence_redefinition_of_method :html_escape
module_function :html_escape
# HTML escapes strings but doesn't wrap them with an ActiveSupport::SafeBuffer.
# This method is not for public consumption! Seriously!
def unwrapped_html_escape(s) # :nodoc:
s = s.to_s
if s.html_safe?
s
else
CGI.escapeHTML(ActiveSupport::Multibyte::Unicode.tidy_bytes(s))
end
end
module_function :unwrapped_html_escape
# A utility method for escaping HTML without affecting existing escaped entities.
#
# html_escape_once('1 < 2 & 3')
# # => "1 < 2 & 3"
#
# html_escape_once('<< Accept & Checkout')
# # => "<< Accept & Checkout"
def html_escape_once(s)
result = ActiveSupport::Multibyte::Unicode.tidy_bytes(s.to_s).gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE)
s.html_safe? ? result.html_safe : result
end
module_function :html_escape_once
# A utility method for escaping HTML entities in JSON strings. Specifically, the
# &, > and < characters are replaced with their equivalent unicode escaped form -
# \u0026, \u003e, and \u003c. The Unicode sequences \u2028 and \u2029 are also
# escaped as they are treated as newline characters in some JavaScript engines.
# These sequences have identical meaning as the original characters inside the
# context of a JSON string, so assuming the input is a valid and well-formed
# JSON value, the output will have equivalent meaning when parsed:
#
# json = JSON.generate({ name: "</script><script>alert('PWNED!!!')</script>"})
# # => "{\"name\":\"</script><script>alert('PWNED!!!')</script>\"}"
#
# json_escape(json)
# # => "{\"name\":\"\\u003C/script\\u003E\\u003Cscript\\u003Ealert('PWNED!!!')\\u003C/script\\u003E\"}"
#
# JSON.parse(json) == JSON.parse(json_escape(json))
# # => true
#
# The intended use case for this method is to escape JSON strings before including
# them inside a script tag to avoid XSS vulnerability:
#
# <script>
# var currentUser = <%= raw json_escape(current_user.to_json) %>;
# </script>
#
# It is necessary to +raw+ the result of +json_escape+, so that quotation marks
# don't get converted to <tt>"</tt> entities. +json_escape+ doesn't
# automatically flag the result as HTML safe, since the raw value is unsafe to
# use inside HTML attributes.
#
# If your JSON is being used downstream for insertion into the DOM, be aware of
# whether or not it is being inserted via <tt>html()</tt>. Most jQuery plugins do this.
# If that is the case, be sure to +html_escape+ or +sanitize+ any user-generated
# content returned by your JSON.
#
# If you need to output JSON elsewhere in your HTML, you can just do something
# like this, as any unsafe characters (including quotation marks) will be
# automatically escaped for you:
#
# <div data-user-info="<%= current_user.to_json %>">...</div>
#
# WARNING: this helper only works with valid JSON. Using this on non-JSON values
# will open up serious XSS vulnerabilities. For example, if you replace the
# +current_user.to_json+ in the example above with user input instead, the browser
# will happily <tt>eval()</tt> that string as JavaScript.
#
# The escaping performed in this method is identical to those performed in the
# Active Support JSON encoder when +ActiveSupport.escape_html_entities_in_json+ is
# set to true. Because this transformation is idempotent, this helper can be
# applied even if +ActiveSupport.escape_html_entities_in_json+ is already true.
#
# Therefore, when you are unsure if +ActiveSupport.escape_html_entities_in_json+
# is enabled, or if you are unsure where your JSON string originated from, it
# is recommended that you always apply this helper (other libraries, such as the
# JSON gem, do not provide this kind of protection by default; also some gems
# might override +to_json+ to bypass Active Support's encoder).
def json_escape(s)
result = s.to_s.gsub(JSON_ESCAPE_REGEXP, JSON_ESCAPE)
s.html_safe? ? result.html_safe : result
end
module_function :json_escape
# A utility method for escaping XML names of tags and names of attributes.
#
# xml_name_escape('1 < 2 & 3')
# # => "1___2___3"
#
# It follows the requirements of the specification: https://www.w3.org/TR/REC-xml/#NT-Name
def xml_name_escape(name)
name = name.to_s
return "" if name.blank?
starting_char = name[0].gsub(TAG_NAME_START_REGEXP, TAG_NAME_REPLACEMENT_CHAR)
return starting_char if name.size == 1
following_chars = name[1..-1].gsub(TAG_NAME_FOLLOWING_REGEXP, TAG_NAME_REPLACEMENT_CHAR)
starting_char + following_chars
end
module_function :xml_name_escape
end
end
class Object
def html_safe?
false
end
end
class Numeric
def html_safe?
true
end
end
module ActiveSupport # :nodoc:
class SafeBuffer < String
UNSAFE_STRING_METHODS = %w(
capitalize chomp chop delete delete_prefix delete_suffix
downcase lstrip next reverse rstrip scrub slice squeeze strip
succ swapcase tr tr_s unicode_normalize upcase
)
UNSAFE_STRING_METHODS_WITH_BACKREF = %w(gsub sub)
alias_method :original_concat, :concat
private :original_concat
# Raised when ActiveSupport::SafeBuffer#safe_concat is called on unsafe buffers.
class SafeConcatError < StandardError
def initialize
super "Could not concatenate to the buffer because it is not html safe."
end
end
def [](*args)
if html_safe?
new_string = super
return unless new_string
new_safe_buffer = new_string.is_a?(SafeBuffer) ? new_string : SafeBuffer.new(new_string)
new_safe_buffer.instance_variable_set :@html_safe, true
new_safe_buffer
else
to_str[*args]
end
end
def safe_concat(value)
raise SafeConcatError unless html_safe?
original_concat(value)
end
def initialize(str = "")
@html_safe = true
super
end
def initialize_copy(other)
super
@html_safe = other.html_safe?
end
def clone_empty
self[0, 0]
end
def concat(value)
unless value.nil?
super(implicit_html_escape_interpolated_argument(value))
end
self
end
alias << concat
def bytesplice(*args, value)
super(*args, implicit_html_escape_interpolated_argument(value))
end
def insert(index, value)
super(index, implicit_html_escape_interpolated_argument(value))
end
def prepend(value)
super(implicit_html_escape_interpolated_argument(value))
end
def replace(value)
super(implicit_html_escape_interpolated_argument(value))
end
def []=(*args)
if args.length == 3
super(args[0], args[1], implicit_html_escape_interpolated_argument(args[2]))
else
super(args[0], implicit_html_escape_interpolated_argument(args[1]))
end
end
def +(other)
dup.concat(other)
end
def *(*)
new_string = super
new_safe_buffer = new_string.is_a?(SafeBuffer) ? new_string : SafeBuffer.new(new_string)
new_safe_buffer.instance_variable_set(:@html_safe, @html_safe)
new_safe_buffer
end
def %(args)
case args
when Hash
escaped_args = args.transform_values { |arg| explicit_html_escape_interpolated_argument(arg) }
else
escaped_args = Array(args).map { |arg| explicit_html_escape_interpolated_argument(arg) }
end
self.class.new(super(escaped_args))
end
def html_safe?
defined?(@html_safe) && @html_safe
end
def to_s
self
end
def to_param
to_str
end
def encode_with(coder)
coder.represent_object nil, to_str
end
UNSAFE_STRING_METHODS.each do |unsafe_method|
if unsafe_method.respond_to?(unsafe_method)
class_eval <<-EOT, __FILE__, __LINE__ + 1
def #{unsafe_method}(*args, &block) # def capitalize(*args, &block)
to_str.#{unsafe_method}(*args, &block) # to_str.capitalize(*args, &block)
end # end
def #{unsafe_method}!(*args) # def capitalize!(*args)
@html_safe = false # @html_safe = false
super # super
end # end
EOT
end
end
UNSAFE_STRING_METHODS_WITH_BACKREF.each do |unsafe_method|
class_eval <<-EOT, __FILE__, __LINE__ + 1
def #{unsafe_method}(*args, &block) # def gsub(*args, &block)
if block # if block
to_str.#{unsafe_method}(*args) { |*params| # to_str.gsub(*args) { |*params|
set_block_back_references(block, $~) # set_block_back_references(block, $~)
block.call(*params) # block.call(*params)
} # }
else # else
to_str.#{unsafe_method}(*args) # to_str.gsub(*args)
end # end
end # end
def #{unsafe_method}!(*args, &block) # def gsub!(*args, &block)
@html_safe = false # @html_safe = false
if block # if block
super(*args) { |*params| # super(*args) { |*params|
set_block_back_references(block, $~) # set_block_back_references(block, $~)
block.call(*params) # block.call(*params)
} # }
else # else
super # super
end # end
end # end
EOT
end
private
def explicit_html_escape_interpolated_argument(arg)
(!html_safe? || arg.html_safe?) ? arg : CGI.escapeHTML(arg.to_s)
end
def implicit_html_escape_interpolated_argument(arg)
if !html_safe? || arg.html_safe?
arg
else
arg_string = begin
arg.to_str
rescue NoMethodError => error
if error.name == :to_str
str = arg.to_s
ActiveSupport::Deprecation.warn <<~MSG.squish
Implicit conversion of #{arg.class} into String by ActiveSupport::SafeBuffer
is deprecated and will be removed in Rails 7.1.
You must explicitly cast it to a String.
MSG
str
else
raise
end
end
CGI.escapeHTML(arg_string)
end
end
def set_block_back_references(block, match_data)
block.binding.eval("proc { |m| $~ = m }").call(match_data)
rescue ArgumentError
# Can't create binding from C level Proc
end
end
end
class String
# Marks a string as trusted safe. It will be inserted into HTML with no
# additional escaping performed. It is your responsibility to ensure that the
# string contains no malicious content. This method is equivalent to the
# +raw+ helper in views. It is recommended that you use +sanitize+ instead of
# this method. It should never be called on user input.
def html_safe
ActiveSupport::SafeBuffer.new(self)
end
end