class Sanitize
def self.clean(html, config = {})
Returns a sanitized copy of _html_, using the settings in _config_ if
def self.clean(html, config = {}) Sanitize.new(config).clean(html) end
def self.clean!(html, config = {})
Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes
def self.clean!(html, config = {}) Sanitize.new(config).clean!(html) end
def self.clean_node!(node, config = {})
def self.clean_node!(node, config = {}) Sanitize.new(config).clean_node!(node) end
def clean(html)
def clean(html) if html dupe = html.dup clean!(dupe) || dupe end end
def clean!(html)
Performs clean in place, returning _html_, or +nil+ if no changes were
def clean!(html) fragment = Nokogiri::HTML::DocumentFragment.parse(html) clean_node!(fragment) output_method_params = {:encoding => @config[:output_encoding], :indent => 0} if @config[:output] == :xhtml output_method = fragment.method(:to_xhtml) output_method_params[:save_with] = Nokogiri::XML::Node::SaveOptions::AS_XHTML elsif @config[:output] == :html output_method = fragment.method(:to_html) else raise Error, "unsupported output format: #{@config[:output]}" end result = output_method.call(output_method_params) return result == html ? nil : html[0, html.length] = result end
def clean_node!(node)
def clean_node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node) node_whitelist = Set.new node.traverse {|child| transform_node!(child, node_whitelist) } node end
def initialize(config = {})
def initialize(config = {}) @config = Config::DEFAULT.merge(config) @transformers = Array(@config[:transformers].dup) # Default transformers. These always run at the end of the transformer # chain, after any custom transformers. @transformers << Transformers::CleanComment << Transformers::CleanCDATA << Transformers::CleanElement.new(@config) end
def transform_node!(node, node_whitelist)
def transform_node!(node, node_whitelist) @transformers.each do |transformer| result = transformer.call({ :config => @config, :is_whitelisted => node_whitelist.include?(node), :node => node, :node_name => node.name.downcase, :node_whitelist => node_whitelist }) # If the node has been unlinked, there's no point running subsequent # transformers. break if node.parent.nil? && !node.fragment? if result.is_a?(Hash) && result[:node_whitelist].respond_to?(:each) node_whitelist.merge(result[:node_whitelist]) end end node end