class Sanitize

def self.clean(html, config = {})

specified.
Returns a sanitized copy of _html_, using the settings in _config_ if
def self.clean(html, config = {})
  sanitize = Sanitize.new(config)
  sanitize.clean(html)
end

def self.clean!(html, config = {})

were made.
Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes
def self.clean!(html, config = {})
  sanitize = Sanitize.new(config)
  sanitize.clean!(html)
end

def self.clean_node!(node, config = {})

Sanitizes the specified Nokogiri::XML::Node and all its children.
def self.clean_node!(node, config = {})
  sanitize = Sanitize.new(config)
  sanitize.clean_node!(node)
end

def clean(html)

Returns a sanitized copy of _html_.
def clean(html)
  if html
    dupe = html.dup
    clean!(dupe) || dupe
  end
end

def clean!(html)

made.
Performs clean in place, returning _html_, or +nil+ if no changes were
def clean!(html)
  fragment = Nokogiri::HTML::DocumentFragment.parse(html)
  clean_node!(fragment)
  output_method_params = {:encoding => @config[:output_encoding], :indent => 0}
  if @config[:output] == :xhtml
    output_method = fragment.method(:to_xhtml)
    output_method_params[:save_with] = Nokogiri::XML::Node::SaveOptions::AS_XHTML
  elsif @config[:output] == :html
    output_method = fragment.method(:to_html)
  else
    raise Error, "unsupported output format: #{@config[:output]}"
  end
  result = output_method.call(output_method_params)
  return result == html ? nil : html[0, html.length] = result
end

def clean_element!(node)

def clean_element!(node)
  # Run this node through all configured transformers.
  transform = transform_element!(node)
  # If this node is in the dynamic whitelist array (built at runtime by
  # transformers), let it live with all of its attributes intact.
  return if @whitelist_nodes.include?(node)
  name = node.name.to_s.downcase
  # Delete any element that isn't in the whitelist.
  unless transform[:whitelist] || @allowed_elements[name]
    unless @remove_all_contents || @remove_element_contents[name]
      node.children.each { |n| node.add_previous_sibling(n) }
    end
    node.unlink
    return
  end
  attr_whitelist = (transform[:attr_whitelist] +
      (@config[:attributes][name] || []) +
      (@config[:attributes][:all] || [])).uniq
  if attr_whitelist.empty?
    # Delete all attributes from elements with no whitelisted attributes.
    node.attribute_nodes.each {|attr| attr.remove }
  else
    # Delete any attribute that isn't in the whitelist for this element.
    node.attribute_nodes.each do |attr|
      attr.unlink unless attr_whitelist.include?(attr.name.downcase)
    end
    # Delete remaining attributes that use unacceptable protocols.
    if @config[:protocols].has_key?(name)
      protocol = @config[:protocols][name]
      node.attribute_nodes.each do |attr|
        attr_name = attr.name.downcase
        next false unless protocol.has_key?(attr_name)
        del = if attr.value.to_s.downcase =~ REGEX_PROTOCOL
          !protocol[attr_name].include?($1.downcase)
        else
          !protocol[attr_name].include?(:relative)
        end
        attr.unlink if del
      end
    end
  end
  # Add required attributes.
  if @config[:add_attributes].has_key?(name)
    @config[:add_attributes][name].each do |key, val|
      node[key] = val
    end
  end
  transform
end

def clean_node!(node)

Sanitizes the specified Nokogiri::XML::Node and all its children.
def clean_node!(node)
  raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
  @whitelist_nodes = []
  node.traverse do |child|
    if child.element? || (child.text? && @config[:process_text_nodes])
      clean_element!(child)
    elsif child.comment?
      child.unlink unless @config[:allow_comments]
    elsif child.cdata?
      child.replace(Nokogiri::XML::Text.new(child.text, child.document))
    end
  end
  @whitelist_nodes = []
  node
end

def initialize(config = {})

Returns a new Sanitize object initialized with the settings in _config_.
def initialize(config = {})
  # Sanitize configuration.
  @config = Config::DEFAULT.merge(config)
  @config[:transformers] = Array(@config[:transformers].dup)
  # Convert the list of allowed elements to a Hash for faster lookup.
  @allowed_elements = {}
  @config[:elements].each {|el| @allowed_elements[el] = true }
  # Convert the list of :remove_contents elements to a Hash for faster lookup.
  @remove_all_contents     = false
  @remove_element_contents = {}
  if @config[:remove_contents].is_a?(Array)
    @config[:remove_contents].each {|el| @remove_element_contents[el] = true }
  else
    @remove_all_contents = !!@config[:remove_contents]
  end
  # Specific nodes to whitelist (along with all their attributes). This array
  # is generated at runtime by transformers, and is cleared before and after
  # a fragment is cleaned (so it applies only to a specific fragment).
  @whitelist_nodes = []
  # Workaround for a fragment parsing bug in Nokogiri >= 1.4.2. The naïve
  # version check is fine here; there are no side effects for unaffected
  # versions except slightly worse performance, and I plan to remove this hack
  # as soon as Nokogiri fixes the bug on their end.
  if Nokogiri::VERSION > '1.4.1'
    @config[:transformers] << Transformers::FIX_FRAGMENT_CDATA
  end
end

def transform_element!(node)

def transform_element!(node)
  output = {
    :attr_whitelist => [],
    :node           => node,
    :whitelist      => false
  }
  @config[:transformers].inject(node) do |transformer_node, transformer|
    transform = transformer.call({
      :allowed_elements => @allowed_elements,
      :config           => @config,
      :node             => transformer_node,
      :node_name        => transformer_node.name.downcase,
      :whitelist_nodes  => @whitelist_nodes
    })
    if transform.nil?
      transformer_node
    elsif transform.is_a?(Hash)
      if transform[:whitelist_nodes].is_a?(Array)
        @whitelist_nodes += transform[:whitelist_nodes]
        @whitelist_nodes.uniq!
      end
      output[:attr_whitelist]  += transform[:attr_whitelist] if transform[:attr_whitelist].is_a?(Array)
      output[:whitelist]      ||= true if transform[:whitelist]
      output[:node]             = transform[:node].is_a?(Nokogiri::XML::Node) ? transform[:node] : output[:node]
    else
      raise Error, "transformer output must be a Hash or nil"
    end
  end
  node.replace(output[:node]) if node != output[:node]
  return output
end