class Sanitize

def clean(html)

Returns a sanitized copy of _html_.
def clean(html)
  dupe = html.dup
  clean!(dupe) || dupe
end

def clean(html, config = {})

specified.
Returns a sanitized copy of _html_, using the settings in _config_ if
def clean(html, config = {})
  sanitize = Sanitize.new(config)
  sanitize.clean(html)
end

def clean!(html)

made.
Performs clean in place, returning _html_, or +nil+ if no changes were
def clean!(html)
  fragment = Hpricot(html)
  fragment.search('*') do |node|
    if node.bogusetag? || node.doctype? || node.procins? || node.xmldecl?
      node.parent.replace_child(node, '')
      next
    end
    if node.comment?
      node.parent.replace_child(node, '') unless @config[:allow_comments]
    elsif node.elem?
      name = node.name.to_s.downcase
      # Delete any element that isn't in the whitelist.
      unless @config[:elements].include?(name)
        node.parent.replace_child(node, node.children || '')
        next
      end
      node.raw_attributes ||= {}
      attr_whitelist = ((@config[:attributes][name] || []) +
          (@config[:attributes][:all] || [])).uniq
      if attr_whitelist.empty?
        # Delete all attributes from elements with no whitelisted
        # attributes.
        node.raw_attributes = {}
      else
        # Delete any attribute that isn't in the whitelist for this element.
        node.raw_attributes.delete_if do |key, value|
          !attr_whitelist.include?(key.to_s.downcase)
        end
        # Delete remaining attributes that use unacceptable protocols.
        if @config[:protocols].has_key?(name)
          protocol = @config[:protocols][name]
          node.raw_attributes.delete_if do |key, value|
            next false unless protocol.has_key?(key)
            next true if value.nil?
            if value.to_s.downcase =~ REGEX_PROTOCOL
              !protocol[key].include?($1.downcase)
            else
              !protocol[key].include?(:relative)
            end
          end
        end
      end
      # Add required attributes.
      if @config[:add_attributes].has_key?(name)
        node.raw_attributes.merge!(@config[:add_attributes][name])
      end
      # Escape special chars in attribute values.
      node.raw_attributes.each do |key, value|
        node.raw_attributes[key] = Sanitize.encode_html(value)
      end
    end
  end
  # Make one last pass through the fragment and encode all special HTML chars
  # as entities. This eliminates certain types of maliciously-malformed nested
  # tags.
  fragment.search('*') do |node|
    node.swap(Sanitize.encode_html(node.to_original_html)) if node.text?
  end
  result = fragment.to_s
  return result == html ? nil : html[0, html.length] = result
end

def clean!(html, config = {})

were made.
Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes
def clean!(html, config = {})
  sanitize = Sanitize.new(config)
  sanitize.clean!(html)
end

def encode_html(html)

references and returns the encoded string.
Encodes special HTML characters (<, >, ", ', and &) in _html_ as entity
def encode_html(html)
  str = html.dup
  # Encode special chars.
  ENTITY_MAP.each {|char, entity| str.gsub!(char, entity) }
  # Convert unencoded ampersands to entity references.
  str.gsub(REGEX_AMPERSAND, '&amp;')
end

def initialize(config = {})

Returns a new Sanitize object initialized with the settings in _config_.
def initialize(config = {})
  @config = Config::DEFAULT.merge(config)
end