# encoding: utf-8#--# Copyright (c) 2010 Ryan Grove <ryan@wonko.com>## Permission is hereby granted, free of charge, to any person obtaining a copy# of this software and associated documentation files (the 'Software'), to deal# in the Software without restriction, including without limitation the rights# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell# copies of the Software, and to permit persons to whom the Software is# furnished to do so, subject to the following conditions:## The above copyright notice and this permission notice shall be included in all# copies or substantial portions of the Software.## THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE# SOFTWARE.#++require'set'require'nokogiri'require'sanitize/version'require'sanitize/config'require'sanitize/config/restricted'require'sanitize/config/basic'require'sanitize/config/relaxed'require'sanitize/transformers/clean_cdata'require'sanitize/transformers/clean_comment'require'sanitize/transformers/clean_element'classSanitizeattr_reader:config# Matches an attribute value that could be treated by a browser as a URL# with a protocol prefix, such as "http:" or "javascript:". Any string of zero# or more characters followed by a colon is considered a match, even if the# colon is encoded as an entity and even if it's an incomplete entity (which# IE6 and Opera will still parse).REGEX_PROTOCOL=/^([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\:|�*58|�*3a)/i#--# Class Methods#++# Returns a sanitized copy of _html_, using the settings in _config_ if# specified.defself.clean(html,config={})Sanitize.new(config).clean(html)end# Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes# were made.defself.clean!(html,config={})Sanitize.new(config).clean!(html)end# Sanitizes the specified Nokogiri::XML::Node and all its children.defself.clean_node!(node,config={})Sanitize.new(config).clean_node!(node)end#--# Instance Methods#++# Returns a new Sanitize object initialized with the settings in _config_.definitialize(config={})@config=Config::DEFAULT.merge(config)@transformers=Array(@config[:transformers].dup)# Default transformers. These always run at the end of the transformer# chain, after any custom transformers.@transformers<<Transformers::CleanCommentunless@config[:allow_comments]@transformers<<Transformers::CleanCDATA<<Transformers::CleanElement.new(@config)end# Returns a sanitized copy of _html_.defclean(html)ifhtmldupe=html.dupclean!(dupe)||dupeendend# Performs clean in place, returning _html_, or +nil+ if no changes were# made.defclean!(html)fragment=Nokogiri::HTML::DocumentFragment.parse(html)clean_node!(fragment)output_method_params={:encoding=>@config[:output_encoding],:indent=>0}if@config[:output]==:xhtmloutput_method=fragment.method(:to_xhtml)output_method_params[:save_with]=Nokogiri::XML::Node::SaveOptions::AS_XHTMLelsif@config[:output]==:htmloutput_method=fragment.method(:to_html)elseraiseError,"unsupported output format: #{@config[:output]}"endresult=output_method.call(output_method_params)returnresult==html?nil:html[0,html.length]=resultend# Sanitizes the specified Nokogiri::XML::Node and all its children.defclean_node!(node)raiseArgumentErrorunlessnode.is_a?(Nokogiri::XML::Node)node_whitelist=Set.newnode.traverse{|child|transform_node!(child,node_whitelist)}nodeendprivatedeftransform_node!(node,node_whitelist)@transformers.eachdo|transformer|result=transformer.call({:config=>@config,:is_whitelisted=>node_whitelist.include?(node),:node=>node,:node_name=>node.name.downcase,:node_whitelist=>node_whitelist})ifresult.is_a?(Hash)&&result[:node_whitelist].respond_to?(:each)node_whitelist.merge(result[:node_whitelist])end# If the node has been unlinked or replaced, there's no point running# subsequent transformers.breakifnode.parent.nil?&&!node.fragment?endnodeendclassError<StandardError;endend