lib/rails/html/scrubbers.rb



# frozen_string_literal: true

module Rails
  module HTML
    # === Rails::HTML::PermitScrubber
    #
    # +Rails::HTML::PermitScrubber+ allows you to permit only your own tags and/or attributes.
    #
    # +Rails::HTML::PermitScrubber+ can be subclassed to determine:
    # - When a node should be skipped via +skip_node?+.
    # - When a node is allowed via +allowed_node?+.
    # - When an attribute should be scrubbed via +scrub_attribute?+.
    #
    # Subclasses don't need to worry if tags or attributes are set or not.
    # If tags or attributes are not set, Loofah's behavior will be used.
    # If you override +allowed_node?+ and no tags are set, it will not be called.
    # Instead Loofahs behavior will be used.
    # Likewise for +scrub_attribute?+ and attributes respectively.
    #
    # Text and CDATA nodes are skipped by default.
    # Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
    # Supplied tags and attributes should be Enumerables.
    #
    # +tags=+
    # If set, elements excluded will be stripped.
    # If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+.
    #
    # +attributes=+
    # If set, attributes excluded will be removed.
    # If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+.
    #
    #  class CommentScrubber < Rails::HTML::PermitScrubber
    #    def initialize
    #      super
    #      self.tags = %w(form script comment blockquote)
    #    end
    #
    #    def skip_node?(node)
    #      node.text?
    #    end
    #
    #    def scrub_attribute?(name)
    #      name == "style"
    #    end
    #  end
    #
    # See the documentation for +Nokogiri::XML::Node+ to understand what's possible
    # with nodes: https://nokogiri.org/rdoc/Nokogiri/XML/Node.html
    class PermitScrubber < Loofah::Scrubber
      attr_reader :tags, :attributes, :prune

      def initialize(prune: false)
        @prune = prune
        @direction = @prune ? :top_down : :bottom_up
        @tags, @attributes = nil, nil
      end

      def tags=(tags)
        @tags = validate!(tags, :tags)
      end

      def attributes=(attributes)
        @attributes = validate!(attributes, :attributes)
      end

      def scrub(node)
        if Loofah::HTML5::Scrub.cdata_needs_escaping?(node)
          replacement = Loofah::HTML5::Scrub.cdata_escape(node)
          node.replace(replacement)
          return CONTINUE
        end
        return CONTINUE if skip_node?(node)

        unless (node.element? || node.comment?) && keep_node?(node)
          return STOP if scrub_node(node) == STOP
        end

        scrub_attributes(node)
      end

      protected
        def allowed_node?(node)
          @tags.include?(node.name)
        end

        def skip_node?(node)
          node.text?
        end

        def scrub_attribute?(name)
          !@attributes.include?(name)
        end

        def keep_node?(node)
          if @tags
            allowed_node?(node)
          else
            Loofah::HTML5::Scrub.allowed_element?(node.name)
          end
        end

        def scrub_node(node)
          node.before(node.children) unless prune # strip
          node.remove
        end

        def scrub_attributes(node)
          if @attributes
            node.attribute_nodes.each do |attr|
              attr.remove if scrub_attribute?(attr.name)
              scrub_attribute(node, attr)
            end

            scrub_css_attribute(node)
          else
            Loofah::HTML5::Scrub.scrub_attributes(node)
          end
        end

        def scrub_css_attribute(node)
          if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute)
            Loofah::HTML5::Scrub.scrub_css_attribute(node)
          else
            style = node.attributes["style"]
            style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style
          end
        end

        def validate!(var, name)
          if var && !var.is_a?(Enumerable)
            raise ArgumentError, "You should pass :#{name} as an Enumerable"
          end
          var
        end

        def scrub_attribute(node, attr_node)
          attr_name = if attr_node.namespace
            "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
          else
            attr_node.node_name
          end

          if Loofah::HTML5::SafeList::ATTR_VAL_IS_URI.include?(attr_name)
            return if Loofah::HTML5::Scrub.scrub_uri_attribute(attr_node)
          end

          if Loofah::HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
            Loofah::HTML5::Scrub.scrub_attribute_that_allows_local_ref(attr_node)
          end

          if Loofah::HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
            attr_node.remove
          end

          node.remove_attribute(attr_node.name) if attr_name == "src" && attr_node.value !~ /[^[:space:]]/

          Loofah::HTML5::Scrub.force_correct_attribute_escaping! node
        end
    end

    # === Rails::HTML::TargetScrubber
    #
    # Where +Rails::HTML::PermitScrubber+ picks out tags and attributes to permit in
    # sanitization, +Rails::HTML::TargetScrubber+ targets them for removal.
    #
    # +tags=+
    # If set, elements included will be stripped.
    #
    # +attributes=+
    # If set, attributes included will be removed.
    class TargetScrubber < PermitScrubber
      def allowed_node?(node)
        !super
      end

      def scrub_attribute?(name)
        !super
      end
    end

    # === Rails::HTML::TextOnlyScrubber
    #
    # +Rails::HTML::TextOnlyScrubber+ allows you to permit text nodes.
    #
    # Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
    class TextOnlyScrubber < Loofah::Scrubber
      def initialize
        @direction = :bottom_up
      end

      def scrub(node)
        if node.text?
          CONTINUE
        else
          node.before node.children
          node.remove
        end
      end
    end
  end
end