lib/nokogiri/html5/document_fragment.rb



# coding: utf-8
# frozen_string_literal: true

#
#  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

require_relative "../html4/document_fragment"

module Nokogiri
  module HTML5
    # Since v1.12.0
    #
    # 💡 HTML5 functionality is not available when running JRuby.
    class DocumentFragment < Nokogiri::HTML4::DocumentFragment
      class << self
        # :call-seq:
        #   parse(input, **options) → HTML5::DocumentFragment
        #
        # Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment. This
        # method creates a new, empty HTML5::Document to contain the fragment.
        #
        # [Parameters]
        # - +input+ (String | IO) The HTML5 document fragment to parse.
        #
        # [Optional Keyword Arguments]
        # - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
        #   used when processing the document. When not provided, the encoding will be determined
        #   based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
        #   encoding is handled by the parser.
        #
        # - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, "in
        #   context" of which to parse the document fragment. See below for more
        #   information. (default +"body"+)
        #
        # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
        #   +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
        #
        # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
        #   +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
        #
        # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
        #   element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
        #
        # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
        #   elements as text. (default +false+)
        #
        # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
        #
        # [Returns] Nokogiri::HTML5::DocumentFragment
        #
        # === Context \Node
        #
        # If a context node is specified using +context:+, then the parser will behave as if that
        # Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
        #
        def parse(
          input,
          encoding_ = nil, positional_options_hash = nil,
          encoding: encoding_, **options
        )
          unless positional_options_hash.nil? || positional_options_hash.empty?
            options.merge!(positional_options_hash)
          end

          context = options.delete(:context)

          document = HTML5::Document.new
          document.encoding = "UTF-8"
          input = HTML5.read_and_encode(input, encoding)

          new(document, input, context, options)
        end
      end

      attr_accessor :document
      attr_accessor :errors

      # Get the parser's quirks mode value. See HTML5::QuirksMode.
      #
      # This method returns `nil` if the parser was not invoked (e.g.,
      # `Nokogiri::HTML5::DocumentFragment.new(doc)`).
      #
      # Since v1.14.0
      attr_reader :quirks_mode

      #
      # :call-seq:
      #   new(document, input, **options) → HTML5::DocumentFragment
      #
      # Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment.
      #
      # 💡 It's recommended to use either HTML5::DocumentFragment.parse or HTML5::Node#fragment
      # rather than call this method directly.
      #
      # [Required Parameters]
      # - +document+ (HTML5::Document) The parent document to associate the returned fragment with.
      #
      # [Optional Parameters]
      # - +input+ (String) The content to be parsed.
      #
      # [Optional Keyword Arguments]
      # - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
      #   used when processing the document. When not provided, the encoding will be determined
      #   based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
      #   encoding is handled by the parser.
      #
      # - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, in
      #   which to parse the document fragment. (default +"body"+)
      #
      # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
      #   +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
      #
      # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
      #   +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
      #
      # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
      #   element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
      #
      # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
      #   elements as text. (default +false+)
      #
      # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
      #
      # [Returns] HTML5::DocumentFragment
      #
      # === Context \Node
      #
      # If a context node is specified using +context:+, then the parser will behave as if that
      # Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
      #
      def initialize(
        doc, input = nil,
        context_ = nil, positional_options_hash = nil,
        context: context_,
        **options
      ) # rubocop:disable Lint/MissingSuper
        unless positional_options_hash.nil? || positional_options_hash.empty?
          options.merge!(positional_options_hash)
        end

        @document = doc
        @errors = []
        return self unless input

        input = Nokogiri::HTML5.read_and_encode(input, nil)

        context = options.delete(:context) if options.key?(:context)

        options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
        options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
        options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH

        Nokogiri::Gumbo.fragment(self, input, context, **options)
      end

      def serialize(options = {}, &block) # :nodoc:
        # Bypass XML::Document.serialize which doesn't support options even
        # though XML::Node.serialize does!
        XML::Node.instance_method(:serialize).bind_call(self, options, &block)
      end

      def extract_params(params) # :nodoc:
        handler = params.find do |param|
          ![Hash, String, Symbol].include?(param.class)
        end
        params -= [handler] if handler

        hashes = []
        while Hash === params.last || params.last.nil?
          hashes << params.pop
          break if params.empty?
        end
        ns, binds = hashes.reverse

        ns ||=
          begin
            ns = {}
            children.each { |child| ns.merge!(child.namespaces) }
            ns
          end

        [params, handler, ns, binds]
      end
    end
  end
end
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab: