class Nokogiri::HTML5::Document

💡 HTML5 functionality is not available when running JRuby.
Since v1.12.0

def do_parse(string_or_io, url, encoding, **options)

def do_parse(string_or_io, url, encoding, **options)
  string = HTML5.read_and_encode(string_or_io, encoding)
  options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
  options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
  options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
  doc = Nokogiri::Gumbo.parse(string, url, self, **options)
  doc.encoding = "UTF-8"
  doc
end

def fragment(markup = nil)


passed, is empty, or is +nil+.
Nokogiri::HTML5::DocumentFragment. This object's children will be empty if +markup+ is not
[Returns]

- +markup+ (String) The HTML5 markup fragment to be parsed
[Properties]

Parse a HTML5 document fragment from +markup+, returning a Nokogiri::HTML5::DocumentFragment.

fragment(markup) → Nokogiri::HTML5::DocumentFragment
fragment() → Nokogiri::HTML5::DocumentFragment
:call-seq:
def fragment(markup = nil)
  DocumentFragment.new(self, markup)
end

def initialize(*args) # :nodoc:

:nodoc:
def initialize(*args) # :nodoc:
  super
  @url = nil
  @quirks_mode = nil
end

def parse(


Nokogiri::HTML5::Document.parse(input) { |c| c[:parse_noscript_content_as_text] = true }

configuration block parameter.
*Example:* Parse a string setting the +:parse_noscript_content_as_text+ option using the

Nokogiri::HTML5::Document.parse(socket, encoding: "ISO-8859-1", max_errors: 10)

*Example:* Parse a string with a specific encoding and custom max errors limit.

[Returns] Nokogiri::HTML5::Document

âš  Note that +url:+ and +encoding:+ cannot be set by the configuration block.

input is parsed. See rdoc-ref:HTML5@Parsing+options for a list of available options.
If present, the block will be passed a Hash object to modify with parse options before the
[Yields]

See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.

elements as text. (default +false+)
- +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+

element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
- +max_attributes:+ (Integer) The maximum number of attributes allowed on an

+Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
- +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default

+Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
- +max_errors:+ (Integer) The maximum number of parse errors to record. (default

content.
document. When not provided, the encoding will be determined based on the document
- +encoding:+ (Encoding) The name of the encoding that should be used when processing the
[Optional Keyword Arguments]

- +url:+ (String) the base URI of the document.
[Optional Parameters]

- +input+ (String | IO) the \HTML content to be parsed.
[Required Parameters]

parameter.
encoding of +input+ if it can be determined, or else falls back to the +encoding:+
Parse \HTML input with a parser compliant with the HTML5 spec. This method uses the

parse(input, **options) → HTML5::Document
parse(input, url: encoding:) { |options| ... } → HTML5::Document
parse(input) { |options| ... } → HTML5::Document
:call-seq:
def parse(
  string_or_io,
  url_ = nil, encoding_ = nil,
  url: url_, encoding: encoding_,
  **options, &block
)
  yield options if block
  string_or_io = "" unless string_or_io
  if string_or_io.respond_to?(:encoding) && string_or_io.encoding != Encoding::ASCII_8BIT
    encoding ||= string_or_io.encoding.name
  end
  if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
    url ||= string_or_io.path
  end
  unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
    raise ArgumentError, "not a string or IO object"
  end
  do_parse(string_or_io, url, encoding, **options)
end

def read_io(io, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)

💡 Most users should prefer Document.parse to this method.

Create a new document from an IO object.
def read_io(io, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
  raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
  do_parse(io, url, encoding, **options)
end

def read_memory(string, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)

💡 Most users should prefer Document.parse to this method.

Create a new document from a String.
def read_memory(string, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
  raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
  do_parse(string, url, encoding, **options)
end

def to_xml(options = {}, &block) # :nodoc:

:nodoc:
def to_xml(options = {}, &block) # :nodoc:
  # Bypass XML::Document#to_xml which doesn't add
  # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
  XML::Node.instance_method(:to_xml).bind_call(self, options, &block)
end

def xpath_doctype

See CSS::XPathVisitor for more information.

[Returns] The document type which determines CSS-to-XPath translation.

xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
:call-seq:
def xpath_doctype
  Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5
end