class Nokogiri::HTML5::Document
💡 HTML5 functionality is not available when running JRuby.
Since v1.12.0
def do_parse(string_or_io, url, encoding, options)
def do_parse(string_or_io, url, encoding, options) string = HTML5.read_and_encode(string_or_io, encoding) max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth, self) doc.encoding = "UTF-8" doc end
def fragment(markup = nil)
Nokogiri::HTML5::DocumentFragment. This object's children will be empty if `markup` is not passed, is empty, or is `nil`.
[Returns]
- +markup+ (String) The HTML5 markup fragment to be parsed
[Properties]
Parse a HTML5 document fragment from +markup+, returning a Nokogiri::HTML5::DocumentFragment.
fragment(markup) → Nokogiri::HTML5::DocumentFragment
fragment() → Nokogiri::HTML5::DocumentFragment
:call-seq:
def fragment(markup = nil) DocumentFragment.new(self, markup) end
def initialize(*args) # :nodoc:
def initialize(*args) # :nodoc: super @url = nil @quirks_mode = nil end
def parse(string_or_io, url = nil, encoding = nil, **options, &block)
[Returns] Nokogiri::HTML5::Document
Nokogiri::HTML5 for more information and usage.
- +block+ (optional) is passed a configuration Hash on which parse options may be set. See
Nokogiri::XML::Document and Nokogiri::HTML4::Document.
âš Note that these options are different than those made available by
+:max_tree_depth+ and +:max_attributes+, described at Nokogiri::HTML5.
during parsing. The three currently supported options are +:max_errors+,
- +options+ (optional) is a configuration Hash (or keyword arguments) to set options
the document.
- +encoding+ (optional) is the encoding that should be used when processing
- +url+ (optional) is a String indicating the canonical URI where this document is located.
IO, or StringIO.
- +input+ may be a String, or any object that responds to _read_ and _close_ such as an
[Parameters]
Parse HTML5 input.
parse(input, url=nil, encoding=nil) { |options| ... }
parse(input, url=nil, encoding=nil, **options)
parse(input)
:call-seq:
def parse(string_or_io, url = nil, encoding = nil, **options, &block) yield options if block string_or_io = "" unless string_or_io if string_or_io.respond_to?(:encoding) && string_or_io.encoding != Encoding::ASCII_8BIT encoding ||= string_or_io.encoding.name end if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path) url ||= string_or_io.path end unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str) raise ArgumentError, "not a string or IO object" end do_parse(string_or_io, url, encoding, options) end
def read_io(io, url = nil, encoding = nil, **options)
Create a new document from an IO object.
def read_io(io, url = nil, encoding = nil, **options) raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read) do_parse(io, url, encoding, options) end
def read_memory(string, url = nil, encoding = nil, **options)
Create a new document from a String.
def read_memory(string, url = nil, encoding = nil, **options) raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str) do_parse(string, url, encoding, options) end
def to_xml(options = {}, &block) # :nodoc:
def to_xml(options = {}, &block) # :nodoc: # Bypass XML::Document#to_xml which doesn't add # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does. XML::Node.instance_method(:to_xml).bind_call(self, options, &block) end
def xpath_doctype
[Returns] The document type which determines CSS-to-XPath translation.
xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
:call-seq:
def xpath_doctype Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5 end