class Nokogiri::HTML4::DocumentFragment
def self.parse(
end
options.huge.pedantic
HTML4::DocumentFragment.parse("
Hello World") do |options|
*Example:* Setting parse options dynamically
fragment = HTML4::DocumentFragment.parse(input, encoding: "EUC-JP")
*Example:* Specifying encoding
end
HTML4::DocumentFragment.parse(file)
fragment = File.open("fragment.html") do |file|
*Example:* Parsing an IO
fragment = HTML4::DocumentFragment.parse("
*Example:* Parsing a string
[Returns] HTML4::DocumentFragment
can be configured before parsing. See ParseOptions for more information.
If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
[Yields]
+ParseOptions::DEFAULT_HTML+.
behaviors during parsing. See ParseOptions for more information. The default value is
- +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
content.
document. When not provided, the encoding will be determined based on the document
- +encoding:+ (String) The name of the encoding that should be used when processing the
[Optional Keyword Arguments]
- +input+ (String | IO) The content to be parsed.
[Required Parameters]
method creates a new, empty HTML4::Document to contain the fragment.
Parse \HTML4 fragment input from a String, and return a new HTML4::DocumentFragment. This
parse(input, encoding:, options:) { |options| ... } → HTML4::DocumentFragment
parse(input) { |options| ... } → HTML4::DocumentFragment
:call-seq:
*Example:* Setting parse options dynamically
fragment = HTML4::DocumentFragment.parse(input, encoding: "EUC-JP")
*Example:* Specifying encoding
end
HTML4::DocumentFragment.parse(file)
fragment = File.open("fragment.html") do |file|
*Example:* Parsing an IO
fragment = HTML4::DocumentFragment.parse("
Hello World
")*Example:* Parsing a string
[Returns] HTML4::DocumentFragment
can be configured before parsing. See ParseOptions for more information.
If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
[Yields]
+ParseOptions::DEFAULT_HTML+.
behaviors during parsing. See ParseOptions for more information. The default value is
- +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
content.
document. When not provided, the encoding will be determined based on the document
- +encoding:+ (String) The name of the encoding that should be used when processing the
[Optional Keyword Arguments]
- +input+ (String | IO) The content to be parsed.
[Required Parameters]
method creates a new, empty HTML4::Document to contain the fragment.
Parse \HTML4 fragment input from a String, and return a new HTML4::DocumentFragment. This
parse(input, encoding:, options:) { |options| ... } → HTML4::DocumentFragment
parse(input) { |options| ... } → HTML4::DocumentFragment
:call-seq:
def self.parse( input, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML, encoding: encoding_, options: options_, &block ) # TODO: this method should take a context node. doc = HTML4::Document.new if input.respond_to?(:read) # Handle IO-like objects (IO, File, StringIO, etc.) # The _read_ method of these objects doesn't accept an +encoding+ parameter. # Encoding is usually set when the IO object is created or opened, # or by using the _set_encoding_ method. # # 1. If +encoding+ is provided and the object supports _set_encoding_, # set the encoding before reading. # 2. Read the content from the IO-like object. # # Note: After reading, the content's encoding will be: # - The encoding set by _set_encoding_ if it was called # - The default encoding of the IO object otherwise # # For StringIO specifically, _set_encoding_ affects only the internal string, # not how the data is read out. input.set_encoding(encoding) if encoding && input.respond_to?(:set_encoding) input = input.read end encoding ||= if input.respond_to?(:encoding) encoding = input.encoding if encoding == ::Encoding::ASCII_8BIT "UTF-8" else encoding.name end else "UTF-8" end doc.encoding = encoding new(doc, input, options: options, &block) end
def initialize(
parent of the fragment subtree.
calling XML::Node#parse on that node, so the parser will behave as if that Node is the
If a context node is specified using +context:+, then the fragment will be created by
=== Context \Node
[Returns] HTML4::DocumentFragment
can be configured before parsing. See ParseOptions for more information.
If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
[Yields]
+ParseOptions::DEFAULT_HTML+.
behaviors during parsing. See ParseOptions for more information. The default value is
- +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
below for more information.
- +context:+ (Nokogiri::XML::Node) The context node for the subtree created. See
[Optional Keyword Arguments]
- +input+ (String) The content to be parsed.
[Optional Parameters]
- +document+ (HTML4::Document) The parent document to associate the returned fragment with.
[Required Parameters]
than call this method directly.
💡 It's recommended to use either HTML4::DocumentFragment.parse or XML::Node#parse rather
Parse \HTML4 fragment input from a String, and return a new HTML4::DocumentFragment.
new(document, input, context:, options:) { |options| ... } → HTML4::DocumentFragment
new(document, input) { |options| ... } → HTML4::DocumentFragment
new(document) { |options| ... } → HTML4::DocumentFragment
:call-seq:
def initialize( document, input = nil, context_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML, context: context_, options: options_ ) # rubocop:disable Lint/MissingSuper return self unless input options = Nokogiri::XML::ParseOptions.new(options) if Integer === options @parse_options = options yield options if block_given? if context preexisting_errors = document.errors.dup node_set = context.parse("<div>#{input}</div>", options) node_set.first.children.each { |child| child.parent = self } unless node_set.empty? self.errors = document.errors - preexisting_errors else # This is a horrible hack, but I don't care path = if /^\s*?<body/i.match?(input) "/html/body" else "/html/body/node()" end temp_doc = HTML4::Document.parse("<html><body>#{input}", nil, document.encoding, options) temp_doc.xpath(path).each { |child| child.parent = self } self.errors = temp_doc.errors end children end