class Nokogiri::HTML::Document
def fragment tags = nil
###
def fragment tags = nil DocumentFragment.new(self, tags, self.root) end
def meta_content_type
def meta_content_type xpath('//meta[@http-equiv and boolean(@content)]').find { |node| node['http-equiv'] =~ /\AContent-Type\z/i } end
def meta_encoding
Get the meta tag encoding for this document. If there is no meta tag,
##
def meta_encoding case when meta = at('//meta[@charset]') meta[:charset] when meta = meta_content_type meta['content'][/charset\s*=\s*([\w-]+)/i, 1] end end
def meta_encoding= encoding
Beware in CRuby, that libxml2 automatically inserts a meta tag
from the document encoding is undefined.
The result when trying to set an encoding that is different
content element (typically ) if any.
is inside a head element if any, and before any text node or
place supplying head and/or html elements as necessary, which
Otherwise, this method tries to create one at an appropriate
replaced with the given text.
If an meta encoding tag is already present, its content is
Set the meta tag encoding for this document.
##
def meta_encoding= encoding case when meta = meta_content_type meta['content'] = 'text/html; charset=%s' % encoding encoding when meta = at('//meta[@charset]') meta['charset'] = encoding else meta = XML::Node.new('meta', self) if dtd = internal_subset and dtd.html5_dtd? meta['charset'] = encoding else meta['http-equiv'] = 'Content-Type' meta['content'] = 'text/html; charset=%s' % encoding end case when head = at('//head') head.prepend_child(meta) else set_metadata_element(meta) end encoding end end
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
Nokogiri::XML::ParseOptions::RECOVER. See the constants in
is a number that sets options in the parser, such as
encoding that should be used when processing the document. +options+
+url+ is resource where this document is located. +encoding+ is the
responds to _read_ and _close_ such as an IO, or StringIO.
Parse HTML. +string_or_io+ may be a String, or any object that
##
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? if string_or_io.respond_to?(:encoding) unless string_or_io.encoding.name == "ASCII-8BIT" encoding ||= string_or_io.encoding.name end end if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil if !encoding # Libxml2's parser has poor support for encoding # detection. First, it does not recognize the HTML5 # style meta charset declaration. Secondly, even if it # successfully detects an encoding hint, it does not # re-decode or re-parse the preceding part which may be # garbled. # # EncodingReader aims to perform advanced encoding # detection beyond what Libxml2 does, and to emulate # rewinding of a stream and make Libxml2 redo parsing # from the start when an encoding hint is found. string_or_io = EncodingReader.new(string_or_io) begin return read_io(string_or_io, url, encoding, options.to_i) rescue EncodingFound => e encoding = e.found_encoding end end return read_io(string_or_io, url, encoding, options.to_i) end # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? encoding ||= EncodingReader.detect_encoding(string_or_io) read_memory(string_or_io, url, encoding, options.to_i) end
def serialize options = {}
end
config.format.as_xml
node.serialize(:encoding => 'UTF-8') do |config|
or
node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
These two statements are equivalent:
block. See SaveOptions.
Serialize Node using +options+. Save options can also be set using a
###
def serialize options = {} options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML super end
def set_metadata_element(element)
def set_metadata_element(element) case when head = at('//head') head << element when html = at('//html') head = html.prepend_child(XML::Node.new('head', self)) head.prepend_child(element) when first = children.find { |node| case node when XML::Element, XML::Text true end } # We reach here only if the underlying document model # allows <html>/<head> elements to be omitted and does not # automatically supply them. first.add_previous_sibling(element) else html = add_child(XML::Node.new('html', self)) head = html.add_child(XML::Node.new('head', self)) head.prepend_child(element) end end
def title
Get the title string of this document. Return nil if there is
##
def title title = at('//title') and title.inner_text end
def title=(text)
encoding/charset tag if any, and before any text node or
is inside a head element if any, right after a meta
place supplying head and/or html elements as necessary, which
Otherwise, this method tries to create one at an appropriate
with the given text.
If a title element is already present, its content is replaced
Set the title string of this document.
##
def title=(text) tnode = XML::Text.new(text, self) if title = at('//title') title.children = tnode return text end title = XML::Node.new('title', self) << tnode case when head = at('//head') head << title when meta = at('//meta[@charset]') || meta_content_type # better put after charset declaration meta.add_next_sibling(title) else set_metadata_element(title) end text end