# coding: utf-8# frozen_string_literal: truerequire"pathname"moduleNokogirimoduleHTML4classDocument<Nokogiri::XML::Document#### Get the meta tag encoding for this document. If there is no meta tag,# then nil is returned.defmeta_encodingif(meta=at_xpath("//meta[@charset]"))meta[:charset]elsif(meta=meta_content_type)meta["content"][/charset\s*=\s*([\w-]+)/i,1]endend#### Set the meta tag encoding for this document.## If an meta encoding tag is already present, its content is# replaced with the given text.## Otherwise, this method tries to create one at an appropriate# place supplying head and/or html elements as necessary, which# is inside a head element if any, and before any text node or# content element (typically <body>) if any.## The result when trying to set an encoding that is different# from the document encoding is undefined.## Beware in CRuby, that libxml2 automatically inserts a meta tag# into a head element.defmeta_encoding=(encoding)if(meta=meta_content_type)meta["content"]=format("text/html; charset=%s",encoding)encodingelsif(meta=at_xpath("//meta[@charset]"))meta["charset"]=encodingelsemeta=XML::Node.new("meta",self)if(dtd=internal_subset)&&dtd.html5_dtd?meta["charset"]=encodingelsemeta["http-equiv"]="Content-Type"meta["content"]=format("text/html; charset=%s",encoding)endif(head=at_xpath("//head"))head.prepend_child(meta)elseset_metadata_element(meta)endencodingendenddefmeta_content_typexpath("//meta[@http-equiv and boolean(@content)]").finddo|node|node["http-equiv"]=~/\AContent-Type\z/iendendprivate:meta_content_type#### Get the title string of this document. Return nil if there is# no title tag.deftitle(title=at_xpath("//title"))&&title.inner_textend#### Set the title string of this document.## If a title element is already present, its content is replaced# with the given text.## Otherwise, this method tries to create one at an appropriate# place supplying head and/or html elements as necessary, which# is inside a head element if any, right after a meta# encoding/charset tag if any, and before any text node or# content element (typically <body>) if any.deftitle=(text)tnode=XML::Text.new(text,self)if(title=at_xpath("//title"))title.children=tnodereturntextendtitle=XML::Node.new("title",self)<<tnodeif(head=at_xpath("//head"))head<<titleelsif(meta=(at_xpath("//meta[@charset]")||meta_content_type))# better put after charset declarationmeta.add_next_sibling(title)elseset_metadata_element(title)endenddefset_metadata_element(element)# rubocop:disable Naming/AccessorMethodNameif(head=at_xpath("//head"))head<<elementelsif(html=at_xpath("//html"))head=html.prepend_child(XML::Node.new("head",self))head.prepend_child(element)elsif(first=children.finddo|node|casenodewhenXML::Element,XML::Texttrueendend)# We reach here only if the underlying document model# allows <html>/<head> elements to be omitted and does not# automatically supply them.first.add_previous_sibling(element)elsehtml=add_child(XML::Node.new("html",self))head=html.add_child(XML::Node.new("head",self))head.prepend_child(element)endendprivate:set_metadata_element##### Serialize Node using +options+. Save options can also be set using a block.## See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.## These two statements are equivalent:## node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)## or## node.serialize(:encoding => 'UTF-8') do |config|# config.format.as_xml# end#defserialize(options={})options[:save_with]||=XML::Node::SaveOptions::DEFAULT_HTMLsuperend##### Create a Nokogiri::XML::DocumentFragment from +tags+deffragment(tags=nil)DocumentFragment.new(self,tags,root)end# :call-seq:# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig## [Returns] The document type which determines CSS-to-XPath translation.## See XPathVisitor for more information.defxpath_doctypeNokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4endclass<<self#### Parse HTML. +string_or_io+ may be a String, or any object that# responds to _read_ and _close_ such as an IO, or StringIO.# +url+ is resource where this document is located. +encoding+ is the# encoding that should be used when processing the document. +options+# is a number that sets options in the parser, such as# Nokogiri::XML::ParseOptions::RECOVER. See the constants in# Nokogiri::XML::ParseOptions.defparse(string_or_io,url=nil,encoding=nil,options=XML::ParseOptions::DEFAULT_HTML)options=Nokogiri::XML::ParseOptions.new(options)ifInteger===optionsyieldoptionsifblock_given?url||=string_or_io.respond_to?(:path)?string_or_io.path:nilifstring_or_io.respond_to?(:encoding)unlessstring_or_io.encoding==Encoding::ASCII_8BITencoding||=string_or_io.encoding.nameendendifstring_or_io.respond_to?(:read)ifstring_or_io.is_a?(Pathname)# resolve the Pathname to the file and open it as an IO object, see #2110string_or_io=string_or_io.expand_path.openurl||=string_or_io.pathendunlessencodingstring_or_io=EncodingReader.new(string_or_io)beginreturnread_io(string_or_io,url,encoding,options.to_i)rescueEncodingReader::EncodingFound=>eencoding=e.found_encodingendendreturnread_io(string_or_io,url,encoding,options.to_i)end# read_memory pukes on empty docsifstring_or_io.nil?||string_or_io.empty?returnencoding?new.tap{|i|i.encoding=encoding}:newendencoding||=EncodingReader.detect_encoding(string_or_io)read_memory(string_or_io,url,encoding,options.to_i)endendendendend