# coding: utf-8# frozen_string_literal: truerequire"pathname"moduleNokogirimoduleHTML4classDocument<Nokogiri::XML::Document#### Get the meta tag encoding for this document. If there is no meta tag,# then nil is returned.defmeta_encodingif(meta=at_xpath("//meta[@charset]"))meta[:charset]elsif(meta=meta_content_type)meta["content"][/charset\s*=\s*([\w-]+)/i,1]endend#### Set the meta tag encoding for this document.## If an meta encoding tag is already present, its content is# replaced with the given text.## Otherwise, this method tries to create one at an appropriate# place supplying head and/or html elements as necessary, which# is inside a head element if any, and before any text node or# content element (typically <body>) if any.## The result when trying to set an encoding that is different# from the document encoding is undefined.## Beware in CRuby, that libxml2 automatically inserts a meta tag# into a head element.defmeta_encoding=(encoding)if(meta=meta_content_type)meta["content"]=format("text/html; charset=%s",encoding)encodingelsif(meta=at_xpath("//meta[@charset]"))meta["charset"]=encodingelsemeta=XML::Node.new("meta",self)if(dtd=internal_subset)&&dtd.html5_dtd?meta["charset"]=encodingelsemeta["http-equiv"]="Content-Type"meta["content"]=format("text/html; charset=%s",encoding)endif(head=at_xpath("//head"))head.prepend_child(meta)elseset_metadata_element(meta)endencodingendenddefmeta_content_typexpath("//meta[@http-equiv and boolean(@content)]").finddo|node|node["http-equiv"]=~/\AContent-Type\z/iendendprivate:meta_content_type#### Get the title string of this document. Return nil if there is# no title tag.deftitle(title=at_xpath("//title"))&&title.inner_textend#### Set the title string of this document.## If a title element is already present, its content is replaced# with the given text.## Otherwise, this method tries to create one at an appropriate# place supplying head and/or html elements as necessary, which# is inside a head element if any, right after a meta# encoding/charset tag if any, and before any text node or# content element (typically <body>) if any.deftitle=(text)tnode=XML::Text.new(text,self)if(title=at_xpath("//title"))title.children=tnodereturntextendtitle=XML::Node.new("title",self)<<tnodeif(head=at_xpath("//head"))head<<titleelsif(meta=at_xpath("//meta[@charset]")||meta_content_type)# better put after charset declarationmeta.add_next_sibling(title)elseset_metadata_element(title)endenddefset_metadata_element(element)# rubocop:disable Naming/AccessorMethodNameif(head=at_xpath("//head"))head<<elementelsif(html=at_xpath("//html"))head=html.prepend_child(XML::Node.new("head",self))head.prepend_child(element)elsif(first=children.finddo|node|casenodewhenXML::Element,XML::Texttrueendend)# We reach here only if the underlying document model# allows <html>/<head> elements to be omitted and does not# automatically supply them.first.add_previous_sibling(element)elsehtml=add_child(XML::Node.new("html",self))head=html.add_child(XML::Node.new("head",self))head.prepend_child(element)endendprivate:set_metadata_element##### Serialize Node using +options+. Save options can also be set using a block.## See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.## These two statements are equivalent:## node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)## or## node.serialize(:encoding => 'UTF-8') do |config|# config.format.as_xml# end#defserialize(options={})options[:save_with]||=XML::Node::SaveOptions::DEFAULT_HTMLsuperend##### Create a Nokogiri::XML::DocumentFragment from +tags+deffragment(tags=nil)DocumentFragment.new(self,tags,root)end# :call-seq:# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig## [Returns] The document type which determines CSS-to-XPath translation.## See XPathVisitor for more information.defxpath_doctypeNokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4endclass<<self# :call-seq:# parse(input) { |options| ... } => Nokogiri::HTML4::Document# parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document## Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.## [Required Parameters]# - +input+ (String | IO) The content to be parsed.## [Optional Keyword Arguments]# - +url:+ (String) The base URI for this document.## - +encoding:+ (String) The name of the encoding that should be used when processing the# document. When not provided, the encoding will be determined based on the document# content.## - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some# behaviors during parsing. See ParseOptions for more information. The default value is# +ParseOptions::DEFAULT_HTML+.## [Yields]# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.## [Returns] Nokogiri::HTML4::Documentdefparse(input,url_=nil,encoding_=nil,options_=XML::ParseOptions::DEFAULT_HTML,url: url_,encoding: encoding_,options: options_)options=Nokogiri::XML::ParseOptions.new(options)ifInteger===optionsyieldoptionsifblock_given?url||=input.respond_to?(:path)?input.path:nilifinput.respond_to?(:encoding)unlessinput.encoding==Encoding::ASCII_8BITencoding||=input.encoding.nameendendifinput.respond_to?(:read)ifinput.is_a?(Pathname)# resolve the Pathname to the file and open it as an IO object, see #2110input=input.expand_path.openurl||=input.pathendunlessencodinginput=EncodingReader.new(input)beginreturnread_io(input,url,encoding,options.to_i)rescueEncodingReader::EncodingFound=>eencoding=e.found_encodingendendreturnread_io(input,url,encoding,options.to_i)end# read_memory pukes on empty docsifinput.nil?||input.empty?returnencoding?new.tap{|i|i.encoding=encoding}:newendencoding||=EncodingReader.detect_encoding(input)read_memory(input,url,encoding,options.to_i)endendendendend