class Roadie::MarkupImprover

  • ‘<meta>` declaring charset and content-type (text/html)
    * `<body>`
    * `<head>`
    * `<html>`
    * Basic HTML elements will be added if missing.
    * A HTML5 doctype will be added if missing, other doctypes will be left as-is.
    This class will improve the following aspects of the DOM:
    Class that improves the markup of a HTML DOM tree
    @api private

def content_type_meta_element_missing?

def content_type_meta_element_missing?
  dom.xpath("html/head/meta").none? do |meta|
    meta["http-equiv"].to_s.downcase == "content-type"
  end
end

def create_head_element(parent)

def create_head_element(parent)
  head = Nokogiri::XML::Node.new "head", dom
  if parent.children.empty?
    parent << head
  else
    # Crashes when no children are present
    parent.children.before head
  end
  head
end

def ensure_declared_charset(parent)

def ensure_declared_charset(parent)
  if content_type_meta_element_missing?
    parent.add_child make_content_type_element
  end
end

def ensure_doctype_present

def ensure_doctype_present
  return if @html.include?("<!DOCTYPE ")
  # Nokogiri adds a "default" doctype to the DOM, which we will remove
  dom.internal_subset&.remove
  dom.create_internal_subset "html", nil, nil
end

def ensure_head_element_present

def ensure_head_element_present
  if (head = dom.at_xpath("html/head"))
    head
  else
    create_head_element dom.at_xpath("html")
  end
end

def ensure_html_element_present

def ensure_html_element_present
  return if dom.at_xpath("html")
  html = Nokogiri::XML::Node.new "html", dom
  dom << html
end

def improve

Returns:
  • (nil) - passed DOM will be mutated
def improve
  ensure_doctype_present
  ensure_html_element_present
  head = ensure_head_element_present
  ensure_declared_charset head
end

def initialize(dom, original_html)

determine if we want to add a HTML5 doctype or not.
the original source had it or not. Reading the raw HTML is the only way to
since a +Nokogiri::HTML::Document+ will always have a doctype, no matter if
The original HTML must also be passed in in order to handle the doctypes
def initialize(dom, original_html)
  @dom = dom
  @html = original_html
end

def make_content_type_element

def make_content_type_element
  meta = Nokogiri::XML::Node.new("meta", dom)
  meta["http-equiv"] = "Content-Type"
  meta["content"] = "text/html; charset=UTF-8"
  meta
end