class PrawnHtml::HtmlParser

def init_element(node)

def init_element(node)
  node.name.downcase.to_sym.tap do |tag_name|
    @processing = true if tag_name == :body
    @ignore = true if @processing && @ignore_content_tags.include?(tag_name)
    process_styles(node.text) if tag_name == :style
  end
end

def initialize(renderer, ignore_content_tags: %i[script style])

Parameters:
  • ignore_content_tags (Array) -- array of tags (symbols) to skip their contents while preparing the PDF document
  • renderer (DocumentRenderer) -- document renderer
def initialize(renderer, ignore_content_tags: %i[script style])
  @processing = false
  @ignore = false
  @ignore_content_tags = ignore_content_tags
  @renderer = renderer
  @raw_styles = {}
end

def node_close(element)

def node_close(element)
  if processing
    renderer.on_tag_close(element) unless ignore
    @ignore = false if ignore && @ignore_content_tags.include?(element.tag)
  end
  @processing = false if element.tag == :body
end

def node_open(node)

def node_open(node)
  tag = node.is_a?(Oga::XML::Element) && init_element(node)
  return unless processing
  return IgnoredTag.new(tag) if ignore
  return renderer.on_text_node(node.text) unless tag
  renderer.on_tag_open(tag, attributes: prepare_attributes(node), element_styles: styles[node])
end

def parse_styles(text_styles)

Parameters:
  • text_styles (String) -- The CSS styles to evaluate
def parse_styles(text_styles)
  @raw_styles = text_styles.scan(REGEXP_STYLES).to_h
end

def prepare_attributes(node)

def prepare_attributes(node)
  node.attributes.each_with_object({}) do |attr, res|
    res[attr.name] = attr.value
  end
end

def process(html)

Parameters:
  • html (String) -- The HTML content to process
def process(html)
  @styles = {}
  @processing = !html.include?('<body')
  @document = Oga.parse_html(html)
  process_styles # apply previously loaded styles
  traverse_nodes(document.children)
  renderer.flush
end

def process_styles(text_styles = nil)

def process_styles(text_styles = nil)
  parse_styles(text_styles) if text_styles
  @raw_styles.each do |selector, rule|
    document.css(selector).each do |node|
      styles[node] = rule
    end
  end
end

def traverse_nodes(nodes)

def traverse_nodes(nodes)
  nodes.each do |node|
    next if node.is_a?(Oga::XML::Comment)
    element = node_open(node)
    traverse_nodes(node.children) if node.children.any?
    node_close(element) if element
  end
end