class PrawnHtml::HtmlParser
def init_element(node)
def init_element(node) node.name.downcase.to_sym.tap do |tag_name| @processing = true if tag_name == :body @ignore = true if @processing && @ignore_content_tags.include?(tag_name) process_styles(node.text) if tag_name == :style end end
def initialize(renderer, ignore_content_tags: %i[script style])
-
ignore_content_tags
(Array
) -- array of tags (symbols) to skip their contents while preparing the PDF document -
renderer
(DocumentRenderer
) -- document renderer
def initialize(renderer, ignore_content_tags: %i[script style]) @processing = false @ignore = false @ignore_content_tags = ignore_content_tags @renderer = renderer @raw_styles = {} end
def node_close(element)
def node_close(element) if processing renderer.on_tag_close(element) unless ignore @ignore = false if ignore && @ignore_content_tags.include?(element.tag) end @processing = false if element.tag == :body end
def node_open(node)
def node_open(node) tag = node.is_a?(Oga::XML::Element) && init_element(node) return unless processing return IgnoredTag.new(tag) if ignore return renderer.on_text_node(node.text) unless tag renderer.on_tag_open(tag, attributes: prepare_attributes(node), element_styles: styles[node]) end
def parse_styles(text_styles)
-
text_styles
(String
) -- The CSS styles to evaluate
def parse_styles(text_styles) @raw_styles = text_styles.scan(REGEXP_STYLES).to_h end
def prepare_attributes(node)
def prepare_attributes(node) node.attributes.each_with_object({}) do |attr, res| res[attr.name] = attr.value end end
def process(html)
-
html
(String
) -- The HTML content to process
def process(html) @styles = {} @processing = !html.include?('<body') @document = Oga.parse_html(html) process_styles # apply previously loaded styles traverse_nodes(document.children) renderer.flush end
def process_styles(text_styles = nil)
def process_styles(text_styles = nil) parse_styles(text_styles) if text_styles @raw_styles.each do |selector, rule| document.css(selector).each do |node| styles[node] = rule end end end
def traverse_nodes(nodes)
def traverse_nodes(nodes) nodes.each do |node| next if node.is_a?(Oga::XML::Comment) element = node_open(node) traverse_nodes(node.children) if node.children.any? node_close(element) if element end end