class Kramdown::Parser::Html::ElementConverter
Converts HTML elements to native elements if possible.
def self.convert(root, el = root)
def self.convert(root, el = root) new(root).process(el) end
def convert_a(el)
def convert_a(el) if el.attr['href'] set_basics(el, :a) process_children(el) else process_html_element(el, false) end end
def convert_code(el)
def convert_code(el) raw = +'' extract_text(el, raw) result = process_text(raw, true) begin str = result.inject(+'') do |mem, c| case c.type when :text mem << c.value when :entity mem << if [60, 62, 34, 38].include?(c.value.code_point) c.value.code_point.chr else c.value.char end when :smart_quote, :typographic_sym mem << entity(c.value.to_s).char else raise "Bug - please report" end end result.clear result << Element.new(:text, str) rescue StandardError end if result.length > 1 || result.first.type != :text process_html_element(el, false, true) else if el.value == 'code' set_basics(el, :codespan) el.attr['class']&.gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '') else set_basics(el, :codeblock) if el.children.size == 1 && el.children.first.value == 'code' value = (el.children.first.attr['class'] || '').scan(/\blanguage-\S+/).first el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value end end el.value = result.first.value el.children.clear end end
def convert_em(el)
def convert_em(el) text = +'' extract_text(el, text) if text =~ /\A\s/ || text =~ /\s\z/ process_html_element(el, false) else set_basics(el, EMPHASIS_TYPE_MAP[el.value]) process_children(el) end end
def convert_h1(el)
def convert_h1(el) set_basics(el, :header, level: el.value[1..1].to_i) extract_text(el, el.options[:raw_text] = +'') process_children(el) end
def convert_script(el)
def convert_script(el) if is_math_tag?(el) handle_math_tag(el) else process_html_element(el) end end
def convert_table(el)
def convert_table(el) unless is_simple_table?(el) process_html_element(el, false) return end remove_text_children(el) process_children(el) set_basics(el, :table) calc_alignment = lambda do |c| if c.type == :tr el.options[:alignment] = c.children.map do |td| if td.attr['style'] td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/) td.attr.delete('style') if td.attr['style'].strip.empty? $1 ? $1.to_sym : :default else :default end end else c.children.each {|cc| calc_alignment.call(cc) } end end calc_alignment.call(el) el.children.delete_if {|c| c.type == :html_element } change_th_type = lambda do |c| if c.type == :th c.type = :td else c.children.each {|cc| change_th_type.call(cc) } end end change_th_type.call(el) if el.children.first.type == :tr tbody = Element.new(:tbody) tbody.children = el.children el.children = [tbody] end end
def convert_textarea(el)
def convert_textarea(el) process_html_element(el, true, true) end
def extract_text(el, raw)
def extract_text(el, raw) raw << el.value.to_s if el.type == :text el.children.each {|c| extract_text(c, raw) } end
def handle_math_tag(el)
def handle_math_tag(el) set_basics(el, :math, category: (el.attr['type'].include?("mode=display") ? :block : :span)) el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1') el.attr.delete('type') end
def initialize(root)
def initialize(root) @root = root end
def is_math_tag?(el)
def is_math_tag?(el) el.attr['type'].to_s =~ /\bmath\/tex\b/ end
def is_simple_table?(el)
def is_simple_table?(el) only_phrasing_content = lambda do |c| c.children.all? do |cc| (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc) end end check_cells = proc do |c| if c.value == 'th' || c.value == 'td' return false unless only_phrasing_content.call(c) else c.children.each {|cc| check_cells.call(cc) } end end check_cells.call(el) nr_cells = 0 check_nr_cells = lambda do |t| if t.value == 'tr' count = t.children.count {|cc| cc.value == 'th' || cc.value == 'td' } if count != nr_cells if nr_cells == 0 nr_cells = count else nr_cells = -1 break end end else t.children.each {|cc| check_nr_cells.call(cc) } end end check_nr_cells.call(el) return false if nr_cells == -1 || nr_cells == 0 alignment = nil check_alignment = proc do |t| if t.value == 'tr' cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.map do |cell| md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s) return false if md && (md[1] == 'justify' || md[1] == 'inherit') md.nil? ? :default : md[1] end alignment = cur_alignment if alignment.nil? return false if alignment != cur_alignment else t.children.each {|cc| check_alignment.call(cc) } end end check_alignment.call(el) check_rows = lambda do |t, type| t.children.all? do |r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text } end end check_rows.call(el, 'td') || (el.children.all? do |t| t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) || ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td')) end && el.children.any? {|t| t.value == 'tbody' }) end
def process(el, do_conversion = true, preserve_text = false, parent = nil)
def process(el, do_conversion = true, preserve_text = false, parent = nil) case el.type when :xml_comment, :xml_pi ptype = if parent.nil? 'div' else case parent.type when :html_element then parent.value when :code_span then 'code' when :code_block then 'pre' when :header then 'h1' else parent.type.to_s end end el.options.replace(category: (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)) return when :html_element # do nothing when :root el.children.map! do |c| if c.type == :text process_text(c.value, !do_conversion) else process(c) c end end.flatten! remove_whitespace_children(el) return else return end mname = "convert_#{el.value}" if do_conversion && self.class.method_defined?(mname) send(mname, el) else type = el.value remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type) if do_conversion && SIMPLE_ELEMENTS.include?(type) set_basics(el, type.intern) process_children(el, do_conversion, preserve_text) else process_html_element(el, do_conversion, preserve_text) end if do_conversion strip_whitespace(el) if STRIP_WHITESPACE.include?(type) remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type) wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type) end end end
def process_children(el, do_conversion = true, preserve_text = false)
def process_children(el, do_conversion = true, preserve_text = false) el.children.map! do |c| if c.type == :text process_text(c.value, preserve_text || !do_conversion) else process(c, do_conversion, preserve_text, el) c end end.flatten! end
def process_html_element(el, do_conversion = true, preserve_text = false)
def process_html_element(el, do_conversion = true, preserve_text = false) el.options.replace(category: HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block, content_model: (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw)) process_children(el, do_conversion, preserve_text) end
def process_text(raw, preserve = false)
Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
def process_text(raw, preserve = false) raw.gsub!(/\s+/, ' ') unless preserve src = Kramdown::Utils::StringScanner.new(raw) result = [] until src.eos? if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/o)) result << Element.new(:text, tmp) src.scan(HTML_ENTITY_RE) val = src[1] || src[2]&.to_i || src[3].hex result << if %w[lsquo rsquo ldquo rdquo].include?(val) Element.new(:smart_quote, val.intern) elsif %w[mdash ndash hellip laquo raquo].include?(val) Element.new(:typographic_sym, val.intern) else begin Element.new(:entity, entity(val), nil, original: src.matched) rescue ::Kramdown::Error src.pos -= src.matched_size - 1 Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp')) end end else result << Element.new(:text, src.rest) src.terminate end end result end
def remove_text_children(el)
def remove_text_children(el) el.children.delete_if {|c| c.type == :text } end
def remove_whitespace_children(el)
def remove_whitespace_children(el) i = -1 el.children = el.children.reject do |c| i += 1 c.type == :text && c.value.strip.empty? && (i == 0 || i == el.children.length - 1 || (el.children[i - 1].block? && el.children[i + 1].block?)) end end
def set_basics(el, type, opts = {})
def set_basics(el, type, opts = {}) el.type = type el.options.replace(opts) el.value = nil end
def strip_whitespace(el)
def strip_whitespace(el) return if el.children.empty? if el.children.first.type == :text el.children.first.value.lstrip! end if el.children.last.type == :text el.children.last.value.rstrip! end end
def wrap_text_children(el)
def wrap_text_children(el) tmp = [] last_is_p = false el.children.each do |c| if !c.block? || c.type == :text unless last_is_p tmp << Element.new(:p, nil, nil, transparent: true) last_is_p = true end tmp.last.children << c tmp else tmp << c last_is_p = false end end el.children = tmp end