class Kramdown::Parser::Html::ElementConverter
Converts HTML elements to native elements if possible.
def convert_a(el)
def convert_a(el) if el.options[:attr].has_key?('href') set_basics(el, :a, :span) process_children(el) else process_html_element(el, false) end end
def convert_code(el)
def convert_code(el) raw = '' extract_text(el, raw) result = process_text(raw, true) begin str = result.inject('') do |mem, c| if c.type == :text mem << c.value elsif c.type == :entity if RUBY_VERSION >= '1.9' mem << c.value.char.encode(@doc.parse_infos[:encoding]) elsif [60, 62, 34, 38].include?(c.value.code_point) mem << c.value.code_point.chr end elsif c.type == :smart_quote || c.type == :typographic_sym mem << entity(c.value.to_s).char.encode(@doc.parse_infos[:encoding]) else raise "Bug - please report" end end result.clear result << Element.new(:text, str) rescue end if result.length > 1 || result.first.type != :text process_html_element(el, false, true) else if el.value == 'code' set_basics(el, :codespan, :span) else set_basics(el, :codeblock, :block) end el.value = result.first.value end end
def convert_div(el)
def convert_div(el) if !is_math_tag?(el) process_html_element(el) else handle_math_tag(el) end end
def convert_h1(el)
def convert_h1(el) set_basics(el, :header, :block, :level => el.value[1..1].to_i) extract_text(el, el.options[:raw_text] = '') process_children(el) end
def convert_table(el)
def convert_table(el) if !is_simple_table?(el) process_html_element(el, false) return end process_children(el) set_basics(el, :table, :block) el.options[:alignment] = [] calc_alignment = lambda do |c| if c.type == :tr && el.options[:alignment].empty? el.options[:alignment] = [:default] * c.children.length break else c.children.each {|cc| calc_alignment.call(cc)} end end calc_alignment.call(el) if el.children.first.type == :tr tbody = Element.new(:tbody, nil, :category => :block) tbody.children = el.children el.children = [tbody] end end
def extract_text(el, raw)
def extract_text(el, raw) raw << el.value.to_s if el.type == :text el.children.each {|c| extract_text(c, raw)} end
def handle_math_tag(el)
def handle_math_tag(el) set_basics(el, :math, (el.value == 'div' ? :block : :span)) el.value = el.children.shift.value if el.options[:attr]['class'] =~ /^\s*math\s*$/ el.options[:attr].delete('class') else el.options[:attr]['class'].sub!(/\s?math/, '') end el.value.gsub!(/&(amp|quot|gt|lt);/) do |m| case m when '&' then '&' when '"' then '"' when '>' then '>' when '<' then '<' end end end
def initialize(doc)
def initialize(doc) @doc = doc end
def is_math_tag?(el)
def is_math_tag?(el) el.options[:attr] && el.options[:attr]['class'].to_s =~ /\bmath\b/ && el.children.size == 1 && el.children.first.type == :text end
def is_simple_table?(el)
def is_simple_table?(el) only_phrasing_content = lambda do |c| c.children.all? do |cc| (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc) end end check_cells = Proc.new do |c| if c.value == 'th' || c.value == 'td' return false if !only_phrasing_content.call(c) else c.children.each {|cc| check_cells.call(cc)} end end check_cells.call(el) check_rows = lambda do |t, type| t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}} end check_rows.call(el, 'td') || (el.children.all? do |t| t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) || ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td')) end && el.children.any? {|t| t.value == 'tbody'}) end
def process(el, do_conversion = true, preserve_text = false, parent = nil)
def process(el, do_conversion = true, preserve_text = false, parent = nil) case el.type when :xml_comment, :xml_pi, :html_doctype ptype = if parent.nil? 'div' else case parent.type when :html_element then parent.value when :code_span then 'code' when :code_block then 'pre' when :header then 'h1' else parent.type.to_s end end el.options = {:category => HTML_PARSE_AS_SPAN.include?(ptype) ? :span : :block} return when :html_element else return end type = el.value remove_text_children(el) if REMOVE_TEXT_CHILDREN.include?(type) mname = "convert_#{el.value}" if do_conversion && self.class.method_defined?(mname) send(mname, el) elsif do_conversion && SIMPLE_ELEMENTS.include?(type) set_basics(el, type.intern, HTML_SPAN_ELEMENTS.include?(type) ? :span : :block) process_children(el, do_conversion, preserve_text) else process_html_element(el, do_conversion, preserve_text) end strip_whitespace(el) if STRIP_WHITESPACE.include?(type) remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type) wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type) end
def process_children(el, do_conversion = true, preserve_text = false)
def process_children(el, do_conversion = true, preserve_text = false) el.children.map! do |c| if c.type == :text process_text(c.value, preserve_text) else process(c, do_conversion, preserve_text, el) c end end.flatten! end
def process_html_element(el, do_conversion = true, preserve_text = false)
def process_html_element(el, do_conversion = true, preserve_text = false) el.options = {:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block, :parse_type => HTML_PARSE_AS[el.value], :attr => el.options[:attr] } process_children(el, do_conversion, preserve_text) end
def process_text(raw, preserve = false)
Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
def process_text(raw, preserve = false) raw.gsub!(/\s+/, ' ') unless preserve src = StringScanner.new(raw) result = [] while !src.eos? if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/) result << Element.new(:text, tmp) src.scan(HTML_ENTITY_RE) val = src[1] || (src[2] && src[2].to_i) || src[3].hex result << if %w{lsquo rsquo ldquo rdquo}.include?(val) Element.new(:smart_quote, val.intern) elsif %w{mdash ndash hellip laquo raquo}.include?(val) Element.new(:typographic_sym, val.intern) else Element.new(:entity, entity(val), :original => src.matched) end else result << Element.new(:text, src.scan(/.*/m)) end end result end
def remove_text_children(el)
def remove_text_children(el) el.children.delete_if {|c| c.type == :text} end
def remove_whitespace_children(el)
def remove_whitespace_children(el) i = -1 el.children.delete_if do |c| i += 1 c.type == :text && c.value.strip.empty? && (i == 0 || i == el.children.length - 1 || (el.children[i-1].options[:category] == :block && el.children[i+1].options[:category] == :block)) end end
def set_basics(el, type, category, opts = {})
def set_basics(el, type, category, opts = {}) el.type = type el.options = {:category => category, :attr => el.options[:attr]}.merge(opts) el.value = nil end
def strip_whitespace(el)
def strip_whitespace(el) return if el.children.empty? if el.children.first.type == :text el.children.first.value.lstrip! end if el.children.last.type == :text el.children.last.value.rstrip! end end
def wrap_text_children(el)
def wrap_text_children(el) tmp = [] last_is_p = false el.children.each do |c| if c.options[:category] != :block || c.type == :text if !last_is_p tmp << Element.new(:p, nil, :transparent => true) last_is_p = true end tmp.last.children << c tmp else tmp << c last_is_p = false end end el.children = tmp end