# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown.
#
# kramdown is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#++
#
require 'kramdown/compatibility'
require 'kramdown/version'
require 'kramdown/error'
require 'kramdown/parser'
require 'kramdown/converter'
require 'kramdown/options'
require 'kramdown/utils'
module Kramdown
# Return the data directory for kramdown.
def self.data_dir
unless defined?(@@data_dir)
require 'rbconfig'
@@data_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'kramdown'))
@@data_dir = File.expand_path(File.join(Config::CONFIG["datadir"], "kramdown")) if !File.exists?(@@data_dir)
raise "kramdown data directory not found! This is a bug, please report it!" unless File.directory?(@@data_dir)
end
@@data_dir
end
# The main interface to kramdown.
#
# This class provides a one-stop-shop for using kramdown to convert text into various output
# formats. Use it like this:
#
# require 'kramdown'
# doc = Kramdown::Document.new('This *is* some kramdown text')
# puts doc.to_html
#
# The #to_html method is a shortcut for using the Converter::Html class. See #method_missing for
# more information.
#
# The second argument to the ::new method is an options hash for customizing the behaviour of the
# used parser and the converter. See ::new for more information!
class Document
# The root Element of the element tree. It is immediately available after the ::new method has
# been called.
attr_accessor :root
# The options hash which holds the options for parsing/converting the Kramdown document.
attr_reader :options
# An array of warning messages. It is filled with warnings during the parsing phase (i.e. in
# ::new) and the conversion phase.
attr_reader :warnings
# Create a new Kramdown document from the string +source+ and use the provided +options+. The
# options that can be used are defined in the Options module.
#
# The special options key <tt>:input</tt> can be used to select the parser that should parse the
# +source+. It has to be the name of a class in the Kramdown::Parser module. For example, to
# select the kramdown parser, one would set the <tt>:input</tt> key to +Kramdown+. If this key
# is not set, it defaults to +Kramdown+.
#
# The +source+ is immediately parsed by the selected parser so that the root element is
# immediately available and the output can be generated.
def initialize(source, options = {})
@options = Options.merge(options).freeze
parser = (options[:input] || 'kramdown').to_s
parser = parser[0..0].upcase + parser[1..-1]
if Parser.const_defined?(parser)
@root, @warnings = Parser.const_get(parser).parse(source, @options)
else
raise Kramdown::Error.new("kramdown has no parser to handle the specified input format: #{options[:input]}")
end
end
# Check if a method is invoked that begins with +to_+ and if so, try to instantiate a converter
# class (i.e. a class in the Kramdown::Converter module) and use it for converting the document.
#
# For example, +to_html+ would instantiate the Kramdown::Converter::Html class.
def method_missing(id, *attr, &block)
if id.to_s =~ /^to_(\w+)$/ && (name = $1[0..0].upcase + $1[1..-1]) && Converter.const_defined?(name)
output, warnings = Converter.const_get(name).convert(@root, @options)
@warnings.concat(warnings)
output
else
super
end
end
def inspect #:nodoc:
"<KD:Document: options=#{@options.inspect} root=#{@root.inspect} warnings=#{@warnings.inspect}>"
end
end
# Represents all elements in the element tree.
#
# kramdown only uses this one class for representing all available elements in an element tree
# (paragraphs, headers, emphasis, ...). The type of element can be set via the #type accessor.
#
# Following is a description of all supported element types.
#
# == Structural Elements
#
# === :root
#
# [Category] None
# [Usage context] As the root element of a document
# [Content model] Block-level elements
#
# Represents the root of a kramdown document.
#
# The root element contains the following option keys:
#
# <tt>:encoding</tt>:: When running on Ruby 1.9 this key has to be set to the encoding used for
# the text parts of the kramdown document.
#
# <tt>:abbrev_defs</tt>:: This key may be used to store the mapping of abbreviation to
# abbreviation definition.
#
#
# === :blank
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Empty
#
# Represents one or more blank lines. It is not allowed to have two or more consecutive blank
# elements.
#
# The +value+ field may contain the original content of the blank lines.
#
#
# === :p
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Span-level elements
#
# Represents a paragraph.
#
# If the option <tt>:transparent</tt> is +true+, this element just represents a block of text.
# I.e. this element just functions as a container for span-level elements.
#
#
# === :header
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Span-level elements
#
# Represents a header.
#
# The option <tt>:level</tt> specifies the header level and has to contain a number between 1 and
# \6. The option <tt>:raw_text</tt> has to contain the raw header text.
#
#
# === :blockquote
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Block-level elements
#
# Represents a blockquote.
#
#
# === :codeblock
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Empty
#
# Represents a code block, i.e. a block of text that should be used as-is.
#
# The +value+ field has to contain the content of the code block.
#
#
# === :ul
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] One or more :li elements
#
# Represents an unordered list.
#
#
# === :ol
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] One or more :li elements
#
# Represents an ordered list.
#
#
# === :li
#
# [Category] None
# [Usage context] Inside :ol and :ul elements
# [Content model] Block-level elements
#
# Represents a list item of an ordered or unordered list.
#
#
# === :dl
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] One or more groups each consisting of one or more :dt elements followed by one
# or more :dd elements.
#
# Represents a definition list which contains groups consisting of terms and definitions for them.
#
#
# === :dt
#
# [Category] None
# [Usage context] Before :dt or :dd elements inside a :dl elment
# [Content model] Span-level elements
#
# Represents the term part of a term-definition group in a definition list.
#
#
# === :dd
#
# [Category] None
# [Usage context] After :dt or :dd elements inside a :dl elment
# [Content model] Block-level elements
#
# Represents the definition part of a term-definition group in a definition list.
#
#
# === :hr
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] None
#
# Represents a horizontal line.
#
#
# === :table
#
# [Category] Block-level element
# [Usage context] Where block-level elements are expected
# [Content model] Zero or one :thead elements, one or more :tbody elements, zero or one :tfoot
# elements
#
# Represents a table. Each table row (i.e. :tr element) of the table has to contain the same
# number of :td elements.
#
# The option <tt>:alignment</tt> has to be an array containing the alignment values, exactly one
# for each column of the table. The possible alignment values are <tt>:left</tt>,
# <tt>:center</tt>, <tt>:right</tt> and <tt>:default</tt>.
#
#
# === :thead
#
# [Category] None
# [Usage context] As first element inside a :table element
# [Content model] One or more :tr elements
#
# Represents the table header.
#
#
# === :tbody
#
# [Category] None
# [Usage context] After a :thead element but before a :tfoot element inside a :table element
# [Content model] One or more :tr elements
#
# Represents a table body.
#
#
# === :tfoot
#
# [Category] None
# [Usage context] As last element inside a :table element
# [Content model] One or more :tr elements
#
# Represents the table footer.
#
#
# === :tr
#
# [Category] None
# [Usage context] Inside :thead, :tbody and :tfoot elements
# [Content model] One or more :td elements
#
# Represents a table row.
#
#
# === :td
#
# [Category] None
# [Usage context] Inside :tr elements
# [Content model] As child of :thead/:tr span-level elements, as child of :tbody/:tr and
# :tfoot/:tr block-level elements
#
# Represents a table cell.
#
#
# === :math
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements are expected
# [Content model] None
#
# Represents mathematical text that is written in LaTeX.
#
# The +value+ field has to contain the actual mathematical text.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the context where the element is used.
#
#
# == Text Markup Elements
#
# === :text
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents text.
#
# The +value+ field has to contain the text itself.
#
#
# === :br
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents a hard line break.
#
#
# === :a
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] Span-level elements
#
# Represents a link to an URL.
#
# The attribute +href+ has to be set to the URL to which the link points. The attribute +title+
# optionally contains the title of the link.
#
#
# === :img
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents an image.
#
# The attribute +src+ has to be set to the URL of the image. The attribute +alt+ has to contain a
# text description of the image. The attribute +title+ optionally contains the title of the image.
#
#
# === :codespan
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents verbatim text.
#
# The +value+ field has to contain the content of the code span.
#
#
# === :footnote
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents a footnote marker.
#
# The +value+ field has to contain an element whose children are the content of the footnote. The
# option <tt>:name</tt> has to contain a valid and unique footnote name. A valid footnote name
# consists of a word character or a digit and then optionally followed by other word characters,
# digits or dashes.
#
#
# === :em
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] Span-level elements
#
# Represents emphasis of its contents.
#
#
# === :strong
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] Span-level elements
#
# Represents strong importance for its contents.
#
#
# === :entity
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents an HTML entity.
#
# The +value+ field has to contain an instance of Kramdown::Utils::Entities::Entity. The option
# <tt>:original</tt> can be used to store the original representation of the entity.
#
#
# === :typographic_sym
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents a typographic symbol.
#
# The +value+ field needs to contain a Symbol representing the specific typographic symbol from
# the following list:
#
# <tt>:mdash</tt>:: An mdash character (---)
# <tt>:ndash</tt>:: An ndash character (--)
# <tt>:hellip</tt>:: An ellipsis (...)
# <tt>:laquo</tt>:: A left guillemet (<<)
# <tt>:raquo</tt>:: A right guillemet (>>)
# <tt>:laquo_space</tt>:: A left guillemet with a space (<< )
# <tt>:raquo_space</tt>:: A right guillemet with a space ( >>)
#
#
# === :smart_quote
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents a quotation character.
#
# The +value+ field needs to contain a Symbol representing the specific quotation character:
#
# <tt>:lsquo</tt>:: Left single quote
# <tt>:rsquo</tt>:: Right single quote
# <tt>:ldquo</tt>:: Left double quote
# <tt>:rdquo</tt>:: Right double quote
#
#
# === :abbreviation
#
# [Category] Span-level element
# [Usage context] Where span-level elements are expected
# [Content model] None
#
# Represents a text part that is an abbreviation.
#
# The +value+ field has to contain the text part that is the abbreviation. The definition of the
# abbreviation is stored in the <tt>:root</tt> element of the document.
#
#
# == Other Elements
#
# === :html_element
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements or raw HTML elements are expected
# [Content model] Depends on the element
#
# Represents an HTML element.
#
# The +value+ field has to contain the name of the HTML element the element is representing.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the whether the element is a block-level or a span-level element. The option
# <tt>:content_model</tt> has to be set to the content model for the element (either
# <tt>:block</tt> if it contains block-level elements, <tt>:span</tt> if it contains span-level
# elements or <tt>:raw</tt> if it contains raw content).
#
#
# === :xml_comment
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements are expected or in raw HTML elements
# [Content model] None
#
# Represents an XML/HTML comment.
#
# The +value+ field has to contain the whole XML/HTML comment including the delimiters.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the context where the element is used.
#
#
# === :xml_pi
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements are expected or in raw HTML elements
# [Content model] None
#
# Represents an XML/HTML processing instruction.
#
# The +value+ field has to contain the whole XML/HTML processing instruction including the
# delimiters.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the context where the element is used.
#
#
# === :comment
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements are expected
# [Content model] None
#
# Represents a comment.
#
# The +value+ field has to contain the comment.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the context where the element is used.
#
#
# === :raw
#
# [Category] Block/span-level element
# [Usage context] Where block/span-level elements are expected
# [Content model] None
#
# Represents a raw string that should not be modified. For example, the element could contain some
# HTML code that should be output as-is without modification and escaping.
#
# The +value+ field has to contain the actual raw text.
#
# The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
# depending on the context where the element is used. The option <tt>:type</tt> can be set to an
# array of strings to define for which converters the raw string is valid.
class Element
# A symbol representing the element type. For example, <tt>:p</tt> or <tt>:blockquote</tt>.
attr_accessor :type
# The value of the element. The interpretation of this field depends on the type of the element.
# Many elements don't use this field.
attr_accessor :value
# The child elements of this element.
attr_accessor :children
# Create a new Element object of type +type+. The optional parameters +value+, +attr+ and
# +options+ can also be set in this constructor for convenience.
def initialize(type, value = nil, attr = nil, options = nil)
@type, @value, @attr, @options = type, value, (Utils::OrderedHash.new.merge!(attr) if attr), options
@children = []
end
# The attributes of the element. Uses an Utils::OrderedHash to retain the insertion order.
def attr
@attr ||= Utils::OrderedHash.new
end
# The options hash for the element. It is used for storing arbitray options.
def options
@options ||= {}
end
def inspect #:nodoc:
"<kd:#{@type}#{@value.nil? ? '' : ' ' + @value.inspect} #{@attr.inspect}#{options.empty? ? '' : ' ' + @options.inspect}#{@children.empty? ? '' : ' ' + @children.inspect}>"
end
CATEGORY = {} # :nodoc:
[:blank, :p, :header, :blockquote, :codeblock, :ul, :ol, :dl, :table, :hr].each {|b| CATEGORY[b] = :block}
[:text, :a, :br, :img, :codespan, :footnote, :em, :strong, :entity, :typographic_sym,
:smart_quote, :abbreviation].each {|b| CATEGORY[b] = :span}
# Return the category of +el+ which can be <tt>:block</tt>, <tt>:span</tt> or +nil+.
#
# Most elements have a fixed category, however, some elements can either appear in a block-level
# or a span-level context. These elements need to have the option <tt>:category</tt> correctly
# set.
def self.category(el)
CATEGORY[el.type] || el.options[:category]
end
end
end