lib/rouge/lexers/markdown.rb



# -*- coding: utf-8 -*- #

module Rouge
  module Lexers
    class Markdown < RegexLexer
      desc "Markdown, a light-weight markup language for authors"

      tag 'markdown'
      aliases 'md', 'mkd'
      filenames '*.markdown', '*.md', '*.mkd'
      mimetypes 'text/x-markdown'

      def html
        @html ||= HTML.new(options)
      end

      start { html.reset! }

      edot = /\\.|[^\\\n]/

      state :root do
        # YAML frontmatter
        rule(/\A(---\s*\n.*?\n?)^(---\s*$\n?)/m) { delegate YAML }

        rule /\\./, Str::Escape

        rule /^[\S ]+\n(?:---*)\n/, Generic::Heading
        rule /^[\S ]+\n(?:===*)\n/, Generic::Subheading

        rule /^#(?=[^#]).*?$/, Generic::Heading
        rule /^##*.*?$/, Generic::Subheading

        # TODO: syntax highlight the code block, github style
        rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?)(\2)/m do |m|
          sublexer = Lexer.find_fancy(m[3].strip, m[4])
          sublexer ||= PlainText.new(:token => Str::Backtick)

          token Text, m[1]
          token Punctuation, m[2]
          token Name::Label, m[3]
          delegate sublexer, m[4]
          token Punctuation, m[5]
        end

        rule /\n\n((    |\t).*?\n|\n)+/, Str::Backtick

        rule /(`+)#{edot}*\1/, Str::Backtick

        # various uses of * are in order of precedence

        # line breaks
        rule /^(\s*[*]){3,}\s*$/, Punctuation
        rule /^(\s*[-]){3,}\s*$/, Punctuation

        # bulleted lists
        rule /^\s*[*+-](?=\s)/, Punctuation

        # numbered lists
        rule /^\s*\d+\./, Punctuation

        # blockquotes
        rule /^\s*>.*?$/, Generic::Traceback

        # link references
        # [foo]: bar "baz"
        rule %r(^
          (\s*) # leading whitespace
          (\[) (#{edot}+?) (\]) # the reference
          (\s*) (:) # colon
        )x do
          groups Text, Punctuation, Str::Symbol, Punctuation, Text, Punctuation

          push :title
          push :url
        end

        # links and images
        rule /(!?\[)(#{edot}+?)(\])/ do
          groups Punctuation, Name::Variable, Punctuation
          push :link
        end

        rule /[*][*]#{edot}*?[*][*]/, Generic::Strong
        rule /__#{edot}*?__/, Generic::Strong

        rule /[*]#{edot}*?[*]/, Generic::Emph
        rule /_#{edot}*?_/, Generic::Emph

        # Automatic links
        rule /<.*?@.+[.].+>/, Name::Variable
        rule %r[<(https?|mailto|ftp)://#{edot}*?>], Name::Variable


        rule /[^\\`\[*\n&<]+/, Text

        # inline html
        rule(/&\S*;/) { delegate html }
        rule(/<#{edot}*?>/) { delegate html }
        rule /[&<]/, Text

        rule /\n/, Text
      end

      state :link do
        rule /(\[)(#{edot}*?)(\])/ do
          groups Punctuation, Str::Symbol, Punctuation
          pop!
        end

        rule /[(]/ do
          token Punctuation
          push :inline_title
          push :inline_url
        end

        rule /[ \t]+/, Text

        rule(//) { pop! }
      end

      state :url do
        rule /[ \t]+/, Text

        # the url
        rule /(<)(#{edot}*?)(>)/ do
          groups Name::Tag, Str::Other, Name::Tag
          pop!
        end

        rule /\S+/, Str::Other, :pop!
      end

      state :title do
        rule /"#{edot}*?"/, Name::Namespace
        rule /'#{edot}*?'/, Name::Namespace
        rule /[(]#{edot}*?[)]/, Name::Namespace
        rule /\s*(?=["'()])/, Text
        rule(//) { pop! }
      end

      state :inline_title do
        rule /[)]/, Punctuation, :pop!
        mixin :title
      end

      state :inline_url do
        rule /[^<\s)]+/, Str::Other, :pop!
        rule /\s+/m, Text
        mixin :url
      end
    end
  end
end