lib/rouge/lexers/html.rb



# -*- coding: utf-8 -*- #

module Rouge
  module Lexers
    class HTML < RegexLexer
      desc "HTML, the markup language of the web"
      tag 'html'
      filenames '*.htm', '*.html', '*.xhtml'
      mimetypes 'text/html', 'application/xhtml+xml'

      def self.analyze_text(text)
        return 1 if text.doctype?(/\bhtml\b/i)
        return 1 if text =~ /<\s*html\b/
      end

      state :root do
        rule /[^<&]+/m, Text
        rule /&\S*?;/, Name::Entity
        rule /<!DOCTYPE .*?>/im, Comment::Preproc
        rule /<!\[CDATA\[.*?\]\]>/m, Comment::Preproc
        rule /<!--/, Comment, :comment
        rule /<\?.*?\?>/m, Comment::Preproc # php? really?

        rule /<\s*script\s*/m do
          token Name::Tag
          push :script_content
          push :tag
        end

        rule /<\s*style\s*/m do
          token Name::Tag
          push :style_content
          push :tag
        end

        rule %r(<\s*[a-zA-Z0-9:-]+), Name::Tag, :tag # opening tags
        rule %r(<\s*/\s*[a-zA-Z0-9:-]+\s*>), Name::Tag # closing tags
      end

      state :comment do
        rule /[^-]+/, Comment
        rule /-->/, Comment, :pop!
        rule /-/, Comment
      end

      state :tag do
        rule /\s+/m, Text
        rule /[a-zA-Z0-9_:-]+\s*=/m, Name::Attribute, :attr
        rule /[a-zA-Z0-9_:-]+/, Name::Attribute
        rule %r(/?\s*>)m, Name::Tag, :pop!
      end

      state :attr do
        # TODO: are backslash escapes valid here?
        rule /"/ do
          token Str
          goto :dq
        end

        rule /'/ do
          token Str
          goto :sq
        end

        rule /[^\s>]+/, Str, :pop!
      end

      state :dq do
        rule /"/, Str, :pop!
        rule /[^"]+/, Str
      end

      state :sq do
        rule /'/, Str, :pop!
        rule /[^']+/, Str
      end

      state :script_content do
        rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop!
        rule %r(.*?(?=<\s*/\s*script\s*>))m do
          delegate Javascript
        end
      end

      state :style_content do
        rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop!
        rule %r(.*(?=<\s*/\s*style\s*>))m do
          delegate CSS
        end
      end
    end
  end
end