lib/rouge/lexers/html.rb
# -*- coding: utf-8 -*- #
# frozen_string_literal: true
module Rouge
module Lexers
class HTML < RegexLexer
title "HTML"
desc "HTML, the markup language of the web"
tag 'html'
filenames '*.htm', '*.html', '*.xhtml'
mimetypes 'text/html', 'application/xhtml+xml'
def self.detect?(text)
return true if text.doctype?(/\bhtml\b/i)
return true if text =~ /<\s*html\b/
end
start do
@javascript = Javascript.new(options)
@css = CSS.new(options)
end
state :root do
rule /[^<&]+/m, Text
rule /&\S*?;/, Name::Entity
rule /<!DOCTYPE .*?>/im, Comment::Preproc
rule /<!\[CDATA\[.*?\]\]>/m, Comment::Preproc
rule /<!--/, Comment, :comment
rule /<\?.*?\?>/m, Comment::Preproc # php? really?
rule /<\s*script\s*/m do
token Name::Tag
@javascript.reset!
push :script_content
push :tag
end
rule /<\s*style\s*/m do
token Name::Tag
@css.reset!
@lang = @css
push :style_content
push :tag
end
rule /<\//, Name::Tag, :tag_end
rule /</, Name::Tag, :tag_start
rule %r(<\s*[a-zA-Z0-9:-]+), Name::Tag, :tag # opening tags
rule %r(<\s*/\s*[a-zA-Z0-9:-]+\s*>), Name::Tag # closing tags
end
state :tag_end do
mixin :tag_end_end
rule /[a-zA-Z0-9:-]+/ do
token Name::Tag
goto :tag_end_end
end
end
state :tag_end_end do
rule /\s+/, Text
rule />/, Name::Tag, :pop!
end
state :tag_start do
rule /\s+/, Text
rule /[a-zA-Z0-9:-]+/ do
token Name::Tag
goto :tag
end
rule(//) { goto :tag }
end
state :comment do
rule /[^-]+/, Comment
rule /-->/, Comment, :pop!
rule /-/, Comment
end
state :tag do
rule /\s+/m, Text
rule /[a-zA-Z0-9_:-]+\s*=\s*/m, Name::Attribute, :attr
rule /[a-zA-Z0-9_:-]+/, Name::Attribute
rule %r(/?\s*>)m, Name::Tag, :pop!
end
state :attr do
# TODO: are backslash escapes valid here?
rule /"/ do
token Str
goto :dq
end
rule /'/ do
token Str
goto :sq
end
rule /[^\s>]+/, Str, :pop!
end
state :dq do
rule /"/, Str, :pop!
rule /[^"]+/, Str
end
state :sq do
rule /'/, Str, :pop!
rule /[^']+/, Str
end
state :script_content do
rule %r([^<]+) do
delegate @javascript
end
rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop!
rule %r(<) do
delegate @javascript
end
end
state :style_content do
rule /[^<]+/ do
delegate @lang
end
rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop!
rule /</ do
delegate @lang
end
end
end
end
end