# -*- coding: utf-8 -*- #
# frozen_string_literal: true
# C minus minus (Cmm) is a pun on the name C++. It's an intermediate language
# of the Glasgow Haskell Compiler (GHC) that is very similar to C, but with
# many features missing and some special constructs.
#
# Cmm is a dialect of C--. The goal of this lexer is to use what GHC produces
# and parses (Cmm); C-- itself is not supported.
#
# https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cmm-syntax
#
module Rouge
module Lexers
class GHCCmm < RegexLexer
title "GHC Cmm (C--)"
desc "GHC Cmm is the intermediate representation of the GHC Haskell compiler"
tag 'ghc-cmm'
filenames '*.cmm', '*.dump-cmm', '*.dump-cmm-*'
aliases 'cmm'
ws = %r(\s|//.*?\n|/[*](?:[^*]|(?:[*][^/]))*[*]+/)mx
# Make sure that this is not a preprocessor macro, e.g. `#if` or `#define`.
id = %r((?!\#[a-zA-Z])[\w#\$%_']+)
complex_id = %r(
(?:[\w#$%_']|\(\)|\(,\)|\[\]|[0-9])*
(?:[\w#$%_']+)
)mx
state :root do
rule %r/\s+/m, Text
# sections markers
rule %r/^=====.*=====$/, Generic::Heading
# timestamps
rule %r/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ UTC$/, Comment::Single
mixin :detect_section
mixin :preprocessor_macros
mixin :info_tbls
mixin :comments
mixin :literals
mixin :keywords
mixin :types
mixin :infos
mixin :names
mixin :operators
# escaped newline
rule %r/\\\n/, Text
# rest is Text
rule %r/./, Text
end
state :detect_section do
rule %r/(section)(\s+)/ do |m|
token Keyword, m[1]
token Text, m[2]
push :section
end
end
state :section do
rule %r/"(data|cstring|text|rodata|relrodata|bss)"/, Name::Builtin
rule %r/{/, Punctuation, :pop!
mixin :names
mixin :operators
mixin :keywords
rule %r/\s+/, Text
end
state :preprocessor_macros do
rule %r/#(include|endif|else|if)/, Comment::Preproc
rule %r{
(\#define)
(#{ws}*)
(#{id})
}mx do |m|
token Comment::Preproc, m[1]
recurse m[2]
token Name::Label, m[3]
end
end
state :info_tbls do
rule %r/({ )(info_tbls)(:)/ do |m|
token Punctuation, m[1]
token Name::Entity, m[2]
token Punctuation, m[3]
push :info_tbls_body
end
end
state :info_tbls_body do
rule %r/}/, Punctuation, :pop!
rule %r/{/, Punctuation, :info_tbls_body
rule %r/(?=label:)/ do
push :label
end
rule %r{(\()(#{complex_id})(,)}mx do |m|
token Punctuation, m[1]
token Name::Label, m[2]
token Punctuation, m[3]
end
mixin :literals
mixin :infos
mixin :keywords
mixin :operators
rule %r/#{id}/, Text
rule %r/\s+/, Text
end
state :label do
mixin :infos
mixin :names
mixin :keywords
mixin :operators
rule %r/[^\S\n]+/, Text # Tab, space, etc. but not newline!
rule %r/\n/, Text, :pop!
end
state :comments do
rule %r/\/{2}.*/, Comment::Single
rule %r/\(likely.*?\)/, Comment
rule %r/\/\*.*?\*\//m, Comment::Multiline
end
state :literals do
rule %r/-?[0-9]+\.[0-9]+/, Literal::Number::Float
rule %r/-?[0-9]+/, Literal::Number::Integer
rule %r/"/, Literal::String::Delimiter, :literal_string
end
state :literal_string do
# quotes
rule %r/\\./, Literal::String::Escape
rule %r/%./, Literal::String::Symbol
rule %r/"/, Literal::String::Delimiter, :pop!
rule %r/./, Literal::String
end
state :operators do
rule %r/\.\./, Operator
rule %r/[+\-*\/<>=!&|~]/, Operator
rule %r/[\[\].{}:;,()]/, Punctuation
end
state :keywords do
rule %r/(const)(\s+)/ do |m|
token Keyword::Constant, m[1]
token Text, m[2]
end
rule %r/"/, Literal::String::Double
rule %r/(switch)([^{]*)({)/ do |m|
token Keyword, m[1]
recurse m[2]
token Punctuation, m[3]
end
rule %r/(arg|result)(#{ws}+)(hints)(:)/ do |m|
token Name::Property, m[1]
recurse m[2]
token Name::Property, m[3]
token Punctuation, m[4]
end
rule %r/(returns)(#{ws}*)(to)/ do |m|
token Keyword, m[1]
recurse m[2]
token Keyword, m[3]
end
rule %r/(never)(#{ws}*)(returns)/ do |m|
token Keyword, m[1]
recurse m[2]
token Keyword, m[3]
end
rule %r{(return)(#{ws}*)(\()} do |m|
token Keyword, m[1]
recurse m[2]
token Punctuation, m[3]
end
rule %r{(if|else|goto|call|offset|import|jump|ccall|foreign|prim|case|unwind|export|reserve|push)(#{ws})} do |m|
token Keyword, m[1]
recurse m[2]
end
rule %r{(default)(#{ws}*)(:)} do |m|
token Keyword, m[1]
recurse m[2]
token Punctuation, m[3]
end
end
state :types do
# Memory access: `type[42]`
# Note: Only a token for type is produced.
rule %r/(#{id})(?=\[[^\]])/ do |m|
token Keyword::Type, m[1]
end
# Array type: `type[]`
rule %r/(#{id}\[\])/ do |m|
token Keyword::Type, m[1]
end
# Capture macro substitutions before lexing typed declarations
# I.e. there is no type in `PREPROCESSOR_MACRO_VARIABLE someFun()`
rule %r{
(^#{id})
(#{ws}+)
(#{id})
(#{ws}*)
(\()
}mx do |m|
token Name::Label, m[1]
recurse m[2]
token Name::Function, m[3]
recurse m[4]
token Punctuation, m[5]
end
# Type in variable or parameter declaration:
# `type /* optional whitespace */ var_name /* optional whitespace */;`
# `type /* optional whitespace */ var_name /* optional whitespace */, var_name2`
# `(type /* optional whitespace */ var_name /* optional whitespace */)`
# Note: Only the token for type is produced here.
rule %r{
(^#{id})
(#{ws}+)
(#{id})
}mx do |m|
token Keyword::Type, m[1]
recurse m[2]
token Name::Label, m[3]
end
end
state :infos do
rule %r/(args|res|upd|label|rep|srt|arity|fun_type|arg_space|updfr_space)(:)/ do |m|
token Name::Property, m[1]
token Punctuation, m[2]
end
rule %r/(stack_info)(:)/ do |m|
token Name::Entity, m[1]
token Punctuation, m[2]
end
end
state :names do
rule %r/(::)(#{ws}*)([A-Z]\w+)/ do |m|
token Operator, m[1]
recurse m[2]
token Keyword::Type, m[3]
end
rule %r/<(#{id})>/, Name::Builtin
rule %r/(Sp|SpLim|Hp|HpLim|HpAlloc|BaseReg|CurrentNursery|CurrentTSO|R\d{1,2}|gcptr)(?!#{id})/, Name::Variable::Global
rule %r/([A-Z]#{id})(\.)/ do |m|
token Name::Namespace, m[1]
token Punctuation, m[2]
push :namespace_name
end
# Inline function calls:
# ```
# arg1 `lt` arg2
# ```
rule %r/(`)(#{id})(`)/ do |m|
token Punctuation, m[1]
token Name::Function, m[2]
token Punctuation, m[3]
end
# Function: `name /* optional whitespace */ (`
# Function (arguments via explicit stack handling): `name /* optional whitespace */ {`
rule %r{(?=
#{complex_id}
#{ws}*
[\{\(]
)}mx do
push :function
end
rule %r/CLOSURE/, Keyword::Type
rule %r/#{complex_id}/, Name::Label
end
state :namespace_name do
rule %r/([A-Z]#{id})(\.)/ do |m|
token Name::Namespace, m[1]
token Punctuation, m[2]
end
rule %r{(#{complex_id})(#{ws}*)([\{\(])}mx do |m|
token Name::Function, m[1]
recurse m[2]
token Punctuation, m[3]
pop!
end
rule %r/#{complex_id}/, Name::Label, :pop!
rule %r/(?=.)/m do
pop!
end
end
state :function do
rule %r/INFO_TABLE_FUN|INFO_TABLE_CONSTR|INFO_TABLE_SELECTOR|INFO_TABLE_RET|INFO_TABLE/, Name::Builtin
rule %r/%#{id}/, Name::Builtin
rule %r/#{complex_id}/, Name::Function
rule %r/\s+/, Text
rule %r/[({]/, Punctuation, :pop!
mixin :comments
end
end
end
end