lib/rouge/lexers/ghc_cmm.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

# C minus minus (Cmm) is a pun on the name C++. It's an intermediate language
# of the Glasgow Haskell Compiler (GHC) that is very similar to C, but with
# many features missing and some special constructs.
#
# Cmm is a dialect of C--. The goal of this lexer is to use what GHC produces
# and parses (Cmm); C-- itself is not supported.
#
# https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cmm-syntax
#
module Rouge
  module Lexers
    class GHCCmm < RegexLexer
      title "GHC Cmm (C--)"
      desc "GHC Cmm is the intermediate representation of the GHC Haskell compiler"
      tag 'ghc-cmm'
      filenames '*.cmm', '*.dump-cmm', '*.dump-cmm-*'
      aliases 'cmm'

      ws = %r(\s|//.*?\n|/[*](?:[^*]|(?:[*][^/]))*[*]+/)mx

      # Make sure that this is not a preprocessor macro, e.g. `#if` or `#define`.
      id = %r((?!\#[a-zA-Z])[\w#\$%_']+)

      complex_id = %r(
        (?:[\w#$%_']|\(\)|\(,\)|\[\]|[0-9])*
        (?:[\w#$%_']+)
      )mx

      state :root do
        rule %r/\s+/m, Text

        # sections markers
        rule %r/^=====.*=====$/, Generic::Heading

        # timestamps
        rule %r/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ UTC$/, Comment::Single

        mixin :detect_section
        mixin :preprocessor_macros

        mixin :info_tbls
        mixin :comments
        mixin :literals
        mixin :keywords
        mixin :types
        mixin :infos
        mixin :names
        mixin :operators

        # escaped newline
        rule %r/\\\n/, Text

        # rest is Text
        rule %r/./, Text
      end

      state :detect_section do
        rule %r/(section)(\s+)/ do |m|
          token Keyword, m[1]
          token Text, m[2]
          push :section
        end
      end

      state :section do
        rule %r/"(data|cstring|text|rodata|relrodata|bss)"/, Name::Builtin

        rule %r/{/, Punctuation, :pop!

        mixin :names
        mixin :operators
        mixin :keywords

        rule %r/\s+/, Text
      end

      state :preprocessor_macros do
        rule %r/#(include|endif|else|if)/, Comment::Preproc

        rule %r{
            (\#define)
            (#{ws}*)
            (#{id})
          }mx do |m|
          token Comment::Preproc, m[1]
          recurse m[2]
          token Name::Label, m[3]
        end
      end

      state :info_tbls do
        rule %r/({ )(info_tbls)(:)/ do |m|
          token Punctuation, m[1]
          token Name::Entity, m[2]
          token Punctuation, m[3]

          push :info_tbls_body
        end
      end

      state :info_tbls_body do
        rule %r/}/, Punctuation, :pop!
        rule %r/{/, Punctuation, :info_tbls_body

        rule %r/(?=label:)/ do
          push :label
        end

        rule %r{(\()(#{complex_id})(,)}mx do |m|
          token Punctuation, m[1]
          token Name::Label, m[2]
          token Punctuation, m[3]
        end

        mixin :literals
        mixin :infos
        mixin :keywords
        mixin :operators

        rule %r/#{id}/, Text
        rule %r/\s+/, Text
      end

      state :label do
        mixin :infos
        mixin :names
        mixin :keywords
        mixin :operators

        rule %r/[^\S\n]+/, Text # Tab, space, etc. but not newline!
        rule %r/\n/, Text, :pop!
      end

      state :comments do
        rule %r/\/{2}.*/, Comment::Single
        rule %r/\(likely.*?\)/, Comment
        rule %r/\/\*.*?\*\//m, Comment::Multiline
      end

      state :literals do
        rule %r/-?[0-9]+\.[0-9]+/, Literal::Number::Float
        rule %r/-?[0-9]+/, Literal::Number::Integer
        rule %r/"/, Literal::String::Delimiter, :literal_string
      end

      state :literal_string do
        # quotes
        rule %r/\\./, Literal::String::Escape
        rule %r/%./, Literal::String::Symbol
        rule %r/"/, Literal::String::Delimiter, :pop!
        rule %r/./, Literal::String
      end

      state :operators do
        rule %r/\.\./, Operator
        rule %r/[+\-*\/<>=!&|~]/, Operator
        rule %r/[\[\].{}:;,()]/, Punctuation
      end

      state :keywords do
        rule %r/(const)(\s+)/ do |m|
          token Keyword::Constant, m[1]
          token Text, m[2]
        end

        rule %r/"/, Literal::String::Double

        rule %r/(switch)([^{]*)({)/ do |m|
          token Keyword, m[1]
          recurse m[2]
          token Punctuation, m[3]
        end

        rule %r/(arg|result)(#{ws}+)(hints)(:)/ do |m|
          token Name::Property, m[1]
          recurse m[2]
          token Name::Property, m[3]
          token Punctuation, m[4]
        end

        rule %r/(returns)(#{ws}*)(to)/ do |m|
          token Keyword, m[1]
          recurse m[2]
          token Keyword, m[3]
        end

        rule %r/(never)(#{ws}*)(returns)/ do |m|
          token Keyword, m[1]
          recurse m[2]
          token Keyword, m[3]
        end

        rule %r{(return)(#{ws}*)(\()} do |m|
          token Keyword, m[1]
          recurse m[2]
          token Punctuation, m[3]
        end

        rule %r{(if|else|goto|call|offset|import|jump|ccall|foreign|prim|case|unwind|export|reserve|push)(#{ws})} do |m|
          token Keyword, m[1]
          recurse m[2]
        end

        rule %r{(default)(#{ws}*)(:)} do |m|
          token Keyword, m[1]
          recurse m[2]
          token Punctuation, m[3]
        end
      end

      state :types do
        # Memory access: `type[42]`
        # Note: Only a token for type is produced.
        rule %r/(#{id})(?=\[[^\]])/ do |m|
          token Keyword::Type, m[1]
        end

        # Array type: `type[]`
        rule %r/(#{id}\[\])/ do |m|
          token Keyword::Type, m[1]
        end

        # Capture macro substitutions before lexing typed declarations
        # I.e. there is no type in `PREPROCESSOR_MACRO_VARIABLE someFun()`
        rule %r{
                (^#{id})
                (#{ws}+)
                (#{id})
                (#{ws}*)
                (\()
              }mx do |m|
          token Name::Label, m[1]
          recurse m[2]
          token Name::Function, m[3]
          recurse m[4]
          token Punctuation, m[5]
        end

        # Type in variable or parameter declaration:
        #   `type /* optional whitespace */ var_name /* optional whitespace */;`
        #   `type /* optional whitespace */ var_name /* optional whitespace */, var_name2`
        #   `(type /* optional whitespace */ var_name /* optional whitespace */)`
        # Note: Only the token for type is produced here.
        rule %r{
                (^#{id})
                (#{ws}+)
                (#{id})
              }mx do |m|
          token Keyword::Type, m[1]
          recurse m[2]
          token Name::Label, m[3]
        end
      end

      state :infos do
        rule %r/(args|res|upd|label|rep|srt|arity|fun_type|arg_space|updfr_space)(:)/ do |m|
          token Name::Property, m[1]
          token Punctuation, m[2]
        end

        rule %r/(stack_info)(:)/ do |m|
          token Name::Entity, m[1]
          token Punctuation, m[2]
        end
      end

      state :names do
        rule %r/(::)(#{ws}*)([A-Z]\w+)/ do |m|
          token Operator, m[1]
          recurse m[2]
          token Keyword::Type, m[3]
        end

        rule %r/<(#{id})>/, Name::Builtin

        rule %r/(Sp|SpLim|Hp|HpLim|HpAlloc|BaseReg|CurrentNursery|CurrentTSO|R\d{1,2}|gcptr)(?!#{id})/, Name::Variable::Global
        rule %r/([A-Z]#{id})(\.)/ do |m|
          token Name::Namespace, m[1]
          token Punctuation, m[2]
          push :namespace_name
        end

        # Inline function calls:
        # ```
        #  arg1 `lt` arg2
        # ```
        rule %r/(`)(#{id})(`)/ do |m|
          token Punctuation, m[1]
          token Name::Function, m[2]
          token Punctuation, m[3]
        end

        # Function: `name /* optional whitespace */ (`
        # Function (arguments via explicit stack handling): `name /* optional whitespace */ {`
        rule %r{(?=
                  #{complex_id}
             #{ws}*
                  [\{\(]
                )}mx do
          push :function
        end

        rule %r/CLOSURE/, Keyword::Type
        rule %r/#{complex_id}/, Name::Label
      end

      state :namespace_name do
        rule %r/([A-Z]#{id})(\.)/ do |m|
          token Name::Namespace, m[1]
          token Punctuation, m[2]
        end

        rule %r{(#{complex_id})(#{ws}*)([\{\(])}mx do |m|
          token Name::Function, m[1]
          recurse m[2]
          token Punctuation, m[3]
          pop!
        end

        rule %r/#{complex_id}/, Name::Label, :pop!

        rule %r/(?=.)/m do
          pop!
        end
      end

      state :function do
        rule %r/INFO_TABLE_FUN|INFO_TABLE_CONSTR|INFO_TABLE_SELECTOR|INFO_TABLE_RET|INFO_TABLE/, Name::Builtin
        rule %r/%#{id}/, Name::Builtin
        rule %r/#{complex_id}/, Name::Function
        rule %r/\s+/, Text
        rule %r/[({]/, Punctuation, :pop!
        mixin :comments
      end
    end
  end
end