lib/rouge/lexers/crystal.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Crystal < RegexLexer
      title "Crystal"
      desc "Crystal The Programming Language (crystal-lang.org)"
      tag 'crystal'
      aliases 'cr'
      filenames '*.cr'

      mimetypes 'text/x-crystal', 'application/x-crystal'

      def self.detect?(text)
        return true if text.shebang? 'crystal'
      end

      state :symbols do
        # symbols
        rule %r(
          :  # initial :
          @{0,2} # optional ivar, for :@foo and :@@foo
          [a-z_]\w*[!?]? # the symbol
        )xi, Str::Symbol

        # special symbols
        rule %r(:(?:\*\*|[-+]@|[/\%&\|^`~]|\[\]=?|<<|>>|<=?>|<=?|===?)),
          Str::Symbol

        rule %r/:'(\\\\|\\'|[^'])*'/, Str::Symbol
        rule %r/:"/, Str::Symbol, :simple_sym
      end

      state :sigil_strings do
        # %-sigiled strings
        # %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
        delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
        rule %r/%([rqswQWxiI])?([^\w\s])/ do |m|
          open = Regexp.escape(m[2])
          close = Regexp.escape(delimiter_map[m[2]] || m[2])
          interp = /[rQWxI]/ === m[1]
          toktype = Str::Other

          puts "    open: #{open.inspect}" if @debug
          puts "    close: #{close.inspect}" if @debug

          # regexes
          if m[1] == 'r'
            toktype = Str::Regex
            push :regex_flags
          end

          token toktype

          push do
            rule %r/\\[##{open}#{close}\\]/, Str::Escape
            # nesting rules only with asymmetric delimiters
            if open != close
              rule %r/#{open}/ do
                token toktype
                push
              end
            end
            rule %r/#{close}/, toktype, :pop!

            if interp
              mixin :string_intp_escaped
              rule %r/#/, toktype
            else
              rule %r/[\\#]/, toktype
            end

            rule %r/[^##{open}#{close}\\]+/m, toktype
          end
        end
      end

      state :strings do
        mixin :symbols
        rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
        rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
        rule %r/"/, Str::Double, :simple_string
        rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
      end

      state :regex_flags do
        rule %r/[mixounse]*/, Str::Regex, :pop!
      end

      # double-quoted string and symbol
      [[:string, Str::Double, '"'],
       [:sym, Str::Symbol, '"'],
       [:backtick, Str::Backtick, '`']].each do |name, tok, fin|
        state :"simple_#{name}" do
          mixin :string_intp_escaped
          rule %r/[^\\#{fin}#]+/m, tok
          rule %r/[\\#]/, tok
          rule %r/#{fin}/, tok, :pop!
        end
      end

      keywords = %w(
        BEGIN END alias begin break case defined\? do else elsif end
        ensure for if ifdef in next redo rescue raise retry return super then
        undef unless until when while yield lib fun type of as
      )

      keywords_pseudo = %w(
        initialize new loop include extend raise attr_reader attr_writer
        attr_accessor alias_method attr catch throw private module_function
        public protected true false nil __FILE__ __LINE__
        getter getter? getter! property property? property! struct record
      )

      builtins_g = %w(
        abort ancestors at_exit
        caller catch chomp chop clone constants
        display dup eval exec exit extend fail fork format freeze
        getc gets global_variables gsub hash id included_modules
        inspect instance_eval instance_method instance_methods
        lambda loop method method_missing
        methods module_eval name object_id open p print printf
        proc putc puts raise rand
        readline readlines require scan select self send
        sleep split sprintf srand sub syscall system
        test throw to_a to_s trap warn
      )

      builtins_q = %w(
        eql equal include is_a iterator kind_of nil
      )

      builtins_b = %w(chomp chop exit gsub sub)

      start do
        push :expr_start
        @heredoc_queue = []
      end

      state :whitespace do
        mixin :inline_whitespace
        rule %r/\n\s*/m, Text, :expr_start
        rule %r/#.*$/, Comment::Single

        rule %r(=begin\b.*?\n=end\b)m, Comment::Multiline
      end

      state :inline_whitespace do
        rule %r/[ \t\r]+/, Text
      end

      state :root do
        mixin :whitespace
        rule %r/__END__/, Comment::Preproc, :end_part

        rule %r/0_?[0-7]+(?:_[0-7]+)*/, Num::Oct
        rule %r/0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/, Num::Hex
        rule %r/0b[01]+(?:_[01]+)*/, Num::Bin
        rule %r/\d+\.\d+(e[\+\-]?\d+)?/, Num::Float
        rule %r/[\d]+(?:_\d+)*/, Num::Integer

        rule %r/@\[([^\]]+)\]/, Name::Decorator

        # names
        rule %r/@@[a-z_]\w*/i, Name::Variable::Class
        rule %r/@[a-z_]\w*/i, Name::Variable::Instance
        rule %r/\$\w+/, Name::Variable::Global
        rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
        rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
        rule %r/::/, Operator

        mixin :strings

        rule %r/(?:#{keywords.join('|')})\b/, Keyword, :expr_start
        rule %r/(?:#{keywords_pseudo.join('|')})\b/, Keyword::Pseudo, :expr_start

        rule %r(
          (module)
          (\s+)
          ([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
        )x do
          groups Keyword, Text, Name::Namespace
        end

        rule %r/(def\b)(\s*)/ do
          groups Keyword, Text
          push :funcname
        end

        rule %r/(class\b)(\s*)/ do
          groups Keyword, Text
          push :classname
        end

        rule %r/(?:#{builtins_q.join('|')})[?]/, Name::Builtin, :expr_start
        rule %r/(?:#{builtins_b.join('|')})!/,  Name::Builtin, :expr_start
        rule %r/(?<!\.)(?:#{builtins_g.join('|')})\b/,
          Name::Builtin, :method_call

        mixin :has_heredocs

        # `..` and `...` for ranges must have higher priority than `.`
        # Otherwise, they will be parsed as :method_call
        rule %r/\.{2,3}/, Operator, :expr_start

        rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
        rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
          groups Punctuation, Text, Name::Function
          push :method_call
        end

        rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
        rule %r/[a-zA-Z_]\w*/, Name, :method_call
        rule %r/\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\./,
          Operator, :expr_start
        rule %r/[-+\/*%=<>&!^|~]=?/, Operator, :expr_start
        rule(/[?]/) { token Punctuation; push :ternary; push :expr_start }
        rule %r<[\[({,:\\;/]>, Punctuation, :expr_start
        rule %r<[\])}]>, Punctuation
      end

      state :has_heredocs do
        rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
          token Operator, m[1]
          token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
          @heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
          push :heredoc_queue unless state? :heredoc_queue
        end

        rule %r/(<<[-~]?)(["'])(\2)/ do |m|
          token Operator, m[1]
          token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
          @heredoc_queue << [['<<-', '<<~'].include?(m[1]), '']
          push :heredoc_queue unless state? :heredoc_queue
        end
      end

      state :heredoc_queue do
        rule %r/(?=\n)/ do
          goto :resolve_heredocs
        end

        mixin :root
      end

      state :resolve_heredocs do
        mixin :string_intp_escaped

        rule %r/\n/, Str::Heredoc, :test_heredoc
        rule %r/[#\\\n]/, Str::Heredoc
        rule %r/[^#\\\n]+/, Str::Heredoc
      end

      state :test_heredoc do
        rule %r/[^#\\\n]*$/ do |m|
          tolerant, heredoc_name = @heredoc_queue.first
          check = tolerant ? m[0].strip : m[0].rstrip

          # check if we found the end of the heredoc
          puts "    end heredoc check #{check.inspect} = #{heredoc_name.inspect}" if @debug
          if check == heredoc_name
            @heredoc_queue.shift
            # if there's no more, we're done looking.
            pop! if @heredoc_queue.empty?
            token Name::Constant
          else
            token Str::Heredoc
          end

          pop!
        end

        rule(//) { pop! }
      end

      state :funcname do
        rule %r/\s+/, Text
        rule %r/\(/, Punctuation, :defexpr
        rule %r(
          (?:([a-zA-Z_]\w*)(\.))?
          (
            [a-zA-Z_]\w*[!?]? |
            \*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
            <<? | >>? | <=>? | >= | ===?
          )
        )x do |m|
          puts "matches: #{[m[0], m[1], m[2], m[3]].inspect}" if @debug
          groups Name::Class, Operator, Name::Function
          pop!
        end

        rule(//) { pop! }
      end

      state :classname do
        rule %r/\s+/, Text
        rule %r/\(/ do
          token Punctuation
          push :defexpr
          push :expr_start
        end

        # class << expr
        rule %r/<</ do
          token Operator
          goto :expr_start
        end

        rule %r/[A-Z_]\w*/, Name::Class, :pop!

        rule(//) { pop! }
      end

      state :ternary do
        rule(/:(?!:)/) { token Punctuation; goto :expr_start }

        mixin :root
      end

      state :defexpr do
        rule %r/(\))(\.|::)?/ do
          groups Punctuation, Operator
          pop!
        end
        rule %r/\(/ do
          token Punctuation
          push :defexpr
          push :expr_start
        end

        mixin :root
      end

      state :in_interp do
        rule %r/}/, Str::Interpol, :pop!
        mixin :root
      end

      state :string_intp do
        rule %r/[#][{]/, Str::Interpol, :in_interp
        rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
      end

      state :string_intp_escaped do
        mixin :string_intp
        rule %r/\\([\\abefnrstv#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})/,
          Str::Escape
        rule %r/\\./, Str::Escape
      end

      state :method_call do
        rule %r(/) do
          token Operator
          goto :expr_start
        end

        rule(/(?=\n)/) { pop! }

        rule(//) { goto :method_call_spaced }
      end

      state :method_call_spaced do
        mixin :whitespace

        rule %r([%/]=) do
          token Operator
          goto :expr_start
        end

        rule %r((/)(?=\S|\s*/)) do
          token Str::Regex
          goto :slash_regex
        end

        mixin :sigil_strings

        rule(%r((?=\s*/))) { pop! }

        rule(/\s+/) { token Text; goto :expr_start }
        rule(//) { pop! }
      end

      state :expr_start do
        mixin :inline_whitespace

        rule %r(/) do
          token Str::Regex
          goto :slash_regex
        end

        # char operator.  ?x evaulates to "x", unless there's a digit
        # beforehand like x>=0?n[x]:""
        rule %r(
          [?](\\[MC]-)*     # modifiers
          (\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
          (?!\w)
        )x, Str::Char, :pop!

        # special case for using a single space.  Ruby demands that
        # these be in a single line, otherwise it would make no sense.
        rule %r/(\s*)(%[rqswQWxiI]? \S* )/ do
          groups Text, Str::Other
          pop!
        end

        mixin :sigil_strings

        rule(//) { pop! }
      end

      state :slash_regex do
        mixin :string_intp
        rule %r(\\\\), Str::Regex
        rule %r(\\/), Str::Regex
        rule %r([\\#]), Str::Regex
        rule %r([^\\/#]+)m, Str::Regex
        rule %r(/) do
          token Str::Regex
          goto :regex_flags
        end
      end

      state :end_part do
        # eat up the rest of the stream as Comment::Preproc
        rule %r/.+/m, Comment::Preproc, :pop!
      end
    end
  end
end