lib/rouge/lexers/ruby.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Ruby < RegexLexer
      title "Ruby"
      desc "The Ruby programming language (ruby-lang.org)"
      tag 'ruby'
      aliases 'rb'
      filenames '*.rb', '*.ruby', '*.rbw', '*.rake', '*.gemspec', '*.podspec',
                'Rakefile', 'Guardfile', 'Gemfile', 'Capfile', 'Podfile',
                'Vagrantfile', '*.ru', '*.prawn', 'Berksfile', '*.arb',
                'Dangerfile', 'Fastfile', 'Deliverfile', 'Appfile'

      mimetypes 'text/x-ruby', 'application/x-ruby'

      def self.detect?(text)
        return true if text.shebang? 'ruby'
      end

      state :symbols do
        # symbols
        rule %r(
          :  # initial :
          @{0,2} # optional ivar, for :@foo and :@@foo
          [a-z_]\w*[!?]? # the symbol
        )xi, Str::Symbol

        # special symbols
        rule %r(:(?:\*\*|[-+]@|[/\%&\|^`~]|\[\]=?|<<|>>|<=?>|<=?|===?)),
          Str::Symbol

        rule %r/:'(\\\\|\\'|[^'])*'/, Str::Symbol
        rule %r/:"/, Str::Symbol, :simple_sym
      end

      state :sigil_strings do
        # %-sigiled strings
        # %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
        delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
        rule %r/%([rqswQWxiI])?([^\w\s])/ do |m|
          open = Regexp.escape(m[2])
          close = Regexp.escape(delimiter_map[m[2]] || m[2])
          interp = /[rQWxI]/ === m[1]
          toktype = Str::Other

          puts "    open: #{open.inspect}" if @debug
          puts "    close: #{close.inspect}" if @debug

          # regexes
          if m[1] == 'r'
            toktype = Str::Regex
            push :regex_flags
          end

          token toktype

          push do
            uniq_chars = "#{open}#{close}".squeeze
            uniq_chars = '' if open == close && open == "\\#"
            rule %r/\\[##{uniq_chars}\\]/, Str::Escape
            # nesting rules only with asymmetric delimiters
            if open != close
              rule %r/#{open}/ do
                token toktype
                push
              end
            end
            rule %r/#{close}/, toktype, :pop!

            if interp
              mixin :string_intp_escaped
              rule %r/#/, toktype
            else
              rule %r/[\\#]/, toktype
            end

            rule %r/[^##{uniq_chars}\\]+/m, toktype
          end
        end
      end

      state :strings do
        mixin :symbols
        rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
        rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
        rule %r/"/, Str::Double, :simple_string
        rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
      end

      state :regex_flags do
        rule %r/[mixounse]*/, Str::Regex, :pop!
      end

      # double-quoted string and symbol
      [[:string, Str::Double, '"'],
       [:sym, Str::Symbol, '"'],
       [:backtick, Str::Backtick, '`']].each do |name, tok, fin|
        state :"simple_#{name}" do
          mixin :string_intp_escaped
          rule %r/[^\\#{fin}#]+/m, tok
          rule %r/[\\#]/, tok
          rule %r/#{fin}/, tok, :pop!
        end
      end

      keywords = %w(
        BEGIN END alias begin break case defined\? do else elsif end
        ensure for if in next redo rescue raise retry return super then
        undef unless until when while yield
      )

      keywords_pseudo = %w(
        loop include extend raise
        alias_method attr catch throw private module_function
        public protected true false nil __FILE__ __LINE__
      )

      builtins_g = %w(
        attr_reader attr_writer attr_accessor

        __id__ __send__ abort ancestors at_exit autoload binding callcc
        caller catch chomp chop class_eval class_variables clone
        const_defined\? const_get const_missing const_set constants
        display dup eval exec exit extend fail fork format freeze
        getc gets global_variables gsub hash id included_modules
        inspect instance_eval instance_method instance_methods
        instance_variable_get instance_variable_set instance_variables
        lambda load local_variables loop method method_missing
        methods module_eval name object_id open p print printf
        private_class_method private_instance_methods private_methods proc
        protected_instance_methods protected_methods public_class_method
        public_instance_methods public_methods putc puts raise rand
        readline readlines require require_relative scan select self send set_trace_func
        singleton_methods sleep split sprintf srand sub syscall system
        taint test throw to_a to_s trace_var trap untaint untrace_var warn
      )

      builtins_q = %w(
        autoload block_given const_defined eql equal frozen
        include instance_of is_a iterator kind_of method_defined
        nil private_method_defined protected_method_defined
        public_method_defined respond_to tainted
      )

      builtins_b = %w(chomp chop exit gsub sub)

      start do
        push :expr_start
        @heredoc_queue = []
      end

      state :whitespace do
        mixin :inline_whitespace
        rule %r/\n\s*/m, Text, :expr_start
        rule %r/#.*$/, Comment::Single

        rule %r(=begin\b.*?\n=end\b)m, Comment::Multiline
      end

      state :inline_whitespace do
        rule %r/[ \t\r]+/, Text
      end

      state :root do
        mixin :whitespace
        rule %r/__END__/, Comment::Preproc, :end_part

        rule %r/0_?[0-7]+(?:_[0-7]+)*/, Num::Oct
        rule %r/0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/, Num::Hex
        rule %r/0b[01]+(?:_[01]+)*/, Num::Bin
        rule %r/\d+\.\d+(e[\+\-]?\d+)?/, Num::Float
        rule %r/[\d]+(?:_\d+)*/, Num::Integer

        # names
        rule %r/@@[a-z_]\w*/i, Name::Variable::Class
        rule %r/@[a-z_]\w*/i, Name::Variable::Instance
        rule %r/\$\w+/, Name::Variable::Global
        rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
        rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
        rule %r/::/, Operator

        mixin :strings

        rule %r/(?:#{keywords.join('|')})(?=\W|$)/, Keyword, :expr_start
        rule %r/(?:#{keywords_pseudo.join('|')})\b/, Keyword::Pseudo, :expr_start

        rule %r(
          (module)
          (\s+)
          ([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
        )x do
          groups Keyword, Text, Name::Namespace
        end

        rule %r/(def\b)(\s*)/ do
          groups Keyword, Text
          push :funcname
        end

        rule %r/(class\b)(\s*)/ do
          groups Keyword, Text
          push :classname
        end

        rule %r/(?:#{builtins_q.join('|')})[?]/, Name::Builtin, :expr_start
        rule %r/(?:#{builtins_b.join('|')})!/,  Name::Builtin, :expr_start
        rule %r/(?<!\.)(?:#{builtins_g.join('|')})\b/,
          Name::Builtin, :method_call

        mixin :has_heredocs

        # `..` and `...` for ranges must have higher priority than `.`
        # Otherwise, they will be parsed as :method_call
        rule %r/\.{2,3}/, Operator, :expr_start

        rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
        rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
          groups Punctuation, Text, Name::Function
          push :method_call
        end

        rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
        rule %r/[a-zA-Z_]\w*/, Name, :method_call
        rule %r/\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\./,
          Operator, :expr_start
        rule %r/[-+\/*%=<>&!^|~]=?/, Operator, :expr_start
        rule(/[?]/) { token Punctuation; push :ternary; push :expr_start }
        rule %r<[\[({,:\\;/]>, Punctuation, :expr_start
        rule %r<[\])}]>, Punctuation
      end

      state :has_heredocs do
        rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
          token Operator, m[1]
          token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
          @heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
          push :heredoc_queue unless state? :heredoc_queue
        end

        rule %r/(<<[-~]?)(["'])(\2)/ do |m|
          token Operator, m[1]
          token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
          @heredoc_queue << [['<<-', '<<~'].include?(m[1]), '']
          push :heredoc_queue unless state? :heredoc_queue
        end
      end

      state :heredoc_queue do
        rule %r/(?=\n)/ do
          goto :resolve_heredocs
        end

        mixin :root
      end

      state :resolve_heredocs do
        mixin :string_intp_escaped

        rule %r/\n/, Str::Heredoc, :test_heredoc
        rule %r/[#\\\n]/, Str::Heredoc
        rule %r/[^#\\\n]+/, Str::Heredoc
      end

      state :test_heredoc do
        rule %r/[^#\\\n]*$/ do |m|
          tolerant, heredoc_name = @heredoc_queue.first
          check = tolerant ? m[0].strip : m[0].rstrip

          # check if we found the end of the heredoc
          puts "    end heredoc check #{check.inspect} = #{heredoc_name.inspect}" if @debug
          if check == heredoc_name
            @heredoc_queue.shift
            # if there's no more, we're done looking.
            pop! if @heredoc_queue.empty?
            token Name::Constant
          else
            token Str::Heredoc
          end

          pop!
        end

        rule(//) { pop! }
      end

      state :funcname do
        rule %r/\s+/, Text
        rule %r/\(/, Punctuation, :defexpr
        rule %r(
          (?:([a-zA-Z_]\w*)(\.))?
          (
            [a-zA-Z_]\w*[!?]? |
            \*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
            <<? | >>? | <=>? | >= | ===?
          )
        )x do |m|
          puts "matches: #{[m[0], m[1], m[2], m[3]].inspect}" if @debug
          groups Name::Class, Operator, Name::Function
          pop!
        end

        rule(//) { pop! }
      end

      state :classname do
        rule %r/\s+/, Text
        rule %r/\(/ do
          token Punctuation
          push :defexpr
          push :expr_start
        end

        # class << expr
        rule %r/<</ do
          token Operator
          goto :expr_start
        end

        rule %r/[A-Z_]\w*/, Name::Class, :pop!

        rule(//) { pop! }
      end

      state :ternary do
        rule(/:(?!:)/) { token Punctuation; goto :expr_start }

        mixin :root
      end

      state :defexpr do
        rule %r/(\))(\.|::)?/ do
          groups Punctuation, Operator
          pop!
        end
        rule %r/\(/ do
          token Punctuation
          push :defexpr
          push :expr_start
        end

        mixin :root
      end

      state :in_interp do
        rule %r/}/, Str::Interpol, :pop!
        mixin :root
      end

      state :string_intp do
        rule %r/[#][{]/, Str::Interpol, :in_interp
        rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
      end

      state :string_intp_escaped do
        mixin :string_intp
        rule %r/\\([\\abefnrstv#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})/,
          Str::Escape
        rule %r/\\./, Str::Escape
      end

      state :method_call do
        rule %r(/) do
          token Operator
          goto :expr_start
        end

        rule(/(?=\n)/) { pop! }

        rule(//) { goto :method_call_spaced }
      end

      state :method_call_spaced do
        mixin :whitespace

        rule %r([%/]=) do
          token Operator
          goto :expr_start
        end

        rule %r((/)(?=\S|\s*/)) do
          token Str::Regex
          goto :slash_regex
        end

        mixin :sigil_strings

        rule(%r((?=\s*/))) { pop! }

        rule(/\s+/) { token Text; goto :expr_start }
        rule(//) { pop! }
      end

      state :expr_start do
        mixin :inline_whitespace

        rule %r(/) do
          token Str::Regex
          goto :slash_regex
        end

        # char operator.  ?x evaulates to "x", unless there's a digit
        # beforehand like x>=0?n[x]:""
        rule %r(
          [?](\\[MC]-)*     # modifiers
          (\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
          (?!\w)
        )x, Str::Char, :pop!

        # special case for using a single space.  Ruby demands that
        # these be in a single line, otherwise it would make no sense.
        rule %r/(\s*)(%[rqswQWxiI]? \S* )/ do
          groups Text, Str::Other
          pop!
        end

        mixin :sigil_strings

        rule(//) { pop! }
      end

      state :slash_regex do
        mixin :string_intp
        rule %r(\\\\), Str::Regex
        rule %r(\\/), Str::Regex
        rule %r([\\#]), Str::Regex
        rule %r([^\\/#]+)m, Str::Regex
        rule %r(/) do
          token Str::Regex
          goto :regex_flags
        end
      end

      state :end_part do
        # eat up the rest of the stream as Comment::Preproc
        rule %r/.+/m, Comment::Preproc, :pop!
      end
    end
  end
end