lib/rouge/lexers/perl.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Perl < RegexLexer
      title "Perl"
      desc "The Perl scripting language (perl.org)"

      tag 'perl'
      aliases 'pl'

      filenames '*.pl', '*.pm', '*.t'
      mimetypes 'text/x-perl', 'application/x-perl'

      def self.detect?(text)
        return true if text.shebang? 'perl'
      end

      keywords = %w(
        case continue do else elsif for foreach if last my next our
        redo reset then unless until while use print new BEGIN CHECK
        INIT END return
      )

      builtins = %w(
        abs accept alarm atan2 bind binmode bless caller chdir chmod
        chomp chop chown chr chroot close closedir connect continue cos
        crypt dbmclose dbmopen defined delete die dump each endgrent
        endhostent endnetent endprotoent endpwent endservent eof eval
        exec exists exit exp fcntl fileno flock fork format formline getc
        getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent
        getlogin getnetbyaddr getnetbyname getnetent getpeername
        getpgrp getppid getpriority getprotobyname getprotobynumber
        getprotoent getpwent getpwnam getpwuid getservbyname getservbyport
        getservent getsockname getsockopt glob gmtime goto grep hex
        import index int ioctl join keys kill last lc lcfirst length
        link listen local localtime log lstat map mkdir msgctl msgget
        msgrcv msgsnd my next no oct open opendir ord our pack package
        pipe pop pos printf prototype push quotemeta rand read readdir
        readline readlink readpipe recv redo ref rename require reverse
        rewinddir rindex rmdir scalar seek seekdir select semctl semget
        semop send setgrent sethostent setnetent setpgrp setpriority
        setprotoent setpwent setservent setsockopt shift shmctl shmget
        shmread shmwrite shutdown sin sleep socket socketpair sort splice
        split sprintf sqrt srand stat study substr symlink syscall sysopen
        sysread sysseek system syswrite tell telldir tie tied time times
        tr truncate uc ucfirst umask undef unlink unpack unshift untie
        utime values vec wait waitpid wantarray warn write
      )

      re_tok = Str::Regex

      state :balanced_regex do
        rule %r(/(\\[\\/]|[^/])*/[egimosx]*)m, re_tok, :pop!
        rule %r(!(\\[\\!]|[^!])*![egimosx]*)m, re_tok, :pop!
        rule %r(\\(\\\\|[^\\])*\\[egimosx]*)m, re_tok, :pop!
        rule %r({(\\[\\}]|[^}])*}[egimosx]*), re_tok, :pop!
        rule %r(<(\\[\\>]|[^>])*>[egimosx]*), re_tok, :pop!
        rule %r(\[(\\[\\\]]|[^\]])*\][egimosx]*), re_tok, :pop!
        rule %r[\((\\[\\\)]|[^\)])*\)[egimosx]*], re_tok, :pop!
        rule %r(@(\\[\\@]|[^@])*@[egimosx]*), re_tok, :pop!
        rule %r(%(\\[\\%]|[^%])*%[egimosx]*), re_tok, :pop!
        rule %r(\$(\\[\\\$]|[^\$])*\$[egimosx]*), re_tok, :pop!
      end

      state :root do
        rule %r/#.*/, Comment::Single
        rule %r/^=[a-zA-Z0-9]+\s+.*?\n=cut/m, Comment::Multiline
        rule %r/(?:#{keywords.join('|')})\b/, Keyword

        rule %r/(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)/ do
          groups Keyword, Text, Name, Text, Punctuation, Text

          push :format
        end

        rule %r/(?:eq|lt|gt|le|ge|ne|not|and|or|cmp)\b/, Operator::Word

        # substitution/transliteration: balanced delimiters
        rule %r((?:s|tr|y){(\\\\|\\}|[^}])*}\s*), re_tok, :balanced_regex
        rule %r((?:s|tr|y)<(\\\\|\\>|[^>])*>\s*), re_tok, :balanced_regex
        rule %r((?:s|tr|y)\[(\\\\|\\\]|[^\]])*\]\s*), re_tok, :balanced_regex
        rule %r[(?:s|tr|y)\((\\\\|\\\)|[^\)])*\)\s*], re_tok, :balanced_regex

        # substitution/transliteration: arbitrary non-whitespace delimiters
        rule %r((?:s|tr|y)\s*([^\w\s])((\\\\|\\\1)|[^\1])*?\1((\\\\|\\\1)|[^\1])*?\1[msixpodualngcr]*)m, re_tok
        rule %r((?:s|tr|y)\s+(\w)((\\\\|\\\1)|[^\1])*?\1((\\\\|\\\1)|[^\1])*?\1[msixpodualngcr]*)m, re_tok

        # matches: common case, m-optional
        rule %r(m?/(\\\\|\\/|[^/\n])*/[msixpodualngc]*), re_tok
        rule %r(m(?=[/!\\{<\[\(@%\$])), re_tok, :balanced_regex

        # arbitrary non-whitespace delimiters
        rule %r(m\s*([^\w\s])((\\\\|\\\1)|[^\1])*?\1[msixpodualngc]*)m, re_tok
        rule %r(m\s+(\w)((\\\\|\\\1)|[^\1])*?\1[msixpodualngc]*)m, re_tok

        rule %r(((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[msixpodualngc]*),
          re_tok, :balanced_regex

        rule %r/\s+/, Text

        rule(/(?=[a-z_]\w*(\s*#.*\n)*\s*=>)/i) { push :fat_comma }

        rule %r/(?:#{builtins.join('|')})\b/, Name::Builtin
        rule %r/((__(DIE|WARN)__)|(DATA|STD(IN|OUT|ERR)))\b/,
          Name::Builtin::Pseudo

        rule %r/<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n/m, Str

        rule %r/(__(END|DATA)__)\b/, Comment::Preproc, :end_part
        rule %r/\$\^[ADEFHILMOPSTWX]/, Name::Variable::Global
        rule %r/\$[\\"'\[\]&`+*.,;=%~?@$!<>(^\|\/_-](?!\w)/, Name::Variable::Global
        rule %r/[$@%&*][$@%&*#_]*(?=[a-z{\[;])/i, Name::Variable, :varname

        rule %r/[-+\/*%=<>&^\|!\\~]=?/, Operator

        rule %r/0_?[0-7]+(_[0-7]+)*/, Num::Oct
        rule %r/0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*/, Num::Hex
        rule %r/0b[01]+(_[01]+)*/, Num::Bin
        rule %r/(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?/i,
          Num::Float
        rule %r/\d+(_\d*)*e[+-]?\d+(_\d*)*/i, Num::Float
        rule %r/\d+(_\d+)*/, Num::Integer

        rule %r/'/, Punctuation, :sq
        rule %r/"/, Punctuation, :dq
        rule %r/`/, Punctuation, :bq
        rule %r/<([^\s>]+)>/, re_tok
        rule %r/(q|qq|qw|qr|qx)\{/, Str::Other, :cb_string
        rule %r/(q|qq|qw|qr|qx)\(/, Str::Other, :rb_string
        rule %r/(q|qq|qw|qr|qx)\[/, Str::Other, :sb_string
        rule %r/(q|qq|qw|qr|qx)</, Str::Other, :lt_string
        rule %r/(q|qq|qw|qr|qx)(\W)(.|\n)*?\2/, Str::Other

        rule %r/package\s+/, Keyword, :modulename
        rule %r/sub\s+/, Keyword, :funcname
        rule %r/\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|!~|&&?|\|\||\.{1,3}/,
          Operator
        rule %r/[()\[\]:;,<>\/?{}]/, Punctuation
        rule(/(?=\w)/) { push :name }
      end

      state :format do
        rule %r/\.\n/, Str::Interpol, :pop!
        rule %r/.*?\n/, Str::Interpol
      end

      state :fat_comma do
        rule %r/#.*/, Comment::Single
        rule %r/\w+/, Str
        rule %r/\s+/, Text
        rule %r/=>/, Operator, :pop!
      end

      state :name_common do
        rule %r/\w+::/, Name::Namespace
        rule %r/[\w:]+/, Name::Variable, :pop!
      end

      state :varname do
        rule %r/\s+/, Text
        rule %r/[{\[]/, Punctuation, :pop! # hash syntax
        rule %r/[),]/, Punctuation, :pop! # arg specifier
        rule %r/[;]/, Punctuation, :pop! # postfix
        mixin :name_common
      end

      state :name do
        mixin :name_common
        rule %r/[A-Z_]+(?=[^a-zA-Z0-9_])/, Name::Constant, :pop!
        rule(/(?=\W)/) { pop! }
      end

      state :modulename do
        rule %r/[a-z_]\w*/i, Name::Namespace, :pop!
      end

      state :funcname do
        rule %r/[a-zA-Z_]\w*[!?]?/, Name::Function
        rule %r/\s+/, Text

        # argument declaration
        rule %r/(\([$@%]*\))(\s*)/ do
          groups Punctuation, Text
        end

        rule %r/.*?{/, Punctuation, :pop!
        rule %r/;/, Punctuation, :pop!
      end

      state :sq do
        rule %r/\\[\\']/, Str::Escape
        rule %r/[^\\']+/, Str::Single
        rule %r/'/, Punctuation, :pop!
        rule %r/\\/, Str::Single
      end

      state :dq do
        mixin :string_intp
        rule %r/\\[\\tnrabefluLUE"$@]/, Str::Escape
        rule %r/\\0\d{2}/, Str::Escape
        rule %r/\\o\{\d+\}/, Str::Escape
        rule %r/\\x\h{2}/, Str::Escape
        rule %r/\\x\{\h+\}/, Str::Escape
        rule %r/\\c./, Str::Escape
        rule %r/\\N\{[^\}]+\}/, Str::Escape
        rule %r/[^\\"]+?/, Str::Double
        rule %r/"/, Punctuation, :pop!
        rule %r/\\/, Str::Escape
      end

      state :bq do
        mixin :string_intp
        rule %r/\\[\\tnr`]/, Str::Escape
        rule %r/[^\\`]+?/, Str::Backtick
        rule %r/`/, Punctuation, :pop!
      end

      [[:cb, '\{', '\}'],
       [:rb, '\(', '\)'],
       [:sb, '\[', '\]'],
       [:lt, '<',  '>']].each do |name, open, close|
        tok = Str::Other
        state :"#{name}_string" do
          rule %r/\\[#{open}#{close}\\]/, tok
          rule %r/\\/, tok
          rule(/#{open}/) { token tok; push }
          rule %r/#{close}/, tok, :pop!
          rule %r/[^#{open}#{close}\\]+/, tok
        end
      end

      state :in_interp do
        rule %r/}/, Str::Interpol, :pop!
        rule %r/\s+/, Text
        rule %r/[a-z_]\w*/i, Str::Interpol
      end

      state :string_intp do
        rule %r/[$@][{]/, Str::Interpol, :in_interp
        rule %r/[$@][a-z_]\w*/i, Str::Interpol
      end

      state :end_part do
        # eat the rest of the stream
        rule %r/.+/m, Comment::Preproc, :pop!
      end
    end
  end
end