lib/yard/parser/ruby/legacy/ruby_lex.rb



require_relative 'irb/slex'

module YARD
  module Parser::Ruby::Legacy
    # Legacy lexical tokenizer module.
    module RubyToken
      EXPR_BEG   = :EXPR_BEG
      EXPR_MID   = :EXPR_MID
      EXPR_END   = :EXPR_END
      EXPR_ARG   = :EXPR_ARG
      EXPR_FNAME = :EXPR_FNAME
      EXPR_DOT   = :EXPR_DOT
      EXPR_CLASS = :EXPR_CLASS

      # Represents a token in the Ruby lexer
      class Token
        # @return [Integer] the line number in the file/stream the token is
        #   located.
        attr_reader :line_no

        # @return [Integer] the character number in the file/stream the token
        #   is located.
        attr_reader :char_no

        # @return [String] the token text value
        attr_reader :text

        # @return [Symbol] the lexical state at the token
        attr_accessor :lex_state

        # @private
        NO_TEXT = "??".freeze

        # Creates a new Token object
        # @param [Integer] line_no the line number to initialize the token to
        # @param [Integer] char_no the char number to initialize the token to
        def initialize(line_no, char_no)
          @line_no = line_no
          @char_no = char_no
          @text    = NO_TEXT
        end

        # Chainable way to sets the text attribute
        #
        # @param [String] text the new text
        # @return [Token] this token object
        def set_text(text)
          @text = text
          self
        end
      end

      # Represents a block
      class TkBlockContents < Token
        def text; '...' end
      end

      # Represents an end statement
      class TkStatementEnd < Token
        def text; '' end
      end

      class TkNode < Token
        attr :node
      end

      # Represents whitespace
      class TkWhitespace < Token
      end

      # Represents a Ruby identifier
      class TkId < Token
        def initialize(line_no, char_no, name)
          super(line_no, char_no)
          @name = name
        end
        attr :name
      end

      # Represents a Ruby keyword
      class TkKW < TkId
      end

      # Represents a Ruby value
      class TkVal < Token
        def initialize(line_no, char_no, value = nil)
          super(line_no, char_no)
          set_text(value)
        end
      end

      class TkOp < Token
        def name
          self.class.op_name
        end
      end

      class TkOPASGN < TkOp
        def initialize(line_no, char_no, op)
          super(line_no, char_no)
          op = TkReading2Token[op] unless op.is_a?(Symbol)
          @op = op
        end
        attr :op
      end

      class TkUnknownChar < Token
        def initialize(line_no, char_no, _id)
          super(line_no, char_no)
          @name = char_no > 255 ? '?' : char_no.chr
        end
        attr :name
      end

      class TkError < Token
      end

      # @private
      def set_token_position(line, char)
        @prev_line_no = line
        @prev_char_no = char
      end

      # @private
      def Token(token, value = nil) # rubocop:disable Style/MethodName
        tk = nil
        case token
        when String, Symbol
          source = token.is_a?(String) ? TkReading2Token : TkSymbol2Token
          if (tk = source[token]).nil?
            raise "no key #{token}"
          end
          tk = Token(tk[0], value)
        else
          if token
            tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
                   token.new(@prev_line_no, @prev_char_no)
                 else
                   token.new(@prev_line_no, @prev_char_no, value)
                 end
          end
        end
        tk
      end

      # @private
      TokenDefinitions = [
        [:TkCLASS,      TkKW,  "class",  EXPR_CLASS],
        [:TkMODULE,     TkKW,  "module", EXPR_BEG],
        [:TkDEF,        TkKW,  "def",    EXPR_FNAME],
        [:TkUNDEF,      TkKW,  "undef",  EXPR_FNAME],
        [:TkBEGIN,      TkKW,  "begin",  EXPR_BEG],
        [:TkRESCUE,     TkKW,  "rescue", EXPR_MID],
        [:TkENSURE,     TkKW,  "ensure", EXPR_BEG],
        [:TkEND,        TkKW,  "end",    EXPR_END],
        [:TkIF,         TkKW,  "if",     EXPR_BEG, :TkIF_MOD],
        [:TkUNLESS,     TkKW,  "unless", EXPR_BEG, :TkUNLESS_MOD],
        [:TkTHEN,       TkKW,  "then",   EXPR_BEG],
        [:TkELSIF,      TkKW,  "elsif",  EXPR_BEG],
        [:TkELSE,       TkKW,  "else",   EXPR_BEG],
        [:TkCASE,       TkKW,  "case",   EXPR_BEG],
        [:TkWHEN,       TkKW,  "when",   EXPR_BEG],
        [:TkWHILE,      TkKW,  "while",  EXPR_BEG, :TkWHILE_MOD],
        [:TkUNTIL,      TkKW,  "until",  EXPR_BEG, :TkUNTIL_MOD],
        [:TkFOR,        TkKW,  "for",    EXPR_BEG],
        [:TkBREAK,      TkKW,  "break",  EXPR_END],
        [:TkNEXT,       TkKW,  "next",   EXPR_END],
        [:TkREDO,       TkKW,  "redo",   EXPR_END],
        [:TkRETRY,      TkKW,  "retry",  EXPR_END],
        [:TkIN,         TkKW,  "in",     EXPR_BEG],
        [:TkDO,         TkKW,  "do",     EXPR_BEG],
        [:TkRETURN,     TkKW,  "return", EXPR_MID],
        [:TkYIELD,      TkKW,  "yield",  EXPR_END],
        [:TkSUPER,      TkKW,  "super",  EXPR_END],
        [:TkSELF,       TkKW,  "self",   EXPR_END],
        [:TkNIL,        TkKW,  "nil",    EXPR_END],
        [:TkTRUE,       TkKW,  "true",   EXPR_END],
        [:TkFALSE,      TkKW,  "false",  EXPR_END],
        [:TkAND,        TkKW,  "and",    EXPR_BEG],
        [:TkOR,         TkKW,  "or",     EXPR_BEG],
        [:TkNOT,        TkKW,  "not",    EXPR_BEG],
        [:TkIF_MOD,     TkKW],
        [:TkUNLESS_MOD, TkKW],
        [:TkWHILE_MOD,  TkKW],
        [:TkUNTIL_MOD,  TkKW],
        [:TkALIAS,      TkKW,  "alias",    EXPR_FNAME],
        [:TkDEFINED,    TkKW,  "defined?", EXPR_END],
        [:TklBEGIN,     TkKW,  "BEGIN",    EXPR_END],
        [:TklEND,       TkKW,  "END",      EXPR_END],
        [:Tk__LINE__,   TkKW,  "__LINE__", EXPR_END],
        [:Tk__FILE__,   TkKW,  "__FILE__", EXPR_END],
        [:TkIDENTIFIER, TkId],
        [:TkFID,        TkId],
        [:TkGVAR,       TkId],
        [:TkIVAR,       TkId],
        [:TkCONSTANT,   TkId],
        [:TkINTEGER,    TkVal],
        [:TkFLOAT,      TkVal],
        [:TkSYMBOL,     TkVal],
        [:TkLABEL,      TkVal],
        [:TkSTRING,     TkVal],
        [:TkXSTRING,    TkVal],
        [:TkREGEXP,     TkVal],
        [:TkCOMMENT,    TkVal],
        [:TkDSTRING,    TkNode],
        [:TkDXSTRING,   TkNode],
        [:TkDREGEXP,    TkNode],
        [:TkNTH_REF,    TkId],
        [:TkBACK_REF,   TkId],
        [:TkUPLUS,      TkOp,   "+@"],
        [:TkUMINUS,     TkOp,   "-@"],
        [:TkPOW,        TkOp,   "**"],
        [:TkCMP,        TkOp,   "<=>"],
        [:TkEQ,         TkOp,   "=="],
        [:TkEQQ,        TkOp,   "==="],
        [:TkNEQ,        TkOp,   "!="],
        [:TkGEQ,        TkOp,   ">="],
        [:TkLEQ,        TkOp,   "<="],
        [:TkANDOP,      TkOp,   "&&"],
        [:TkOROP,       TkOp,   "||"],
        [:TkMATCH,      TkOp,   "=~"],
        [:TkNMATCH,     TkOp,   "!~"],
        [:TkDOT2,       TkOp,   ".."],
        [:TkDOT3,       TkOp,   "..."],
        [:TkAREF,       TkOp,   "[]"],
        [:TkASET,       TkOp,   "[]="],
        [:TkLSHFT,      TkOp,   "<<"],
        [:TkRSHFT,      TkOp,   ">>"],
        [:TkCOLON2,     TkOp],
        [:TkCOLON3,     TkOp],
        [:OPASGN,       TkOp], # +=, -=  etc. #
        [:TkASSOC,      TkOp,   "=>"],
        [:TkQUESTION,   TkOp,   "?"], #?
        [:TkCOLON,      TkOp,   ":"], #:
        [:TkSTAR],            # *arg
        [:TkAMPER],           # &arg #
        [:TkSYMBEG,     TkId],
        [:TkGT,         TkOp,   ">"],
        [:TkLT,         TkOp,   "<"],
        [:TkPLUS,       TkOp,   "+"],
        [:TkMINUS,      TkOp,   "-"],
        [:TkMULT,       TkOp,   "*"],
        [:TkDIV,        TkOp,   "/"],
        [:TkMOD,        TkOp,   "%"],
        [:TkBITOR,      TkOp,   "|"],
        [:TkBITXOR,     TkOp,   "^"],
        [:TkBITAND,     TkOp,   "&"],
        [:TkBITNOT,     TkOp,   "~"],
        [:TkNOTOP,      TkOp,   "!"],
        [:TkBACKQUOTE,  TkOp,   "`"],
        [:TkASSIGN,     Token,  "="],
        [:TkDOT,        Token,  "."],
        [:TkLPAREN,     Token,  "("],  # (exp)
        [:TkLBRACK,     Token,  "["],  # [arry]
        [:TkLBRACE,     Token,  "{"],  # {hash}
        [:TkRPAREN,     Token,  ")"],
        [:TkRBRACK,     Token,  "]"],
        [:TkRBRACE,     Token,  "}"],
        [:TkCOMMA,      Token,  ","],
        [:TkSEMICOLON,  Token,  ";"],
        [:TkSPACE,          TkWhitespace],
        [:TkNL,             TkWhitespace],
        [:TkEND_OF_SCRIPT,  TkWhitespace],
        [:TkBACKSLASH,  TkUnknownChar,  "\\"],
        [:TkAT,         TkUnknownChar,  "@"],
        [:TkDOLLAR,     TkUnknownChar,  "\$"]
      ]

      # { reading => token_class }
      # { reading => [token_class, *opt] }
      TkReading2Token = {}
      TkSymbol2Token = {}

      # @private
      def self.def_token(token_n, super_token = Token, reading = nil, *opts)
        token_n = token_n.id2name unless token_n.is_a?(String)
        if RubyToken.const_defined?(token_n)
          # IRB.fail AlreadyDefinedToken, token_n
        end

        token_c = Class.new super_token
        RubyToken.const_set token_n, token_c
        # token_c.inspect

        if reading
          if TkReading2Token[reading]
            raise "duplicate #{token_n} #{reading}"
          end
          if opts.empty?
            TkReading2Token[reading] = [token_c]
          else
            TkReading2Token[reading] = [token_c].concat(opts)
          end
        end
        TkSymbol2Token[token_n.intern] = token_c

        if token_c <= TkOp
          token_c.class_eval %{
            def self.op_name; "#{reading}"; end
          }
        end
      end

      for defs in TokenDefinitions
        def_token(*defs)
      end

      NEWLINE_TOKEN = TkNL.new(0, 0)
      NEWLINE_TOKEN.set_text("\n")
    end

    # Lexical analyzer for Ruby source
    # @private
    class RubyLex
      # Read an input stream character by character. We allow for unlimited
      # ungetting of characters just read.
      #
      # We simplify the implementation greatly by reading the entire input
      # into a buffer initially, and then simply traversing it using
      # pointers.
      #
      # We also have to allow for the <i>here document diversion</i>. This
      # little gem comes about when the lexer encounters a here
      # document. At this point we effectively need to split the input
      # stream into two parts: one to read the body of the here document,
      # the other to read the rest of the input line where the here
      # document was initially encountered. For example, we might have
      #
      #   do_something(<<-A, <<-B)
      #     stuff
      #     for
      #   A
      #     stuff
      #     for
      #   B
      #
      # When the lexer encounters the <<A, it reads until the end of the
      # line, and keeps it around for later. It then reads the body of the
      # here document.  Once complete, it needs to read the rest of the
      # original line, but then skip the here document body.
      #
      # @private
      class BufferedReader
        attr_reader :line_num

        def initialize(content)
          if /\t/ =~ content
            tab_width = 2
            content = content.split(/\n/).map do |line|
              1 while line.gsub!(/\t+/) { ' ' * (tab_width * $&.length - $`.length % tab_width) } && $~ #`
              line
            end .join("\n")
          end
          @content = String.new(content)
          @content << "\n" unless @content[-1, 1] == "\n"
          @size      = @content.size
          @offset    = 0
          @hwm       = 0
          @line_num  = 1
          @read_back_offset = 0
          @last_newline = 0
          @newline_pending = false
        end

        def column
          @offset - @last_newline
        end

        def getc
          return nil if @offset >= @size
          ch = @content[@offset, 1]

          @offset += 1
          @hwm = @offset if @hwm < @offset

          if @newline_pending
            @line_num += 1
            @last_newline = @offset - 1
            @newline_pending = false
          end

          if ch == "\n"
            @newline_pending = true
          end
          ch
        end

        def getc_already_read
          getc
        end

        def ungetc(_ch)
          raise "unget past beginning of file" if @offset <= 0
          @offset -= 1
          if @content[@offset] == ?\n
            @newline_pending = false
          end
        end

        def get_read
          res = @content[@read_back_offset...@offset]
          @read_back_offset = @offset
          res
        end

        def peek(at)
          pos = @offset + at
          if pos >= @size
            nil
          else
            @content[pos, 1]
          end
        end

        def peek_equal(str)
          @content[@offset, str.length] == str
        end

        def divert_read_from(reserve)
          @content[@offset, 0] = reserve
          @size = @content.size
        end
      end

      # end of nested class BufferedReader

      include RubyToken
      include IRB

      attr_reader :continue
      attr_reader :lex_state

      def self.debug?
        false
      end

      def initialize(content)
        lex_init

        @reader = BufferedReader.new(content)

        @exp_line_no = @line_no = 1
        @base_char_no = 0
        @indent = 0

        @ltype = nil
        @quoted = nil
        @lex_state = EXPR_BEG
        @space_seen = false

        @continue = false
        @line = ""

        @skip_space = false
        @read_auto_clean_up = false
        @exception_on_syntax_error = true

        @colonblock_seen = false
      end

      attr_accessor :skip_space
      attr_accessor :read_auto_clean_up
      attr_accessor :exception_on_syntax_error

      attr :indent

      # io functions
      def line_no
        @reader.line_num
      end

      def char_no
        @reader.column
      end

      def get_read
        @reader.get_read
      end

      def getc
        @reader.getc
      end

      def getc_of_rests
        @reader.getc_already_read
      end

      def gets
        (c = getc) || return
        l = ""
        begin
          l.concat c unless c == "\r"
          break if c == "\n"
        end while c = getc # rubocop:disable Lint/Loop
        l
      end

      def ungetc(c = nil)
        @reader.ungetc(c)
      end

      def peek_equal?(str)
        @reader.peek_equal(str)
      end

      def peek(i = 0)
        @reader.peek(i)
      end

      def lex
        catch(:eof) do
          until ((tk = token).is_a?(TkNL) || tk.is_a?(TkEND_OF_SCRIPT)) &&
                !@continue ||
                tk.nil?
          end
          line = get_read

          if line == "" && tk.is_a?(TkEND_OF_SCRIPT) || tk.nil?
            nil
          else
            line
          end
        end
      end

      def token
        set_token_position(line_no, char_no)
        catch(:eof) do
          begin
            begin
              tk = @OP.match(self)
              @space_seen = tk.is_a?(TkSPACE)
            rescue SyntaxError
              abort if @exception_on_syntax_error
              tk = TkError.new(line_no, char_no)
            end
          end while @skip_space && tk.is_a?(TkSPACE)
          if @read_auto_clean_up
            get_read
          end
          # throw :eof unless tk
          p tk if $DEBUG
          tk.lex_state = lex_state if tk
          tk
        end
      end

      ENINDENT_CLAUSE = [
        "case", "class", "def", "do", "for", "if",
        "module", "unless", "until", "while", "begin"
      ] #, "when"
      ACCEPTS_COLON = ["if", "for", "unless", "until", "while"]
      DEINDENT_CLAUSE = ["end"] #, "when"

      PERCENT_LTYPE = {
        "q" => "\'",
        "Q" => "\"",
        "x" => "\`",
        "r" => "/",
        "w" => "]",
        "W" => "]"
      }

      PERCENT_PAREN = {
        "{" => "}",
        "[" => "]",
        "<" => ">",
        "(" => ")"
      }

      Ltype2Token = {
        "\'" => TkSTRING,
        "\"" => TkSTRING,
        "\`" => TkXSTRING,
        "/" => TkREGEXP,
        "]" => TkDSTRING
      }
      Ltype2Token.default = TkSTRING

      DLtype2Token = {
        "\"" => TkDSTRING,
        "\`" => TkDXSTRING,
        "/" => TkDREGEXP
      }

      def lex_init()
        @OP = SLex.new
        @OP.def_rules("\0", "\004", "\032") do |chars, _io|
          Token(TkEND_OF_SCRIPT).set_text(chars)
        end

        @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, _io|
          @space_seen = true
          while (ch = getc) =~ /[ \t\f\r\13]/
            chars << ch
          end
          ungetc
          Token(TkSPACE).set_text(chars)
        end

        @OP.def_rule("#") do |_op, _io|
          identify_comment
        end

        @OP.def_rule("=begin", proc { @prev_char_no == 0 && peek(0) =~ /\s/ }) do |op, _io|
          str = String.new(op)
          @ltype = "="

          begin
            line = String.new
            begin
              ch = getc
              line << ch
            end until ch == "\n"
            str << line
          end until line =~ /^=end/

          ungetc

          @ltype = nil

          if str =~ /\A=begin\s+rdoc/i
            str.sub!(/\A=begin.*\n/, '')
            str.sub!(/^=end.*/m, '')
            Token(TkCOMMENT).set_text(str)
          else
            Token(TkCOMMENT).set_text(str)
          end
        end

        @OP.def_rule("\n") do
          print "\\n\n" if RubyLex.debug?
          @colonblock_seen = false
          case @lex_state
          when EXPR_BEG, EXPR_FNAME, EXPR_DOT
            @continue = true
          else
            @continue = false
            @lex_state = EXPR_BEG
          end
          Token(TkNL).set_text("\n")
        end

        @OP.def_rules("*", "**",
          "!", "!=", "!~",
          "=", "==", "===",
          "=~", "<=>",
          "<", "<=",
          ">", ">=", ">>") do |op, _io|
          @lex_state = EXPR_BEG
          Token(op).set_text(op)
        end

        @OP.def_rules("<<") do |op, _io|
          tk = nil
          if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
             (@lex_state != EXPR_ARG || @space_seen)
            c = peek(0)
            tk = identify_here_document if /[-~\w\"\'\`]/ =~ c
          end
          if !tk
            @lex_state = EXPR_BEG
            tk = Token(op).set_text(op)
          end
          tk
        end

        @OP.def_rules("'", '"') do |op, _io|
          identify_string(op)
        end

        @OP.def_rules("`") do |op, _io|
          if @lex_state == EXPR_FNAME
            Token(op).set_text(op)
          else
            identify_string(op)
          end
        end

        @OP.def_rules('?') do |op, _io|
          if @lex_state == EXPR_END
            @lex_state = EXPR_BEG
            Token(TkQUESTION).set_text(op)
          else
            ch = getc
            if @lex_state == EXPR_ARG && ch !~ /\s/
              ungetc
              @lex_state = EXPR_BEG
              Token(TkQUESTION).set_text(op)
            else
              str = String.new(op)
              str << ch
              if ch == '\\' #'
                str << read_escape
              end
              @lex_state = EXPR_END
              Token(TkINTEGER).set_text(str)
            end
          end
        end

        @OP.def_rules("&", "&&", "|", "||") do |op, _io|
          @lex_state = EXPR_BEG
          Token(op).set_text(op)
        end

        @OP.def_rules("+=", "-=", "*=", "**=",
          "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, _io|
          @lex_state = EXPR_BEG
          op =~ /^(.*)=$/
          Token(TkOPASGN, $1).set_text(op)
        end

        @OP.def_rule("+@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
          Token(TkUPLUS).set_text(op)
        end

        @OP.def_rule("-@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
          Token(TkUMINUS).set_text(op)
        end

        @OP.def_rules("+", "-") do |op, _io|
          catch(:RET) do
            if @lex_state == EXPR_ARG
              if @space_seen && peek(0) =~ /[0-9]/
                throw :RET, identify_number(op)
              else
                @lex_state = EXPR_BEG
              end
            elsif @lex_state != EXPR_END && peek(0) =~ /[0-9]/
              throw :RET, identify_number(op)
            else
              @lex_state = EXPR_BEG
            end
            Token(op).set_text(op)
          end
        end

        @OP.def_rule(".") do
          @lex_state = EXPR_BEG
          if peek(0) =~ /[0-9]/
            ungetc
            identify_number("")
          else
            # for obj.if
            @lex_state = EXPR_DOT
            Token(TkDOT).set_text(".")
          end
        end

        @OP.def_rules("..", "...") do |op, _io|
          @lex_state = EXPR_BEG
          Token(op).set_text(op)
        end

        lex_int2
      end

      def lex_int2
        @OP.def_rules("]", "}", ")") do |op, _io|
          @lex_state = EXPR_END
          @indent -= 1
          Token(op).set_text(op)
        end

        @OP.def_rule(":") do
          if (@colonblock_seen && @lex_state != EXPR_BEG) || peek(0) =~ /\s/
            @lex_state = EXPR_BEG
            tk = Token(TkCOLON)
          else
            @lex_state = EXPR_FNAME
            tk = Token(TkSYMBEG)
          end
          tk.set_text(":")
        end

        @OP.def_rule("::") do
          # p @lex_state.id2name, @space_seen
          if @lex_state == EXPR_BEG || @lex_state == EXPR_ARG && @space_seen
            @lex_state = EXPR_BEG
            tk = Token(TkCOLON3)
          else
            @lex_state = EXPR_DOT
            tk = Token(TkCOLON2)
          end
          tk.set_text("::")
        end

        @OP.def_rule("/") do |op, _io|
          if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
            identify_string(op)
          elsif peek(0) == '='
            getc
            @lex_state = EXPR_BEG
            Token(TkOPASGN, :/).set_text("/=") #")
          elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
            identify_string(op)
          else
            @lex_state = EXPR_BEG
            Token("/").set_text(op)
          end
        end

        @OP.def_rules("^") do
          @lex_state = EXPR_BEG
          Token("^").set_text("^")
        end

        # @OP.def_rules("^=") do
        #   @lex_state = EXPR_BEG
        #   Token(TkOPASGN, :^)
        # end

        @OP.def_rules(",", ";") do |op, _io|
          @colonblock_seen = false
          @lex_state = EXPR_BEG
          Token(op).set_text(op)
        end

        @OP.def_rule("~") do
          @lex_state = EXPR_BEG
          Token("~").set_text("~")
        end

        @OP.def_rule("~@", proc { @lex_state = EXPR_FNAME }) do
          @lex_state = EXPR_BEG
          Token("~").set_text("~@")
        end

        @OP.def_rule("(") do
          @indent += 1
            # if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
            #  @lex_state = EXPR_BEG
            #  tk = Token(TkfLPAREN)
            # else
            @lex_state = EXPR_BEG
            tk = Token(TkLPAREN)
          # end
          tk.set_text("(")
        end

        @OP.def_rule("[]", proc { @lex_state == EXPR_FNAME }) do
          Token("[]").set_text("[]")
        end

        @OP.def_rule("[]=", proc { @lex_state == EXPR_FNAME }) do
          Token("[]=").set_text("[]=")
        end

        @OP.def_rule("[") do
          @indent += 1
          # if @lex_state == EXPR_FNAME
          #   t = Token(TkfLBRACK)
          # else
          #   if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
          #     t = Token(TkLBRACK)
          #   elsif @lex_state == EXPR_ARG && @space_seen
          #   else
          #     t = Token(TkfLBRACK)
          #   end
          # end
          t = Token(TkLBRACK)
          @lex_state = EXPR_BEG
          t.set_text("[")
        end

        @OP.def_rule("{") do
          @indent += 1
          # if @lex_state != EXPR_END && @lex_state != EXPR_ARG
          #   t = Token(TkLBRACE)
          # else
          #   t = Token(TkfLBRACE)
          # end
          t = Token(TkLBRACE)
          @lex_state = EXPR_BEG
          t.set_text("{")
        end

        @OP.def_rule('\\') do #'
          if getc == "\n"
            @space_seen = true
            @continue = true
            Token(TkSPACE).set_text("\\\n")
          else
            ungetc
            Token("\\").set_text("\\") #"
          end
        end

        @OP.def_rule('%') do |_op, _io|
          if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
            identify_quotation('%')
          elsif peek(0) == '='
            getc
            Token(TkOPASGN, "%").set_text("%=")
          elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
            identify_quotation('%')
          else
            @lex_state = EXPR_BEG
            Token("%").set_text("%")
          end
        end

        @OP.def_rule('$') do #'
          identify_gvar
        end

        @OP.def_rule('@') do
          if peek(0) =~ /[@\w]/
            ungetc
            identify_identifier
          else
            Token("@").set_text("@")
          end
        end

        # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
        #   |op, io|
        #   @indent += 1
        #   @lex_state = EXPR_FNAME
        # # @lex_state = EXPR_END
        # # until @rests[0] == "\n" or @rests[0] == ";"
        # #   rests.shift
        # # end
        # end

        @OP.def_rule("__END__", proc { @prev_char_no == 0 && peek(0) =~ /[\r\n]/ }) do
          throw :eof
        end

        @OP.def_rule("") do |op, io|
          printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
          if peek(0) =~ /[0-9]/
            t = identify_number("")
          elsif peek(0) =~ /[\w]/
            t = identify_identifier
          end
          printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
          t
        end

        p @OP if RubyLex.debug?
      end

      def identify_gvar
        @lex_state = EXPR_END
        str = String.new("$")

        tk = case ch = getc
             when %r{[~_*$?!@/\\;,=:<>".]}
               str << ch
               Token(TkGVAR, str)

             when "-"
               str << "-" << getc
               Token(TkGVAR, str)

             when "&", "`", "'", "+"
               str << ch
               Token(TkBACK_REF, str)

             when /[1-9]/
               str << ch
               while (ch = getc) =~ /[0-9]/
                 str << ch
               end
               ungetc
               Token(TkNTH_REF)
             when /\w/
               ungetc
               ungetc
               return identify_identifier
             else
               ungetc
               Token("$")
             end
        tk.set_text(str)
      end

      def identify_identifier
        token = ""
        token.concat getc if peek(0) =~ /[$@]/
        token.concat getc if peek(0) == "@"

        while (ch = getc) =~ /\w|_/
          print ":", ch, ":" if RubyLex.debug?
          token.concat ch
        end
        ungetc

        if ch == "!" || ch == "?"
          token.concat getc
        end
        # fix token

        # $stderr.puts "identifier - #{token}, state = #@lex_state"

        case token
        when /^\$/
          return Token(TkGVAR, token).set_text(token)
        when /^\@/
          @lex_state = EXPR_END
          return Token(TkIVAR, token).set_text(token)
        end

        if @lex_state != EXPR_DOT
          print token, "\n" if RubyLex.debug?

          token_c, *trans = TkReading2Token[token]
          if token_c
            # reserved word?

            if @lex_state != EXPR_BEG &&
               @lex_state != EXPR_FNAME &&
               trans[1]
              # modifiers
              token_c = TkSymbol2Token[trans[1]]
              @lex_state = trans[0]
            else
              if @lex_state != EXPR_FNAME
                if ENINDENT_CLAUSE.include?(token)
                  @indent += 1

                  if ACCEPTS_COLON.include?(token)
                    @colonblock_seen = true
                  else
                    @colonblock_seen = false
                  end
                elsif DEINDENT_CLAUSE.include?(token)
                  @indent -= 1
                  @colonblock_seen = false
                end
                @lex_state = trans[0]
              else
                @lex_state = EXPR_END
              end
            end
            return Token(token_c, token).set_text(token)
          end
        end

        if @lex_state == EXPR_FNAME
          @lex_state = EXPR_END
          if peek(0) == '='
            token.concat getc
          end
        elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
          @lex_state = EXPR_ARG
        else
          @lex_state = EXPR_END
        end

        if token[0, 1] =~ /[A-Z]/
          return Token(TkCONSTANT, token).set_text(token)
        elsif token[token.size - 1, 1] =~ /[!?]/
          return Token(TkFID, token).set_text(token)
        else
          return Token(TkIDENTIFIER, token).set_text(token)
        end
      end

      def identify_here_document
        ch = getc
        if ch == "-"
          ch = getc
        elsif ch == "~"
          ch = getc
          indent = true
        end
        if /['"`]/ =~ ch # '
          lt = ch
          quoted = ""
          while (c = getc) && c != lt
            quoted.concat c
          end
        else
          lt = '"'
          quoted = ch.dup
          while (c = getc) && c =~ /\w/
            quoted.concat c
          end
          ungetc
        end

        ltback, @ltype = @ltype, lt
        reserve = String.new

        while ch = getc
          reserve << ch
          if ch == "\\" #"
            ch = getc
            reserve << ch
          elsif ch == "\n"
            break
          end
        end

        str = String.new
        while (l = gets)
          l.chomp!
          if l == quoted
            str = dedent(str) if indent
            break
          else
            str << l.chomp << "\n"
          end
        end

        @reader.divert_read_from(reserve)

        @ltype = ltback
        @lex_state = EXPR_END
        Token(Ltype2Token[lt], str).set_text(str.dump)
      end

      def dedent(str)
        lines = str.split("\n", -1)
        dedent_amt = lines.map do |line|
          line =~ /\S/ ? line.match(/^ */).offset(0)[1] : nil
        end.compact.min || 0
        return str if dedent_amt.zero?

        lines.map { |line| line =~ /\S/ ? line.gsub(/^ {#{dedent_amt}}/, "") : line }.join("\n")
      end

      def identify_quotation(initial_char)
        ch = getc
        if lt = PERCENT_LTYPE[ch]
          initial_char += ch
          ch = getc
        elsif ch =~ /\W/
          lt = "\""
        else
          # RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
        end
        # if ch !~ /\W/
        #   ungetc
        #   next
        # end
        # @ltype = lt
        @quoted = ch unless @quoted = PERCENT_PAREN[ch]
        identify_string(lt, @quoted, ch, initial_char) if lt
      end

      def identify_number(start)
        str = start.dup

        if start == "+" || start == "-" || start == ""
          start = getc
          str << start
        end

        @lex_state = EXPR_END

        if start == "0"
          if peek(0) == "x"
            ch = getc
            str << ch
            match = /[0-9a-f_]/
          else
            match = /[0-7_]/
          end
          while ch = getc
            if ch !~ match
              ungetc
              break
            else
              str << ch
            end
          end
          return Token(TkINTEGER).set_text(str)
        end

        type = TkINTEGER
        allow_point = true
        allow_e = true
        while ch = getc
          case ch
          when /[0-9_]/
            str << ch

          when allow_point && "."
            type = TkFLOAT
            if peek(0) !~ /[0-9]/
              ungetc
              break
            end
            str << ch
            allow_point = false

          when allow_e && "e", allow_e && "E"
            str << ch
            type = TkFLOAT
            if peek(0) =~ /[+-]/
              str << getc
            end
            allow_e = false
            allow_point = false
          else
            ungetc
            break
          end
        end
        Token(type).set_text(str)
      end

      def identify_string(ltype, quoted = ltype, opener = nil, initial_char = nil)
        @ltype = ltype
        @quoted = quoted
        subtype = nil

        str = String.new
        str << initial_char if initial_char
        str << (opener || quoted)

        nest = 0
        begin
          while ch = getc
            str << ch
            if @quoted == ch
              if nest == 0
                break
              else
                nest -= 1
              end
            elsif opener == ch
              nest += 1
            elsif @ltype != "'" && @ltype != "]" && ch == "#"
              ch = getc
              if ch == "{"
                subtype = true
                str << ch << skip_inner_expression
              else
                ungetc(ch)
              end
            elsif ch == '\\' #'
              str << read_escape
            end
          end
          if @ltype == "/"
            if peek(0) =~ /i|o|n|e|s/
              str << getc
            end
          end
          if subtype
            Token(DLtype2Token[ltype], str)
          else
            Token(Ltype2Token[ltype], str)
          end.set_text(str)
        ensure
          @ltype = nil
          @quoted = nil
          @lex_state = EXPR_END
        end
      end

      def skip_inner_expression
        res = String.new
        nest = 0
        while (ch = getc)
          res << ch
          if ch == '}'
            break if nest == 0
            nest -= 1
          elsif ch == '{'
            nest += 1
          end
        end
        res
      end

      def identify_comment
        @ltype = "#"
        comment = String.new("#")
        while ch = getc
          if ch == "\\"
            ch = getc
            if ch == "\n"
              ch = " "
            else
              comment << "\\"
            end
          else
            if ch == "\n"
              @ltype = nil
              ungetc
              break
            end
          end
          comment << ch
        end
        Token(TkCOMMENT).set_text(comment)
      end

      def read_escape
        res = String.new
        case ch = getc
        when /[0-7]/
          ungetc ch
          3.times do
          case ch = getc
          when /[0-7]/
          when nil
            break
          else
            ungetc
            break
          end
          res << ch
          end

        when "x"
          res << ch
          2.times do
          case ch = getc
          when /[0-9a-fA-F]/
          when nil
            break
          else
            ungetc
            break
          end
            res << ch
          end

        when "M"
          res << ch
          if (ch = getc) != '-'
            ungetc
          else
            res << ch
            if (ch = getc) == "\\" #"
              res << ch
              res << read_escape
            else
              res << ch
            end
          end

        when "C", "c" #, "^"
          res << ch
          if ch == "C" && (ch = getc) != "-"
            ungetc
          else
            res << ch
            if (ch = getc) == "\\" #"
              res << ch
              res << read_escape
            else
              res << ch
            end
          end
        else
          res << ch
        end
        res
      end
    end
  end
end