def yylex
@space_seen = false
cmd_start = false
c = ''
if self.strterm
return next_string_token
end
while true
if scan(/\ |\t|\r/)
@space_seen = true
next
elsif scan(/(\n|#)/)
c = scanner.matched
if c == '#' then scan(/(.*)/) else @line += 1; end
scan(/(\n+)/)
@line += scanner.matched.length if scanner.matched
next if [:expr_beg, :expr_dot].include? @lex_state
if scan(/([\ \t\r\f\v]*)\./)
@space_seen = true unless scanner[1].empty?
scanner.pos = scanner.pos - 1
next unless check(/\.\./)
end
cmd_start = true
@lex_state = :expr_beg
return :tNL, '\\n'
elsif scan(/\;/)
@lex_state = :expr_beg
return :tSEMI, ';'
elsif scan(/\*/)
if scan(/\*/)
if scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '**'
end
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tPOW, '**'
else
if scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '*'
end
end
if scan(/\*\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '**'
end
if scan(/\*/)
if after_operator?
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tPOW, '**'
end
if scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '*'
else
result = '*'
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
return :tSTAR2, result
elsif @space_seen && check(/\S/)
@lex_state = :expr_beg
return :tSTAR, result
elsif [:expr_beg, :expr_mid].include? @lex_state
@lex_state = :expr_beg
return :tSTAR, result
else
@lex_state = :expr_beg
return :tSTAR2, result
end
end
elsif scan(/\!/)
c = scan(/./)
if after_operator?
@lex_state = :expr_arg
if c == "@"
return :tBANG, '!'
end
else
@lex_state = :expr_beg
end
if c == '='
return :tNEQ, '!='
elsif c == '~'
return :tNMATCH, '!~'
end
scanner.pos = scanner.pos - 1
return :tBANG, '!'
elsif scan(/\=/)
if @lex_state == :expr_beg and !@space_seen
if scan(/begin/) and space?
scan(/(.*)/) # end of line
line_count = 0
while true
if scanner.eos?
raise "embedded document meets end of file"
end
if scan(/\=end/) and space?
@line += line_count
return next_token
end
if scan(/\n/)
line_count += 1
next
end
scan(/(.*)/)
end
end
end
@lex_state = if after_operator?
:expr_arg
else
:expr_beg
end
if scan(/\=/)
if scan(/\=/)
return :tEQQ, '==='
end
return :tEQ, '=='
end
if scan(/\~/)
return :tMATCH, '=~'
elsif scan(/\>/)
return :tASSOC, '=>'
end
return :tEQL, '='
elsif scan(/\"/)
self.strterm = new_strterm(:dquote, '"', '"')
return :tSTRING_BEG, scanner.matched
elsif scan(/\'/)
self.strterm = new_strterm(:squote, "'", "'")
return :tSTRING_BEG, scanner.matched
elsif scan(/\`/)
self.strterm = new_strterm(:xquote, '`', '`')
return :tXSTRING_BEG, scanner.matched
elsif scan(/\&/)
if scan(/\&/)
@lex_state = :expr_beg
if scan(/\=/)
return :tOP_ASGN, '&&'
end
return :tANDOP, '&&'
elsif scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '&'
end
if spcarg?
#puts "warning: `&' interpreted as argument prefix"
result = :tAMPER
elsif beg?
result = :tAMPER
else
#puts "warn_balanced: & argument prefix"
result = :tAMPER2
end
@lex_state = after_operator? ? :expr_arg : :expr_beg
return result, '&'
elsif scan(/\|/)
if scan(/\|/)
@lex_state = :expr_beg
if scan(/\=/)
return :tOP_ASGN, '||'
end
return :tOROP, '||'
elsif scan(/\=/)
return :tOP_ASGN, '|'
end
@lex_state = after_operator?() ? :expr_arg : :expr_beg
return :tPIPE, '|'
elsif scan(/\%[QqWwxr]/)
str_type = scanner.matched[1, 1]
paren = scan(/./)
term = case paren
when '(' then ')'
when '[' then ']'
when '{' then '}'
else paren
end
case str_type
when 'Q'
self.strterm = new_strterm2(:dquote, paren, term)
return :tSTRING_BEG, scanner.matched
when 'q'
self.strterm = new_strterm2(:squote, paren, term)
return :tSTRING_BEG, scanner.matched
when 'W'
self.strterm = new_strterm(:dword, 'W', term)
scan(/\s*/)
return :tWORDS_BEG, scanner.matched
when 'w', 'i'
self.strterm = new_strterm(:sword, 'w', term)
scan(/\s*/)
return :tAWORDS_BEG, scanner.matched
when 'x'
self.strterm = new_strterm2(:xquote, paren, term)
return :tXSTRING_BEG, scanner.matched
when 'r'
self.strterm = new_strterm2(:regexp, paren, term)
return :tREGEXP_BEG, scanner.matched
end
elsif scan(/\//)
if [:expr_beg, :expr_mid].include? @lex_state
self.strterm = new_strterm(:regexp, '/', '/')
return :tREGEXP_BEG, scanner.matched
elsif scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '/'
elsif @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
elsif @lex_state == :expr_cmdarg || @lex_state == :expr_arg
if !check(/\s/) && @space_seen
self.strterm = new_strterm(:regexp, '/', '/')
return :tREGEXP_BEG, scanner.matched
end
else
@lex_state = :expr_beg
end
return :tDIVIDE, '/'
elsif scan(/\%/)
if scan(/\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '%'
elsif check(/[^\s]/)
if @lex_state == :expr_beg or (@lex_state == :expr_arg && @space_seen)
start_word = scan(/./)
end_word = { '(' => ')', '[' => ']', '{' => '}' }[start_word] || start_word
self.strterm = new_strterm2(:dquote, start_word, end_word)
return :tSTRING_BEG, scanner.matched
end
end
@lex_state = after_operator? ? :expr_arg : :expr_beg
return :tPERCENT, '%'
elsif scan(/\\/)
if scan(/\r?\n/)
@space_seen = true
next
end
raise SyntaxError, "backslash must appear before newline :#{@file}:#{@line}"
elsif scan(/\(/)
result = scanner.matched
if [:expr_beg, :expr_mid].include? @lex_state
result = :tLPAREN
elsif @space_seen && [:expr_arg, :expr_cmdarg].include?(@lex_state)
result = :tLPAREN_ARG
else
result = :tLPAREN2
end
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return result, scanner.matched
elsif scan(/\)/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end
return :tRPAREN, scanner.matched
elsif scan(/\[/)
result = scanner.matched
if [:expr_fname, :expr_dot].include? @lex_state
@lex_state = :expr_arg
if scan(/\]=/)
return :tASET, '[]='
elsif scan(/\]/)
return :tAREF, '[]'
else
raise "Unexpected '[' token"
end
elsif [:expr_beg, :expr_mid].include?(@lex_state) || @space_seen
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return :tLBRACK, scanner.matched
else
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return :tLBRACK2, scanner.matched
end
elsif scan(/\]/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end
return :tRBRACK, scanner.matched
elsif scan(/\}/)
cond_lexpop
cmdarg_lexpop
@lex_state = :expr_end
return :tRCURLY, scanner.matched
elsif scan(/\.\.\./)
@lex_state = :expr_beg
return :tDOT3, scanner.matched
elsif scan(/\.\./)
@lex_state = :expr_beg
return :tDOT2, scanner.matched
elsif scan(/\./)
@lex_state = :expr_dot unless @lex_state == :expr_fname
return :tDOT, scanner.matched
elsif scan(/\:\:/)
if [:expr_beg, :expr_mid, :expr_class].include? @lex_state
@lex_state = :expr_beg
return :tCOLON3, scanner.matched
elsif @space_seen && @lex_state == :expr_arg
@lex_state = :expr_beg
return :tCOLON3, scanner.matched
end
@lex_state = :expr_dot
return :tCOLON2, scanner.matched
elsif scan(/\:/)
if end? || check(/\s/)
unless check(/\w/)
@lex_state = :expr_beg
return :tCOLON, ':'
end
@lex_state = :expr_fname
return :tSYMBEG, ':'
end
if scan(/\'/)
self.strterm = new_strterm(:ssym, "'", "'")
elsif scan(/\"/)
self.strterm = new_strterm(:dsym, '"', '"')
end
@lex_state = :expr_fname
return :tSYMBEG, ':'
elsif scan(/\^\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '^'
elsif scan(/\^/)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
return :tCARET, scanner.matched
end
@lex_state = :expr_beg
return :tCARET, scanner.matched
elsif check(/\</)
if scan(/\<\<\=/)
@lex_state = :expr_beg
return :tOP_ASGN, '<<'
elsif scan(/\<\</)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
return :tLSHFT, '<<'
elsif ![:expr_dot, :expr_class].include?(@lex_state) && !end? && (!arg? || @space_seen)
if token = heredoc_identifier
return token
end
@lex_state = :expr_beg
return :tLSHFT, '<<'
end
@lex_state = :expr_beg
return :tLSHFT, '<<'
elsif scan(/\<\=\>/)
if after_operator?
@lex_state = :expr_arg
else
if @lex_state == :expr_class
cmd_start = true
end
@lex_state = :expr_beg
end
return :tCMP, '<=>'
elsif scan(/\<\=/)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tLEQ, '<='
elsif scan(/\</)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tLT, '<'
end
elsif check(/\>/)
if scan(/\>\>\=/)
return :tOP_ASGN, '>>'
elsif scan(/\>\>/)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tRSHFT, '>>'
elsif scan(/\>\=/)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_end
else
@lex_state = :expr_beg
end
return :tGEQ, scanner.matched
elsif scan(/\>/)
if @lex_state == :expr_fname or @lex_state == :expr_dot
@lex_state = :expr_arg
else
@lex_state = :expr_beg
end
return :tGT, '>'
end
elsif scan(/->/)
# FIXME: # should be :expr_arg, but '(' breaks it...
@lex_state = :expr_end
@start_of_lambda = true
return [:tLAMBDA, scanner.matched]
elsif scan(/[+-]/)
matched = scanner.matched
sign, utype = if matched == '+'
[:tPLUS, :tUPLUS]
else
[:tMINUS, :tUMINUS]
end
if beg?
@lex_state = :expr_mid
return [utype, matched]
elsif after_operator?
@lex_state = :expr_arg
return [:tIDENTIFIER, matched + '@'] if scan(/@/)
return [sign, matched]
end
if scan(/\=/)
@lex_state = :expr_beg
return [:tOP_ASGN, matched]
end
if arg?
if !space? && @space_seen
@lex_state = :expr_mid
return [utype, matched]
end
end
@lex_state = :expr_beg
return [sign, sign]
elsif scan(/\?/)
if end?
@lex_state = :expr_beg
return :tEH, scanner.matched
end
unless check(/\ |\t|\r|\s/)
@lex_state = :expr_end
return :tSTRING, scan(/./)
end
@lex_state = :expr_beg
return :tEH, scanner.matched
elsif scan(/\~/)
if @lex_state == :expr_fname
@lex_state = :expr_end
return :tTILDE, '~'
end
@lex_state = :expr_beg
return :tTILDE, '~'
elsif check(/\$/)
if scan(/\$([1-9]\d*)/)
@lex_state = :expr_end
return :tNTH_REF, scanner.matched.sub('$', '')
elsif scan(/(\$_)(\w+)/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
elsif scan(/\$[\+\'\`\&!@\"~*$?\/\\:;=.,<>_]/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
elsif scan(/\$\w+/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
else
raise "Bad gvar name: #{scanner.peek(5).inspect}"
end
elsif scan(/\$\w+/)
@lex_state = :expr_end
return :tGVAR, scanner.matched
elsif scan(/\@\@\w*/)
@lex_state = :expr_end
return :tCVAR, scanner.matched
elsif scan(/\@\w*/)
@lex_state = :expr_end
return :tIVAR, scanner.matched
elsif scan(/\,/)
@lex_state = :expr_beg
return :tCOMMA, scanner.matched
elsif scan(/\{/)
if @start_of_lambda
@start_of_lambda = false
@lex_state = :expr_beg
return [:tLAMBEG, scanner.matched]
elsif [:expr_end, :expr_arg, :expr_cmdarg].include? @lex_state
result = :tLCURLY
elsif @lex_state == :expr_endarg
result = :LBRACE_ARG
else
result = '{'
end
@lex_state = :expr_beg
cond_push 0
cmdarg_push 0
return result, scanner.matched
elsif check(/[0-9]/)
return process_numeric
elsif scan(/(\w)+[\?\!]?/)
return process_identifier scanner.matched, cmd_start
end
if scanner.eos?
if @scanner_stack.size == 1 # our main scanner, we cant pop this
return [false, false]
else # we were probably parsing a heredoc, so pop that parser and continue
@scanner_stack.pop
@scanner = @scanner_stack.last
return next_token
end
end
raise "Unexpected content in parsing stream `#{scanner.peek 5}` :#{@file}:#{@line}"
end
end