# encoding: utf-8
module Rubocop
module Cop
class Grammar
def initialize(tokens)
@tokens_without_pos = tokens.map { |t| [t.type, t.text] }
process_embedded_expressions
@token_indexes = {}
@tokens_without_pos.each_with_index do |t, i|
@token_indexes[t] ||= []
@token_indexes[t] << i
end
@ix = 0
@table = {}
token_positions = tokens.map { |t| [t.pos.lineno, t.pos.column] }
@index_by_pos = Hash[*token_positions.each_with_index.to_a.flatten(1)]
@special = {
assign: [:on_op, '='],
brace_block: [:on_lbrace, '{']
}
end
# The string "#{x}" will give the tokens
# [:on_tstring_beg, '"'], [:on_embexpr_beg, '#{'], [:on_ident, 'x'],
# [:on_rbrace, '}'], [:on_tstring_end, '"']
# which is not so good for us. We want to distinguish between a
# right brace that ends an embedded expression inside a string
# and an ordinary right brace. So we replace :on_rbrace with the
# made up :on_embexpr_end.
def process_embedded_expressions
state = :outside
brace_depth = 0
@tokens_without_pos.each_with_index do |(type, _), ix|
case state
when :outside
state = :inside_string if type == :on_tstring_beg
when :inside_string
case type
when :on_tstring_end
state = :outside
when :on_embexpr_beg
brace_depth = 1
state = :inside_expr
end
when :inside_expr
case type
when :on_lbrace
brace_depth += 1
when :on_rbrace
if brace_depth == 1
@tokens_without_pos[ix][0] = :on_embexpr_end
state = :inside_string
end
brace_depth -= 1
end
end
end
end
# Returns a hash mapping indexes in the token array to grammar
# paths, e.g.:
# { 0 => [:program, :assign, :var_field, :@ident],
# 1 => [:program, :assign],
# 2 => [:program, :assign, :@int],
# 4 => [:program, :assign, :var_field, :@ident],
# 5 => [:program, :assign],
# 7 => [:program, :assign, :@int],
# 9 => [:program, :assign, :var_field, :@ident],
# 11 => [:program, :assign],
# 12 => [:program, :assign, :@int] }
def correlate(sexp, path = [])
case sexp
when Array
case sexp[0]
when /^@/
# Leaves in the grammar have a corresponding token with a
# position, which we search for and advance @ix.
@ix = @index_by_pos[[sexp[-1].lineno, sexp[-1].column]]
fail "#{sexp}\n#{@index_by_pos}" unless @ix
@table[@ix] = path + [sexp[0]]
@ix += 1
when *@special.keys
# Here we don't advance @ix because there may be other
# tokens inbetween the current one and the one we get from
# @special.
find(path, sexp, @special[sexp[0]])
when :block_var # "{ |...|" or "do |...|"
@ix = find(path, sexp, [:on_op, '|']) + 1
find(path, sexp, [:on_op, '|'])
end
path += [sexp[0]] if Symbol === sexp[0]
# Compensate for reverse order of if modifier
children = (sexp[0] == :if_mod) ? sexp.reverse : sexp
children.each do |elem|
case elem
when Array
correlate(elem, path) # Dive deeper
when Symbol
unless elem.to_s =~ /^@?[a-z_]+$/
# There's a trailing @ in some symbols in sexp,
# e.g. :-@, that don't appear in tokens. That's why we
# chomp it off.
find(path, [elem], [:on_op, elem.to_s.chomp('@')])
end
end
end
end
@table
end
private
def find(path, sexp, token_to_find)
indices = @token_indexes[token_to_find] or return
ix = indices.find { |i| i >= @ix } or return
@table[ix] = path + [sexp[0]]
add_matching_rbrace(ix) if token_to_find == [:on_lbrace, '{']
ix
end
def add_matching_rbrace(ix)
brace_depth = 0
rbrace_offset = @tokens_without_pos[ix..-1].index do |t|
brace_depth += 1 if t == [:on_lbrace, '{']
brace_depth -= 1 if t == [:on_rbrace, '}']
brace_depth == 0 && t == [:on_rbrace, '}']
end
@table[ix + rbrace_offset] = @table[ix] if rbrace_offset
end
end
end
end