class Prism::Translation::Parser::Lexer
def to_a
Convert the prism tokens into the expected format for the parser gem.
def to_a
tokens = []
index = 0
length = lexed.length
heredoc_identifier_stack = []
while index < length
token, state = lexed[index]
index += 1
next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
case type
when :kDO
types = tokens.map(&:first)
nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }
if nearest_lambda_token_type == :tLAMBDA
type = :kDO_LAMBDA
end
when :tCHARACTER
value.delete_prefix!("?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
start_index = index
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
if start_index != index
value += next_token.value
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
index += 1
end
else
value.chomp!
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
end
when :tNL
value = nil
when :tFLOAT
value = parse_float(value)
when :tIMAGINARY
value = parse_complex(value)
when :tINTEGER
if value.start_with?("+")
tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
end
value = parse_integer(value)
when :tLABEL
value.chomp!(":")
when :tLABEL_END
value.chomp!(":")
when :tLCURLY
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
when :tLPAREN2
type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
when :tNTH_REF
value = parse_integer(value.delete_prefix("$"))
when :tOP_ASGN
value.chomp!("=")
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
value = nil
when :tSTRING_BEG
if token.type == :HEREDOC_START
heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
end
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
value = ""
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 1
elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
next_location = token.location.join(next_next_token.location)
type = :tSTRING
value = next_token.value.gsub("\\\\", "\\")
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 2
elsif value.start_with?("<<")
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
if quote == "`"
type = :tXSTRING_BEG
value = "<<`"
else
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
end
end
when :tSTRING_CONTENT
unless (lines = token.value.lines).one?
start_offset = offset_cache[token.location.start_offset]
lines.map do |line|
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
chomped_line = line.chomp
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
adjustment = match[:backslashes].size / 2
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
if match[:backslashes].size.odd?
adjusted_line.delete_suffix!("\\")
adjustment += 2
else
adjusted_line << newline
end
else
adjusted_line = line
adjustment = 0
end
end_offset = start_offset + adjusted_line.length + adjustment
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
start_offset = end_offset
end
next
end
when :tSTRING_DVAR
value = nil
when :tSTRING_END
if token.type == :HEREDOC_END && value.end_with?("\n")
newline_length = value.end_with?("\r\n") ? 2 : 1
value = heredoc_identifier_stack.pop
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
elsif token.type == :REGEXP_END
value = value[0]
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
end
when :tSYMBEG
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 1
end
when :tFID
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
type = :tIDENTIFIER
end
when :tXSTRING_BEG
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
type = :tBACK_REF2
end
end
tokens << [type, [value, location]]
if token.type == :REGEXP_END
tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
end
end
tokens
end