def result
tokens = []
state = :default
heredoc_stack = [[]]
result = YARP.lex(source, @filepath)
result_value = result.value
previous_state = nil
# If there's a UTF-8 byte-order mark as the start of the file, then ripper
# sets every token's on the first line back by 6 bytes. It also keeps the
# byte order mark in the first token's value. This is weird, and I don't
# want to mirror that in our parser. So instead, we'll match up the values
# here, and then match up the locations as we process the tokens.
bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
result_value.each_with_index do |(token, lex_state), index|
lineno = token.location.start_line
column = token.location.start_column
column -= index == 0 ? 6 : 3 if bom && lineno == 1
event = RIPPER.fetch(token.type)
value = token.value
lex_state = Ripper::Lexer::State.new(lex_state)
token =
case event
when :on___end__
EndContentToken.new([[lineno, column], event, value, lex_state])
when :on_comment
CommentToken.new([[lineno, column], event, value, lex_state])
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
HeredocEndToken.new([[lineno, column], event, value, lex_state])
when :on_embexpr_end, :on_ident
if lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
# In the event that we're comparing identifiers, we're going to
# allow a little divergence. Ripper doesn't account for local
# variables introduced through named captures in regexes, and we
# do, which accounts for this difference.
IdentToken.new([[lineno, column], event, value, lex_state])
else
Token.new([[lineno, column], event, value, lex_state])
end
when :on_ignored_nl
# Ignored newlines can occasionally have a LABEL state attached to
# them which doesn't actually impact anything. We don't mirror that
# state so we ignored it.
IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. YARP sets lex state
# correctly to end state, but we want to be able to compare against
# Ripper's lexed state. So here, if it's a regexp end token, we
# output the state as the previous state, solely for the sake of
# comparison.
previous_token = result_value[index - 1][0]
lex_state =
if RIPPER.fetch(previous_token.type) == :on_embexpr_end
# If the previous token is embexpr_end, then we have to do even
# more processing. The end of an embedded expression sets the
# state to the state that it had at the beginning of the
# embedded expression. So we have to go and find that state and
# set it here.
counter = 1
current_index = index - 1
until counter == 0
current_index -= 1
current_event = RIPPER.fetch(result_value[current_index][0].type)
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
Ripper::Lexer::State.new(result_value[current_index][1])
else
previous_state
end
Token.new([[lineno, column], event, value, lex_state])
else
Token.new([[lineno, column], event, value, lex_state])
end
previous_state = lex_state
# The order in which tokens appear in our lexer is different from the
# order that they appear in Ripper. When we hit the declaration of a
# heredoc in YARP, we skip forward and lex the rest of the content of
# the heredoc before going back and lexing at the end of the heredoc
# identifier.
#
# To match up to ripper, we keep a small state variable around here to
# track whether we're in the middle of a heredoc or not. In this way we
# can shuffle around the token to match Ripper's output.
case state
when :default
tokens << token
if event == :on_heredoc_beg
state = :heredoc_opened
heredoc_stack.last << Heredoc.build(token)
end
when :heredoc_opened
heredoc_stack.last.last << token
case event
when :on_heredoc_beg
heredoc_stack << [Heredoc.build(token)]
when :on_heredoc_end
state = :heredoc_closed
end
when :heredoc_closed
if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
if heredoc_stack.size > 1
flushing = heredoc_stack.pop
heredoc_stack.last.last << token
flushing.each do |heredoc|
heredoc.to_a.each do |flushed_token|
heredoc_stack.last.last << flushed_token
end
end
state = :heredoc_opened
next
end
elsif event == :on_heredoc_beg
tokens << token
state = :heredoc_opened
heredoc_stack.last << Heredoc.build(token)
next
elsif heredoc_stack.size > 1
heredoc_stack[-2].last << token
next
end
heredoc_stack.last.each do |heredoc|
tokens.concat(heredoc.to_a)
end
heredoc_stack.last.clear
state = :default
tokens << token
end
end
tokens.reject! { |t| t.event == :on_eof }
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
if result_value.size - 1 > tokens.size
raise StandardError, "Lost tokens when performing lex_compat"
end
ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
end