class YARP::LexCompat

def result

def result
  tokens = []
  state = :default
  heredoc_stack = [[]]
  result = YARP.lex(source, @filepath)
  result_value = result.value
  previous_state = nil
  # If there's a UTF-8 byte-order mark as the start of the file, then ripper
  # sets every token's on the first line back by 6 bytes. It also keeps the
  # byte order mark in the first token's value. This is weird, and I don't
  # want to mirror that in our parser. So instead, we'll match up the values
  # here, and then match up the locations as we process the tokens.
  bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
  result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
  result_value.each_with_index do |(token, lex_state), index|
    lineno = token.location.start_line
    column = token.location.start_column
    column -= index == 0 ? 6 : 3 if bom && lineno == 1
    event = RIPPER.fetch(token.type)
    value = token.value
    lex_state = Ripper::Lexer::State.new(lex_state)
    token =
      case event
      when :on___end__
        EndContentToken.new([[lineno, column], event, value, lex_state])
      when :on_comment
        CommentToken.new([[lineno, column], event, value, lex_state])
      when :on_heredoc_end
        # Heredoc end tokens can be emitted in an odd order, so we don't
        # want to bother comparing the state on them.
        HeredocEndToken.new([[lineno, column], event, value, lex_state])
      when :on_embexpr_end, :on_ident
        if lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
          # In the event that we're comparing identifiers, we're going to
          # allow a little divergence. Ripper doesn't account for local
          # variables introduced through named captures in regexes, and we
          # do, which accounts for this difference.
          IdentToken.new([[lineno, column], event, value, lex_state])
        else
          Token.new([[lineno, column], event, value, lex_state])
        end
      when :on_ignored_nl
        # Ignored newlines can occasionally have a LABEL state attached to
        # them which doesn't actually impact anything. We don't mirror that
        # state so we ignored it.
        IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
      when :on_regexp_end
        # On regex end, Ripper scans and then sets end state, so the ripper
        # lexed output is begin, when it should be end. YARP sets lex state
        # correctly to end state, but we want to be able to compare against
        # Ripper's lexed state. So here, if it's a regexp end token, we
        # output the state as the previous state, solely for the sake of
        # comparison.
        previous_token = result_value[index - 1][0]
        lex_state =
          if RIPPER.fetch(previous_token.type) == :on_embexpr_end
            # If the previous token is embexpr_end, then we have to do even
            # more processing. The end of an embedded expression sets the
            # state to the state that it had at the beginning of the
            # embedded expression. So we have to go and find that state and
            # set it here.
            counter = 1
            current_index = index - 1
            until counter == 0
              current_index -= 1
              current_event = RIPPER.fetch(result_value[current_index][0].type)
              counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
            end
            Ripper::Lexer::State.new(result_value[current_index][1])
          else
            previous_state
          end
        Token.new([[lineno, column], event, value, lex_state])
      else
        Token.new([[lineno, column], event, value, lex_state])
      end
    previous_state = lex_state
    # The order in which tokens appear in our lexer is different from the
    # order that they appear in Ripper. When we hit the declaration of a
    # heredoc in YARP, we skip forward and lex the rest of the content of
    # the heredoc before going back and lexing at the end of the heredoc
    # identifier.
    #
    # To match up to ripper, we keep a small state variable around here to
    # track whether we're in the middle of a heredoc or not. In this way we
    # can shuffle around the token to match Ripper's output.
    case state
    when :default
      tokens << token
      if event == :on_heredoc_beg
        state = :heredoc_opened
        heredoc_stack.last << Heredoc.build(token)
      end
    when :heredoc_opened
      heredoc_stack.last.last << token
      case event
      when :on_heredoc_beg
        heredoc_stack << [Heredoc.build(token)]
      when :on_heredoc_end
        state = :heredoc_closed
      end
    when :heredoc_closed
      if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
        if heredoc_stack.size > 1
          flushing = heredoc_stack.pop
          heredoc_stack.last.last << token
          flushing.each do |heredoc|
            heredoc.to_a.each do |flushed_token|
              heredoc_stack.last.last << flushed_token
            end
          end
          state = :heredoc_opened
          next
        end
      elsif event == :on_heredoc_beg
        tokens << token
        state = :heredoc_opened
        heredoc_stack.last << Heredoc.build(token)
        next
      elsif heredoc_stack.size > 1
        heredoc_stack[-2].last << token
        next
      end
      heredoc_stack.last.each do |heredoc|
        tokens.concat(heredoc.to_a)
      end
      heredoc_stack.last.clear
      state = :default
      tokens << token
    end
  end
  tokens.reject! { |t| t.event == :on_eof }
  # We sort by location to compare against Ripper's output
  tokens.sort_by!(&:location)
  if result_value.size - 1 > tokens.size
    raise StandardError, "Lost tokens when performing lex_compat"
  end
  ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
end
Modules

Classes

class YARP::LexCompat

def result

Instance Methods