lib/syntax_tree/index.rb



# frozen_string_literal: true

module SyntaxTree
  # This class can be used to build an index of the structure of Ruby files. We
  # define an index as the list of constants and methods defined within a file.
  #
  # This index strives to be as fast as possible to better support tools like
  # IDEs. Because of that, it has different backends depending on what
  # functionality is available.
  module Index
    # This is a location for an index entry.
    class Location
      attr_reader :line, :column

      def initialize(line, column)
        @line = line
        @column = column
      end
    end

    # This entry represents a class definition using the class keyword.
    class ClassDefinition
      attr_reader :nesting, :name, :superclass, :location, :comments

      def initialize(nesting, name, superclass, location, comments)
        @nesting = nesting
        @name = name
        @superclass = superclass
        @location = location
        @comments = comments
      end
    end

    # This entry represents a constant assignment.
    class ConstantDefinition
      attr_reader :nesting, :name, :location, :comments

      def initialize(nesting, name, location, comments)
        @nesting = nesting
        @name = name
        @location = location
        @comments = comments
      end
    end

    # This entry represents a module definition using the module keyword.
    class ModuleDefinition
      attr_reader :nesting, :name, :location, :comments

      def initialize(nesting, name, location, comments)
        @nesting = nesting
        @name = name
        @location = location
        @comments = comments
      end
    end

    # This entry represents a method definition using the def keyword.
    class MethodDefinition
      attr_reader :nesting, :name, :location, :comments

      def initialize(nesting, name, location, comments)
        @nesting = nesting
        @name = name
        @location = location
        @comments = comments
      end
    end

    # This entry represents a singleton method definition using the def keyword
    # with a specified target.
    class SingletonMethodDefinition
      attr_reader :nesting, :name, :location, :comments

      def initialize(nesting, name, location, comments)
        @nesting = nesting
        @name = name
        @location = location
        @comments = comments
      end
    end

    # This entry represents a method definition that was created using the alias
    # keyword.
    class AliasMethodDefinition
      attr_reader :nesting, :name, :location, :comments

      def initialize(nesting, name, location, comments)
        @nesting = nesting
        @name = name
        @location = location
        @comments = comments
      end
    end

    # When you're using the instruction sequence backend, this class is used to
    # lazily parse comments out of the source code.
    class FileComments
      # We use the ripper library to pull out source comments.
      class Parser < Ripper
        attr_reader :comments

        def initialize(*)
          super
          @comments = {}
        end

        def on_comment(value)
          comments[lineno] = value.chomp
        end
      end

      # This represents the Ruby source in the form of a file. When it needs to
      # be read we'll read the file.
      class FileSource
        attr_reader :filepath

        def initialize(filepath)
          @filepath = filepath
        end

        def source
          File.read(filepath)
        end
      end

      # This represents the Ruby source in the form of a string. When it needs
      # to be read the string is returned.
      class StringSource
        attr_reader :source

        def initialize(source)
          @source = source
        end
      end

      attr_reader :source

      def initialize(source)
        @source = source
      end

      def comments
        @comments ||= Parser.new(source.source).tap(&:parse).comments
      end
    end

    # This class handles parsing comments from Ruby source code in the case that
    # we use the instruction sequence backend. Because the instruction sequence
    # backend doesn't provide comments (since they are dropped) we provide this
    # interface to lazily parse them out.
    class EntryComments
      include Enumerable
      attr_reader :file_comments, :location

      def initialize(file_comments, location)
        @file_comments = file_comments
        @location = location
      end

      def each(&block)
        line = location.line - 1
        result = []

        while line >= 0 && (comment = file_comments.comments[line])
          result.unshift(comment)
          line -= 1
        end

        result.each(&block)
      end
    end

    # This backend creates the index using RubyVM::InstructionSequence, which is
    # faster than using the Syntax Tree parser, but is not available on all
    # runtimes.
    class ISeqBackend
      VM_DEFINECLASS_TYPE_CLASS = 0x00
      VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 0x01
      VM_DEFINECLASS_TYPE_MODULE = 0x02
      VM_DEFINECLASS_FLAG_SCOPED = 0x08
      VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 0x10

      def index(source)
        index_iseq(
          RubyVM::InstructionSequence.compile(source).to_a,
          FileComments.new(FileComments::StringSource.new(source))
        )
      end

      def index_file(filepath)
        index_iseq(
          RubyVM::InstructionSequence.compile_file(filepath).to_a,
          FileComments.new(FileComments::FileSource.new(filepath))
        )
      end

      private

      def location_for(iseq)
        code_location = iseq[4][:code_location]
        Location.new(code_location[0], code_location[1])
      end

      def find_constant_path(insns, index)
        index -= 1 while index >= 0 &&
          (
            insns[index].is_a?(Integer) ||
              (
                insns[index].is_a?(Array) &&
                  %i[swap topn].include?(insns[index][0])
              )
          )
        insn = insns[index]

        if insn.is_a?(Array) && insn[0] == :opt_getconstant_path
          # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path
          # instruction, so we already know all of the symbols in the nesting.
          [index - 1, insn[1]]
        elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/)
          # Otherwise, if we have a label then this is very likely the
          # destination of an opt_getinlinecache instruction, in which case
          # we'll walk backwards to grab up all of the constants.
          names = []

          index -= 1
          until insns[index].is_a?(Array) &&
                  insns[index][0] == :opt_getinlinecache
            if insns[index].is_a?(Array) && insns[index][0] == :getconstant
              names.unshift(insns[index][1])
            end

            index -= 1
          end

          [index - 1, names]
        else
          [index, []]
        end
      end

      def find_attr_arguments(insns, index)
        orig_argc = insns[index][1][:orig_argc]
        names = []

        current = index - 1
        while current >= 0 && names.length < orig_argc
          if insns[current].is_a?(Array) && insns[current][0] == :putobject
            names.unshift(insns[current][1])
          end

          current -= 1
        end

        names if insns[current] == [:putself] && names.length == orig_argc
      end

      def method_definition(nesting, name, location, file_comments)
        comments = EntryComments.new(file_comments, location)

        if nesting.last == [:singletonclass]
          SingletonMethodDefinition.new(
            nesting[0...-1],
            name,
            location,
            comments
          )
        else
          MethodDefinition.new(nesting, name, location, comments)
        end
      end

      def index_iseq(iseq, file_comments)
        results = []
        queue = [[iseq, []]]

        while (current_iseq, current_nesting = queue.shift)
          file = current_iseq[5]
          line = current_iseq[8]
          insns = current_iseq[13]

          insns.each_with_index do |insn, index|
            case insn
            when Integer
              line = insn
              next
            when Array
              # continue on
            else
              # skip everything else
              next
            end

            case insn[0]
            when :defineclass
              _, name, class_iseq, flags = insn
              next_nesting = current_nesting.dup

              # This is the index we're going to search for the nested constant
              # path within the declaration name.
              constant_index = index - 2

              # This is the superclass of the class being defined.
              superclass = []

              # If there is a superclass, then we're going to find it here and
              # then update the constant_index as necessary.
              if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0
                constant_index, superclass =
                  find_constant_path(insns, index - 1)

                if superclass.empty?
                  warn("#{file}:#{line}: superclass with non constant path")
                  next
                end
              end

              if (_, nesting = find_constant_path(insns, constant_index))
                # If there is a constant path in the class name, then we need to
                # handle that by updating the nesting.
                next_nesting << (nesting << name)
              else
                # Otherwise we'll add the class name to the nesting.
                next_nesting << [name]
              end

              if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS
                # At the moment, we don't support singletons that aren't
                # defined on self. We could, but it would require more
                # emulation.
                if insns[index - 2] != [:putself]
                  warn(
                    "#{file}:#{line}: singleton class with non-self receiver"
                  )
                  next
                end
              elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0
                location = location_for(class_iseq)
                results << ModuleDefinition.new(
                  next_nesting,
                  name,
                  location,
                  EntryComments.new(file_comments, location)
                )
              else
                location = location_for(class_iseq)
                results << ClassDefinition.new(
                  next_nesting,
                  name,
                  superclass,
                  location,
                  EntryComments.new(file_comments, location)
                )
              end

              queue << [class_iseq, next_nesting]
            when :definemethod
              location = location_for(insn[2])
              results << method_definition(
                current_nesting,
                insn[1],
                location,
                file_comments
              )
            when :definesmethod
              if insns[index - 1] != [:putself]
                warn("#{file}:#{line}: singleton method with non-self receiver")
                next
              end

              location = location_for(insn[2])
              results << SingletonMethodDefinition.new(
                current_nesting,
                insn[1],
                location,
                EntryComments.new(file_comments, location)
              )
            when :setconstant
              next_nesting = current_nesting.dup
              name = insn[1]

              _, nesting = find_constant_path(insns, index - 1)
              next_nesting << nesting if nesting.any?

              location = Location.new(line, :unknown)
              results << ConstantDefinition.new(
                next_nesting,
                name,
                location,
                EntryComments.new(file_comments, location)
              )
            when :opt_send_without_block, :send
              case insn[1][:mid]
              when :attr_reader, :attr_writer, :attr_accessor
                attr_names = find_attr_arguments(insns, index)
                next unless attr_names

                location = Location.new(line, :unknown)
                attr_names.each do |attr_name|
                  if insn[1][:mid] != :attr_writer
                    results << method_definition(
                      current_nesting,
                      attr_name,
                      location,
                      file_comments
                    )
                  end

                  if insn[1][:mid] != :attr_reader
                    results << method_definition(
                      current_nesting,
                      :"#{attr_name}=",
                      location,
                      file_comments
                    )
                  end
                end
              when :"core#set_method_alias"
                # Now we have to validate that the alias is happening with a
                # non-interpolated value. To do this we'll match the specific
                # pattern we're expecting.
                values =
                  insns[(index - 4)...index].map do |previous|
                    previous.is_a?(Array) ? previous[0] : previous
                  end
                if values !=
                     %i[putspecialobject putspecialobject putobject putobject]
                  next
                end

                # Now that we know it's in the structure we want it, we can use
                # the values of the putobject to determine the alias.
                location = Location.new(line, :unknown)
                results << AliasMethodDefinition.new(
                  current_nesting,
                  insns[index - 2][1],
                  location,
                  EntryComments.new(file_comments, location)
                )
              end
            end
          end
        end

        results
      end
    end

    # This backend creates the index using the Syntax Tree parser and a visitor.
    # It is not as fast as using the instruction sequences directly, but is
    # supported on all runtimes.
    class ParserBackend
      class ConstantNameVisitor < Visitor
        def visit_const_ref(node)
          [node.constant.value.to_sym]
        end

        def visit_const_path_ref(node)
          visit(node.parent) << node.constant.value.to_sym
        end

        def visit_var_ref(node)
          [node.value.value.to_sym]
        end
      end

      class IndexVisitor < Visitor
        attr_reader :results, :nesting, :statements

        def initialize
          @results = []
          @nesting = []
          @statements = nil
        end

        visit_methods do
          def visit_alias(node)
            if node.left.is_a?(SymbolLiteral) && node.right.is_a?(SymbolLiteral)
              location =
                Location.new(
                  node.location.start_line,
                  node.location.start_column
                )

              results << AliasMethodDefinition.new(
                nesting.dup,
                node.left.value.value.to_sym,
                location,
                comments_for(node)
              )
            end

            super
          end

          def visit_assign(node)
            if node.target.is_a?(VarField) && node.target.value.is_a?(Const)
              location =
                Location.new(
                  node.location.start_line,
                  node.location.start_column
                )

              results << ConstantDefinition.new(
                nesting.dup,
                node.target.value.value.to_sym,
                location,
                comments_for(node)
              )
            end

            super
          end

          def visit_class(node)
            names = node.constant.accept(ConstantNameVisitor.new)
            nesting << names

            location =
              Location.new(node.location.start_line, node.location.start_column)

            superclass =
              if node.superclass
                visited = node.superclass.accept(ConstantNameVisitor.new)

                if visited == [[]]
                  raise NotImplementedError, "superclass with non constant path"
                end

                visited
              else
                []
              end

            results << ClassDefinition.new(
              nesting.dup,
              names.last,
              superclass,
              location,
              comments_for(node)
            )

            super
            nesting.pop
          end

          def visit_command(node)
            case node.message.value
            when "attr_reader", "attr_writer", "attr_accessor"
              comments = comments_for(node)
              location =
                Location.new(
                  node.location.start_line,
                  node.location.start_column
                )

              node.arguments.parts.each do |argument|
                next unless argument.is_a?(SymbolLiteral)
                name = argument.value.value.to_sym

                if node.message.value != "attr_writer"
                  results << MethodDefinition.new(
                    nesting.dup,
                    name,
                    location,
                    comments
                  )
                end

                if node.message.value != "attr_reader"
                  results << MethodDefinition.new(
                    nesting.dup,
                    :"#{name}=",
                    location,
                    comments
                  )
                end
              end
            end

            super
          end

          def visit_def(node)
            name = node.name.value.to_sym
            location =
              Location.new(node.location.start_line, node.location.start_column)

            results << if node.target.nil?
              MethodDefinition.new(
                nesting.dup,
                name,
                location,
                comments_for(node)
              )
            else
              SingletonMethodDefinition.new(
                nesting.dup,
                name,
                location,
                comments_for(node)
              )
            end

            super
          end

          def visit_module(node)
            names = node.constant.accept(ConstantNameVisitor.new)
            nesting << names

            location =
              Location.new(node.location.start_line, node.location.start_column)

            results << ModuleDefinition.new(
              nesting.dup,
              names.last,
              location,
              comments_for(node)
            )

            super
            nesting.pop
          end

          def visit_program(node)
            super
            results
          end

          def visit_statements(node)
            @statements = node
            super
          end
        end

        private

        def comments_for(node)
          comments = []

          body = statements.body
          line = node.location.start_line - 1
          index = body.index(node)
          return comments if index.nil?

          index -= 1
          while index >= 0 && body[index].is_a?(Comment) &&
                  (line - body[index].location.start_line < 2)
            comments.unshift(body[index].value)
            line = body[index].location.start_line
            index -= 1
          end

          comments
        end
      end

      def index(source)
        SyntaxTree.parse(source).accept(IndexVisitor.new)
      end

      def index_file(filepath)
        index(SyntaxTree.read(filepath))
      end
    end

    # The class defined here is used to perform the indexing, depending on what
    # functionality is available from the runtime.
    INDEX_BACKEND =
      defined?(RubyVM::InstructionSequence) ? ISeqBackend : ParserBackend

    # This method accepts source code and then indexes it.
    def self.index(source, backend: INDEX_BACKEND.new)
      backend.index(source)
    end

    # This method accepts a filepath and then indexes it.
    def self.index_file(filepath, backend: INDEX_BACKEND.new)
      backend.index_file(filepath)
    end
  end
end