lib/elastic_apm/sql/signature.rb



# frozen_string_literal: true

require 'elastic_apm/sql/tokenizer'

module ElasticAPM
  module Sql
    # @api private
    class Signature
      include Tokens

      # Mostly here to provide a similar API to new SqlSummarizer for easier
      # swapping out
      #
      # @api private
      class Summarizer
        def summarize(sql)
          Signature.parse(sql)
        end
      end

      def initialize(sql)
        @sql = sql
        @tokenizer = Tokenizer.new(sql)
      end

      def parse
        @tokenizer.scan # until tokenizer.token != COMMENT

        parsed = parse_tokens
        return parsed if parsed

        # If all else fails, just return the first token of the query.
        parts = @sql.split
        return '' unless parts.any?

        parts.first.upcase
      end

      def self.parse(sql)
        new(sql).parse
      end

      private

      # rubocop:disable Metrics/CyclomaticComplexity
      # rubocop:disable Metrics/PerceivedComplexity
      def parse_tokens
        t = @tokenizer

        case t.token

        when CALL
          return unless scan_until IDENT
          "CALL #{t.text}"

        when DELETE
          return unless scan_until FROM
          return unless scan_token IDENT
          table = scan_dotted_identifier
          "DELETE FROM #{table}"

        when INSERT, REPLACE
          action = t.text
          return unless scan_until INTO
          return unless scan_token IDENT
          table = scan_dotted_identifier
          "#{action} INTO #{table}"

        when SELECT
          level = 0
          while t.scan
            case t.token
            when LPAREN then level += 1
            when RPAREN then level -= 1
            when FROM
              next unless level == 0
              break unless scan_token IDENT
              table = scan_dotted_identifier
              return "SELECT FROM #{table}"
            end
          end

        when UPDATE
          # Scan for the table name. Some dialects allow option keywords before
          # the table name.
          return 'UPDATE' unless scan_token IDENT

          table = t.text

          period = false
          first_period = false

          while t.scan
            case t.token
            when IDENT
              if period
                table += t.text
                period = false
              end

              unless first_period
                table = t.text
              end

              # Two adjacent identifiers found after the first period. Ignore
              # the secondary ones, in case they are unknown keywords.
            when PERIOD
              period = true
              first_period = true
              table += '.'
            else
              return "UPDATE #{table}"
            end
          end
        end
      end
      # rubocop:enable Metrics/CyclomaticComplexity
      # rubocop:enable Metrics/PerceivedComplexity

      # Scans until finding token of `kind`
      def scan_until(kind)
        while @tokenizer.scan
          break true if @tokenizer.token == kind
          false
        end
      end

      # Scans next token, ignoring comments
      # Returns whether next token is of `kind`
      def scan_token(kind)
        while @tokenizer.scan
          next if @tokenizer.token == COMMENT
          break
        end

        return true if @tokenizer.token == kind

        false
      end

      def scan_dotted_identifier
        table = @tokenizer.text

        while scan_token(PERIOD) && scan_token(IDENT)
          table += ".#{@tokenizer.text}"
        end

        table
      end
    end
  end
end