gem.sh

lib/elastic_apm/sql/signature.rb

# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# frozen_string_literal: true

require 'elastic_apm/sql/tokenizer'

module ElasticAPM
  module Sql
    # @api private
    class Signature
      include Tokens

      # Mostly here to provide a similar API to new SqlSummarizer for easier
      # swapping out
      #
      # @api private
      class Summarizer
        def summarize(sql)
          Signature.parse(sql)
        end
      end

      def initialize(sql)
        @sql = sql.encode('utf-8', invalid: :replace, undef: :replace)
        @tokenizer = Tokenizer.new(@sql)
      end

      def parse
        @tokenizer.scan # until tokenizer.token != COMMENT

        parsed = parse_tokens
        return parsed if parsed

        # If all else fails, just return the first token of the query.
        parts = @sql.split
        return '' unless parts.any?

        parts.first.upcase
      end

      def self.parse(sql)
        new(sql).parse
      end

      private

      # rubocop:disable Metrics/CyclomaticComplexity
      # rubocop:disable Metrics/PerceivedComplexity
      def parse_tokens
        t = @tokenizer

        case t.token

        when CALL
          return unless scan_until IDENT
          "CALL #{t.text}"

        when DELETE
          return unless scan_until FROM
          return unless scan_token IDENT
          table = scan_dotted_identifier
          "DELETE FROM #{table}"

        when INSERT, REPLACE
          action = t.text
          return unless scan_until INTO
          return unless scan_token IDENT
          table = scan_dotted_identifier
          "#{action} INTO #{table}"

        when SELECT
          level = 0
          while t.scan
            case t.token
            when LPAREN then level += 1
            when RPAREN then level -= 1
            when FROM
              next unless level == 0
              break unless scan_token IDENT
              table = scan_dotted_identifier
              return "SELECT FROM #{table}"
            end
          end

        when UPDATE
          # Scan for the table name. Some dialects allow option keywords before
          # the table name.
          return 'UPDATE' unless scan_token IDENT

          table = t.text

          period = false
          first_period = false

          while t.scan
            case t.token
            when IDENT
              if period
                table += t.text
                period = false
              end

              unless first_period
                table = t.text
              end

              # Two adjacent identifiers found after the first period. Ignore
              # the secondary ones, in case they are unknown keywords.
            when PERIOD
              period = true
              first_period = true
              table += '.'
            else
              return "UPDATE #{table}"
            end
          end
        end
      end
      # rubocop:enable Metrics/CyclomaticComplexity
      # rubocop:enable Metrics/PerceivedComplexity

      # Scans until finding token of `kind`
      def scan_until(kind)
        while @tokenizer.scan
          break true if @tokenizer.token == kind
          false
        end
      end

      # Scans next token, ignoring comments
      # Returns whether next token is of `kind`
      def scan_token(kind)
        while @tokenizer.scan
          next if @tokenizer.token == COMMENT
          break
        end

        return true if @tokenizer.token == kind

        false
      end

      def scan_dotted_identifier
        table = @tokenizer.text

        # rubocop:disable Style/WhileUntilModifier
        while scan_token(PERIOD) && scan_token(IDENT)
          table += ".#{@tokenizer.text}"
        end
        # rubocop:enable Style/WhileUntilModifier

        table
      end
    end
  end
end
Modules

Classes

lib/elastic_apm/sql/signature.rb

Source Files