gem.sh

lib/datadog/tracing/span_operation.rb

require 'time'

require_relative '../core/environment/identity'
require_relative '../core/utils'
require_relative '../core/utils/time'
require_relative '../core/utils/safe_dup'

require_relative 'event'
require_relative 'metadata'
require_relative 'metadata/ext'
require_relative 'span'
require_relative 'utils'

module Datadog
  module Tracing
    # Represents the act of taking a span measurement.
    # It gives a Span a context which can be used to
    # build a Span. When completed, it yields the Span.
    #
    # @public_api
    class SpanOperation
      include Metadata

      # Span attributes
      # NOTE: In the future, we should drop the me
      attr_reader \
        :end_time,
        :id,
        :name,
        :parent_id,
        :resource,
        :service,
        :start_time,
        :trace_id,
        :type

      attr_accessor \
        :status

      def initialize(
        name,
        child_of: nil,
        events: nil,
        on_error: nil,
        parent_id: 0,
        resource: name,
        service: nil,
        start_time: nil,
        tags: nil,
        trace_id: nil,
        type: nil
      )
        # Ensure dynamically created strings are UTF-8 encoded.
        #
        # All strings created in Ruby land are UTF-8. The only sources of non-UTF-8 string are:
        # * Strings explicitly encoded as non-UTF-8.
        # * Some natively created string, although most natively created strings are UTF-8.
        self.name = name
        self.service = service
        self.type = type
        self.resource = resource

        @id = Tracing::Utils.next_id
        @parent_id = parent_id || 0
        @trace_id = trace_id || Tracing::Utils::TraceId.next_id

        @status = 0

        # start_time and end_time track wall clock. In Ruby, wall clock
        # has less accuracy than monotonic clock, so if possible we look to only use wall clock
        # to measure duration when a time is supplied by the user, or if monotonic clock
        # is unsupported.
        @start_time = nil
        @end_time = nil

        # duration_start and duration_end track monotonic clock, and may remain nil in cases where it
        # is known that we have to use wall clock to measure duration.
        @duration_start = nil
        @duration_end = nil

        # Set tags if provided.
        set_tags(tags) if tags

        # Only set parent if explicitly provided.
        # We don't want it to override context-derived
        # IDs if it's a distributed trace w/o a parent span.
        parent = child_of
        self.parent = parent if parent

        # Some other SpanOperation-specific behavior
        @events = events || Events.new
        @span = nil

        # Subscribe :on_error event
        @events.on_error.wrap_default(&on_error) if on_error.is_a?(Proc)

        # Start the span with start time, if given.
        start(start_time) if start_time
      end

      # Operation name.
      # @!attribute [rw] name
      # @return [String]
      def name=(name)
        raise ArgumentError, "SpanOperation name can't be nil" unless name

        @name = Core::Utils.utf8_encode(name)
      end

      # Span type.
      # @!attribute [rw] type
      # @return [String
      def type=(type)
        @type = type.nil? ? nil : Core::Utils.utf8_encode(type) # Allow this to be explicitly set to nil
      end

      # Service name.
      # @!attribute [rw] service
      # @return [String
      def service=(service)
        @service = service.nil? ? nil : Core::Utils.utf8_encode(service) # Allow this to be explicitly set to nil
      end

      # Span resource.
      # @!attribute [rw] resource
      # @return [String
      def resource=(resource)
        @resource = resource.nil? ? nil : Core::Utils.utf8_encode(resource) # Allow this to be explicitly set to nil
      end

      def measure
        raise ArgumentError, 'Must provide block to measure!' unless block_given?
        # TODO: Should we just invoke the block and skip tracing instead?
        raise AlreadyStartedError if started?

        return_value = nil

        begin
          # If span fails to start, don't prevent the operation from
          # running, to minimize impact on normal application function.
          begin
            start
          rescue StandardError => e
            Datadog.logger.debug { "Failed to start span: #{e}" }
          ensure
            # We should yield to the provided block when possible, as this
            # block is application code that we don't want to hinder.
            # * We don't yield during a fatal error, as the application is likely trying to
            #   end its execution (either due to a system error or graceful shutdown).
            return_value = yield(self) unless e && !e.is_a?(StandardError)
          end
        # rubocop:disable Lint/RescueException
        # Here we really want to catch *any* exception, not only StandardError,
        # as we really have no clue of what is in the block,
        # and it is user code which should be executed no matter what.
        # It's not a problem since we re-raise it afterwards so for example a
        # SignalException::Interrupt would still bubble up.
        rescue Exception => e
          # Stop the span first, so timing is a more accurate.
          # If the span failed to start, timing may be inaccurate,
          # but this is not really a serious concern.
          stop

          # Trigger the on_error event
          events.on_error.publish(self, e)

          # We must finish the span to trigger callbacks,
          # and build the final span.
          finish

          raise e
        # Use an ensure block here to make sure the span closes.
        # NOTE: It's not sufficient to use "else": when a function
        #       uses "return", it will skip "else".
        ensure
          # Finish the span
          # NOTE: If an error was raised, this "finish" might be redundant.
          finish unless finished?
        end
        # rubocop:enable Lint/RescueException

        return_value
      end

      def start(start_time = nil)
        # Span can only be started once
        return self if started?

        # Trigger before_start event
        events.before_start.publish(self)

        # Start the span
        @start_time = start_time || Core::Utils::Time.now.utc
        @duration_start = start_time.nil? ? duration_marker : nil

        self
      end

      # Mark the span stopped at the current time
      def stop(stop_time = nil)
        # A span should not be stopped twice. Note that this is not thread-safe,
        # stop is called from multiple threads, a given span might be stopped
        # several times. Again, one should not do this, so this test is more a
        # fallback to avoid very bad things and protect you in most common cases.
        return if stopped?

        now = Core::Utils::Time.now.utc

        # Provide a default start_time if unset.
        # Using `now` here causes duration to be 0; this is expected
        # behavior when start_time is unknown.
        start(stop_time || now) unless started?

        @end_time = stop_time || now
        @duration_end = stop_time.nil? ? duration_marker : nil

        # Trigger after_stop event
        events.after_stop.publish(self)

        self
      end

      # Return whether the duration is started or not
      def started?
        !@start_time.nil?
      end

      # Return whether the duration is stopped or not.
      def stopped?
        !@end_time.nil?
      end

      # for backwards compatibility
      def start_time=(time)
        time.tap { start(time) }
      end

      # for backwards compatibility
      def end_time=(time)
        time.tap { stop(time) }
      end

      def finish(end_time = nil)
        # Returned memoized span if already finished
        return span if finished?

        # Stop timing
        stop(end_time)

        # Build span
        # Memoize for performance reasons
        @span = build_span

        # Trigger after_finish event
        events.after_finish.publish(span, self)

        span
      end

      def finished?
        !span.nil?
      end

      def duration
        return @duration_end - @duration_start if @duration_start && @duration_end
        return @end_time - @start_time if @start_time && @end_time
      end

      def set_error(e)
        @status = Metadata::Ext::Errors::STATUS
        super
      end

      # Return a string representation of the span.
      def to_s
        "SpanOperation(name:#{@name},sid:#{@id},tid:#{@trace_id},pid:#{@parent_id})"
      end

      # Return the hash representation of the current span.
      def to_hash
        h = {
          error: @status,
          id: @id,
          meta: meta,
          metrics: metrics,
          name: @name,
          parent_id: @parent_id,
          resource: @resource,
          service: @service,
          trace_id: @trace_id,
          type: @type
        }

        if stopped?
          h[:start] = start_time_nano
          h[:duration] = duration_nano
        end

        h
      end

      # Return a human readable version of the span
      def pretty_print(q)
        start_time = (self.start_time.to_f * 1e9).to_i
        end_time = (self.end_time.to_f * 1e9).to_i
        q.group 0 do
          q.breakable
          q.text "Name: #{@name}\n"
          q.text "Span ID: #{@id}\n"
          q.text "Parent ID: #{@parent_id}\n"
          q.text "Trace ID: #{@trace_id}\n"
          q.text "Type: #{@type}\n"
          q.text "Service: #{@service}\n"
          q.text "Resource: #{@resource}\n"
          q.text "Error: #{@status}\n"
          q.text "Start: #{start_time}\n"
          q.text "End: #{end_time}\n"
          q.text "Duration: #{duration.to_f if stopped?}\n"
          q.group(2, 'Tags: [', "]\n") do
            q.breakable
            q.seplist meta.each do |key, value|
              q.text "#{key} => #{value}"
            end
          end
          q.group(2, 'Metrics: [', ']') do
            q.breakable
            q.seplist metrics.each do |key, value|
              q.text "#{key} => #{value}"
            end
          end
        end
      end

      # Callback behavior
      class Events
        include Tracing::Events

        DEFAULT_ON_ERROR = proc { |span_op, error| span_op.set_error(error) unless span_op.nil? }

        attr_reader \
          :after_finish,
          :after_stop,
          :before_start

        def initialize(on_error: nil)
          @after_finish = AfterFinish.new
          @after_stop = AfterStop.new
          @before_start = BeforeStart.new
        end

        # This event is lazily initialized as error paths
        # are normally less common that non-error paths.
        def on_error
          @on_error ||= OnError.new(DEFAULT_ON_ERROR)
        end

        # Triggered when the span is finished, regardless of error.
        class AfterFinish < Tracing::Event
          def initialize
            super(:after_finish)
          end
        end

        # Triggered when the span is stopped, regardless of error.
        class AfterStop < Tracing::Event
          def initialize
            super(:after_stop)
          end
        end

        # Triggered just before the span is started.
        class BeforeStart < Tracing::Event
          def initialize
            super(:before_start)
          end
        end

        # Triggered when the span raises an error during measurement.
        class OnError
          def initialize(default)
            @handler = default
          end

          # Call custom error handler but fallback to default behavior on failure.

          # DEV: Revisit this before full 1.0 release.
          # It seems like OnError wants to behave like a middleware stack,
          # where each "subscriber"'s executed is chained to the previous one.
          # This is different from how {Tracing::Event} works, and might be incompatible.
          def wrap_default
            original = @handler

            @handler = proc do |op, error|
              begin
                yield(op, error)
              rescue StandardError => e
                Datadog.logger.debug do
                  "Custom on_error handler #{@handler} failed, using fallback behavior. \
                  Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}"
                end

                original.call(op, error) if original
              end
            end
          end

          def publish(*args)
            begin
              @handler.call(*args)
            rescue StandardError => e
              Datadog.logger.debug do
                "Error in on_error handler '#{@default}': #{e.class.name} #{e.message} at #{Array(e.backtrace).first}"
              end
            end

            true
          end
        end
      end

      # Error when the span attempts to start again after being started
      class AlreadyStartedError < StandardError
        def message
          'Cannot measure an already started span!'.freeze
        end
      end

      private

      # Keep span reference private: we don't want users
      # modifying the finalized span from the operation after
      # it has been finished.
      attr_reader \
        :events,
        :parent,
        :span

      # Create a Span from the operation which represents
      # the finalized measurement. We #dup here to prevent
      # mutation by reference; when this span is returned,
      # we don't want this SpanOperation to modify it further.
      def build_span
        Span.new(
          @name,
          duration: duration,
          end_time: @end_time,
          id: @id,
          meta: Core::Utils::SafeDup.frozen_or_dup(meta),
          metrics: Core::Utils::SafeDup.frozen_or_dup(metrics),
          parent_id: @parent_id,
          resource: @resource,
          service: @service,
          start_time: @start_time,
          status: @status,
          type: @type,
          trace_id: @trace_id,
          service_entry: parent.nil? || (service && parent.service != service)
        )
      end

      # Set this span's parent, inheriting any properties not explicitly set.
      # If the parent is nil, set the span as the root span.
      #
      # DEV: This method creates a false expectation that
      # `self.parent.id == self.parent_id`, which is not the case
      # for distributed traces, as the parent Span object does not exist
      # in this application. `#parent_id` is the only reliable parent
      # identifier. We should remove the ability to set a parent Span
      # object in the future.
      def parent=(parent)
        @parent = parent

        if parent.nil?
          @trace_id = @id
          @parent_id = 0
        else
          @trace_id = parent.trace_id
          @parent_id = parent.id
        end
      end

      def duration_marker
        Core::Utils::Time.get_time
      end

      # Used for serialization
      # @return [Integer] in nanoseconds since Epoch
      def start_time_nano
        @start_time.to_i * 1000000000 + @start_time.nsec
      end

      # Used for serialization
      # @return [Integer] in nanoseconds since Epoch
      def duration_nano
        (duration * 1e9).to_i
      end

      # For backwards compatibility
      # TODO: Deprecate and remove these in 2.0.
      alias :span_id :id
      alias :span_type :type
      alias :span_type= :type=
    end
  end
end
Modules

Classes

lib/datadog/tracing/span_operation.rb

Source Files