lib/gitlab/qa/component/telegraf.rb



# frozen_string_literal: true

require 'tempfile'

module Gitlab
  module QA
    module Component
      # Component to collect docker metrics
      #
      class Telegraf < Base
        DOCKER_IMAGE = 'telegraf'
        DOCKER_IMAGE_TAG = '1.21-alpine'
        LOG_DIR = '/etc/telegraf/log'

        def initialize
          super

          @name = DOCKER_IMAGE
          @host_log_dir = "#{Runtime::Env.host_artifacts_dir}/#{@name}"
          @environment = Runtime::Env.variables.slice(
            'QA_INFLUXDB_TOKEN',
            'QA_INFLUXDB_URL',
            'QA_RUN_TYPE',
            'CI_JOB_NAME',
            'CI_PIPELINE_ID'
          )
        end

        attr_reader :name, :host_log_dir, :telegraf_config

        # Start container
        #
        # @return [void]
        def start
          docker.run(image: image, tag: tag) do |command|
            set_command_args(command)
            set_volumes(command)
            set_environment(command)
          end
        end

        # Run prepare commands
        #
        # @return [void]
        def prepare
          @telegraf_config = File.open("#{Dir.mktmpdir(nil, ENV['CI_BUILDS_DIR'])}/telegraf.conf", 'w') do |file|
            file.write(config)
            file.path
          end
          FileUtils.mkdir_p(host_log_dir)

          prepare_docker_image
          prepare_docker_container
        end

        # Run teardown
        #
        # @return [void]
        def teardown
          return unless run_telegraf?

          super
        end

        private

        # Set custom run command arguments
        #
        # @param [Docker::Command] command
        # @return [void]
        def set_command_args(command)
          command << '-d'
          command << "--name #{name}"
          command << "--user root"
          command << "--entrypoint telegraf"
        end

        # Set volumes
        #
        # @param [Docker::Command] command
        # @return [void]
        def set_volumes(command)
          command.volume(host_log_dir, LOG_DIR)
          command.volume('/var/run/docker.sock', '/var/run/docker.sock')
          command.volume(telegraf_config, '/etc/telegraf/telegraf.conf', :ro)
        end

        # Set environment variables
        #
        # @param [Docker::Command] command
        # @return [void]
        def set_environment(command)
          environment.each { |k, v| command.env(k, v) }
        end

        # Run main entrypoint
        #
        # @return [void]
        def instance_no_teardown
          if run_telegraf?
            super
          else
            Runtime::Logger.debug("Skipping starting telegraf container!")
            yield self if block_given?
          end
        end

        # Should telegraf be started
        #
        # Run only on CI and skip if metrics explicitly disabled, run_type not set or influx params missing
        #
        # @return [Boolean]
        def run_telegraf?
          Runtime::Env.ci && Runtime::Env.qa_export_test_metrics? && Runtime::Env.qa_run_type && !missing_influx_config?
        end

        # Influxdb config params missing
        #
        # @return [Boolean]
        def missing_influx_config?
          environment.slice('QA_INFLUXDB_TOKEN', 'QA_INFLUXDB_URL').any? { |_k, v| v.blank? }
        end

        # Telegraf configuration
        #
        # @return [String]
        def config
          <<~CONFIG
            [global_tags]
              run_type = "${QA_RUN_TYPE}"
              pipeline_id = "${CI_PIPELINE_ID}"
              job_name = "${CI_JOB_NAME}"

            [agent]
              interval = "1s"
              round_interval = true
              metric_batch_size = 1000
              metric_buffer_limit = 10000
              collection_jitter = "0s"
              flush_interval = "10s"
              flush_jitter = "0s"
              precision = ""
              debug = true
              logtarget = "file"
              logfile = "#{LOG_DIR}/telegraf.log"
              hostname = ""
              omit_hostname = false

            [[outputs.influxdb_v2]]
              urls = ["${QA_INFLUXDB_URL}"]
              token = "${QA_INFLUXDB_TOKEN}"
              organization = "gitlab-qa"
              bucket = "test-env-stats"

            [[inputs.docker]]
              endpoint = "unix:///var/run/docker.sock"
              gather_services = false
              container_names = []
              source_tag = false
              container_name_include = []
              container_name_exclude = ["#{name}"]
              timeout = "5s"
              perdevice = false
              perdevice_include = []
              total = true
              total_include = ["cpu", "blkio", "network"]
              docker_label_include = []
              docker_label_exclude = []
          CONFIG
        end
      end
    end
  end
end