lib/gitlab/qa/report/relate_failure_issue.rb



# frozen_string_literal: true

require 'nokogiri'
require 'active_support/core_ext/enumerable'
require 'rubygems/text'

module Gitlab
  module QA
    module Report
      # Uses the API to create or update GitLab issues with the results of tests from RSpec report files.
      class RelateFailureIssue < ReportAsIssue
        DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION = 0.15
        FAILURE_STACKTRACE_REGEX = %r{((.*Failure\/Error:(?<stacktrace>.+))|(?<stacktrace>.+))}m.freeze
        ISSUE_STACKTRACE_REGEX = /### Stack trace\s*(```)#{FAILURE_STACKTRACE_REGEX}(```)/m.freeze
        NEW_ISSUE_LABELS = Set.new(%w[QA Quality test failure::investigating priority::2]).freeze

        MultipleIssuesFound = Class.new(StandardError)

        def initialize(max_diff_ratio: DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION, **kwargs)
          super
          @max_diff_ratio = max_diff_ratio.to_f
        end

        private

        attr_reader :max_diff_ratio

        def run!
          puts "Reporting test failures in `#{files.join(',')}` as issues in project `#{project}` via the API at `#{Runtime::Env.gitlab_api_base}`."

          test_results_per_file do |test_results|
            puts "=> Reporting tests in #{test_results.path}"

            test_results.each do |test|
              next if test.failures.empty?

              relate_test_to_issue(test)
            end

            test_results.write
          end
        end

        def relate_test_to_issue(test)
          puts "  => Searching issues for test '#{test.name}'..."

          begin
            issue = find_or_create_issue(test)
            return unless issue

            update_labels(issue, test)
            post_failed_job_note(issue, test)
            puts "  => Marked #{issue.web_url} as related to #{test.testcase}."
          rescue MultipleIssuesFound => e
            warn(e.message)
          end
        end

        def find_or_create_issue(test)
          issue, diff_ratio = find_failure_issue(test)

          if issue
            puts "  => Found issue #{issue.web_url} for test '#{test.name}' with a diff ratio of #{(diff_ratio * 100).round(2)}%."
          else
            issue = create_issue(test)
            puts "  => Created new issue: #{issue.web_url} for test '#{test.name}'." if issue
          end

          issue
        end

        def failure_issues(test)
          gitlab.find_issues(options: { state: 'opened', labels: 'QA' }).select do |issue|
            issue_title = issue.title.strip
            issue_title.include?(test.name) || issue_title.include?(partial_file_path(test.file))
          end
        end

        def find_relevant_failure_issues(test) # rubocop:disable Metrics/AbcSize
          ld = Class.new.extend(Gem::Text).method(:levenshtein_distance)
          full_stacktrace = test.failures.first['message_lines'].join("\n")
          first_test_failure_stacktrace = sanitize_stacktrace(full_stacktrace, FAILURE_STACKTRACE_REGEX) || full_stacktrace

          # Search with the `search` param returns 500 errors, so we filter by ~QA and then filter further in Ruby
          failure_issues(test).each_with_object({}) do |issue, memo|
            relevant_issue_stacktrace = find_issue_stacktrace(issue)
            next unless relevant_issue_stacktrace

            distance = ld.call(first_test_failure_stacktrace, relevant_issue_stacktrace)
            diff_ratio = distance.zero? ? 0.0 : (distance.to_f / first_test_failure_stacktrace.size).round(3)

            if diff_ratio <= max_diff_ratio
              puts "  => [DEBUG] Issue #{issue.web_url} has an acceptable diff ratio of #{(diff_ratio * 100).round(2)}%."
              # The `Gitlab::ObjectifiedHash` class overrides `#hash` which is used by `Hash#[]=` to compute the hash key.
              # This leads to a `TypeError Exception: no implicit conversion of Hash into Integer` error, so we convert the object to a hash before using it as a Hash key.
              # See:
              # - https://gitlab.com/gitlab-org/gitlab-qa/-/merge_requests/587#note_453336995
              # - https://github.com/NARKOZ/gitlab/commit/cbdbd1e32623f018a8fae39932a8e3bc4d929abb?_pjax=%23js-repo-pjax-container#r44484494
              memo[issue.to_h] = diff_ratio
            else
              puts "  => [DEBUG] Found issue #{issue.web_url} but stacktraces are too different (#{(diff_ratio * 100).round(2)}%).\n"
              puts "  => [DEBUG] Issue stacktrace:\n----------------\n#{relevant_issue_stacktrace}\n----------------\n"
              puts "  => [DEBUG] Failure stacktrace:\n----------------\n#{first_test_failure_stacktrace}\n----------------\n"
            end
          end
        end

        def find_issue_stacktrace(issue)
          issue_stacktrace = sanitize_stacktrace(issue.description, ISSUE_STACKTRACE_REGEX)
          return issue_stacktrace if issue_stacktrace

          puts "  => [DEBUG] Stacktrace couldn't be found for #{issue.web_url}!"
        end

        def sanitize_stacktrace(stacktrace, regex)
          stacktrace_match = stacktrace.match(regex)

          if stacktrace_match
            stacktrace_match[:stacktrace].gsub(/^\s*#.*$/, '').gsub(/^[[:space:]]+/, '').strip
          else
            puts "  => [DEBUG] Stacktrace doesn't match the expected regex (#{regex}):\n----------------\n#{stacktrace}\n----------------\n"
          end
        end

        def find_failure_issue(test)
          relevant_issues = find_relevant_failure_issues(test)

          return nil if relevant_issues.empty?

          best_matching_issue, smaller_diff_ratio = relevant_issues.min_by { |_, diff_ratio| diff_ratio }

          unless relevant_issues.values.count(smaller_diff_ratio) == 1 # rubocop:disable Style/IfUnlessModifier
            raise(MultipleIssuesFound, %(Too many issues found for test '#{test.name}' (`#{test.file}`)!))
          end

          # Re-instantiate a `Gitlab::ObjectifiedHash` object after having converted it to a hash in #find_relevant_failure_issues above.
          best_matching_issue = Gitlab::ObjectifiedHash.new(best_matching_issue)

          test.failure_issue ||= best_matching_issue.web_url

          [best_matching_issue, smaller_diff_ratio]
        end

        def new_issue_description(test)
          super + [
            "\n\n### Stack trace",
            "```\n#{test.failures.first['message_lines'].join("\n")}\n```",
            "First happened in #{test.ci_job_url}."
          ].join("\n\n")
        end

        def new_issue_labels(test)
          up_to_date_labels(test: test, new_labels: NEW_ISSUE_LABELS)
        end

        def up_to_date_labels(test:, issue: nil, new_labels: Set.new)
          super << pipeline_name_label
        end

        def post_failed_job_note(issue, test)
          gitlab.create_issue_note(iid: issue.iid, note: "/relate #{test.testcase}")
        end

        def new_issue_title(test)
          "Failure in #{super}"
        end
      end
    end
  end
end