# frozen_string_literal: true
require 'nokogiri'
require 'active_support/core_ext/enumerable'
require 'rubygems/text'
require 'active_support/core_ext/integer/time'
module Gitlab
module QA
module Report
# Uses the API to create or update GitLab issues with the results of tests from RSpec report files.
class RelateFailureIssue < ReportAsIssue
include FindSetDri
DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION = 0.15
SPAM_THRESHOLD_FOR_FAILURE_ISSUES = 3
FAILURE_STACKTRACE_REGEX = %r{((.*Failure\/Error:(?<stacktrace>.+))|(?<stacktrace>.+))}m.freeze
ISSUE_STACKTRACE_REGEX = /### Stack trace\s*(```)#{FAILURE_STACKTRACE_REGEX}(```)/m.freeze
FAILED_JOB_DESCRIPTION_REGEX = %r{First happened in https?:\/\/\S+\.}m.freeze
FAILED_JOB_NOTE_REGEX = %r{Failed most recently in \D+ pipeline: https?:\/\/\S+}.freeze
NEW_ISSUE_LABELS = Set.new(%w[QA Quality test failure::new priority::2]).freeze
IGNORE_EXCEPTIONS = ['Net::ReadTimeout', '403 Forbidden - Your account has been blocked'].freeze
MultipleIssuesFound = Class.new(StandardError)
def initialize(system_logs: [], max_diff_ratio: DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION, **kwargs)
super
@system_logs = Dir.glob(system_logs)
@max_diff_ratio = max_diff_ratio.to_f
@issue_type = 'issue'
@commented_issue_list = Set.new
end
private
attr_reader :max_diff_ratio
def run!
puts "Reporting test failures in `#{files.join(',')}` as issues in project `#{project}` via the API at `#{Runtime::Env.gitlab_api_base}`."
test_results_per_file do |test_results|
puts "=> Reporting tests in #{test_results.path}"
test_results.each do |test|
relate_failure_to_issue(test) if should_report?(test)
end
test_results.write
end
end
def relate_failure_to_issue(test)
puts " => Searching issues for test '#{test.name}'..."
begin
issue, issue_already_commented = find_and_link_issue(test)
return create_issue(test) unless issue || test.quarantine?
update_labels(issue, test) unless issue_already_commented
rescue MultipleIssuesFound => e
warn(e.message)
end
end
def find_and_link_issue(test)
issue, diff_ratio = find_failure_issue(test)
return [false, true] unless issue
issue_already_commented = issue_already_commented?(issue)
if issue_already_commented
puts " => Failure already commented on issue."
else
puts " => Found issue #{issue.web_url} for test '#{test.name}' with a diff ratio of #{(diff_ratio * 100).round(2)}%."
post_or_update_failed_job_note(issue, test)
@commented_issue_list.add(issue.web_url)
end
[issue, issue_already_commented]
end
def create_issue(test)
similar_issues = pipeline_issues_with_similar_stacktrace(test)
if similar_issues.size >= SPAM_THRESHOLD_FOR_FAILURE_ISSUES
puts " => Similar failure issues have already been opened for same pipeline environment"
puts " => Will not create new issue for this failing spec"
similar_issues.each do |similar_issue|
puts "Please check issue: #{similar_issue.web_url}"
gitlab.create_issue_note(iid: similar_issue.iid, note: "This failed job is most likely related: #{test.ci_job_url}")
end
return
end
issue = super
puts "for test '#{test.name}'."
post_or_update_failed_job_note(issue, test)
assign_dri(issue, test)
issue
end
def pipeline_issues_with_similar_stacktrace(test)
gitlab.find_issues(options: { state: 'opened', labels: 'QA,failure::new', created_after: past_timestamp(2) }).select do |issue|
job_url_from_issue = failed_issue_job_url(issue)
next unless pipeline == pipeline_env_from_job_url(job_url_from_issue)
stack_trace_from_issue = cleaned_stack_trace_from_issue(issue)
stack_trace_from_test = cleaned_stacktrace_from_test(test)
diff_ratio = compare_stack_traces(stack_trace_from_test, stack_trace_from_issue)
diff_ratio < max_diff_ratio
end
end
def failed_issue_job_url(issue)
existing_note = existing_failure_note(issue)
if existing_note
job_url_string = existing_note.body
matched = job_url_string.match(FAILED_JOB_NOTE_REGEX)
else
job_url_string = issue.description
matched = job_url_string.match(FAILED_JOB_DESCRIPTION_REGEX)
end
return unless matched
job_url = matched[0].chop.split(" ").last
puts "=> Found failed job url in the issue: #{job_url}"
job_url
end
def pipeline_env_from_job_url(job_url)
return if job_url.nil?
if job_url.include?('/quality/')
job_url.partition('/quality/').last.partition('/').first
else
'master'
end
end
def past_timestamp(hours_ago)
timestamp = Time.now - (hours_ago * 60 * 60)
timestamp.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
end
def failure_issues(test)
gitlab.find_issues(options: { state: 'opened', labels: 'QA' }).select do |issue|
issue_title = issue.title.strip
issue_title.include?(test.name) || issue_title.include?(partial_file_path(test.file))
end
end
def full_stacktrace(test)
if test.failures.first['message_lines'].empty? || test.failures.first['message_lines'].instance_of?(String)
test.failures.first['message']
else
test.failures.first['message_lines'].join("\n")
end
end
def cleaned_stack_trace_from_issue(issue)
relevant_issue_stacktrace = find_issue_stacktrace(issue)
return unless relevant_issue_stacktrace
remove_unique_resource_names(relevant_issue_stacktrace)
end
def cleaned_stacktrace_from_test(test)
first_test_failure_stacktrace = sanitize_stacktrace(full_stacktrace(test), FAILURE_STACKTRACE_REGEX) || full_stacktrace(test)
remove_unique_resource_names(first_test_failure_stacktrace)
end
def compare_stack_traces(stack_trace_first, stack_trace_second)
calculate_diff_ratio(stack_trace_first, stack_trace_second)
end
def calculate_diff_ratio(stack_trace_first, stack_trace_second)
ld = Class.new.extend(Gem::Text).method(:levenshtein_distance)
distance = ld.call(stack_trace_first, stack_trace_second)
distance.zero? ? 0.0 : (distance.to_f / stack_trace_first.size).round(3)
end
def find_relevant_failure_issues(test) # rubocop:disable Metrics/AbcSize
clean_first_test_failure_stacktrace = cleaned_stacktrace_from_test(test)
# Search with the `search` param returns 500 errors, so we filter by ~QA and then filter further in Ruby
failure_issues(test).each_with_object({}) do |issue, memo|
clean_relevant_issue_stacktrace = cleaned_stack_trace_from_issue(issue)
next if clean_relevant_issue_stacktrace.nil?
diff_ratio = compare_stack_traces(clean_first_test_failure_stacktrace, clean_relevant_issue_stacktrace)
if diff_ratio <= max_diff_ratio
puts " => [DEBUG] Issue #{issue.web_url} has an acceptable diff ratio of #{(diff_ratio * 100).round(2)}%."
# The `Gitlab::ObjectifiedHash` class overrides `#hash` which is used by `Hash#[]=` to compute the hash key.
# This leads to a `TypeError Exception: no implicit conversion of Hash into Integer` error, so we convert the object to a hash before using it as a Hash key.
# See:
# - https://gitlab.com/gitlab-org/gitlab-qa/-/merge_requests/587#note_453336995
# - https://github.com/NARKOZ/gitlab/commit/cbdbd1e32623f018a8fae39932a8e3bc4d929abb?_pjax=%23js-repo-pjax-container#r44484494
memo[issue.to_h] = diff_ratio
else
puts " => [DEBUG] Found issue #{issue.web_url} but stacktraces are too different (#{(diff_ratio * 100).round(2)}%).\n"
puts " => [DEBUG] Issue stacktrace:\n----------------\n#{clean_relevant_issue_stacktrace}\n----------------\n"
puts " => [DEBUG] Failure stacktrace:\n----------------\n#{clean_first_test_failure_stacktrace}\n----------------\n"
end
end
end
def find_issue_stacktrace(issue)
issue_stacktrace = sanitize_stacktrace(issue.description, ISSUE_STACKTRACE_REGEX)
return issue_stacktrace if issue_stacktrace
puts " => [DEBUG] Stacktrace couldn't be found for #{issue.web_url}!"
end
def sanitize_stacktrace(stacktrace, regex)
stacktrace_match = stacktrace.match(regex)
if stacktrace_match
stacktrace_match[:stacktrace].split('First happened in')[0].gsub(/^\s*#.*$/, '').gsub(/^[[:space:]]+/, '').strip
else
puts " => [DEBUG] Stacktrace doesn't match the expected regex (#{regex}):\n----------------\n#{stacktrace}\n----------------\n"
end
end
def remove_unique_resource_names(stacktrace)
stacktrace.gsub(/qa-(test|user)-[a-z0-9-]+/, '<unique-test-resource>').gsub(/(?:-|_)(?:\d+[a-z]|[a-z]+\d)[a-z\d]{4,}/, '<unique-hash>')
end
def find_failure_issue(test)
relevant_issues = find_relevant_failure_issues(test)
return nil if relevant_issues.empty?
best_matching_issue, smaller_diff_ratio = relevant_issues.min_by { |_, diff_ratio| diff_ratio }
unless relevant_issues.values.count(smaller_diff_ratio) == 1 # rubocop:disable Style/IfUnlessModifier
raise(MultipleIssuesFound, %(Too many issues found for test '#{test.name}' (`#{test.file}`)!))
end
# Re-instantiate a `Gitlab::ObjectifiedHash` object after having converted it to a hash in #find_relevant_failure_issues above.
best_matching_issue = Gitlab::ObjectifiedHash.new(best_matching_issue)
test.failure_issue ||= best_matching_issue.web_url
[best_matching_issue, smaller_diff_ratio]
end
def new_issue_description(test)
super + [
"\n\n### Stack trace",
"```\n#{full_stacktrace(test)}\n```",
"First happened in #{test.ci_job_url}.",
"Related test case: #{test.testcase}.",
screenshot_section(test),
system_log_errors_section(test)
].join("\n\n")
end
def system_log_errors_section(test)
correlation_id = test.failures.first['correlation_id']
section = ''
if @system_logs.any? && !correlation_id.nil?
section = SystemLogs::SystemLogsFormatter.new(
@system_logs,
correlation_id
).system_logs_summary_markdown
end
puts " => No system logs or correlation id provided, skipping this section in issue description" if section.empty?
section
end
def new_issue_labels(test)
up_to_date_labels(test: test, new_labels: NEW_ISSUE_LABELS)
end
def up_to_date_labels(test:, issue: nil, new_labels: Set.new)
super << pipeline_name_label
end
def post_or_update_failed_job_note(issue, test)
current_note = "Failed most recently in #{pipeline} pipeline: #{test.ci_job_url}"
existing_note = existing_failure_note(issue)
return if existing_note && current_note == existing_note.body
if existing_note
gitlab.edit_issue_note(issue_iid: issue.iid, note_id: existing_note.id, note: current_note)
else
gitlab.create_issue_note(iid: issue.iid, note: current_note)
end
puts " => Linked #{test.ci_job_url} to #{issue.web_url}."
end
def new_issue_title(test)
"Failure in #{super}"
end
def existing_failure_note(issue)
gitlab.find_issue_notes(iid: issue.iid)&.find do |note|
note.body.include?('Failed most recently in')
end
end
def screenshot_section(test)
section = ''
failure = full_stacktrace(test)
if test.screenshot? && !['500 Internal Server Error', 'fabricate_via_api!', 'Error Code 500'].any? { |e| failure.include?(e) }
relative_url = gitlab.upload_file(file_fullpath: test.failure_screenshot)
section = "### Screenshot: #{relative_url.markdown}" if relative_url
end
section
end
def assign_dri(issue, test)
if test.product_group?
dri = set_dri_via_group(test.product_group, test)
dri_id = gitlab.find_user_id(username: dri)
gitlab.edit_issue(iid: issue.iid, options: { assignee_id: dri_id, due_date: Date.today + 1.month })
puts " => Assigning #{dri} as DRI for the issue."
else
puts " => No product group metadata found for test '#{test.name}'"
end
end
# Checks if a test failure should be reported.
#
# @return [Boolean] false if the test was skipped or failed because of a transient error that can be ignored.
# Otherwise returns true.
def should_report?(test)
return false if test.failures.empty?
if test.report.key?('exceptions')
reason = ignore_failure_reason(test.report['exceptions'])
if reason
puts "Failure reporting skipped because #{reason}"
return false
end
end
true
end
# Determine any reason to ignore a failure.
#
# @param [Array<Hash>] exceptions the exceptions associated with the failure.
# @return [String] the reason to ignore the exceptions, or `nil` if any exceptions should not be ignored.
def ignore_failure_reason(exceptions)
exception_messages = exceptions
.filter_map { |exception| exception['message'] if IGNORE_EXCEPTIONS.any? { |e| exception['message'].include?(e) } }
.compact
return if exception_messages.empty? || exception_messages.size < exceptions.size
msg = exception_messages.many? ? 'the errors were' : 'the error was'
"#{msg} #{exception_messages.join(', ')}"
end
def issue_already_commented?(issue)
@commented_issue_list.include?(issue.web_url)
end
end
end
end
end