# frozen_string_literal: true
# Released under the MIT License.
# Copyright, 2022-2024, by Samuel Williams.
require "rugged"
require "yaml"
module Bake
module Modernize
# Support the analysis of authorship and license details.
module License
GIT_BLAME_IGNORE_REVS = ".git-blame-ignore-revs"
# Represents revisions to skip when analyzing authorship.
class SkipList
# Load the skip list from a directory.
def self.for(root)
full_path = File.join(root, GIT_BLAME_IGNORE_REVS)
if File.exist?(full_path)
skip_list = self.new
skip_list.extract(full_path)
return skip_list
end
end
# Create a new skip list with the given revisions.
#
# @parameter revisions [Array(String)] The revisions to skip.
def initialize(revisions = [])
@revisions = Set.new(revisions)
end
# Extract the revisions from the given path.
def extract(path)
File.open(path, "r") do |file|
file.each_line do |line|
# Skip empty lines and comments
next if line =~ /^\s*(#|$)/
# Parse line
@revisions << line.strip
end
end
end
# Check if the given commit should be ignored.
def ignore?(commit)
@revisions.include?(commit.oid)
end
end
# Represents a mailmap file which maps commit emails to proper names.
class Mailmap
# Load the mailmap from a directory.
def self.for(root)
full_path = File.join(root, ".mailmap")
if File.exist?(full_path)
mailmap = self.new
mailmap.extract(full_path)
return mailmap
end
end
# Create a new, empty, mailmap.
def initialize
@names = {}
end
# @attribute [Hash(String, String)] The mapping of commit emails to proper names.
attr :names
# Extract the mailmap from the given path.
def extract(path)
File.open(path, "r") do |file|
file.each_line do |line|
# Skip comments
next if line =~ /^#/
# Skip empty lines
next if line =~ /^\s*$/
# Parse line
user = extract_from_line(line)
if commit_email = user[:commit_email] and proper_name = user[:proper_name]
@names[commit_email] = proper_name
end
end
end
end
# Format: Proper Name <proper@email.xx> Commit Name <commit@email.xx>
PATTERN = /
(?<proper_name>[^<]+)?
(\s+<(?<proper_email>[^>]+)>)?
(\s+(?<commit_name>[^<]+)?)?
\s+<(?<commit_email>[^>]+)>
/x
# Extract the mailmap format from a line of input.
def extract_from_line(line)
line.match(PATTERN)
end
end
# Extract contributors from a YAML file which can be generated from another repository.
class Contributors
# The default path is the root of the repository and for authors who have contributed to the entire repository or unspecified paths in the past.
DEFAULT_PATH = "."
# Load contributors from a directory.
def self.for(root)
full_path = File.join(root, ".contributors.yaml")
if File.exist?(full_path)
contributors = self.new
contributors.extract(full_path)
return contributors
end
end
# Create a new, empty, contributors list.
def initialize
@contributions = []
end
# Iterate over each contribution.
def each(&block)
@contributions.each do |contribution|
author = contribution[:author]
time = contribution[:time]
paths_for(contribution) do |path|
yield path, author, time
end
end
end
# Extract the contributors from the given path.
def extract(path)
@contributions.concat(
YAML.load_file(path, aliases: true, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
)
end
# @attribute [Array(Hash)] The list of paths from a given contribution.
def paths_for(contribution)
return to_enum(:paths_for, contribution) unless block_given?
if path = contribution[:path]
yield path
# elsif paths = contribution[:paths]
# paths.each do |path|
# yield path
# end
else
yield DEFAULT_PATH
end
end
end
# Represents the authorship of a repository.
class Authorship
# Represents a modification to a file.
Modification = Struct.new(:author, :time, :path, :id) do
def full_name
author[:name]
end
def key
self.id || "#{self.author[:email]}:#{self.time.iso8601}"
end
def to_h
{
id: id,
time: time,
path: path,
author: author,
}
end
end
# Represents the copyright for an author.
Copyright = Struct.new(:dates, :author) do
def <=> other
self.to_a <=> other.to_a
end
def statement
years = self.dates.map(&:year).uniq
return "Copyright, #{years.join('-')}, by #{author}."
end
end
# Create a new, empty, authorship.
def initialize
@paths = Hash.new{|h,k| h[k] = []}
@commits = Hash.new{|h,k| h[k] = []}
end
# @attribute [Hash(String, Array(Modification))] The mapping of paths to modifications.
attr :paths
# @attribute [Hash(String, Array(Modification))] The mapping of commits to modifications.
attr :commits
# Add a modification to the authorship.
def add(path, author, time, id = nil)
modification = Modification.new(author, time, path, id)
@commits[modification.key] << modification
@paths[path] << modification
end
# Extract the authorship from the given root directory.
def extract(root = Dir.pwd)
mailmap = Mailmap.for(root)
skip_list = SkipList.for(root)
if contributors = Contributors.for(root)
contributors.each do |path, author, time|
add(path, author, time)
end
end
walk(Rugged::Repository.discover(root), mailmap: mailmap, skip_list: skip_list)
return self
end
# Authors, sorted by contribution date.
def sorted_authors
authors = Hash.new{|h,k| h[k] = 0}
@commits.each do |key, modifications|
modifications.map(&:full_name).uniq.each do |full_name|
authors[full_name] += 1
end
end
return authors.sort_by{|k,v| [-v, k]}.map(&:first)
end
# All copyrights.
def copyrights
copyrights_for_modifications(@paths.values.flatten)
end
# All copyrights for a given path.
def copyrights_for_path(path)
copyrights_for_modifications(@paths[path])
end
# All copyrights for a given modification.
def copyrights_for_modifications(modifications)
authors = modifications.group_by{|modification| modification.full_name}
authors.map do |name, modifications|
Copyright.new(modifications.map(&:time).minmax, name)
end.sort
end
private
DEFAULT_SORT = Rugged::SORT_DATE | Rugged::SORT_TOPO | Rugged::SORT_REVERSE
def walk(repository, mailmap: nil, skip_list: nil, show: "HEAD")
Rugged::Walker.walk(repository, show: show, sort: DEFAULT_SORT) do |commit|
next if skip_list&.ignore?(commit)
diff = commit.diff
# We relax the threshold for copy and rename detection because we want to detect files that have been moved and modified more generously.
diff.find_similar!(
rename_threshold: 25,
copy_threshold: 25,
ignore_whitespace: true,
)
diff.each_delta do |delta|
old_path = delta.old_file[:path]
new_path = delta.new_file[:path]
@paths[new_path] ||= []
if old_path != new_path
# The file was moved, move copyright information too:
Console.logger.debug(self, "Moving #{old_path} to #{new_path}", similarity: delta.similarity)
@paths[new_path].concat(@paths[old_path])
end
author = commit.author
if mailmap
if name = mailmap.names[author[:email]]
author[:name] = name
end
end
add(new_path, author, commit.time, commit.oid)
end
end
end
end
end
end
end