class RuboCop::ResultCache

@api private
Provides functionality for caching RuboCop runs.

def self.allow_symlinks_in_cache_location?(config_store)

def self.allow_symlinks_in_cache_location?(config_store)
  config_store.for_pwd.for_all_cops['AllowSymlinksInCacheRootDirectory']
end

def self.cache_root(config_store)

def self.cache_root(config_store)
  CacheConfig.root_dir do
    config_store.for_pwd.for_all_cops['CacheRootDirectory']
  end
end

def self.cleanup(config_store, verbose, cache_root = nil)

there's parallel execution and the cache is shared.
that some other RuboCop process was just about to read the file, when
cleaning should be done relatively seldom, since there is a slight risk
files in the cache are removed. The reason for removing so much is that
threshold MaxFilesInCache has been exceeded, the oldest 50% of all the
Remove old files so that the cache doesn't grow too big. When the
def self.cleanup(config_store, verbose, cache_root = nil)
  return if inhibit_cleanup # OPTIMIZE: For faster testing
  cache_root ||= cache_root(config_store)
  return unless File.exist?(cache_root)
  files, dirs = Find.find(cache_root).partition { |path| File.file?(path) }
  return unless requires_file_removal?(files.length, config_store)
  remove_oldest_files(files, dirs, cache_root, verbose)
end

def any_symlink?(path)

def any_symlink?(path)
  while path != File.dirname(path)
    if File.symlink?(path)
      warn "Warning: #{path} is a symlink, which is not allowed."
      return true
    end
    path = File.dirname(path)
  end
  false
end

def context_checksum(team, options)

checksum or (2) the inspected file checksum.
This context is for anything that's not (1) the RuboCop executable
making file names that are too long for some filesystems to handle.
We combine team and options into a single "context" checksum to avoid
def context_checksum(team, options)
  Digest::SHA1.hexdigest([team_checksum(team), relevant_options_digest(options)].join)
end

def debug?

def debug?
  @debug
end

def digest(path)

def digest(path)
  content = if path.end_with?(*DL_EXTENSIONS)
              # Shared libraries often contain timestamps of when
              # they were compiled and other non-stable data.
              File.basename(path)
            else
              File.binread(path) # mtime not reliable
            end
  Zlib.crc32(content).to_s
end

def file_checksum(file, config_store)

def file_checksum(file, config_store)
  digester = Digest::SHA1.new
  mode = File.stat(file).mode
  digester.update("#{file}#{mode}#{config_store.for_file(file).signature}")
  digester.file(file)
  digester.hexdigest
rescue Errno::ENOENT
  # Spurious files that come and go should not cause a crash, at least not
  # here.
  '_'
end

def initialize(file, team, options, config_store, cache_root = nil)

def initialize(file, team, options, config_store, cache_root = nil)
  cache_root ||= File.join(options[:cache_root], 'rubocop_cache') if options[:cache_root]
  cache_root ||= ResultCache.cache_root(config_store)
  @allow_symlinks_in_cache_location =
    ResultCache.allow_symlinks_in_cache_location?(config_store)
  @path = File.join(cache_root,
                    rubocop_checksum,
                    context_checksum(team, options),
                    file_checksum(file, config_store))
  @cached_data = CachedData.new(file)
  @debug = options[:debug]
end

def load

def load
  puts "Loading cache from #{@path}" if debug?
  @cached_data.from_json(File.read(@path, encoding: Encoding::UTF_8))
end

def relevant_options_digest(options)

don't affect caching.
no effect on which offenses and disabled line ranges are found, and thus
Return a hash of the options given at invocation, minus the ones that have
def relevant_options_digest(options)
  options = options.reject { |key, _| NON_CHANGING.include?(key) }
  options.to_s.gsub(/[^a-z]+/i, '_')
end

def remove_files(files, dirs, remove_count)

def remove_files(files, dirs, remove_count)
  # Batch file deletions, deleting over 130,000+ files will crash
  # File.delete.
  files[0, remove_count].each_slice(10_000).each do |files_slice|
    File.delete(*files_slice)
  end
  dirs.each { |dir| Dir.rmdir(dir) if Dir["#{dir}/*"].empty? }
end

def remove_oldest_files(files, dirs, cache_root, verbose)

def remove_oldest_files(files, dirs, cache_root, verbose)
  # Add 1 to half the number of files, so that we remove the file if
  # there's only 1 left.
  remove_count = (files.length / 2) + 1
  puts "Removing the #{remove_count} oldest files from #{cache_root}" if verbose
  sorted = files.sort_by { |path| File.mtime(path) }
  remove_files(sorted, dirs, remove_count)
rescue Errno::ENOENT
  # This can happen if parallel RuboCop invocations try to remove the
  # same files. No problem.
  puts $ERROR_INFO if verbose
end

def requires_file_removal?(file_count, config_store)

def requires_file_removal?(file_count, config_store)
  file_count > 1 && file_count > config_store.for_pwd.for_all_cops['MaxFilesInCache']
end

def rubocop_checksum

The checksum of the RuboCop program running the inspection.
def rubocop_checksum
  ResultCache.source_checksum ||=
    begin
      digest = Digest::SHA1.new
      rubocop_extra_features
        .select { |path| File.file?(path) }
        .sort!
        .each do |path|
          digest << digest(path)
        end
      digest << RuboCop::Version::STRING << RuboCop::AST::Version::STRING
      digest.hexdigest
    end
end

def rubocop_extra_features

def rubocop_extra_features
  lib_root = File.join(File.dirname(__FILE__), '..')
  exe_root = File.join(lib_root, '..', 'exe')
  # These are all the files we have `require`d plus everything in the
  # exe directory. A change to any of them could affect the cop output
  # so we include them in the cache hash.
  source_files = $LOADED_FEATURES + Find.find(exe_root).to_a
  source_files -= ResultCache.rubocop_required_features # Rely on gem versions
  source_files
end

def save(offenses)

def save(offenses)
  dir = File.dirname(@path)
  begin
    FileUtils.mkdir_p(dir)
  rescue Errno::EACCES, Errno::EROFS => e
    warn "Couldn't create cache directory. Continuing without cache.\n  #{e.message}"
    return
  end
  preliminary_path = "#{@path}_#{rand(1_000_000_000)}"
  # RuboCop must be in control of where its cached data is stored. A
  # symbolic link anywhere in the cache directory tree can be an
  # indication that a symlink attack is being waged.
  return if symlink_protection_triggered?(dir)
  File.open(preliminary_path, 'w', encoding: Encoding::UTF_8) do |f|
    f.write(@cached_data.to_json(offenses))
  end
  # The preliminary path is used so that if there are multiple RuboCop
  # processes trying to save data for the same inspected file
  # simultaneously, the only problem we run in to is a competition who gets
  # to write to the final file. The contents are the same, so no corruption
  # of data should occur.
  FileUtils.mv(preliminary_path, @path)
end

def symlink_protection_triggered?(path)

def symlink_protection_triggered?(path)
  !@allow_symlinks_in_cache_location && any_symlink?(path)
end

def team_checksum(team)

the checksums don't need to be recomputed for each file.
The external dependency checksums are cached per RuboCop team so that
def team_checksum(team)
  @checksum_by_team ||= {}.compare_by_identity
  @checksum_by_team[team] ||= team.external_dependency_checksum
end

def valid?

def valid?
  File.exist?(@path)
end