# frozen_string_literal: truerequire'digest/sha1'require'etc'require'find'require'zlib'require_relative'cache_config'moduleRuboCop# Provides functionality for caching RuboCop runs.# @api privateclassResultCacheNON_CHANGING=%i[color format formatters out debug fail_level
fix_layout autocorrect safe_autocorrect autocorrect_all
cache fail_fast stdin parallel].freezeDL_EXTENSIONS=::RbConfig::CONFIG.values_at('DLEXT','DLEXT2').reject{|ext|!ext||ext.empty?}.map{|ext|".#{ext}"}.freeze# Remove old files so that the cache doesn't grow too big. When the# threshold MaxFilesInCache has been exceeded, the oldest 50% of all the# files in the cache are removed. The reason for removing so much is that# cleaning should be done relatively seldom, since there is a slight risk# that some other RuboCop process was just about to read the file, when# there's parallel execution and the cache is shared.defself.cleanup(config_store,verbose,cache_root=nil)returnifinhibit_cleanup# OPTIMIZE: For faster testingcache_root||=cache_root(config_store)returnunlessFile.exist?(cache_root)files,dirs=Find.find(cache_root).partition{|path|File.file?(path)}returnunlessrequires_file_removal?(files.length,config_store)remove_oldest_files(files,dirs,cache_root,verbose)endclass<<self# @api privateattr_accessor:rubocop_required_featuresResultCache.rubocop_required_features=[]privatedefrequires_file_removal?(file_count,config_store)file_count>1&&file_count>config_store.for_pwd.for_all_cops['MaxFilesInCache']enddefremove_oldest_files(files,dirs,cache_root,verbose)# Add 1 to half the number of files, so that we remove the file if# there's only 1 left.remove_count=1+(files.length/2)puts"Removing the #{remove_count} oldest files from #{cache_root}"ifverbosesorted=files.sort_by{|path|File.mtime(path)}remove_files(sorted,dirs,remove_count)rescueErrno::ENOENT# This can happen if parallel RuboCop invocations try to remove the# same files. No problem.puts$ERROR_INFOifverboseenddefremove_files(files,dirs,remove_count)# Batch file deletions, deleting over 130,000+ files will crash# File.delete.files[0,remove_count].each_slice(10_000).eachdo|files_slice|File.delete(*files_slice)enddirs.each{|dir|Dir.rmdir(dir)ifDir["#{dir}/*"].empty?}endenddefself.cache_root(config_store)CacheConfig.root_dirdoconfig_store.for_pwd.for_all_cops['CacheRootDirectory']endenddefself.allow_symlinks_in_cache_location?(config_store)config_store.for_pwd.for_all_cops['AllowSymlinksInCacheRootDirectory']endattr:pathdefinitialize(file,team,options,config_store,cache_root=nil)cache_root||=options[:cache_root]cache_root||=ResultCache.cache_root(config_store)@allow_symlinks_in_cache_location=ResultCache.allow_symlinks_in_cache_location?(config_store)@path=File.join(cache_root,rubocop_checksum,context_checksum(team,options),file_checksum(file,config_store))@cached_data=CachedData.new(file)@debug=options[:debug]enddefdebug?@debugenddefvalid?File.exist?(@path)enddefloadputs"Loading cache from #{@path}"ifdebug?@cached_data.from_json(File.read(@path,encoding: Encoding::UTF_8))enddefsave(offenses)dir=File.dirname(@path)beginFileUtils.mkdir_p(dir)rescueErrno::EACCES,Errno::EROFS=>ewarn"Couldn't create cache directory. Continuing without cache.\n#{e.message}"returnendpreliminary_path="#{@path}_#{rand(1_000_000_000)}"# RuboCop must be in control of where its cached data is stored. A# symbolic link anywhere in the cache directory tree can be an# indication that a symlink attack is being waged.returnifsymlink_protection_triggered?(dir)File.open(preliminary_path,'w',encoding: Encoding::UTF_8)do|f|f.write(@cached_data.to_json(offenses))end# The preliminary path is used so that if there are multiple RuboCop# processes trying to save data for the same inspected file# simultaneously, the only problem we run in to is a competition who gets# to write to the final file. The contents are the same, so no corruption# of data should occur.FileUtils.mv(preliminary_path,@path)endprivatedefsymlink_protection_triggered?(path)!@allow_symlinks_in_cache_location&&any_symlink?(path)enddefany_symlink?(path)whilepath!=File.dirname(path)ifFile.symlink?(path)warn"Warning: #{path} is a symlink, which is not allowed."returntrueendpath=File.dirname(path)endfalseenddeffile_checksum(file,config_store)digester=Digest::SHA1.newmode=File.stat(file).modedigester.update("#{file}#{mode}#{config_store.for_file(file).signature}")digester.file(file)digester.hexdigestrescueErrno::ENOENT# Spurious files that come and go should not cause a crash, at least not# here.'_'endclass<<selfattr_accessor:source_checksum,:inhibit_cleanupend# The checksum of the RuboCop program running the inspection.defrubocop_checksumResultCache.source_checksum||=begindigest=Digest::SHA1.newrubocop_extra_features.select{|path|File.file?(path)}.sort!.eachdo|path|digest<<digest(path)enddigest<<RuboCop::Version::STRING<<RuboCop::AST::Version::STRINGdigest.hexdigestendenddefdigest(path)content=ifpath.end_with?(*DL_EXTENSIONS)# Shared libraries often contain timestamps of when# they were compiled and other non-stable data.File.basename(path)elseFile.binread(path)# mtime not reliableendZlib.crc32(content).to_senddefrubocop_extra_featureslib_root=File.join(File.dirname(__FILE__),'..')exe_root=File.join(lib_root,'..','exe')# These are all the files we have `require`d plus everything in the# exe directory. A change to any of them could affect the cop output# so we include them in the cache hash.source_files=$LOADED_FEATURES+Find.find(exe_root).to_asource_files-=ResultCache.rubocop_required_features# Rely on gem versionssource_filesend# Return a hash of the options given at invocation, minus the ones that have# no effect on which offenses and disabled line ranges are found, and thus# don't affect caching.defrelevant_options_digest(options)options=options.reject{|key,_|NON_CHANGING.include?(key)}options.to_s.gsub(/[^a-z]+/i,'_')end# The external dependency checksums are cached per RuboCop team so that# the checksums don't need to be recomputed for each file.defteam_checksum(team)@checksum_by_team||={}.compare_by_identity@checksum_by_team[team]||=team.external_dependency_checksumend# We combine team and options into a single "context" checksum to avoid# making file names that are too long for some filesystems to handle.# This context is for anything that's not (1) the RuboCop executable# checksum or (2) the inspected file checksum.defcontext_checksum(team,options)Digest::SHA1.hexdigest([team_checksum(team),relevant_options_digest(options)].join)endendend