class HTMLProofer::Cache
def add_external(url, filenames, status_code, msg)
def add_external(url, filenames, status_code, msg) return unless enabled? found = status_code.between?(200, 299) clean_url = cleaned_url(url) @cache_log[:external][clean_url] = { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames } end
def add_internal(url, metadata, found)
def add_internal(url, metadata, found) return unless enabled? @cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil? @cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found) end
def cleaned_url(url)
def cleaned_url(url) d_url = escape_unescape(url) cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1 d_url[0..-2]
def construct_internal_link_metadata(metadata, found)
def construct_internal_link_metadata(metadata, found) ce: metadata[:source], name: metadata[:filename], : metadata[:line], _url: metadata[:base_url], d: found,
def detect_url_changes(urls_detected, type)
def detect_url_changes(urls_detected, type) additions = determine_additions(urls_detected, type) determine_deletions(urls_detected, type) additions end
def determine_additions(urls_detected, type)
def determine_additions(urls_detected, type) ons = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected) nk_count = additions.length nk_text = pluralize(new_link_count, "new #{type} link", "new #{type} links") r.log(:debug, "Adding #{new_link_text} to the cache") ons
def determine_deletions(urls_detected, type)
def determine_deletions(urls_detected, type) ons = 0 _log[type].delete_if do |url, _| rls_detected.include?(url) lse f url_matches_type?(url, type) ogger.log(:debug, "Removing #{url} from #{type} cache") letions += 1 ue nk_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links") r.log(:debug, "Removing #{del_link_text} from the cache")
def determine_external_additions(urls_detected)
def determine_external_additions(urls_detected) etected.reject do |url, _metadata| cache_log[:external].include?(url) ache_log[:external][url][:found] # if this is false, we're trying again ogger.log(:debug, "Adding #{url} to external cache") lse
def determine_internal_additions(urls_detected)
def determine_internal_additions(urls_detected) etected.each_with_object({}) do |(url, metadata), hsh| l is not even in cache cache_log[:internal][url].nil? h[url] = metadata xt e_metadata = @cache_log[:internal][url][:metadata] ming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr| isting_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] } cache for this url, from an existing path, exists as found !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found] metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true next d ogger.log(:debug, "Adding #{incoming_url} to internal cache") r << incoming_url url] = incoming_metadata
def empty?
def empty? blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?) end
def escape_unescape(url)
def escape_unescape(url) sable::URI.parse(url).normalize.to_s
def initialize(runner, options)
def initialize(runner, options) @runner = runner @logger = @runner.logger @cache_datetime = Time.now @cache_time = @cache_datetime.to_time if blank?(options) define_singleton_method(:enabled?) { false } else define_singleton_method(:enabled?) { true } setup_cache!(options) @external_timeframe = parsed_timeframe(options[:timeframe][:external]) @internal_timeframe = parsed_timeframe(options[:timeframe][:internal]) end end
def parsed_timeframe(timeframe)
def parsed_timeframe(timeframe) return nil if timeframe.nil? time, date = timeframe.match(/(\d+)(\D)/).captures time = time.to_i case date when "M" time_ago(time, :months) when "w" time_ago(time, :weeks) when "d" time_ago(time, :days) when "h" time_ago(time, :hours) else raise ArgumentError, "#{date} is not a valid timeframe!" end end
def retrieve_urls(urls_detected, type)
def retrieve_urls(urls_detected, type) # if there are no urls, bail return {} if urls_detected.empty? urls_detected = urls_detected.transform_keys do |url| cleaned_url(url) end urls_to_check = detect_url_changes(urls_detected, type) @cache_log[type].each_pair do |url, cache| within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time]) next if within_timeframe urls_to_check[url] = cache[:metadata] # recheck expired links end urls_to_check end
def setup_cache!(options)
def setup_cache!(options) t_structure = { ion: CACHE_VERSION, rnal: {}, rnal: {}, ge_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR ils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir) file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME _file = File.join(storage_dir, cache_file_name) (@cache_log = default_structure) unless File.exist?(@cache_file) ts = File.read(@cache_file) (@cache_log = default_structure) if blank?(contents) JSON.parse(contents, symbolize_names: true) che = (cache_version = log[:version]).nil? _log = if old_cache # previous cache version, create a new one ult_structure cache_version != CACHE_VERSION ache version is newer...do something :internal] = log[:internal].transform_keys(&:to_s) :external] = log[:external].transform_keys(&:to_s)
def size(type)
def size(type) @cache_log[type].size end
def time_ago(measurement, unit)
def time_ago(measurement, unit) nit months he_datetime - (SECONDS_PER_MONTH * measurement) weeks he_datetime - (SECONDS_PER_WEEK * measurement) days he_datetime - (SECONDS_PER_DAY * measurement) hours he_datetime - Rational(SECONDS_PER_HOUR * measurement) _time
def url_matches_type?(url, type)
def url_matches_type?(url, type) true if type == :internal && url !~ URI_REGEXP true if type == :external && url =~ URI_REGEXP
def within_external_timeframe?(time)
def within_external_timeframe?(time) within_timeframe?(time, @external_timeframe) end
def within_internal_timeframe?(time)
def within_internal_timeframe?(time) within_timeframe?(time, @internal_timeframe) end
def within_timeframe?(current_time, parsed_timeframe)
def within_timeframe?(current_time, parsed_timeframe) false if current_time.nil? || parsed_timeframe.nil? t_time = Time.parse(current_time) if current_time.is_a?(String) d_timeframe..@cache_time).cover?(current_time)
def write
def write return unless enabled? File.write(@cache_file, @cache_log.to_json) end