lib/jekyll/algolia/shrinker.rb



# frozen_string_literal: true

require 'json'
module Jekyll
  module Algolia
    # Module to shrink a record so it fits in the plan quotas
    module Shrinker
      include Jekyll::Algolia

      # Public: Get the byte size of the object once converted to JSON
      # - record: The record to estimate
      def self.size(record)
        record.to_json.bytesize
      end

      # Public: Attempt to reduce the size of the record by reducing the size of
      # the less needed attributes
      #
      # - raw_record: The record to attempt to reduce
      # - max_size: The max size to achieve in bytes
      #
      # The excerpts are the attributes most subject to being reduced. We'll go
      # as far as removing them if there is no other choice.
      def self.fit_to_size(raw_record, max_size)
        return raw_record if size(raw_record) <= max_size

        # No excerpt, we can't shrink it
        if !raw_record.key?(:excerpt_html) || !raw_record.key?(:excerpt_text)
          return stop_with_error(raw_record)
        end

        record = raw_record.clone

        # We replace the HTML excerpt with the textual one
        record[:excerpt_html] = record[:excerpt_text]
        return record if size(record) <= max_size

        # We half the excerpts
        excerpt_words = record[:excerpt_text].split(/\s+/)
        shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ')
        record[:excerpt_text] = shortened_excerpt
        record[:excerpt_html] = shortened_excerpt
        return record if size(record) <= max_size

        # We remove the excerpts completely
        record.delete(:excerpt_text)
        record.delete(:excerpt_html)
        return record if size(record) <= max_size

        # Still too big, we fail
        stop_with_error(record)
      end

      # Public: Stop the current indexing process and display details about the
      # record that is too big to be pushed
      #
      # - record: The record causing the error
      #
      # This will display an error message and log the wrong record in a file in
      # the source directory
      def self.stop_with_error(record)
        record_size = size(record)
        record_size_readable = Filesize.from("#{record_size}B").to_s('Kb')
        max_record_size = Configurator.algolia('max_record_size')
        max_record_size_readable = Filesize
                                   .from("#{max_record_size}B").to_s('Kb')

        probable_wrong_keys = readable_largest_record_keys(record)

        # Writing the full record to disk for inspection
        record_log_path = Logger.write_to_file(
          'jekyll-algolia-record-too-big.log',
          JSON.pretty_generate(record)
        )

        details = {
          'object_title' => record[:title],
          'object_url' => record[:url],
          'probable_wrong_keys' => probable_wrong_keys,
          'record_log_path' => record_log_path,
          'nodes_to_index' => Configurator.algolia('nodes_to_index'),
          'record_size' => record_size_readable,
          'max_record_size' => max_record_size_readable
        }

        Logger.known_message('record_too_big', details)

        stop_process
      end

      # Public: Returns a string explaining which attributes are the largest in
      # the record
      #
      # record - The record hash to analyze
      def self.readable_largest_record_keys(record)
        keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
        largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
        output = []
        largest_keys.each do |key, size|
          size = Filesize.from("#{size} B").to_s('Kb')
          output << "#{key} (#{size})"
        end
        output.join(', ')
      end

      # Public: Stop the current process
      def self.stop_process
        exit 1
      end
    end
  end
end