module Jekyll::Algolia::Shrinker
def self.fit_to_size(raw_record, max_size)
The excerpts are the attributes most subject to being reduced. We'll go
- max_size: The max size to achieve in bytes
- raw_record: The record to attempt to reduce
the less needed attributes
Public: Attempt to reduce the size of the record by reducing the size of
def self.fit_to_size(raw_record, max_size) return raw_record if size(raw_record) <= max_size # No excerpt, we can't shrink it return stop_with_error(raw_record) unless raw_record.key?(:excerpt_html) record = raw_record.clone # We replace the HTML excerpt with the textual one record[:excerpt_html] = record[:excerpt_text] return record if size(record) <= max_size # We half the excerpts excerpt_words = record[:excerpt_text].split(/\s+/) shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ') record[:excerpt_text] = shortened_excerpt record[:excerpt_html] = shortened_excerpt return record if size(record) <= max_size # We remove the excerpts completely record.delete(:excerpt_text) record.delete(:excerpt_html) return record if size(record) <= max_size # Still too big, we fail stop_with_error(record) end
def self.readable_largest_record_keys(record)
the record
Public: Returns a string explaining which attributes are the largest in
def self.readable_largest_record_keys(record) keys = Hash[record.map { |key, value| [key, value.to_s.length] }] largest_keys = keys.sort_by { |_, value| value }.reverse[0..2] output = [] largest_keys.each do |key, size| size = Filesize.from("#{size} B").to_s('Kb') output << "#{key} (#{size})" end output.join(', ') end
def self.size(record)
Public: Get the byte size of the object once converted to JSON
def self.size(record) record.to_json.bytesize end
def self.stop_process
def self.stop_process exit 1 end
def self.stop_with_error(record)
This will display an error message and log the wrong record in a file in
- record: The record causing the error
record that is too big to be pushed
Public: Stop the current indexing process and display details about the
def self.stop_with_error(record) record_size = size(record) record_size_readable = Filesize.from("#{record_size}B").to_s('Kb') max_record_size = Configurator.algolia('max_record_size') max_record_size_readable = Filesize .from("#{max_record_size}B").to_s('Kb') probable_wrong_keys = readable_largest_record_keys(record) # Writing the full record to disk for inspection record_log_path = Logger.write_to_file( 'jekyll-algolia-record-too-big.log', JSON.pretty_generate(record) ) details = { 'object_title' => record[:title], 'object_url' => record[:url], 'probable_wrong_keys' => probable_wrong_keys, 'record_log_path' => record_log_path, 'nodes_to_index' => Configurator.algolia('nodes_to_index'), 'record_size' => record_size_readable, 'max_record_size' => max_record_size_readable } Logger.known_message('record_too_big', details) stop_process end