module Jekyll::Algolia::Indexer

def self.execute_operations(operations)

a progress bar if this happens
Note: Will split the batch in several calls if too big, and will display

operations - Operations to batch

Public: Execute a serie of operations in a batch
def self.execute_operations(operations)
  return if Configurator.dry_run?
  return if operations.empty?
  # Run the batches in slices if they are too large
  batch_size = Configurator.algolia('indexing_batch_size')
  slices = operations.each_slice(batch_size).to_a
  should_have_progress_bar = (slices.length > 1)
  if should_have_progress_bar
    progress_bar = ProgressBar.create(
      total: slices.length,
      format: 'Updating index (%j%%) |%B|'
    )
  end
  slices.each do |slice|
    begin
      ::Algolia.batch!(slice)
      progress_bar.increment if should_have_progress_bar
    rescue StandardError => error
      ErrorHandler.stop(error, operations: slice)
    end
  end
end

def self.index

Public: Returns the Algolia index object
def self.index
  @index
end

def self.index_exist?(index)

instead, which will fail if the index does not exist
Note: there is no API endpoint to do that, so we try to get the settings

index - Index to check

Public: Check if an index exists
def self.index_exist?(index)
  index.get_settings
  true
rescue StandardError
  false
end

def self.index_object_ids

Public: Returns the Algolia index used to store object ids
def self.index_object_ids
  @index_object_ids
end

def self.init

Public: Init the module
def self.init
  ::Algolia.init(
    application_id: Configurator.application_id,
    api_key: Configurator.api_key
  )
  index_name = Configurator.index_name
  @index = ::Algolia::Index.new(index_name)
  index_object_ids_name = Configurator.index_object_ids_name
  @index_object_ids = ::Algolia::Index.new(index_object_ids_name)
  set_user_agent
  self
end

def self.local_object_ids(records)

records - Array of all local records

Public: Returns an array of the local objectIDs
def self.local_object_ids(records)
  records.map { |record| record[:objectID] }.compact.sort
end

def self.local_setting_id

key from the hashing.
be stored in the userData key of the resulting config, we exclude that
The settingID is generated as a hash of the current settings. As it will

Public: Get a unique settingID for the current settings
def self.local_setting_id
  settings = Configurator.settings
  settings.delete('userData')
  AlgoliaHTMLExtractor.uuid(settings)
end

def self.record_count(index)

possible. It will still contain the nbHits
only return the objectID and one element, to get the shortest response
Note: We'll do an empty query search, to match everything, but we'll

index - Index to check

Public: Get the number of records in an index
def self.record_count(index)
  index.search(
    '',
    attributesToRetrieve: 'objectID',
    distinct: false,
    hitsPerPage: 1
  )['nbHits']
rescue StandardError
  0
end

def self.remote_object_ids

browsing, but if the index does not exist we read the main index.
Note: We use a dedicated index to store the objectIDs for faster

Public: Returns an array of all the objectIDs in the index
def self.remote_object_ids
  Logger.log('I:Getting list of existing records')
  # Fast version, using the dedicated index
  has_dedicated_index = index_exist?(index_object_ids)
  return remote_object_ids_from_dedicated_index if has_dedicated_index
  # Slow version, browsing the full index
  remote_object_ids_from_main_index
end

def self.remote_object_ids_from_dedicated_index

will fit in one call each time.
Note: This will be very fast. Each record contain 100 object id, so it

index
Public: Get an array of all the object ids, stored in the dedicated
def self.remote_object_ids_from_dedicated_index
  list = []
  begin
    index_object_ids.browse(
      attributesToRetrieve: 'content',
      hitsPerPage: 1000
    ) do |hit|
      list += hit['content']
    end
  rescue StandardError
    return []
  end
  list.sort
end

def self.remote_object_ids_from_main_index

a progress bar.
Note: As this will be slow (grabbing them 1000 at a time), we display

Public: Get an array of all object IDs stored in the main index
def self.remote_object_ids_from_main_index
  Logger.verbose("I:Inspecting existing records in index #{index.name}")
  list = []
  # As it might take some time, we display a progress bar
  progress_bar = ProgressBar.create(
    total: record_count(index),
    format: 'Inspecting existing records (%j%%) |%B|'
  )
  begin
    index.browse(
      attributesToRetrieve: 'objectID',
      hitsPerPage: 1000
    ) do |hit|
      list << hit['objectID']
      progress_bar.increment
    end
  rescue StandardError
    return []
  end
  list.sort
end

def self.remote_settings

In case the index is not accessible, it will return nil

Public: Get the settings of the remote index
def self.remote_settings
  index.get_settings
rescue StandardError
  nil
end

def self.run(records)

records - Records to push

Public: Push all records to Algolia and configure the index
def self.run(records)
  init
  # Indexing zero record is surely a misconfiguration
  if records.length.zero?
    files_to_exclude = Configurator.algolia('files_to_exclude').join(', ')
    Logger.known_message(
      'no_records_found',
      'files_to_exclude' => files_to_exclude,
      'nodes_to_index' => Configurator.algolia('nodes_to_index')
    )
    exit 1
  end
  update_settings
  update_records(records)
  Logger.log('I:✔ Indexing complete')
end

def self.set_settings(settings)

rubocop:disable Naming/AccessorMethodName
Will dispatch to the error handler if it fails

Public: Set new settings to an index
def self.set_settings(settings)
  index.set_settings!(settings)
rescue StandardError => error
  ErrorHandler.stop(error, settings: settings)
end

def self.set_user_agent

are explicit in defining it to help debug from the dashboard.
each integration version is pinned to a specific API client version, we
every API client should follow the "Algolia for YYY" pattern. Even if
Every integrations should follow the "YYY Integration" pattern, and

Public: Set the User-Agent to send to the API
def self.set_user_agent
  user_agent = [
    "Jekyll Integration (#{VERSION})",
    "Algolia for Ruby (#{::Algolia::VERSION})",
    "Jekyll (#{::Jekyll::VERSION})",
    "Ruby (#{RUBY_VERSION})"
  ].join('; ')
  ::Algolia.set_extra_header('User-Agent', user_agent)
end

def self.update_records(records)

Does nothing in dry run mode
update
Note: All operations will be done in one batch, assuring an atomic

records - All records extracted from Jekyll

Public: Update records of the index
def self.update_records(records)
  # Getting list of objectID in remote and locally
  remote_ids = remote_object_ids
  local_ids = local_object_ids(records)
  # Making a diff, to see what to add and what to delete
  ids_to_delete = remote_ids - local_ids
  ids_to_add = local_ids - remote_ids
  # What changes should we do to the indexes?
  has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
  has_dedicated_index = index_exist?(index_object_ids)
  # Stop if nothing to change
  if !has_records_to_update && has_dedicated_index
    Logger.log('I:Content is already up to date.')
    return
  end
  # We group all operations into one batch
  operations = []
  # We update records only if there are records to update
  if has_records_to_update
    Logger.log("I:Updating records in index #{index.name}...")
    Logger.log("I:Records to delete: #{ids_to_delete.length}")
    Logger.log("I:Records to add:    #{ids_to_add.length}")
    # Transforming ids into real records to add
    records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
    records_to_add = ids_to_add.map { |id| records_by_id[id] }
    # Deletion operations come first, to avoid hitting an overquota too
    # soon if it can be avoided
    ids_to_delete.each do |object_id|
      operations << {
        action: 'deleteObject', indexName: index.name,
        body: { objectID: object_id }
      }
    end
    # Then we add the new records
    operations += records_to_add.map do |new_record|
      { action: 'addObject', indexName: index.name, body: new_record }
    end
  end
  # We update the dedicated index everytime we update records, but we also
  # create it if it does not exist
  should_update_dedicated_index = has_records_to_update ||
                                  !has_dedicated_index
  if should_update_dedicated_index
    operations << { action: 'clear', indexName: index_object_ids.name }
    local_ids.each_slice(100).each do |ids|
      operations << {
        action: 'addObject', indexName: index_object_ids.name,
        body: { content: ids }
      }
    end
  end
  execute_operations(operations)
end

def self.update_settings

we push it, overriding the settingID for next push.
If the settingID are not matching, it means our config is different, so

preferred way of doing so.
manually altered though, and warn the user that this is not the
anything. We will still check if the remote config seem to have been
If the settingID is the same, we don't push as this won't change
settings already pushed. It will compare userData.settingID for that.
This will first compare the settings about to be pushed with the

Public: Smart update of the settings of the index
def self.update_settings
  current_remote_settings = remote_settings || {}
  remote_setting_id = current_remote_settings.dig('userData', 'settingID')
  settings = Configurator.settings
  setting_id = local_setting_id
  are_settings_forced = Configurator.force_settings?
  # The config we're about to push is the same we pushed previously. We
  # won't push again.
  if setting_id == remote_setting_id && !are_settings_forced
    Logger.log('I:Settings are already up to date.')
    # Check if remote config has been changed outside of the plugin, so we
    # can warn users that they should not alter their config from outside
    # of the plugin.
    current_remote_settings.delete('userData')
    changed_keys = Utils.diff_keys(settings, current_remote_settings)
    unless changed_keys.nil?
      warn_of_manual_dashboard_editing(changed_keys)
    end
    return
  end
  # Settings have changed, we push them
  settings['userData'] = {
    'settingID' => setting_id,
    'pluginVersion' => VERSION
  }
  Logger.log("I:Updating settings of index #{index.name}")
  return if Configurator.dry_run?
  set_settings(settings)
end

def self.warn_of_manual_dashboard_editing(changed_keys)

them when we detect they changed something.
get overwritten by the pluging. We can't prevent that, but we can warn
When users change some settings in their dashboard, those settings might

their dashboard
Public: Warn users that they have some settings manually configured in
def self.warn_of_manual_dashboard_editing(changed_keys)
  # Transform the hash into readable YAML
  yaml_lines = changed_keys
               .to_yaml(indentation: 2)
               .split("\n")[1..-1]
  yaml_lines.map! do |line|
    line = line.gsub(/^ */) { |spaces| ' ' * spaces.length }
    line = line.gsub('- ', '  - ')
    "W:    #{line}"
  end
  Logger.known_message(
    'settings_manually_edited',
    settings: yaml_lines.join("\n")
  )
end