lib/gds_api/rummager.rb



require 'gds_api/base'
require 'rack/utils'

module GdsApi
  # @api documented
  class Rummager < Base
    # @api documented
    class V1 < SimpleDelegator
      def add_document(type, id, document)
        post_json(
          documents_url,
          document.merge(
            _type: type,
            _id: id,
          )
        )
      end

      def delete_document(type, id)
        delete_json(
          "#{documents_url}/#{id}",
          _type: type,
        )
      end
    end

    # @api documented
    class V2 < SimpleDelegator
      class InvalidIndex < StandardError; end

      def add_document(id, document, index_name)
        raise(InvalidIndex, index_name) unless index_name == 'metasearch'
        post_json(
          "#{base_url}/v2/metasearch/documents",
          document.merge(
            _id: id,
          )
        )
      end

      def delete_document(id, index_name)
        raise(InvalidIndex, index_name) unless index_name == 'metasearch'
        delete_json("#{base_url}/v2/metasearch/documents/#{id}")
      end
    end

    DEFAULT_API_VERSION = 'V1'.freeze
    API_VERSIONS = {
      'V1' => GdsApi::Rummager::V1,
      'V2' => GdsApi::Rummager::V2,
    }.freeze
    class UnknownAPIVersion < StandardError; end

    def initialize(endpoint_url, options = {})
      super
      # The API version provides a simple wrapper around this base class so that we
      # can still access the shared methods present in this class.
      version = options.fetch(:api_version, DEFAULT_API_VERSION)
      api_class = API_VERSIONS[version] || raise(UnknownAPIVersion)
      @api = api_class.new(self)
    end

    # Perform a search.
    #
    # @param args [Hash] A valid search query. See Rummager documentation for options.
    #
    # @see https://github.com/alphagov/rummager/blob/master/doc/search-api.md
    def search(args, additional_headers = {})
      request_url = "#{base_url}/search.json?#{Rack::Utils.build_nested_query(args)}"
      get_json(request_url, additional_headers)
    end

    # Perform a search, returning the results as an enumerator.
    #
    # The enumerator abstracts away rummager's pagination and fetches new pages when
    # necessary.
    #
    # @param args [Hash] A valid search query. See Rummager documentation for options.
    # @param page_size [Integer] Number of results in each page.
    #
    # @see https://github.com/alphagov/rummager/blob/master/doc/search-api.md
    def search_enum(args, page_size: 100, additional_headers: {})
      Enumerator.new do |yielder|
        (0..Float::INFINITY).step(page_size).each do |index|
          search_params = args.merge(start: index.to_i, count: page_size)
          results = search(search_params, additional_headers).to_h.fetch('results', [])
          results.each do |result|
            yielder << result
          end
          if results.count < page_size
            break
          end
        end
      end
    end

    # Advanced search.
    #
    # @deprecated Only in use by Whitehall. Use the `#search` method.
    def advanced_search(args)
      raise ArgumentError.new("Args cannot be blank") if args.nil? || args.empty?
      request_path = "#{base_url}/advanced_search?#{Rack::Utils.build_nested_query(args)}"
      get_json(request_path)
    end

    # Add a document to the search index.
    #
    # @param type [String] The rummager/elasticsearch document type.
    # @param id [String] The rummager/elasticsearch id. Typically the same as the `link` field, but this is not strictly enforced.
    # @param document [Hash] The document to add. Must match the rummager schema matchin the `type` parameter and contain a `link` field.
    # @param index_name (V2 only) Name of the index to be deleted from on
    #   GOV.UK - we only allow deletion from metasearch
    # @return [GdsApi::Response] A status code of 202 indicates the document has been successfully queued.
    #
    # @see https://github.com/alphagov/rummager/blob/master/doc/documents.md
    def add_document(*args)
      @api.add_document(*args)
    end

    # Delete a content-document from the index by base path.
    #
    # Content documents are pages on GOV.UK that have a base path and are
    # returned in searches. This excludes best bets, recommended-links,
    # and contacts, which may be deleted with `delete_document`.
    #
    # @param base_path Base path of the page on GOV.UK.
    # @see https://github.com/alphagov/rummager/blob/master/doc/content-api.md
    def delete_content(base_path)
      request_url = "#{base_url}/content?link=#{base_path}"
      delete_json(request_url)
    end

    # @private
    def delete_content!(*)
      raise "`Rummager#delete_content!` is deprecated. Use `Rummager#delete_content`"
    end

    # Retrieve a content-document from the index.
    #
    # Content documents are pages on GOV.UK that have a base path and are
    # returned in searches. This excludes best bets, recommended-links,
    # and contacts.
    #
    # @param base_path [String] Base path of the page on GOV.UK.
    # @see https://github.com/alphagov/rummager/blob/master/doc/content-api.md
    def get_content(base_path)
      request_url = "#{base_url}/content?link=#{base_path}"
      get_json(request_url)
    end

    # @private
    def get_content!(*)
      raise "`Rummager#get_content!` is deprecated. Use `Rummager#get_content`"
    end

    # Delete a non-content document from the search index.
    #
    # For example, best bets, recommended links, or contacts.
    #
    # @param type [String] The rummager/elasticsearch document type.
    # @param id [String] The rummager/elasticsearch id. Typically the same as the `link` field.
    # @param index_name (V2 only) Name of the index to be deleted from on
    #   GOV.UK - we only allow deletion from metasearch
    def delete_document(*args)
      @api.delete_document(*args)
    end

    def base_url
      endpoint
    end

    def documents_url
      "#{base_url}/documents"
    end
  end
end