class Mindee::Client

See: developers.mindee.com/docs<br>Mindee API Client.

def create_endpoint(endpoint_name: '', account_name: '', version: '')

Returns:
  • (Mindee::HTTP::Endpoint) -

Parameters:
  • version (String) -- For custom endpoints, version of the product
  • account_name (String) -- For custom endpoints, your account or organization username on the API Builder.
  • endpoint_name (String) -- For custom endpoints, the "API name" field in the "Settings" page of the
def create_endpoint(endpoint_name: '', account_name: '', version: '')
  initialize_endpoint(
    Mindee::Product::Universal::Universal,
    endpoint_name: endpoint_name,
    account_name: account_name,
    version: version
  )
end

def enqueue(input_source, product_class, endpoint: nil, options: {})

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • endpoint (Mindee::HTTP::Endpoint) -- Endpoint of the API.
  • options (Hash) -- A hash of options to configure the enqueue behavior. Possible keys:
  • product_class (Mindee::Inference) -- The class of the product.
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def enqueue(input_source, product_class, endpoint: nil, options: {})
  opts = normalize_parse_options(options)
  endpoint ||= initialize_endpoint(product_class)
  logger.debug("Enqueueing document as '#{endpoint.url_root}'")
  prediction, raw_http = endpoint.predict_async(
    input_source,
    opts
  )
  Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
end

def enqueue_and_parse(input_source, product_class, endpoint, options)

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • endpoint (Mindee::HTTP::Endpoint) -- Endpoint of the API.
  • options (Hash) -- A hash of options to configure the parsing behavior. Possible keys:
  • product_class (Mindee::Inference) -- The class of the product.
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def enqueue_and_parse(input_source, product_class, endpoint, options)
  validate_async_params(options.initial_delay_sec, options.delay_sec, options.max_retries)
  enqueue_res = enqueue(input_source, product_class, endpoint: endpoint, options: options)
  job = enqueue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present'
  job_id = job.id
  sleep(options.initial_delay_sec)
  polling_attempts = 1
  logger.debug("Successfully enqueued document with job id: '#{job_id}'")
  queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
  queue_res_job = queue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present'
  valid_statuses = [
    Mindee::Parsing::Common::JobStatus::WAITING,
    Mindee::Parsing::Common::JobStatus::PROCESSING,
  ]
  # @type var valid_statuses: Array[(:waiting | :processing | :completed | :failed)]
  while valid_statuses.include?(queue_res_job.status) && polling_attempts < options.max_retries
    logger.debug("Polling server for parsing result with job id: '#{job_id}'. Attempt #{polling_attempts}")
    sleep(options.delay_sec)
    queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
    queue_res_job = queue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present'
    polling_attempts += 1
  end
  if queue_res_job.status != Mindee::Parsing::Common::JobStatus::COMPLETED
    elapsed = options.initial_delay_sec + (polling_attempts * options.delay_sec.to_f)
    raise Errors::MindeeAPIError,
          "Asynchronous parsing request timed out after #{elapsed} seconds (#{polling_attempts} tries)"
  end
  queue_res
end

def execute_workflow(input_source, workflow_id, options: {})

Returns:
  • (Mindee::Parsing::Common::WorkflowResponse) -

Parameters:
  • options (Hash, WorkflowOptions) -- Options to configure workflow behavior. Possible keys:
  • workflow_id (String) --
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def execute_workflow(input_source, workflow_id, options: {})
  opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options)
  if opts.respond_to?(:page_options) && input_source.is_a?(Input::Source::LocalInputSource)
    process_pdf_if_required(input_source, opts)
  end
  workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key)
  logger.debug("Sending document to workflow '#{workflow_id}'")
  prediction, raw_http = workflow_endpoint.execute_workflow(
    input_source,
    opts
  )
  Mindee::Parsing::Common::WorkflowResponse.new(Product::Universal::Universal, prediction, raw_http)
end

def fix_account_name(account_name)

def fix_account_name(account_name)
  if account_name.nil? || account_name.empty?
    logger.info("No account name provided, #{OTS_OWNER} will be used by default.")
    return OTS_OWNER
  end
  account_name
end

def fix_endpoint_name(product_class, endpoint_name)

def fix_endpoint_name(product_class, endpoint_name)
  endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name : endpoint_name
end

def fix_version(product_class, version)

def fix_version(product_class, version)
  return version unless version.nil? || version.empty?
  if product_class.endpoint_version.nil? || product_class.endpoint_version.empty?
    logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.')
    return '1'
  end
  product_class.endpoint_version
end

def initialize(api_key: '')

Parameters:
  • api_key (String) --
def initialize(api_key: '')
  @api_key = api_key
end

def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '')

Returns:
  • (Mindee::HTTP::Endpoint) -

Parameters:
  • version (String) -- For custom endpoints, version of the product.
  • account_name (String) -- For custom endpoints, your account or organization username on the API Builder.
  • endpoint_name (String) -- For custom endpoints, the "API name" field in the "Settings" page of the
  • product_class (Mindee::Parsing::Common::Inference) -- class of the product
def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '')
  if (endpoint_name.nil? || endpoint_name.empty?) && product_class == Mindee::Product::Universal::Universal
    raise Mindee::Errors::MindeeConfigurationError, 'Missing argument endpoint_name when using custom class'
  end
  endpoint_name = fix_endpoint_name(product_class, endpoint_name)
  account_name = fix_account_name(account_name)
  version = fix_version(product_class, version)
  HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key)
end

def load_prediction(product_class, local_response)

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • local_response (Mindee::Input::LocalResponse) --
  • product_class (Mindee::Inference) -- class of the product
def load_prediction(product_class, local_response)
  raise Errors::MindeeAPIError, 'Expected LocalResponse to not be nil.' if local_response.nil?
  response_hash = local_response.as_hash || {}
  raise Errors::MindeeAPIError, 'Expected LocalResponse#as_hash to return a hash.' if response_hash.nil?
  Mindee::Parsing::Common::ApiResponse.new(product_class, response_hash, response_hash.to_json)
rescue KeyError, Errors::MindeeAPIError
  raise Errors::MindeeInputError, 'No prediction found in local response.'
end

def normalize_parse_options(options)

Returns:
  • (ParseOptions) -

Parameters:
  • options (Hash, ParseOptions) -- Options.
def normalize_parse_options(options)
  return options if options.is_a?(ParseOptions)
  ParseOptions.new(params: options)
end

def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true)

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • enqueue (bool) -- Whether to enqueue the file.
  • options (Hash) -- A hash of options to configure the parsing behavior. Possible keys:
  • endpoint (Mindee::HTTP::Endpoint, nil) -- Endpoint of the API.
  • product_class (Mindee::Inference) -- The class of the product.
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true)
  opts = normalize_parse_options(options)
  process_pdf_if_required(input_source, opts) if input_source.is_a?(Input::Source::LocalInputSource)
  endpoint ||= initialize_endpoint(product_class)
  if enqueue && product_class.has_async
    enqueue_and_parse(input_source, product_class, endpoint, opts)
  else
    parse_sync(input_source, product_class, endpoint, opts)
  end
end

def parse_queued(job_id, product_class, endpoint: nil)

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • endpoint (HTTP::Endpoint, nil) -- Endpoint of the API
  • product_class (Mindee::Inference) -- class of the product
  • job_id (String) -- ID of the job (queue) to poll from
def parse_queued(job_id, product_class, endpoint: nil)
  endpoint = initialize_endpoint(product_class) if endpoint.nil?
  logger.debug("Fetching queued document as '#{endpoint.url_root}'")
  prediction, raw_http = endpoint.parse_async(job_id)
  Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
end

def parse_sync(input_source, product_class, endpoint, options)

Returns:
  • (Mindee::Parsing::Common::ApiResponse) -

Parameters:
  • options (Hash) -- A hash of options to configure the parsing behavior. Possible keys:
  • endpoint (Mindee::HTTP::Endpoint, nil) -- Endpoint of the API.
  • product_class (Mindee::Inference) -- class of the product
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def parse_sync(input_source, product_class, endpoint, options)
  logger.debug("Parsing document as '#{endpoint.url_root}'")
  prediction, raw_http = endpoint.predict(
    input_source,
    options
  )
  Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s)
end

def process_pdf_if_required(input_source, opts)

Parameters:
  • opts (ParseOptions) --
  • input_source (Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource) --
def process_pdf_if_required(input_source, opts)
  return unless input_source.is_a?(Mindee::Input::Source::LocalInputSource) &&
                opts.page_options.on_min_pages &&
                input_source.pdf?
  input_source.process_pdf(opts.page_options)
end

def source_from_b64string(base64_string, filename, repair_pdf: false)

Returns:
  • (Mindee::Input::Source::Base64InputSource) -

Parameters:
  • repair_pdf (bool) -- Attempts to fix broken pdf if true
  • filename (String) -- The name of the file (without the path)
  • base64_string (String) -- Input to parse as base64 string
def source_from_b64string(base64_string, filename, repair_pdf: false)
  Input::Source::Base64InputSource.new(base64_string, filename, repair_pdf: repair_pdf)
end

def source_from_bytes(input_bytes, filename, repair_pdf: false)

Returns:
  • (Mindee::Input::Source::BytesInputSource) -

Parameters:
  • repair_pdf (bool) -- Attempts to fix broken pdf if true
  • filename (String) -- The name of the file (without the path)
  • input_bytes (String) -- Encoding::BINARY byte input
def source_from_bytes(input_bytes, filename, repair_pdf: false)
  Input::Source::BytesInputSource.new(input_bytes, filename, repair_pdf: repair_pdf)
end

def source_from_file(input_file, filename, repair_pdf: false)

Returns:
  • (Mindee::Input::Source::FileInputSource) -

Parameters:
  • repair_pdf (bool) -- Attempts to fix broken pdf if true
  • filename (String) -- The name of the file (without the path)
  • input_file (File) -- Input file handle
def source_from_file(input_file, filename, repair_pdf: false)
  Input::Source::FileInputSource.new(input_file, filename, repair_pdf: repair_pdf)
end

def source_from_path(input_path, repair_pdf: false)

Returns:
  • (Mindee::Input::Source::PathInputSource) -

Parameters:
  • repair_pdf (bool) -- Attempts to fix broken pdf if true
  • input_path (String) -- Path of file to open
def source_from_path(input_path, repair_pdf: false)
  Input::Source::PathInputSource.new(input_path, repair_pdf: repair_pdf)
end

def source_from_url(url)

Returns:
  • (Mindee::Input::Source::URLInputSource) -

Parameters:
  • url (String) -- URL of the file
def source_from_url(url)
  Input::Source::URLInputSource.new(url)
end

def validate_async_params(initial_delay_sec, delay_sec, max_retries)

Parameters:
  • max_retries (Integer, nil) -- maximum amount of retries.
  • delay_sec (Numeric) -- delay between polling attempts
  • initial_delay_sec (Numeric) -- initial delay before polling
def validate_async_params(initial_delay_sec, delay_sec, max_retries)
  min_delay_sec = 1
  min_initial_delay_sec = 1
  min_retries = 2
  if delay_sec < min_delay_sec
    raise ArgumentError,
          "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)"
  end
  if initial_delay_sec < min_initial_delay_sec
    raise ArgumentError,
          "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)"
  end
  raise ArgumentError, "Cannot set auto-poll retries to less than #{min_retries}" if max_retries < min_retries
end