class Mindee::Client
See: developers.mindee.com/docs<br>Mindee API Client.
def create_endpoint(endpoint_name: '', account_name: '', version: '')
-
(Mindee::HTTP::Endpoint)
-
Parameters:
-
version
(String
) -- For custom endpoints, version of the product -
account_name
(String
) -- For custom endpoints, your account or organization username on the API Builder. -
endpoint_name
(String
) -- For custom endpoints, the "API name" field in the "Settings" page of the
def create_endpoint(endpoint_name: '', account_name: '', version: '') initialize_endpoint( Mindee::Product::Universal::Universal, endpoint_name: endpoint_name, account_name: account_name, version: version ) end
def enqueue(input_source, product_class, endpoint: nil, options: {})
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
endpoint
(Mindee::HTTP::Endpoint
) -- Endpoint of the API. -
options
(Hash
) -- A hash of options to configure the enqueue behavior. Possible keys: -
product_class
(Mindee::Inference
) -- The class of the product. -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def enqueue(input_source, product_class, endpoint: nil, options: {}) opts = normalize_parse_options(options) endpoint ||= initialize_endpoint(product_class) logger.debug("Enqueueing document as '#{endpoint.url_root}'") prediction, raw_http = endpoint.predict_async( input_source, opts ) Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) end
def enqueue_and_parse(input_source, product_class, endpoint, options)
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
endpoint
(Mindee::HTTP::Endpoint
) -- Endpoint of the API. -
options
(Hash
) -- A hash of options to configure the parsing behavior. Possible keys: -
product_class
(Mindee::Inference
) -- The class of the product. -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def enqueue_and_parse(input_source, product_class, endpoint, options) validate_async_params(options.initial_delay_sec, options.delay_sec, options.max_retries) enqueue_res = enqueue(input_source, product_class, endpoint: endpoint, options: options) job = enqueue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present' job_id = job.id sleep(options.initial_delay_sec) polling_attempts = 1 logger.debug("Successfully enqueued document with job id: '#{job_id}'") queue_res = parse_queued(job_id, product_class, endpoint: endpoint) queue_res_job = queue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present' valid_statuses = [ Mindee::Parsing::Common::JobStatus::WAITING, Mindee::Parsing::Common::JobStatus::PROCESSING, ] # @type var valid_statuses: Array[(:waiting | :processing | :completed | :failed)] while valid_statuses.include?(queue_res_job.status) && polling_attempts < options.max_retries logger.debug("Polling server for parsing result with job id: '#{job_id}'. Attempt #{polling_attempts}") sleep(options.delay_sec) queue_res = parse_queued(job_id, product_class, endpoint: endpoint) queue_res_job = queue_res.job or raise Errors::MindeeAPIError, 'Expected job to be present' polling_attempts += 1 end if queue_res_job.status != Mindee::Parsing::Common::JobStatus::COMPLETED elapsed = options.initial_delay_sec + (polling_attempts * options.delay_sec.to_f) raise Errors::MindeeAPIError, "Asynchronous parsing request timed out after #{elapsed} seconds (#{polling_attempts} tries)" end queue_res end
def execute_workflow(input_source, workflow_id, options: {})
-
(Mindee::Parsing::Common::WorkflowResponse)
-
Parameters:
-
options
(Hash, WorkflowOptions
) -- Options to configure workflow behavior. Possible keys: -
workflow_id
(String
) -- -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def execute_workflow(input_source, workflow_id, options: {}) opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options) if opts.respond_to?(:page_options) && input_source.is_a?(Input::Source::LocalInputSource) process_pdf_if_required(input_source, opts) end workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key) logger.debug("Sending document to workflow '#{workflow_id}'") prediction, raw_http = workflow_endpoint.execute_workflow( input_source, opts ) Mindee::Parsing::Common::WorkflowResponse.new(Product::Universal::Universal, prediction, raw_http) end
def fix_account_name(account_name)
def fix_account_name(account_name) if account_name.nil? || account_name.empty? logger.info("No account name provided, #{OTS_OWNER} will be used by default.") return OTS_OWNER end account_name end
def fix_endpoint_name(product_class, endpoint_name)
def fix_endpoint_name(product_class, endpoint_name) endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name : endpoint_name end
def fix_version(product_class, version)
def fix_version(product_class, version) return version unless version.nil? || version.empty? if product_class.endpoint_version.nil? || product_class.endpoint_version.empty? logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.') return '1' end product_class.endpoint_version end
def initialize(api_key: '')
-
api_key
(String
) --
def initialize(api_key: '') @api_key = api_key end
def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '')
-
(Mindee::HTTP::Endpoint)
-
Parameters:
-
version
(String
) -- For custom endpoints, version of the product. -
account_name
(String
) -- For custom endpoints, your account or organization username on the API Builder. -
endpoint_name
(String
) -- For custom endpoints, the "API name" field in the "Settings" page of the -
product_class
(Mindee::Parsing::Common::Inference
) -- class of the product
def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '') if (endpoint_name.nil? || endpoint_name.empty?) && product_class == Mindee::Product::Universal::Universal raise Mindee::Errors::MindeeConfigurationError, 'Missing argument endpoint_name when using custom class' end endpoint_name = fix_endpoint_name(product_class, endpoint_name) account_name = fix_account_name(account_name) version = fix_version(product_class, version) HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key) end
def load_prediction(product_class, local_response)
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
local_response
(Mindee::Input::LocalResponse
) -- -
product_class
(Mindee::Inference
) -- class of the product
def load_prediction(product_class, local_response) raise Errors::MindeeAPIError, 'Expected LocalResponse to not be nil.' if local_response.nil? response_hash = local_response.as_hash || {} raise Errors::MindeeAPIError, 'Expected LocalResponse#as_hash to return a hash.' if response_hash.nil? Mindee::Parsing::Common::ApiResponse.new(product_class, response_hash, response_hash.to_json) rescue KeyError, Errors::MindeeAPIError raise Errors::MindeeInputError, 'No prediction found in local response.' end
def normalize_parse_options(options)
-
(ParseOptions)
-
Parameters:
-
options
(Hash, ParseOptions
) -- Options.
def normalize_parse_options(options) return options if options.is_a?(ParseOptions) ParseOptions.new(params: options) end
def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true)
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
enqueue
(bool
) -- Whether to enqueue the file. -
options
(Hash
) -- A hash of options to configure the parsing behavior. Possible keys: -
endpoint
(Mindee::HTTP::Endpoint, nil
) -- Endpoint of the API. -
product_class
(Mindee::Inference
) -- The class of the product. -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true) opts = normalize_parse_options(options) process_pdf_if_required(input_source, opts) if input_source.is_a?(Input::Source::LocalInputSource) endpoint ||= initialize_endpoint(product_class) if enqueue && product_class.has_async enqueue_and_parse(input_source, product_class, endpoint, opts) else parse_sync(input_source, product_class, endpoint, opts) end end
def parse_queued(job_id, product_class, endpoint: nil)
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
endpoint
(HTTP::Endpoint, nil
) -- Endpoint of the API -
product_class
(Mindee::Inference
) -- class of the product -
job_id
(String
) -- ID of the job (queue) to poll from
def parse_queued(job_id, product_class, endpoint: nil) endpoint = initialize_endpoint(product_class) if endpoint.nil? logger.debug("Fetching queued document as '#{endpoint.url_root}'") prediction, raw_http = endpoint.parse_async(job_id) Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) end
def parse_sync(input_source, product_class, endpoint, options)
-
(Mindee::Parsing::Common::ApiResponse)
-
Parameters:
-
options
(Hash
) -- A hash of options to configure the parsing behavior. Possible keys: -
endpoint
(Mindee::HTTP::Endpoint, nil
) -- Endpoint of the API. -
product_class
(Mindee::Inference
) -- class of the product -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def parse_sync(input_source, product_class, endpoint, options) logger.debug("Parsing document as '#{endpoint.url_root}'") prediction, raw_http = endpoint.predict( input_source, options ) Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s) end
def process_pdf_if_required(input_source, opts)
-
opts
(ParseOptions
) -- -
input_source
(Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource
) --
def process_pdf_if_required(input_source, opts) return unless input_source.is_a?(Mindee::Input::Source::LocalInputSource) && opts.page_options.on_min_pages && input_source.pdf? input_source.process_pdf(opts.page_options) end
def source_from_b64string(base64_string, filename, repair_pdf: false)
-
(Mindee::Input::Source::Base64InputSource)
-
Parameters:
-
repair_pdf
(bool
) -- Attempts to fix broken pdf if true -
filename
(String
) -- The name of the file (without the path) -
base64_string
(String
) -- Input to parse as base64 string
def source_from_b64string(base64_string, filename, repair_pdf: false) Input::Source::Base64InputSource.new(base64_string, filename, repair_pdf: repair_pdf) end
def source_from_bytes(input_bytes, filename, repair_pdf: false)
-
(Mindee::Input::Source::BytesInputSource)
-
Parameters:
-
repair_pdf
(bool
) -- Attempts to fix broken pdf if true -
filename
(String
) -- The name of the file (without the path) -
input_bytes
(String
) -- Encoding::BINARY byte input
def source_from_bytes(input_bytes, filename, repair_pdf: false) Input::Source::BytesInputSource.new(input_bytes, filename, repair_pdf: repair_pdf) end
def source_from_file(input_file, filename, repair_pdf: false)
-
(Mindee::Input::Source::FileInputSource)
-
Parameters:
-
repair_pdf
(bool
) -- Attempts to fix broken pdf if true -
filename
(String
) -- The name of the file (without the path) -
input_file
(File
) -- Input file handle
def source_from_file(input_file, filename, repair_pdf: false) Input::Source::FileInputSource.new(input_file, filename, repair_pdf: repair_pdf) end
def source_from_path(input_path, repair_pdf: false)
-
(Mindee::Input::Source::PathInputSource)
-
Parameters:
-
repair_pdf
(bool
) -- Attempts to fix broken pdf if true -
input_path
(String
) -- Path of file to open
def source_from_path(input_path, repair_pdf: false) Input::Source::PathInputSource.new(input_path, repair_pdf: repair_pdf) end
def source_from_url(url)
-
(Mindee::Input::Source::URLInputSource)
-
Parameters:
-
url
(String
) -- URL of the file
def source_from_url(url) Input::Source::URLInputSource.new(url) end
def validate_async_params(initial_delay_sec, delay_sec, max_retries)
-
max_retries
(Integer, nil
) -- maximum amount of retries. -
delay_sec
(Numeric
) -- delay between polling attempts -
initial_delay_sec
(Numeric
) -- initial delay before polling
def validate_async_params(initial_delay_sec, delay_sec, max_retries) min_delay_sec = 1 min_initial_delay_sec = 1 min_retries = 2 if delay_sec < min_delay_sec raise ArgumentError, "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)" end if initial_delay_sec < min_initial_delay_sec raise ArgumentError, "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)" end raise ArgumentError, "Cannot set auto-poll retries to less than #{min_retries}" if max_retries < min_retries end