lib/mindee/parsing/common/document.rb



# frozen_string_literal: true

require_relative 'inference'
require_relative 'extras'

module Mindee
  module Parsing
    module Common
      # Stores all response attributes.
      class Document
        # @return [Mindee::Inference]
        attr_reader :inference
        # @return [String] Filename sent to the API
        attr_reader :name
        # @return [String] Mindee ID of the document
        attr_reader :id
        # @return [Mindee::Parsing::Common::Extras::Extras] Potential Extras fields sent back along the prediction.
        attr_reader :extras
        # @return [Mindee::Parsing::Common::Ocr::Ocr, nil] OCR text results (limited availability)
        attr_reader :ocr
        # @return [Integer] Amount of pages of the document
        attr_reader :n_pages

        # @param http_response [Hash]
        # @return [Mindee::Parsing::Common::Ocr::Ocr]
        def self.load_ocr(http_response)
          ocr_prediction = http_response.fetch('ocr', nil)
          return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?

          Ocr::Ocr.new(ocr_prediction)
        end

        def self.load_extras(http_response)
          extras_prediction = http_response['inference'].fetch('extras', nil)
          return nil if extras_prediction.nil? || extras_prediction.fetch('mvision-v1', nil).nil?

          Extras::Extras::Extras.new(extras_prediction)
        end

        # @param product_class [Mindee::Inference]
        # @param http_response [Hash]
        def initialize(product_class, http_response)
          @id = http_response['id']
          @name = http_response['name']
          @inference = product_class.new(http_response['inference'])
          @ocr = self.class.load_ocr(http_response)
          @extras = self.class.load_extras(http_response)
          inject_full_text_ocr(http_response)
          @n_pages = http_response['n_pages']
        end

        # @return [String]
        def to_s
          out_str = String.new
          out_str << "########\nDocument\n########"
          out_str << "\n:Mindee ID: #{@id}"
          out_str << "\n:Filename: #{@name}"
          out_str << "\n\n#{@inference}"
        end

        private

        def inject_full_text_ocr(raw_prediction)
          return unless raw_prediction.dig('inference', 'pages') &&
                        raw_prediction['inference']['pages'][0]['extras']['full_text_ocr']

          full_text_ocr = String.new
          raw_prediction.dig('inference', 'pages').each do |page|
            full_text_ocr << (page['extras']['full_text_ocr']['content'])
          end
          artificial_text_obj = { 'content' => full_text_ocr }
          if @extras.nil? || @extras.empty?
            @extras = Extras::Extras.new({ 'full_text_ocr' => artificial_text_obj })
          else
            @extras.add_artificial_extra({ 'full_text_ocr' => artificial_text_obj })
          end
        end
      end
    end
  end
end