lib/mindee/parsing/common/ocr/ocr.rb
# frozen_string_literal: true require_relative 'mvision_v1' module Mindee module Parsing module Common # Ocr-specific parsing fields and options module OCR # A single word. class OCRWord # The confidence score, value will be between 0.0 and 1.0 # @return [Float] attr_accessor :confidence # @return [String] attr_reader :text # @return [Mindee::Geometry::Quadrilateral] attr_reader :bounding_box # @return [Mindee::Geometry::Polygon] attr_reader :polygon # @param prediction [Hash] def initialize(prediction) @text = prediction['text'] @confidence = prediction['confidence'] @polygon = Geometry.polygon_from_prediction(prediction['polygon']) @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty? end # @return [String] def to_s @text.to_s end end # A list of words which are on the same line. class OCRLine < Array # @param prediction [Hash, nil] # @param from_array [Array, nil] def initialize(prediction = nil, from_array = nil) if !prediction.nil? super(prediction.map { |word_prediction| OCRWord.new(word_prediction) }) elsif !from_array.nil? super(from_array) end end # Sort the words on the line from left to right. # @return [OCRLine] def sort_on_x from_array = sort do |word1, word2| Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min end OCRLine.new(nil, from_array) end # @return [String] def to_s each(&:to_s).join(' ') end end # OCR extraction for a single page. class OCRPage # All the words on the page, in semi-random order. # @return [Array<OCRWord>] attr_reader :all_words # @return [Array<OCRLine>] attr_reader :lines # @param prediction [Hash] def initialize(prediction) @lines = [] # : Array[Mindee::Parsing::Common::OCR::OCRLine] @all_words = [] # : Array[Mindee::Parsing::Common::OCR::OCRWord] prediction['all_words'].each do |word_prediction| @all_words.push(OCRWord.new(word_prediction)) end end # All the words on the page, ordered in lines. # @return [Array<OCRLine>] def all_lines @lines = to_lines if @lines.empty? @lines end # @return [String] def to_s lines = all_lines return '' if lines.empty? out_str = String.new lines.map do |line| out_str << "#{line}\n" unless line.to_s.strip.empty? end out_str.strip end private # Helper function that iterates through all the words and compares them to a candidate # @param sorted_words [Array<OCRWord>] # @param current [OCRWord] # @param indexes [Array<Integer>] # @param lines [Array<OCRLine>] def parse_one(sorted_words, current, indexes, lines) line = OCRLine.new([]) sorted_words.each_with_index do |word, idx| next if indexes.include?(idx) if current.nil? current = word indexes.push(idx) line = OCRLine.new([]) line.push(word) elsif words_on_same_line?(current, word) line.push(word) indexes.push(idx) end end lines.push(line.sort_on_x) if line.any? end # Order all the words on the page into lines. # @return [Array<OCRLine>] def to_lines current = nil indexes = [] # : Array[Integer] lines = [] # : Array[Mindee::Parsing::Common::OCR::OCRLine] # make sure words are sorted from top to bottom all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min } all_words.each do parse_one(all_words, current, indexes, lines) current = nil end lines end # Determine if two words are on the same line. # @param current_word [Mindee::Parsing::Common::OCR::OCRWord] # @param next_word [Mindee::Parsing::Common::OCR::OCRWord] # @return [bool] def words_on_same_line?(current_word, next_word) current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid) next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid) unless current_word.nil? current_in_next || next_in_current end end # OCR extraction from the entire document. class OCR # Mindee Vision v1 results. # @return [Mindee::Parsing::Common::OCR::MVisionV1] attr_reader :mvision_v1 # @param prediction [Hash] def initialize(prediction) @mvision_v1 = Mindee::Parsing::Common::OCR::MVisionV1.new(prediction['mvision-v1']) end # @return [String] def to_s @mvision_v1.to_s end # Constructs a line from a column, located underneath given coordinates # @param coordinates [Array<Mindee::Geometry::Point>] Polygon or bounding box where the reconstruction should # start # @param page_id [Integer] ID of the page to start at # @param x_margin [Float] Margin of misalignment for the x coordinate (default 10%) # @return [Mindee::Parsing::Common::OCR::OCRLine] def reconstruct_vertically(coordinates, page_id, x_margin = 0.05) @mvision_v1.reconstruct_vertically(coordinates, page_id, x_margin) end end end end end end