lib/pdf/reader/advanced_text_run_filter.rb



# coding: utf-8
# frozen_string_literal: true
# typed: strict

class PDF::Reader
  # Filter a collection of TextRun objects based on a set of conditions.
  # It can be used to filter text runs based on their attributes.
  # The filter can return the text runs that matches the conditions (only) or
  # the text runs that do not match the conditions (exclude).
  #
  # You can filter the text runs based on all its attributes with the operators
  # mentioned in VALID_OPERATORS.
  # The filter can be nested with 'or' and 'and' conditions.
  #
  # Examples:
  # 1. Single condition
  # AdvancedTextRunFilter.exclude(text_runs, text: { include: 'sample' })
  #
  # 2. Multiple conditions (and)
  # AdvancedTextRunFilter.exclude(text_runs, {
  #   font_size: { greater_than: 10, less_than: 15 }
  # })
  #
  # 3. Multiple possible values (or)
  # AdvancedTextRunFilter.exclude(text_runs, {
  #  font_size: { equal: [10, 12] }
  # })
  #
  # 4. Complex AND/OR filter
  # AdvancedTextRunFilter.exclude(text_runs, {
  #   and: [
  #     { font_size: { greater_than: 10 } },
  #     { or: [
  #       { text: { include: "sample" } },
  #       { width: { greater_than: 100 } }
  #     ]}
  #   ]
  # })
  class AdvancedTextRunFilter
    VALID_OPERATORS = %i[
      equal
      not_equal
      greater_than
      less_than
      greater_than_or_equal
      less_than_or_equal
      include
      exclude
    ]

    def self.only(text_runs, filter_hash)
      new(text_runs, filter_hash).only
    end

    def self.exclude(text_runs, filter_hash)
      new(text_runs, filter_hash).exclude
    end

    attr_reader :text_runs, :filter_hash

    def initialize(text_runs, filter_hash)
      @text_runs = text_runs
      @filter_hash = filter_hash
    end

    def only
      return text_runs if filter_hash.empty?
      text_runs.select { |text_run| evaluate_filter(text_run) }
    end

    def exclude
      return text_runs if filter_hash.empty?
      text_runs.reject { |text_run| evaluate_filter(text_run) }
    end

    private

    def evaluate_filter(text_run)
      if filter_hash[:or]
        evaluate_or_filters(text_run, filter_hash[:or])
      elsif filter_hash[:and]
        evaluate_and_filters(text_run, filter_hash[:and])
      else
        evaluate_filters(text_run, filter_hash)
      end
    end

    def evaluate_or_filters(text_run, conditions)
      conditions.any? do |condition|
        evaluate_filters(text_run, condition)
      end
    end

    def evaluate_and_filters(text_run, conditions)
      conditions.all? do |condition|
        evaluate_filters(text_run, condition)
      end
    end

    def evaluate_filters(text_run, filter_hash)
      filter_hash.all? do |attribute, conditions|
        evaluate_attribute_conditions(text_run, attribute, conditions)
      end
    end

    def evaluate_attribute_conditions(text_run, attribute, conditions)
      conditions.all? do |operator, value|
        unless VALID_OPERATORS.include?(operator)
          raise ArgumentError, "Invalid operator: #{operator}"
        end

        apply_operator(text_run.send(attribute), operator, value)
      end
    end

    def apply_operator(attribute_value, operator, filter_value)
      case operator
      when :equal
        Array(filter_value).include?(attribute_value)
      when :not_equal
        !Array(filter_value).include?(attribute_value)
      when :greater_than
        attribute_value > filter_value
      when :less_than
        attribute_value < filter_value
      when :greater_than_or_equal
        attribute_value >= filter_value
      when :less_than_or_equal
        attribute_value <= filter_value
      when :include
        Array(filter_value).any? { |v| attribute_value.to_s.include?(v.to_s) }
      when :exclude
        Array(filter_value).none? { |v| attribute_value.to_s.include?(v.to_s) }
      end
    end
  end
end