module Mindee::PDF::PdfProcessor

def self.get_page(pdf_doc, page_id)

Returns:
  • (StringIO) -

Parameters:
  • page_id (Integer) -- Page ID.
  • pdf_doc (Origami::PDF) -- Origami PDF handle.
def self.get_page(pdf_doc, page_id)
  stream = StringIO.new
  pdf_doc.save(stream)
  options = {
    page_indexes: [page_id - 1],
  }
  parse(stream, options)
end

def self.indexes_from_keep(page_indexes, all_pages)

Parameters:
  • all_pages (Array) --
  • page_indexes (Array) --
def self.indexes_from_keep(page_indexes, all_pages)
  pages_to_keep = Set.new
  page_indexes.each do |idx|
    idx = (all_pages.length - (idx + 2)) if idx.negative?
    page = all_pages[idx]
    next if page.nil?
    pages_to_keep << page
  end
  all_pages.to_set - pages_to_keep
end

def self.indexes_from_remove(page_indexes, all_pages)

Parameters:
  • all_pages (Array) --
  • page_indexes (Array) --
def self.indexes_from_remove(page_indexes, all_pages)
  pages_to_remove = Set.new
  page_indexes.each do |idx|
    idx = (all_pages.length - (idx + 2)) if idx.negative?
    page = all_pages[idx]
    next if page.nil?
    pages_to_remove << page
  end
end

def self.open_pdf(io_stream)

Returns:
  • (Origami::PDF) -

Parameters:
  • io_stream (StringIO) --
def self.open_pdf(io_stream)
  pdf_parser = Origami::PDF::LinearParser.new({ verbosity: Origami::Parser::VERBOSE_QUIET })
  io_stream.seek(0)
  pdf_parser.parse(io_stream)
end

def self.parse(io_stream, options)

Returns:
  • (StringIO) -

Parameters:
  • options (Hash) --
  • io_stream (StreamIO) --
def self.parse(io_stream, options)
  options = DEFAULT_OPTIONS.merge(options)
  current_pdf = open_pdf(io_stream)
  pages_count = current_pdf.pages.size
  return if options[:on_min_pages] > pages_count
  all_pages = (0..pages_count - 1).to_a
  case options[:operation]
  when :KEEP_ONLY
    pages_to_remove = indexes_from_keep(options[:page_indexes], all_pages)
  when :REMOVE
    pages_to_remove = indexes_from_remove(options[:page_indexes], all_pages)
  else
    raise "operation must be one of :KEEP_ONLY or :REMOVE, sent '#{behavior}'"
  end
  current_pdf.delete_pages_at(pages_to_remove) if pages_to_remove.to_a != all_pages.to_a
  current_pdf.to_io_stream
end