class HexaPDF::CLI::Split

Splits a PDF file, putting each page into a separate file.

def execute(pdf, output_spec = pdf.sub(/\.pdf$/i, '_%04d.pdf')) #:nodoc:

:nodoc:

def execute(pdf, output_spec = pdf.sub(/\.pdf$/i, '_%04d.pdf')) #:nodoc:
  with_document(pdf, password: @password) do |doc|
    if @strategy == :page_number
      split_by_page_number(doc, output_spec)
    else
      split_by_page_size(doc, output_spec)
    end
  end
end

def initialize #:nodoc:

:nodoc:

def initialize #:nodoc:
  super('split', takes_commands: false)
  short_desc("Split a PDF file")
  long_desc(<<~EOF)
    The default strategy is to split a PDF into individual pages, i.e. splitting is done by
    page number. It is also possible to split by page size where pages with the same page size
    get put into the same output PDF.
    If no OUTPUT_SPEC is specified, the resulting PDF files are named <PDF>_0001.pdf,
    <PDF>_0002.pdf, ... when splitting by page number and <PDF>_A4.pdf, <PDF>_Letter.pdf, ...
    when splitting by page size.
    To specify a custom name, provide the OUTPUT_SPEC argument. It can contain a printf-style
    format definition like '%04d' to specify the place where the page number should be
    inserted. In case of splitting by page size, the place of the format defintion is replaced
    with the name of the page size, e.g. A4 or Letter.
    The optimization and encryption options are applied to each created output file.
  EOF
  options.on("--strategy STRATEGY", "-s", [:page_number, :page_size], "Defines how the PDF " \
             "file should be split: page_number or page_size (default: page_number)") do |s|
    @strategy = s
  end
  options.on("--password PASSWORD", "-p", String,
             "The password for decryption. Use - for reading from standard input.") do |pwd|
    @password = (pwd == '-' ? read_password : pwd)
  end
  define_optimization_options
  define_encryption_options
  @password = nil
  @strategy = :page_number
end

def page_size_name(box)

returned page size name consists of width x height.
Tries to retrieve a page size name based on the given page box. If this is not possible, the

def page_size_name(box)
  @page_name_cache ||= {}
  return @page_name_cache[box] if @page_name_cache.key?(box)
  paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, paper_box|
    paper_box.each_with_index.all? {|entry, index| (entry - paper_box[index]).abs < 5 }
  end
  @page_name_cache[box] =
    paper_size ? paper_size[0] : sprintf("%.0fx%.0f", *box.values_at(2, 3))
end

def split_by_page_number(doc, output_spec)

Splits the document into individual pages.

def split_by_page_number(doc, output_spec)
  doc.pages.each_with_index do |page, index|
    output_file = sprintf(output_spec, index + 1)
    maybe_raise_on_existing_file(output_file)
    out = HexaPDF::Document.new
    out.pages.add(out.import(page))
    apply_encryption_options(out)
    apply_optimization_options(out)
    write_document(out, output_file)
  end
end

def split_by_page_size(doc, output_spec)

Splits the document into files based on the page sizes.

def split_by_page_size(doc, output_spec)
  output_spec = output_spec.sub(/%.*?[a-zA-Z]/, '%s')
  out_files = Hash.new do |hash, key|
    output_file = sprintf(output_spec, key)
    maybe_raise_on_existing_file(output_file)
    out = HexaPDF::Document.new
    out.config['output_file'] = output_file
    hash[key] = out
  end
  doc.pages.each do |page|
    out = out_files[page_size_name(page.box.value)]
    out.pages.add(out.import(page))
  end
  out_files.each_value do |out|
    apply_encryption_options(out)
    apply_optimization_options(out)
    write_document(out, out.config['output_file'])
  end
end

Namespace

HexaPDF::CLI

Parent class

HexaPDF::CLI::Command

Instance Methods

Defined in

lib/hexapdf/cli/split.rb

Modules

Classes