class HexaPDF::CLI::Inspect

Shows the internal structure of a PDF file.

def execute(file, *commands) #:nodoc:

:nodoc:
def execute(file, *commands) #:nodoc:
  with_document(file, password: @password) do |doc|
    doc.config['font.on_missing_unicode_mapping'] = lambda do |code, font|
      $stderr.puts("No Unicode mapping for code point #{code} in font #{font[:BaseFont]}, " \
                   "using the Unicode replacement character")
      "\u{FFFD}"
    end
    @doc = doc
    if commands.empty?
      begin
        require 'reline'
        Reline.completion_proc = RELINE_COMPLETION_PROC
        Reline.completion_append_character = " "
      rescue LoadError
        if command_parser.verbosity_info?
          $stderr.puts("Library reline not available, history and line editing not available")
        end
      end
      while true
        input = read_input
        (puts; break) unless input
        commands = input.scan(/(["'])(.+?)\1|(\S+)/).map {|a| a[1] || a[2] }
        break if execute_commands(commands)
      end
    else
      execute_commands(commands)
    end
  end
end

def execute_commands(data) #:nodoc:

:nodoc:
def execute_commands(data) #:nodoc:
  data.map! {|item| item == ";" ? nil : item }
  until data.empty?
    command = data.shift || next
    case command
    when /^\d+(,\d+)?$/, 'o', 'object'
      arg = (command.start_with?('o') ? data.shift : command)
      obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
      if obj&.data&.stream && command_parser.verbosity_info?
        $stderr.puts("Note: Object also has stream data")
      end
      serialize(obj.value, recursive: false) if obj
    when 'r', 'recursive'
      obj = if (obj = data.shift)
              pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
            else
              @doc.trailer
            end
      serialize(obj.value, recursive: true) if obj
    when 's', 'stream', 'raw', 'raw-stream', 'sd'
      if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
          obj.kind_of?(HexaPDF::Stream)
        if command == 'sd'
          if obj.respond_to?(:process_contents)
            obj.process_contents(ContentProcessor.new)
          else
            $stderr.puts("Error: The object is not a Form XObject or page")
          end
        else
          source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
          while source.alive? && (stream_data = source.resume)
            $stdout.write(stream_data)
          end
        end
      elsif command_parser.verbosity_info?
        $stderr.puts("Note: Object has no stream data")
      end
    when 'x', 'xref'
      if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
        @doc.revisions.reverse_each do |rev|
          if (xref = rev.xref(obj))
            puts xref
            break
          end
        end
      end
    when 'c', 'catalog'
      serialize(@doc.catalog.value, recursive: false)
    when 't', 'trailer'
      serialize(@doc.trailer.value, recursive: false)
    when 'p', 'pages'
      begin
        pages = parse_pages_specification(data.shift || '1-e', @doc.pages.count)
      rescue StandardError => e
        $stderr.puts("Error: #{e}")
        next
      end
      page_list = @doc.pages.to_a
      pages.each do |index, _|
        page = page_list[index]
        str = +"page #{index + 1} (#{page.oid},#{page.gen}): "
        str << Array(page[:Contents]).map {|c| "#{c.oid},#{c.gen}" }.join(" ")
        puts str
      end
    when 'po', 'ps', 'psd'
      page_number_str = data.shift
      unless page_number_str
        $stderr.puts("Error: Missing PAGE argument to #{command}")
        next
      end
      page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
      unless page_number
        $stderr.puts("Error: Invalid page number #{page_number_str}")
        next
      end
      page = @doc.pages[page_number]
      case command
      when 'ps'
        $stdout.write(page.contents)
      when 'psd'
        page.process_contents(ContentProcessor.new)
      else
        puts "#{page.oid} #{page.gen} obj"
        serialize(page.value, recursive: false)
        puts "endobj"
      end
    when 'pc', 'page-count'
      puts @doc.pages.count
    when 'search'
      regexp = data.shift
      unless regexp
        $stderr.puts("Error: Missing argument regexp")
        next
      end
      re = Regexp.new(regexp, Regexp::IGNORECASE)
      @doc.each do |object|
        if @serializer.serialize(object.value).match?(re)
          puts "#{object.oid} #{object.gen} obj"
          serialize(object.value, recursive: false)
          puts "endobj"
        end
      end
    when 'rev', 'revision'
      if (rev_index = data.shift)
        rev_index = rev_index.to_i - 1
        if rev_index < 0 || rev_index >= @doc.revisions.count
          $stderr.puts("Error: Invalid revision number specified")
          next
        end
        length = 0
        revision_information do |_, index, _, _, end_offset|
          length = end_offset if index == rev_index
        end
        IO.copy_stream(@doc.revisions.parser.io, $stdout, length, 0)
      else
        puts "Document has #{@doc.revisions.count} revision#{@doc.revisions.count == 1 ? '' : 's'}"
        revision_information do |rev, index, count, signature, end_offset|
          type = if rev.trailer[:XRefStm]
                   "xref table + stream"
                 elsif rev.trailer[:Type] == :XRef
                   "xref stream"
                 else
                   "xref table"
                 end
          puts "Revision #{index + 1}"
          puts "  Type      : #{type}"
          puts "  Objects   : #{count}"
          puts "  Size      : #{rev.trailer[:Size]}"
          puts "  Signed    : yes" if signature
          puts "  Byte range: 0-#{end_offset}"
        end
      end
    when 'q', 'quit'
      return true
    when 'h', 'help'
      puts COMMAND_DESCRIPTIONS.map {|cmd, desc| cmd.ljust(35) << desc }.join("\n")
    else
      if command
        $stderr.puts("Error: Unknown command '#{command}' - enter 'h' for a list of commands")
      end
    end
  end
  false
end

def help_long_desc #:nodoc:

:nodoc:
def help_long_desc #:nodoc:
  output = super
  summary_width = command_parser.main_options.summary_width
  data = <<~HELP
    If a command or an argument is OID[,GEN], object and generation numbers are expected. The
    generation number defaults to 0 if not given. The available commands are:
  HELP
  content = format(data, indent: 0,
                   width: command_parser.help_line_width - command_parser.help_indent)
  content << "\n\n"
  COMMAND_DESCRIPTIONS.each do |cmd, desc|
    content << format(cmd.ljust(summary_width + 1) << desc,
                      width: command_parser.help_line_width - command_parser.help_indent,
                      indent: summary_width + 1, indent_first_line: false) << "\n"
  end
  output << cond_format_help_section("Interactive Mode Commands", content, preformatted: true)
end

def initialize #:nodoc:

:nodoc:
def initialize #:nodoc:
  super('inspect', takes_commands: false)
  short_desc("Dig into the internal structure of a PDF file")
  long_desc(<<~EOF)
    Inspects a PDF file for debugging or testing purposes. This command is useful when one
    needs to inspect the internal object structure or a stream of a PDF file. A PDF object is
    always shown in the PDF syntax.
    If no arguments are given, the interactive mode is started. Otherwise the arguments are
    interpreted as interactive mode commands and executed. It is possible to specify more than
    one command in this way by separating them with semicolons, or whitespace in case the
    number of command arguments is fixed.
  EOF
  options.on("--password PASSWORD", "-p", String,
             "The password for decryption. Use - for reading from standard input.") do |pwd|
    @password = (pwd == '-' ? read_password : pwd)
  end
  @password = nil
  @serializer = HexaPDF::Serializer.new
end

def pdf_object_from_string_reference(str)

Resolves the PDF object from the given string reference and returns it.
def pdf_object_from_string_reference(str)
  if str.nil?
    raise Error, "Error: Missing argument object identifier OID[,GEN]"
  elsif !str.match?(/^\d+(,\d+)?$/)
    raise Error, "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
  elsif !(obj = @doc.object(pdf_reference_from_string(str)))
    raise Error, "Error: No object with the given object identifier '#{str}' found"
  else
    obj
  end
end

def pdf_reference_from_string(str)

Parses the given string of the format "oid[,gen]" and returns a PDF reference object.
def pdf_reference_from_string(str)
  oid, gen = str.split(",").map(&:to_i)
  HexaPDF::Reference.new(oid, gen || 0)
end

def read_input

Returns one line of input, using Reline if available.
def read_input
  if Object.const_defined?("Reline")
    Reline.readline("cmd> ", true)
  else
    print "cmd> "
    $stdin.gets
  end
end

def revision_information

- The byte offset from the start of the file to the end of the revision
- The signature dictionary if this revision was signed
- The number of objects in the revision
- The index of the revision in terms of all revisions of the document
- The revision object itself

Returns an array of arrays that include the following information:

Yields information about the document's revisions.
def revision_information
  signatures = @doc.signatures.to_h do |sig|
    [@doc.revisions.find {|rev| rev.object(sig) == sig }, sig]
  end
  io = @doc.revisions.parser.io
  io.seek(0, IO::SEEK_END)
  startxrefs = @doc.revisions.map {|rev| rev.trailer[:Prev].to_i } <<
               @doc.revisions.parser.startxref_offset <<
               io.pos
  startxrefs.sort!
  startxrefs.shift
  @doc.revisions.each_with_index.map do |rev, index|
    end_index = 0
    sig = signatures[rev]
    if sig
      end_index = sig[:ByteRange][-2] + sig[:ByteRange][-1]
    else
      io.seek(startxrefs[index], IO::SEEK_SET)
      buffer = ''.b
      while io.pos < startxrefs[index + 1]
        buffer << io.read(1_000)
        if (buffer_index = buffer.index(/(?:\n|\r\n?)\s*%%EOF\s*(?:\n|\r\n?)?/))
          end_index = io.pos - buffer.size + buffer_index + $~[0].size
          break
        end
        buffer = buffer[-20..-1]
      end
    end
    yield(rev, index, rev.each.count, sig, end_index)
  end
end

def serialize(val, recursive: true, seen: {}, indent: 0) #:nodoc:

:nodoc:
specially generated PDF references.
object tree is printed, with object references to already printed objects replaced by
Prints the serialized value to the standard output. If +recursive+ is +true+, then the whole
def serialize(val, recursive: true, seen: {}, indent: 0) #:nodoc:
  case val
  when Hash
    puts "<<"
    (recursive ? val.sort : val).each do |k, v|
      next if v.nil? || (v.respond_to?(:null?) && v.null?)
      print '%s%s ' % ['  ' * (indent + 1), @serializer.serialize_symbol(k)]
      serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
      puts
    end
    print "#{'  ' * indent}>>"
  when Array
    print "["
    val.each do |v|
      serialize(v, recursive: recursive, seen: seen, indent: indent)
      print " "
    end
    print "]"
  when HexaPDF::Reference
    serialize(@doc.object(val), recursive: recursive, seen: seen, indent: indent)
  when HexaPDF::Object
    if !recursive
      if val.indirect?
        print "#{val.oid} #{val.gen} R"
      else
        serialize(val.value, recursive: recursive, seen: seen, indent: indent)
      end
    elsif val.nil? || seen.key?(val.data)
      print "{ref #{seen[val.data]}}"
    else
      seen[val.data] = (val.type == :Page ? "page #{val.index + 1}" : seen.length + 1)
      print "{obj #{seen[val.data]}} "
      serialize(val.value, recursive: recursive, seen: seen, indent: indent)
    end
  else
    print @serializer.serialize(val)
  end
  puts if indent == 0
end

def usage_arguments #:nodoc:

:nodoc:
def usage_arguments #:nodoc:
  "FILE [[CMD [ARGS]]...]"
end