lib/origami/parsers/pdf/linear.rb



=begin

    This file is part of Origami, PDF manipulation framework for Ruby
    Copyright (C) 2016	Guillaume Delugré.

    Origami is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Origami is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with Origami.  If not, see <http://www.gnu.org/licenses/>.

=end


require 'origami/parsers/pdf'

module Origami

    class PDF

        #
        # Create a new PDF linear Parser.
        #
        class LinearParser < Parser
            def parse(stream)
                super
            
                pdf = parse_initialize

                #
                # Parse each revision
                #
                revision = 0
                until @data.eos? do
                    begin
                        pdf.add_new_revision unless revision.zero?

                        parse_revision(pdf, revision)
                        revision = revision + 1

                    rescue
                        error "Cannot read : " + (@data.peek(10) + "...").inspect
                        error "Stopped on exception : " + $!.message
                        STDERR.puts $!.backtrace.join($/)

                        break
                    end
                end

                pdf.loaded!

                parse_finalize(pdf)
            end

            private

            def parse_revision(pdf, revision_no)
                revision = pdf.revisions[revision_no]

                info "...Parsing revision #{revision_no + 1}..."
                loop do
                    break if (object = parse_object).nil?
                    pdf.insert(object)
                end

                revision.xreftable = parse_xreftable
                revision.trailer = parse_trailer

                locate_xref_streams(pdf, revision_no)

                revision
            end

            def locate_xref_streams(pdf, revision_no)
                revision = pdf.revisions[revision_no]
                trailer = revision.trailer
                xrefstm = nil

                # Try to match the location of the last startxref / XRefStm with an XRefStream.
                if trailer.startxref != 0
                    xrefstm = pdf.get_object_by_offset(trailer.startxref)
                elsif trailer.key?(:XRefStm)
                    xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
                end

                if xrefstm.is_a?(XRefStream)
                    warn "Found a XRefStream for revision #{revision_no + 1} at #{xrefstm.reference}"
                    revision.xrefstm = xrefstm

                    if xrefstm.key?(:Prev)
                        locate_prev_xref_streams(pdf, revision_no, xrefstm)
                    end
                end
            end

            def locate_prev_xref_streams(pdf, revision_no, xrefstm)
                return unless revision_no > 0 and xrefstm.Prev.is_a?(Integer)

                prev_revision = pdf.revisions[revision_no - 1]
                prev_offset = xrefstm.Prev.to_i
                prev_xrefstm = pdf.get_object_by_offset(prev_offset)

                if prev_xrefstm.is_a?(XRefStream)
                    warn "Found a previous XRefStream for revision #{revision_no} at #{prev_xrefstm.reference}"
                    prev_revision.xrefstm = prev_xrefstm

                    if prev_xrefstm.key?(:Prev)
                        locate_prev_xref_streams(pdf, revision_no - 1, prev_xrefstm)
                    end
                end
            end
        end
    end

end