class PDF::Reader::Buffer
An internal PDF::Reader class that mediates access to the underlying PDF File or IO Stream
###############################################################################
def eof?
returns true if the underlying IO object is at end and the internal buffer
###############################################################################
def eof? if @buffer @buffer.empty? && @io.eof? else @io.eof? end end
def find_first_xref_offset
objects in the file. This method attempts to locate the byte offset of the xref
The Xref table in a PDF file acts as an aid for finding the location of various
###############################################################################
def find_first_xref_offset @io.seek(-1024, IO::SEEK_END) rescue seek(0) data = @io.read(1024) # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both. # To ensure we find the xref offset correctly, change all possible options to a # standard format data = data.gsub("\r\n","\n").gsub("\n\r","\n").gsub("\r","\n") lines = data.split(/\n/).reverse eof_index = nil lines.each_with_index do |line, index| if line =~ /^%%EOF\r?$/ eof_index = index break end end raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil? raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1 lines[eof_index+1].to_i end
def head (chars, with_strip=true)
def head (chars, with_strip=true) val = @buffer[0, chars] @buffer = @buffer[chars .. -1] || "" @buffer.lstrip! if with_strip val end
def initialize (io)
###############################################################################
def initialize (io) @io = io @buffer = nil end
def pos
def pos @io.pos end
def raw
###############################################################################
def raw @buffer end
def read (length)
reads the requested number of bytes from the underlying IO stream.
###############################################################################
def read (length) out = "" if @buffer and !@buffer.empty? out << head(length) length -= out.length end out << @io.read(length) if length > 0 out end
def read_until(bytes)
Reads from the buffer until the specified token is found, or the end of the buffer
###############################################################################
def read_until(bytes) out = "" size = bytes.size loop do out << @io.read(1) if out[-1 * size,size].eql?(bytes) out = out[0, out.size - size] seek(pos - size) break end end out end
def ready_token (with_strip=true, skip_blanks=true)
PDF files are processed by tokenising the content into a series of objects and commands.
###############################################################################
def ready_token (with_strip=true, skip_blanks=true) while @buffer.nil? or @buffer.empty? @buffer = @io.readline @buffer.force_encoding("BINARY") if @buffer.respond_to?(:force_encoding) #@buffer.sub!(/%.*$/, '') if strip_comments @buffer.chomp! break unless skip_blanks end @buffer.lstrip! if with_strip end
def seek (offset)
###############################################################################
def seek (offset) @io.seek(offset, IO::SEEK_SET) @buffer = nil self end
def token
###############################################################################
def token ready_token i = @buffer.index(/[\[\]()<>{}\s\/]/) || @buffer.size token_chars = if i == 0 and @buffer[i,2] == "<<" then 2 elsif i == 0 and @buffer[i,2] == ">>" then 2 elsif i == 0 then 1 else i end strip_space = !(i == 0 and @buffer[0,1] == '(') tok = head(token_chars, strip_space) if tok[0,1] == "%" @buffer = "" token else tok end end