class PDF::Reader::Parser
them into useable ruby objects (hash’s, arrays, true, false, etc)
An internal PDF::Reader class that reads objects from the PDF file and converts
###############################################################################
def array
###############################################################################
def array a = [] loop do item = parse_token break if item.kind_of?(Token) and item == "]" a << item end a end
def dictionary
###############################################################################
def dictionary dict = {} loop do key = parse_token break if key.kind_of?(Token) and key == ">>" raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol) value = parse_token value.kind_of?(Token) and Error.str_assert_not(value, ">>") dict[key] = value end dict end
def hex_string
###############################################################################
def hex_string str = "" loop do token = @buffer.token break if token == ">" str << token end # add a missing digit if required, as required by the spec str << "0" unless str.size % 2 == 0 str.scan(/../).map {|i| i.hex.chr}.join end
def initialize (buffer, objects=nil)
buffer - a PDF::Reader::Buffer object that contains PDF data
Create a new parser around a PDF::Reader::Buffer object
###############################################################################
def initialize (buffer, objects=nil) @buffer = buffer @objects = objects end
def object (id, gen)
id - the object ID to return
that describes it
If the object is a content stream, returns both the stream and the dictionary
Reads an entire PDF object from the buffer and returns it as a Ruby String.
###############################################################################
def object (id, gen) Error.assert_equal(parse_token, id) Error.assert_equal(parse_token, gen) Error.str_assert(parse_token, "obj") obj = parse_token post_obj = parse_token if post_obj == "stream" stream(obj) else obj end end
def parse_token (operators={})
object
Reads the next token from the underlying buffer and convets it to an appropriate
###############################################################################
def parse_token (operators={}) token = @buffer.token if STRATEGIES.has_key? token STRATEGIES[token].call(self, token) elsif token.is_a? PDF::Reader::Reference token elsif token.is_a? Token token elsif operators.has_key? token Token.new(token) elsif token.respond_to?(:to_token) token.to_token elsif token =~ /\d*\.\d/ token.to_f else token.to_i end end
def pdf_name
###############################################################################
def pdf_name tok = @buffer.token tok = " " if tok == "" && RUBY_VERSION < "1.9" tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match| match[1, 2].hex.chr end tok.to_sym end
def stream (dict)
###############################################################################
def stream (dict) raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length) if @objects length = @objects.deref(dict[:Length]) else length = dict[:Length] || 0 end data = @buffer.read(length, :skip_eol => true) Error.str_assert(parse_token, "endstream") Error.str_assert(parse_token, "endobj") PDF::Reader::Stream.new(dict, data) end
def string
###############################################################################
def string str = @buffer.token return "" if str == ")" Error.assert_equal(parse_token, ")") ret = "" idx = 0 while idx < str.size chr = str[idx,1] jump = 1 if chr == "\\" jump = 2 case str[idx+1, 1] when "" then jump = 1 when "n" then chr = "\n" when "r" then chr = "\r" when "t" then chr = "\t" when "b" then chr = "\b" when "f" then chr = "\f" when "(" then chr = "(" when ")" then chr = ")" when "\\" then chr = "\\" when "\n" then chr = "" jump = 2 else if str[idx+1,3].match(/\d{3}/) jump = 4 chr = str[idx+1,3].oct.chr elsif str[idx+1,2].match(/\d{2}/) jump = 3 chr = ("0"+str[idx+1,2]).oct.chr elsif str[idx+1,1].match(/\d/) jump = 2 chr = ("00"+str[idx+1,1]).oct.chr else jump = 1 chr = "" end end elsif chr == "\r" && str[idx+1,1] == "\n" chr = "\n" jump = 2 elsif chr == "\n" && str[idx+1,1] == "\r" chr = "\n" jump = 2 elsif chr == "\r" chr = "\n" end ret << chr idx += jump end ret end