class PDF::Reader::TextReceiver
PDF::Reader.file(“somefile.pdf”, receiver)
receiver = PDF::Reader::TextReceiver.new($stdout)
Usage:
is found will be printed to the IO object specified in the constructor.
An example receiver class that processes all text found in a PDF file. All text that
###############################################################################
def begin_document (root)
###############################################################################
def begin_document (root) @upper_corners = [] end
def begin_page (info)
###############################################################################
def begin_page (info) @page = info @state = [{ :char_spacing => 0, :word_spacing => 0, :hori_scaling => 100, :leading => 0, :tj_adjustment => 0, }] @upper_corners.push(media_box_check(info)) @output = [] @line = 0 @location = 0 @displacement = {} @smallest_y_loc = @upper_corners.last[:ury] @written_to = false end
def begin_page_container (page)
def begin_page_container (page) @upper_corners.push(media_box_check(page)) end
def begin_text_object
###############################################################################
def begin_text_object @state.push(@state.last.dup) end
def calculate_line_and_location (new_loc)
def calculate_line_and_location (new_loc) ##puts "calculate_line_and_location(#{new_loc})" key = new_loc; key.freeze #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze) if @written_to unless @displacement.has_key?(key) if key < @location @displacement[key] = @line + 1 elsif key < @smallest_y_loc @displacement[key] = @line + 1 else key = @displacement.keys.find_all {|i| key > i}.sort.last @displacement[key] = 0 unless @displacement.has_key?(key) end end else @displacement[key] = 0 end @smallest_y_loc = key if key < @smallest_y_loc @location = key @line = @displacement[key] #puts "calculate_line_and_location: @location=#@location @line=#@line smallest_y_loc=#@smallest_y_loc" end
def end_document
###############################################################################
def end_document @state.clear end
def end_page
###############################################################################
def end_page @main_receiver << @output.join("\n") @upper_corners.pop end
def end_page_container
def end_page_container @upper_corners.pop end
def end_text_object
###############################################################################
def end_text_object @state.pop end
def initialize (main_receiver)
###############################################################################
def initialize (main_receiver) @main_receiver = main_receiver @upper_corners = [] end
def media_box_check (dict)
def media_box_check (dict) corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup if dict.has_key?('MediaBox') media_box = dict['MediaBox'] corners[:urx] = media_box[2] - media_box[0] corners[:ury] = media_box[3] - media_box[1] end corners end
def move_text_position (tx, ty)
###############################################################################
def move_text_position (tx, ty) #puts "#{tx} #{ty} Td" calculate_line_and_location(@location + ty) end
def move_text_position_and_set_leading (tx, ty)
###############################################################################
def move_text_position_and_set_leading (tx, ty) set_text_leading(ty)# * -1) move_text_position(tx, ty) end
def move_to_next_line_and_show_text (string)
###############################################################################
def move_to_next_line_and_show_text (string) move_to_start_of_next_line show_text(string) end
def move_to_start_of_next_line
###############################################################################
def move_to_start_of_next_line move_text_position(0, @state.last[:leading]) end
def set_character_spacing (n)
###############################################################################
def set_character_spacing (n) @state.last[:char_spacing] = n end
def set_horizontal_text_scaling (n)
###############################################################################
def set_horizontal_text_scaling (n) @state.last[:hori_scaling] = n/100 end
def set_spacing_next_line_show_text (aw, ac, string)
###############################################################################
def set_spacing_next_line_show_text (aw, ac, string) set_word_spacing(aw) set_character_spacing(ac) move_to_next_line_and_show_text(string) end
def set_text_leading (n)
###############################################################################
def set_text_leading (n) @state.last[:leading] = n end
def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f)
###############################################################################
def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f) calculate_line_and_location(f) end
def set_word_spacing (n)
###############################################################################
def set_word_spacing (n) @state.last[:word_spacing] = n end
def show_text (string)
###############################################################################
def show_text (string) #puts "getting line #@line" place = (@output[@line] ||= "") #place << " " unless place.empty? place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000 place << string #puts "place is now: #{place}" @written_to = true end
def show_text_with_positioning (params)
###############################################################################
def show_text_with_positioning (params) prev_adjustment = @state.last[:tj_adjustment] params.each do |p| case p when Float @state.last[:tj_adjustment] = p else show_text(p) end end @state.last[:tj_adjustment] = prev_adjustment end
def super_show_text (string)
def super_show_text (string) urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i #puts "rendering '#{string}' to #{x}x#{y}" place = (@output[y] ||= (" " * urx.to_i)) #puts "#{urx} #{place.size} #{string.size} #{x}" return if x+string.size >= urx string.split(//).each do |c| chars = 1 case c when " " chars += @state.last[:word_spacing].to_i place[x-1, chars] = (" " * chars) else chars += @state.last[:char_spacing].to_i chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment] chars = 1 if chars < 1 place[x-1] = c place[x, chars-1] = (" " * (chars-1)) if chars > 1 end x += chars end @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]]) end