class PDF::Reader::TextReceiver

PDF::Reader.file(“somefile.pdf”, receiver)
receiver = PDF::Reader::TextReceiver.new($stdout)
Usage:
is found will be printed to the IO object specified in the constructor.
An example receiver class that processes all text found in a PDF file. All text that
###############################################################################

def begin_document (root)

Called when the document parsing begins
###############################################################################
def begin_document (root)
  @upper_corners = []
end

def begin_page (info)

Called when new page parsing begins
###############################################################################
def begin_page (info)
  @page = info
  @state = [{
    :char_spacing     => 0,
    :word_spacing     => 0,
    :hori_scaling     => 100,
    :leading          => 0,
    :tj_adjustment    => 0,
  }]
  @upper_corners.push(media_box_check(info))
  @output = []
  @line = 0
  @location = 0
  @displacement = {}
  @smallest_y_loc = @upper_corners.last[:ury]
  @written_to = false
end

def begin_page_container (page)

###############################################################################
def begin_page_container (page)
  @upper_corners.push(media_box_check(page))
end

def begin_text_object

PDF operator BT
###############################################################################
def begin_text_object
  @state.push(@state.last.dup)
end

def calculate_line_and_location (new_loc)

###############################################################################
def calculate_line_and_location (new_loc)
  ##puts "calculate_line_and_location(#{new_loc})"
  key = new_loc; key.freeze
  #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)
  if @written_to
    unless @displacement.has_key?(key)
      if key < @location
        @displacement[key] = @line + 1
      elsif key < @smallest_y_loc
        @displacement[key] = @line + 1
      else
        key = @displacement.keys.find_all {|i| key > i}.sort.last
        @displacement[key] = 0 unless @displacement.has_key?(key)
      end
    end
  else
    @displacement[key] = 0
  end
  @smallest_y_loc = key if key < @smallest_y_loc
  @location = key
  @line = @displacement[key]
  #puts "calculate_line_and_location: @location=#@location @line=#@line smallest_y_loc=#@smallest_y_loc"
end

def end_document

Called when the document parsing ends
###############################################################################
def end_document
  @state.clear
end

def end_page

Called when page parsing ends
###############################################################################
def end_page
  @main_receiver << @output.join("\n")
  @upper_corners.pop
end

def end_page_container

###############################################################################
def end_page_container
  @upper_corners.pop
end

def end_text_object

PDF operator ET
###############################################################################
def end_text_object
  @state.pop
end

def initialize (main_receiver)

Initialize with the library user's receiver
###############################################################################
def initialize (main_receiver)
  @main_receiver = main_receiver
  @upper_corners = []
end

def media_box_check (dict)

###############################################################################
def media_box_check (dict)
  corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
  if dict.has_key?('MediaBox')
    media_box = dict['MediaBox']
    corners[:urx] = media_box[2] - media_box[0]
    corners[:ury] = media_box[3] - media_box[1]
  end
  corners
end

def move_text_position (tx, ty)

PDF operator Td
###############################################################################
def move_text_position (tx, ty)
  #puts "#{tx} #{ty} Td"
  calculate_line_and_location(@location + ty)
end

def move_text_position_and_set_leading (tx, ty)

PDF operator TD
###############################################################################
def move_text_position_and_set_leading (tx, ty)
  set_text_leading(ty)# * -1)
  move_text_position(tx, ty)
end

def move_to_next_line_and_show_text (string)

PDF operator '
###############################################################################
def move_to_next_line_and_show_text (string)
  move_to_start_of_next_line
  show_text(string)
end

def move_to_start_of_next_line

PDF operator T*
###############################################################################
def move_to_start_of_next_line
  move_text_position(0, @state.last[:leading])
end

def set_character_spacing (n)

PDF operator Tc
###############################################################################
def set_character_spacing (n)
  @state.last[:char_spacing] = n
end

def set_horizontal_text_scaling (n)

PDF operator Tz
###############################################################################
def set_horizontal_text_scaling (n)
  @state.last[:hori_scaling] = n/100
end

def set_spacing_next_line_show_text (aw, ac, string)

PDF operator "
###############################################################################
def set_spacing_next_line_show_text (aw, ac, string)
  set_word_spacing(aw)
  set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

def set_text_leading (n)

PDF operator TL
###############################################################################
def set_text_leading (n)
  @state.last[:leading] = n
end

def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f)

PDF operator Tm
###############################################################################
def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f)
  calculate_line_and_location(f)
end

def set_word_spacing (n)

PDF operator Tw
###############################################################################
def set_word_spacing (n)
  @state.last[:word_spacing] = n
end

def show_text (string)

PDF operator Tj
###############################################################################
def show_text (string)
  #puts "getting line #@line"
  place = (@output[@line] ||= "")
  #place << "  " unless place.empty?
  place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
  place << string
  #puts "place is now: #{place}"
  @written_to = true
end

def show_text_with_positioning (params)

PDF operator TJ
###############################################################################
def show_text_with_positioning (params)
  prev_adjustment = @state.last[:tj_adjustment]
  params.each do |p|
    case p
    when Float
      @state.last[:tj_adjustment] = p
    else
      show_text(p)
    end
  end
  @state.last[:tj_adjustment]  = prev_adjustment
end

def super_show_text (string)

def super_show_text (string)
  urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
  ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
  x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
  y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i
  
  #puts "rendering '#{string}' to #{x}x#{y}"
  place = (@output[y] ||= (" " * urx.to_i))
  #puts "#{urx} #{place.size} #{string.size} #{x}"
  return if x+string.size >= urx
  string.split(//).each do |c|
    chars = 1
    case c
    when " "
      chars += @state.last[:word_spacing].to_i
      place[x-1, chars] = (" " * chars)
    else
      chars += @state.last[:char_spacing].to_i
      chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
      chars = 1 if chars < 1
      place[x-1] = c
      place[x, chars-1] = (" " * (chars-1)) if chars > 1
    end
    x += chars
  end
  @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
end