# coding: utf-8
# typed: true
# frozen_string_literal: true
require 'pdf/reader/transformation_matrix'
class PDF::Reader
# encapsulates logic for tracking graphics state as the instructions for
# a single page are processed. Most of the public methods correspond
# directly to PDF operators.
class PageState
DEFAULT_GRAPHICS_STATE = {
:char_spacing => 0,
:word_spacing => 0,
:h_scaling => 1.0,
:text_leading => 0,
:text_font => nil,
:text_font_size => 0,
:text_mode => 0,
:text_rise => 0,
:text_knockout => 0
}
# starting a new page
def initialize(page)
@page = page
@cache = page.cache
@objects = page.objects
@font_stack = [build_fonts(page.fonts)]
@xobject_stack = [page.xobjects]
@cs_stack = [page.color_spaces]
@stack = [DEFAULT_GRAPHICS_STATE.dup]
state[:ctm] = identity_matrix
# These are only valid when inside a `BT` block and we re-initialize them on each
# `BT`. However, we need the instance variables set so PDFs with the text operators
# out order don't trigger NoMethodError when these are nil
@text_matrix = identity_matrix
@text_line_matrix = identity_matrix
end
#####################################################
# Graphics State Operators
#####################################################
# Clones the current graphics state and push it onto the top of the stack.
# Any changes that are subsequently made to the state can then by reversed
# by calling restore_graphics_state.
#
def save_graphics_state
@stack.push clone_state
end
# Restore the state to the previous value on the stack.
#
def restore_graphics_state
@stack.pop
end
#####################################################
# Matrix Operators
#####################################################
# update the current transformation matrix.
#
# If the CTM is currently undefined, just store the new values.
#
# If there's an existing CTM, then multiply the existing matrix
# with the new matrix to form the updated matrix.
#
def concatenate_matrix(a, b, c, d, e, f)
if state[:ctm]
ctm = state[:ctm]
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
ctm.a, ctm.b,
ctm.c, ctm.d,
ctm.e, ctm.f
)
else
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
end
@text_rendering_matrix = nil # invalidate cached value
end
#####################################################
# Text Object Operators
#####################################################
def begin_text_object
@text_matrix = identity_matrix
@text_line_matrix = identity_matrix
@font_size = nil
end
def end_text_object
# don't need to do anything
end
#####################################################
# Text State Operators
#####################################################
def set_character_spacing(char_spacing)
state[:char_spacing] = char_spacing
end
def set_horizontal_text_scaling(h_scaling)
state[:h_scaling] = h_scaling / 100.0
end
def set_text_font_and_size(label, size)
state[:text_font] = label
state[:text_font_size] = size
end
def font_size
@font_size ||= begin
_, zero = trm_transform(0,0)
_, one = trm_transform(1,1)
(zero - one).abs
end
end
def set_text_leading(leading)
state[:text_leading] = leading
end
def set_text_rendering_mode(mode)
state[:text_mode] = mode
end
def set_text_rise(rise)
state[:text_rise] = rise
end
def set_word_spacing(word_spacing)
state[:word_spacing] = word_spacing
end
#####################################################
# Text Positioning Operators
#####################################################
def move_text_position(x, y) # Td
temp = TransformationMatrix.new(1, 0,
0, 1,
x, y)
@text_line_matrix = temp.multiply!(
@text_line_matrix.a, @text_line_matrix.b,
@text_line_matrix.c, @text_line_matrix.d,
@text_line_matrix.e, @text_line_matrix.f
)
@text_matrix = @text_line_matrix.dup
@font_size = @text_rendering_matrix = nil # invalidate cached value
end
def move_text_position_and_set_leading(x, y) # TD
set_text_leading(-1 * y)
move_text_position(x, y)
end
def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
@text_matrix = TransformationMatrix.new(
a, b,
c, d,
e, f
)
@text_line_matrix = @text_matrix.dup
@font_size = @text_rendering_matrix = nil # invalidate cached value
end
def move_to_start_of_next_line # T*
move_text_position(0, -state[:text_leading])
end
#####################################################
# Text Showing Operators
#####################################################
def show_text_with_positioning(params) # TJ
# TODO record position changes in state here
end
def move_to_next_line_and_show_text(str) # '
move_to_start_of_next_line
end
def set_spacing_next_line_show_text(aw, ac, string) # "
set_word_spacing(aw)
set_character_spacing(ac)
move_to_next_line_and_show_text(string)
end
#####################################################
# XObjects
#####################################################
def invoke_xobject(label)
save_graphics_state
xobject = find_xobject(label)
raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
matrix = xobject.hash[:Matrix]
concatenate_matrix(*matrix) if matrix
if xobject.hash[:Subtype] == :Form
form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
@font_stack.unshift(form.font_objects)
@xobject_stack.unshift(form.xobjects)
yield form if block_given?
@font_stack.shift
@xobject_stack.shift
else
yield xobject if block_given?
end
restore_graphics_state
end
#####################################################
# Public Visible State
#####################################################
# transform x and y co-ordinates from the current user space to the
# underlying device space.
#
def ctm_transform(x, y)
[
(ctm.a * x) + (ctm.c * y) + (ctm.e),
(ctm.b * x) + (ctm.d * y) + (ctm.f)
]
end
# transform x and y co-ordinates from the current text space to the
# underlying device space.
#
# transforming (0,0) is a really common case, so optimise for it to
# avoid unnecessary object allocations
#
def trm_transform(x, y)
trm = text_rendering_matrix
if x == 0 && y == 0
[trm.e, trm.f]
else
[
(trm.a * x) + (trm.c * y) + (trm.e),
(trm.b * x) + (trm.d * y) + (trm.f)
]
end
end
def current_font
find_font(state[:text_font])
end
def find_font(label)
dict = @font_stack.detect { |fonts|
fonts.has_key?(label)
}
dict ? dict[label] : nil
end
def find_color_space(label)
dict = @cs_stack.detect { |colorspaces|
colorspaces.has_key?(label)
}
dict ? dict[label] : nil
end
def find_xobject(label)
dict = @xobject_stack.detect { |xobjects|
xobjects.has_key?(label)
}
dict ? dict[label] : nil
end
# when save_graphics_state is called, we need to push a new copy of the
# current state onto the stack. That way any modifications to the state
# will be undone once restore_graphics_state is called.
#
def stack_depth
@stack.size
end
# This returns a deep clone of the current state, ensuring changes are
# keep separate from earlier states.
#
# Marshal is used to round-trip the state through a string to easily
# perform the deep clone. Kinda hacky, but effective.
#
def clone_state
if @stack.empty?
{}
else
Marshal.load Marshal.dump(@stack.last)
end
end
# after each glyph is painted onto the page the text matrix must be
# modified. There's no defined operator for this, but depending on
# the use case some receivers may need to mutate the state with this
# while walking a page.
#
# NOTE: some of the variable names in this method are obscure because
# they mirror variable names from the PDF spec
#
# NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
#
# Arguments:
#
# w0 - the glyph width in *text space*. This generally means the width
# in glyph space should be divded by 1000 before being passed to
# this function
# tj - any kerning that should be applied to the text matrix before the
# following glyph is painted. This is usually the numeric arguments
# in the array passed to a TJ operator
# word_boundary - a boolean indicating if a word boundary was just
# reached. Depending on the current state extra space
# may need to be added
#
def process_glyph_displacement(w0, tj, word_boundary)
fs = state[:text_font_size]
tc = state[:char_spacing]
if word_boundary
tw = state[:word_spacing]
else
tw = 0
end
th = state[:h_scaling]
# optimise the common path to reduce Float allocations
if th == 1 && tj == 0 && tc == 0 && tw == 0
tx = w0 * fs
elsif tj != 0
# don't apply spacing to TJ displacement
tx = (w0 - (tj/1000.0)) * fs * th
else
# apply horizontal scaling to spacing values but not font size
tx = ((w0 * fs) + tc + tw) * th
end
# TODO: support ty > 0
ty = 0
temp = TransformationMatrix.new(1, 0,
0, 1,
tx, ty)
@text_matrix = temp.multiply!(
@text_matrix.a, @text_matrix.b,
@text_matrix.c, @text_matrix.d,
@text_matrix.e, @text_matrix.f
)
@font_size = @text_rendering_matrix = nil # invalidate cached value
end
private
# used for many and varied text positioning calculations. We potentially
# need to access the results of this method many times when working with
# text, so memoize it
#
def text_rendering_matrix
@text_rendering_matrix ||= begin
state_matrix = TransformationMatrix.new(
state[:text_font_size] * state[:h_scaling], 0,
0, state[:text_font_size],
0, state[:text_rise]
)
state_matrix.multiply!(
@text_matrix.a, @text_matrix.b,
@text_matrix.c, @text_matrix.d,
@text_matrix.e, @text_matrix.f
)
state_matrix.multiply!(
ctm.a, ctm.b,
ctm.c, ctm.d,
ctm.e, ctm.f
)
end
end
# return the current transformation matrix
#
def ctm
state[:ctm]
end
def state
@stack.last
end
# wrap the raw PDF Font objects in handy ruby Font objects.
#
def build_fonts(raw_fonts)
wrapped_fonts = raw_fonts.map { |label, font|
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
}
::Hash[wrapped_fonts]
end
#####################################################
# Low-level Matrix Operations
#####################################################
# This class uses 3x3 matrices to represent geometric transformations
# These matrices are represented by arrays with 9 elements
# The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
# a b c
# d e f
# g h i
def identity_matrix
TransformationMatrix.new(1, 0,
0, 1,
0, 0)
end
end
end