class PDF::Reader::Filter
:nodoc:
content.
support for features like compression and encryption. This class is for decoding that
Various parts of a PDF file can be passed through a filter before being stored to provide
###############################################################################
def ascii85(data)
rubygem.
Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
###############################################################################
def ascii85(data) data = "<~#{data}" unless data.to_s[0,2] == "<~" Ascii85::decode(data) rescue Exception => e # Oops, there was a problem decoding the stream raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})" end
def asciihex(data)
Decode the specified data using the AsciiHex algorithm.
###############################################################################
def asciihex(data) data.chop! if data[-1,1] == ">" data = data[1,data.size] if data[0,1] == "<" data.gsub!(/[^A-Fa-f0-9]/,"") data << "0" if data.size % 2 == 1 data.scan(/.{2}/).map { |s| s.hex.chr }.join("") rescue Exception => e # Oops, there was a problem decoding the stream raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})" end
def depredict(data, opts = {})
def depredict(data, opts = {}) predictor = (opts || {})[:Predictor].to_i case predictor when 0, 1 then data when 2 then tiff_depredict(data, opts) when 10, 11, 12, 13, 14, 15 then png_depredict(data, opts) else raise MalformedPDFError, "Unrecognised predictor value (#{predictor})" end end
def filter (data)
returned untouched. At this stage PDF::Reader has no need to decode images.
Filters that are only used to encode image data are accepted, but the data is
attempts to decode the specified data with the current filter
###############################################################################
def filter (data) # leave the data untouched if we don't support the required filter return data if @filter.nil? # decode the data self.send(@filter, data) end
def flate (data)
###############################################################################
def flate (data) deflated = nil begin deflated = Zlib::Inflate.new.inflate(data) rescue Zlib::DataError => e # by default, Ruby's Zlib assumes the data it's inflating # is RFC1951 deflated data, wrapped in a RFC1951 zlib container. # If that fails, then use an undocumented 'feature' to attempt to inflate # the data as a raw RFC1951 stream. # # See # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545 # - http://www.gzip.org/zlib/zlib_faq.html#faq38 deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data) end depredict(deflated, @options) rescue Exception => e # Oops, there was a problem inflating the stream raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})" end
def initialize (name, options = nil)
returned untouched. At this stage PDF::Reader has no need to decode images.
Filters that are only used to encode image data are accepted, but the data is
creates a new filter for decoding content.
###############################################################################
def initialize (name, options = nil) @options = options case name.to_sym when :ASCII85Decode then @filter = :ascii85 when :ASCIIHexDecode then @filter = :asciihex when :CCITTFaxDecode then @filter = nil when :DCTDecode then @filter = nil when :FlateDecode then @filter = :flate when :JBIG2Decode then @filter = nil when :JPXDecode then @filter = nil when :LZWDecode then @filter = :lzw when :RunLengthDecode then @filter = :runlength else raise UnsupportedFeatureError, "Unknown filter: #{name}" end end
def lzw(data)
###############################################################################
def lzw(data) data = PDF::Reader::LZW.decode(data) depredict(data, @options) end
def png_depredict(data, opts = {})
def png_depredict(data, opts = {}) return data if opts.nil? || opts[:Predictor].to_i < 10 data = data.unpack("C*") pixel_bytes = 1 #pixel_bitlength / 8 scanline_length = (pixel_bytes * opts[:Columns]) + 1 row = 0 pixels = [] paeth, pa, pb, pc = nil until data.empty? do row_data = data.slice! 0, scanline_length filter = row_data.shift case filter when 0 # None when 1 # Sub row_data.each_with_index do |byte, index| left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes] row_data[index] = (byte + left) % 256 #p [byte, left, row_data[index]] end when 2 # Up row_data.each_with_index do |byte, index| col = index / pixel_bytes upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes] row_data[index] = (upper + byte) % 256 end when 3 # Average row_data.each_with_index do |byte, index| col = index / pixel_bytes upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes] left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes] row_data[index] = (byte + ((left + upper)/2).floor) % 256 end when 4 # Paeth left = upper = upper_left = nil row_data.each_with_index do |byte, index| col = index / pixel_bytes left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes] if row.zero? upper = upper_left = 0 else upper = pixels[row-1][col][index % pixel_bytes] upper_left = col.zero? ? 0 : pixels[row-1][col-1][index % pixel_bytes] end p = left + upper - upper_left pa = (p - left).abs pb = (p - upper).abs pc = (p - upper_left).abs paeth = if pa <= pb && pa <= pc left elsif pb <= pc upper else upper_left end row_data[index] = (byte + paeth) % 256 end else raise ArgumentError, "Invalid filter algorithm #{filter}" end s = [] row_data.each_slice pixel_bytes do |slice| s << slice end pixels << s row += 1 end pixels.map { |row| row.flatten.pack("C*") }.join("") end
def runlength(data)
###############################################################################
def runlength(data) pos = 0 out = "" while pos < data.length if data.respond_to?(:getbyte) length = data.getbyte(pos) else length = data[pos] end pos += 1 case when length == 128 break when length < 128 # When the length is < 128, we copy the following length+1 bytes # literally. out << data[pos, length + 1] pos += length else # When the length is > 128, we copy the next byte (257 - length) # times; i.e., "\xFA\x00" ([250, 0]) will expand to # "\x00\x00\x00\x00\x00\x00\x00". out << data[pos, 1] * (257 - length) end pos += 1 end out end
def tiff_depredict(data, opts = {})
def tiff_depredict(data, opts = {}) data = data.unpack("C*") unfiltered = [] bpc = opts[:BitsPerComponent] || 8 pixel_bits = bpc * opts[:Colors] pixel_bytes = pixel_bits / 8 line_len = (pixel_bytes * opts[:Columns]) pos = 0 if bpc != 8 raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component" end until pos > data.size row_data = data[pos, line_len] row_data.each_with_index do |byte, index| left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes] row_data[index] = (byte + left) % 256 end unfiltered += row_data pos += line_len end unfiltered.pack("C*") end