################################################################################## Copyright (C) 2008 James Healy (jimmy@deefa.com)## Permission is hereby granted, free of charge, to any person obtaining# a copy of this software and associated documentation files (the# "Software"), to deal in the Software without restriction, including# without limitation the rights to use, copy, modify, merge, publish,# distribute, sublicense, and/or sell copies of the Software, and to# permit persons to whom the Software is furnished to do so, subject to# the following conditions:## The above copyright notice and this permission notice shall be# included in all copies or substantial portions of the Software.## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.#################################################################################require'enumerator'classPDF::ReaderclassEncodingUNKNOWN_CHAR=0x25AF# ▯attr_reader:differences# set the differences table for this encoding. should be an array in the following format:## [25, "A", 26, "B"]## The array alternates bewteen a decimal byte number and a glyph name to map to that byte## To save space the following array is also valid and equivilant to the previous one## [25, "A", "B"]defdifferences=(diff)raiseArgumentError,"diff must be an array"unlessdiff.kind_of?(Array)@differences={}byte=0diff.eachdo|val|ifval.kind_of?(Numeric)byte=val.to_ielse@differences[byte]=valbyte+=1endend@differencesend# Takes the "Encoding" value of a Font dictionary and builds a PDF::Reader::Encoding objectdefself.factory(enc)ifenc.kind_of?(Hash)diff=enc[:Differences]enc=enc[:Encoding]||enc[:BaseEncoding]elsifenc!=nilenc=enc.to_symendcaseencwhennilthenenc=PDF::Reader::Encoding::StandardEncoding.newwhen"Identity-H".to_symthenenc=PDF::Reader::Encoding::IdentityH.newwhen:MacRomanEncodingthenenc=PDF::Reader::Encoding::MacRomanEncoding.newwhen:MacExpertEncodingthenenc=PDF::Reader::Encoding::MacExpertEncoding.newwhen:StandardEncodingthenenc=PDF::Reader::Encoding::StandardEncoding.newwhen:SymbolEncodingthenenc=PDF::Reader::Encoding::SymbolEncoding.newwhen:WinAnsiEncodingthenenc=PDF::Reader::Encoding::WinAnsiEncoding.newwhen:ZapfDingbatsEncodingthenenc=PDF::Reader::Encoding::ZapfDingbatsEncoding.newelseraiseUnsupportedFeatureError,"#{enc} is not currently a supported encoding"endenc.differences=diffifenc&&diffreturnencenddefto_utf8(str,tounicode=nil)# abstract method, of sortsraiseRuntimeError,"Called abstract method"end# accepts an array of byte numbers, and replaces any that have entries in the differences table# with a glyph namedefprocess_differences(arr)@differences||={}arr.collect!{|n|@differences[n].nil??n:@differences[n]}endprotected:process_differences# accepts an array of unicode code points and glyphnames, and converts any glyph names to codepointsdefprocess_glyphnames(arr)@differences||={}arr.collect!{|n|n.kind_of?(Numeric)?n:PDF::Reader::Font.glyphnames[n]}endprotected:process_glyphnamesclassIdentityH<Encodingdefto_utf8(str,tounicode=nil)array_enc=[]# iterate over string, reading it in 2 byte chunks and interpreting those# chunks as intsstr.unpack("n*").eachdo|num|# convert the int to a unicode codepoint if possible.# without a ToUnicode CMap, it's impossible to reliably convert this text# to unicode, so just replace each character with a little box. Big smacks# the the PDF producing app.iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARendend# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendclassMacExpertEncoding<Encoding# convert a MacExpertEncoding string into UTF-8defto_utf8(str,tounicode=nil)array_expert=str.unpack('C*')array_expert=self.process_differences(array_expert)array_enc=[]array_expert.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenum# change necesary characters to equivilant Unicode codepointswhen0x21;array_enc<<0xF721when0x22;array_enc<<0xF6F8# Hungarumlautsmallwhen0x23;array_enc<<0xF7A2when0x24;array_enc<<0xF724when0x25;array_enc<<0xF6E4when0x26;array_enc<<0xF726when0x27;array_enc<<0xF7B4when0x28;array_enc<<0x207Dwhen0x29;array_enc<<0xF07Ewhen0x2A;array_enc<<0x2025when0x2B;array_enc<<0x2024when0x2F;array_enc<<0x2044when0x30;array_enc<<0xF730when0x31;array_enc<<0xF731when0x32;array_enc<<0xF732when0x33;array_enc<<0xF733when0x34;array_enc<<0xF734when0x35;array_enc<<0xF735when0x36;array_enc<<0xF736when0x37;array_enc<<0xF737when0x38;array_enc<<0xF738when0x39;array_enc<<0xF739when0x3D;array_enc<<0xF6DEwhen0x3F;array_enc<<0xF73Fwhen0x44;array_enc<<0xF7F0when0x47;array_enc<<0x00BCwhen0x48;array_enc<<0x00BDwhen0x49;array_enc<<0x00BEwhen0x4A;array_enc<<0x215Bwhen0x4B;array_enc<<0x215Cwhen0x4C;array_enc<<0x215Dwhen0x4D;array_enc<<0x215Ewhen0x4E;array_enc<<0x2153when0x4F;array_enc<<0x2154when0x56;array_enc<<0xFB00when0x57;array_enc<<0xFB01when0x58;array_enc<<0xFB02when0x59;array_enc<<0xFB03when0x5A;array_enc<<0xFB04when0x5B;array_enc<<0x208Dwhen0x5D;array_enc<<0x208Ewhen0x5E;array_enc<<0xF6F6when0x5F;array_enc<<0xF6E5when0x60;array_enc<<0xF760when0x61;array_enc<<0xF761when0x62;array_enc<<0xF762when0x63;array_enc<<0xF763when0x64;array_enc<<0xF764when0x65;array_enc<<0xF765when0x66;array_enc<<0xF766when0x67;array_enc<<0xF767when0x68;array_enc<<0xF768when0x69;array_enc<<0xF769when0x6A;array_enc<<0xF76Awhen0x6B;array_enc<<0xF76Bwhen0x6C;array_enc<<0xF76Cwhen0x6D;array_enc<<0xF76Dwhen0x6E;array_enc<<0xF76Ewhen0x6F;array_enc<<0xF76Fwhen0x70;array_enc<<0xF770when0x71;array_enc<<0xF771when0x72;array_enc<<0xF772when0x73;array_enc<<0xF773when0x74;array_enc<<0xF774when0x75;array_enc<<0xF775when0x76;array_enc<<0xF776when0x77;array_enc<<0xF777when0x78;array_enc<<0xF778when0x79;array_enc<<0xF779when0x7A;array_enc<<0xF77Awhen0x7B;array_enc<<0x20A1when0x7C;array_enc<<0xF6DCwhen0x7D;array_enc<<0xF6DDwhen0x7E;array_enc<<0xF6FEwhen0x81;array_enc<<0xF6E9when0x82;array_enc<<0xF6E0when0x87;array_enc<<0xF7E1# Acircumflexsmallwhen0x88;array_enc<<0xF7E0when0x89;array_enc<<0xF7E2# Acutesmallwhen0x8A;array_enc<<0xF7E4when0x8B;array_enc<<0xF7E3when0x8C;array_enc<<0xF7E5when0x8D;array_enc<<0xF7E7when0x8E;array_enc<<0xF7E9when0x8F;array_enc<<0xF7E8when0x90;array_enc<<0xF7E4when0x91;array_enc<<0xF7EBwhen0x92;array_enc<<0xF7EDwhen0x93;array_enc<<0xF7ECwhen0x94;array_enc<<0xF7EEwhen0x95;array_enc<<0xF7EFwhen0x96;array_enc<<0xF7F1when0x97;array_enc<<0xF7F3when0x98;array_enc<<0xF7F2when0x99;array_enc<<0xF7F4when0x9A;array_enc<<0xF7F6when0x9B;array_enc<<0xF7F5when0x9C;array_enc<<0xF7FAwhen0x9D;array_enc<<0xF7F9when0x9E;array_enc<<0xF7FBwhen0x9F;array_enc<<0xF7FCwhen0xA1;array_enc<<0x2078when0xA2;array_enc<<0x2084when0xA3;array_enc<<0x2083when0xA4;array_enc<<0x2086when0xA5;array_enc<<0x2088when0xA6;array_enc<<0x2087when0xA7;array_enc<<0xF6FDwhen0xA9;array_enc<<0xF6DFwhen0xAA;array_enc<<0x2082when0xAC;array_enc<<0xF7A8when0xAE;array_enc<<0xF6F5when0xAF;array_enc<<0xF6F0when0xB0;array_enc<<0x2085when0xB2;array_enc<<0xF6E1when0xB3;array_enc<<0xF6E7when0xB4;array_enc<<0xF7FDwhen0xB6;array_enc<<0xF6E3when0xB9;array_enc<<0xF7FEwhen0xBB;array_enc<<0x2089when0xBC;array_enc<<0x2080when0xBD;array_enc<<0xF6FFwhen0xBE;array_enc<<0xF7E6# AEsmallwhen0xBF;array_enc<<0xF7F8when0xC0;array_enc<<0xF7BFwhen0xC1;array_enc<<0x2081when0xC2;array_enc<<0xF6F9when0xC9;array_enc<<0xF7B8when0xCF;array_enc<<0xF6FAwhen0xD0;array_enc<<0x2012when0xD1;array_enc<<0xF6E6when0xD6;array_enc<<0xF7A1when0xD8;array_enc<<0xF7FFwhen0xDA;array_enc<<0x00B9when0xDB;array_enc<<0x00B2when0xDC;array_enc<<0x00B3when0xDD;array_enc<<0x2074when0xDE;array_enc<<0x2075when0xDF;array_enc<<0x2076when0xE0;array_enc<<0x2077when0xE1;array_enc<<0x2079when0xE2;array_enc<<0x2070when0xE4;array_enc<<0xF6ECwhen0xE5;array_enc<<0xF6F1when0xE6;array_enc<<0xF6F3when0xE9;array_enc<<0xF6EDwhen0xEA;array_enc<<0xF6F2when0xEB;array_enc<<0xF6EBwhen0xF1;array_enc<<0xF6EEwhen0xF2;array_enc<<0xF6FBwhen0xF3;array_enc<<0xF6F4when0xF4;array_enc<<0xF7AFwhen0xF5;array_enc<<0xF6EFwhen0xF6;array_enc<<0x207Fwhen0xF7;array_enc<<0xF6EFwhen0xF8;array_enc<<0xF6E2when0xF9;array_enc<<0xF6E8when0xFA;array_enc<<0xF6F7when0xFB;array_enc<<0xF6FCelsearray_enc<<numendendend# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendend# The default encoding for OSX <= v9# see: http://en.wikipedia.org/wiki/Mac_OS_RomanclassMacRomanEncoding<Encoding# convert a MacRomanEncoding string into UTF-8defto_utf8(str,tounicode=nil)# content of this method borrowed from REXML::Encoding.decode_cp1252array_mac=str.unpack('C*')array_mac=self.process_differences(array_mac)array_enc=[]array_mac.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenum# change necesary characters to equivilant Unicode codepointswhen0x80;array_enc<<0x00C4when0x81;array_enc<<0x00C5when0x82;array_enc<<0x00C7when0x83;array_enc<<0x00C9when0x84;array_enc<<0x00D1when0x85;array_enc<<0x00D6when0x86;array_enc<<0x00DCwhen0x87;array_enc<<0x00E1when0x88;array_enc<<0x00E0when0x89;array_enc<<0x00E2when0x8A;array_enc<<0x00E4when0x8B;array_enc<<0x00E3when0x8C;array_enc<<0x00E5when0x8D;array_enc<<0x00E7when0x8E;array_enc<<0x00E9when0x8F;array_enc<<0x00E8when0x90;array_enc<<0x00EAwhen0x91;array_enc<<0x00EBwhen0x92;array_enc<<0x00EDwhen0x93;array_enc<<0x00ECwhen0x94;array_enc<<0x00EEwhen0x95;array_enc<<0x00EFwhen0x96;array_enc<<0x00F1when0x97;array_enc<<0x00F3when0x98;array_enc<<0x00F2when0x99;array_enc<<0x00F4when0x9A;array_enc<<0x00F6when0x9B;array_enc<<0x00F5when0x9C;array_enc<<0x00FAwhen0x9D;array_enc<<0x00F9when0x9E;array_enc<<0x00FBwhen0x9F;array_enc<<0x00FCwhen0xA0;array_enc<<0x2020when0xA1;array_enc<<0x00B0when0xA2;array_enc<<0x00A2when0xA3;array_enc<<0x00A3when0xA4;array_enc<<0x00A7when0xA5;array_enc<<0x2022when0xA6;array_enc<<0x00B6when0xA7;array_enc<<0x00DFwhen0xA8;array_enc<<0x00AEwhen0xA9;array_enc<<0x00A9when0xAA;array_enc<<0x2122when0xAB;array_enc<<0x00B4when0xAC;array_enc<<0x00A8when0xAD;array_enc<<0x2260when0xAE;array_enc<<0x00C6when0xAF;array_enc<<0x00D8when0xB0;array_enc<<0x221Ewhen0xB1;array_enc<<0x00B1when0xB2;array_enc<<0x2264when0xB3;array_enc<<0x2265when0xB4;array_enc<<0x00A5when0xB5;array_enc<<0x00B5when0xB6;array_enc<<0x2202when0xB7;array_enc<<0x2211when0xB8;array_enc<<0x220Fwhen0xB9;array_enc<<0x03C0when0xBA;array_enc<<0x222Bwhen0xBB;array_enc<<0x00AAwhen0xBC;array_enc<<0x00BAwhen0xBD;array_enc<<0x03A9when0xBE;array_enc<<0x00E6when0xBF;array_enc<<0x00F8when0xC0;array_enc<<0x00BFwhen0xC1;array_enc<<0x00A1when0xC2;array_enc<<0x00ACwhen0xC3;array_enc<<0x221Awhen0xC4;array_enc<<0x0192when0xC5;array_enc<<0x2248when0xC6;array_enc<<0x2206when0xC7;array_enc<<0x00ABwhen0xC8;array_enc<<0x00BBwhen0xC9;array_enc<<0x2026when0xCA;array_enc<<0x00A0when0xCB;array_enc<<0x00C0when0xCC;array_enc<<0x00C3when0xCD;array_enc<<0x00D5when0xCE;array_enc<<0x0152when0xCF;array_enc<<0x0153when0xD0;array_enc<<0x2013when0xD1;array_enc<<0x2014when0xD2;array_enc<<0x201Cwhen0xD3;array_enc<<0x201Dwhen0xD4;array_enc<<0x2018when0xD5;array_enc<<0x2019when0xD6;array_enc<<0x00F7when0xD7;array_enc<<0x25CAwhen0xD8;array_enc<<0x00FFwhen0xD9;array_enc<<0x0178when0xDA;array_enc<<0x2044when0xDB;array_enc<<0x20ACwhen0xDC;array_enc<<0x2039when0xDD;array_enc<<0x203Awhen0xDE;array_enc<<0xFB01when0xDF;array_enc<<0xFB02when0xE0;array_enc<<0x2021when0xE1;array_enc<<0x00B7when0xE2;array_enc<<0x201Awhen0xE3;array_enc<<0x201Ewhen0xE4;array_enc<<0x2030when0xE5;array_enc<<0x00C2when0xE6;array_enc<<0x00CAwhen0xE7;array_enc<<0x00C1when0xE8;array_enc<<0x00CBwhen0xE9;array_enc<<0x00C8when0xEA;array_enc<<0x00CDwhen0xEB;array_enc<<0x00CEwhen0xEC;array_enc<<0x00CFwhen0xED;array_enc<<0x00CCwhen0xEE;array_enc<<0x00D3when0xEF;array_enc<<0x00D4when0xF0;array_enc<<0xF8FFwhen0xF1;array_enc<<0x00D2when0xF2;array_enc<<0x00DAwhen0xF3;array_enc<<0x00D8when0xF4;array_enc<<0x00D9when0xF5;array_enc<<0x0131when0xF6;array_enc<<0x02C6when0xF7;array_enc<<0x02DCwhen0xF8;array_enc<<0x00AFwhen0xF9;array_enc<<0x02D8when0xFA;array_enc<<0x02D9when0xFB;array_enc<<0x02DAwhen0xFC;array_enc<<0x00B8when0xFD;array_enc<<0x02DDwhen0xFE;array_enc<<0x02DBwhen0xFF;array_enc<<0x02C7elsearray_enc<<numendendend# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendclassStandardEncoding<Encoding# convert an Adobe Standard Encoding string into UTF-8defto_utf8(str,tounicode=nil)# based on mapping described at:# http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txtarray_std=str.unpack('C*')array_std=self.process_differences(array_std)array_enc=[]array_std.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenumwhen0x27;array_enc<<0x2019when0x60;array_enc<<0x2018when0xA4;array_enc<<0x2044when0xA6;array_enc<<0x0192when0xA8;array_enc<<0x00A4when0xA9;array_enc<<0x0027when0xAA;array_enc<<0x201Cwhen0xAC;array_enc<<0x2039when0xAD;array_enc<<0x203Awhen0xAE;array_enc<<0xFB01when0xAF;array_enc<<0xFB02when0xB1;array_enc<<0x2013when0xB2;array_enc<<0x2020when0xB3;array_enc<<0x2021when0xB4;array_enc<<0x00B7when0xB7;array_enc<<0x2022when0xB8;array_enc<<0x201Awhen0xB9;array_enc<<0x201Ewhen0xBA;array_enc<<0x201Dwhen0xBC;array_enc<<0x2026when0xBD;array_enc<<0x2030when0xC1;array_enc<<0x0060when0xC2;array_enc<<0x00B4when0xC3;array_enc<<0x02C6when0xC4;array_enc<<0x02DCwhen0xC5;array_enc<<0x00AFwhen0xC6;array_enc<<0x02D8when0xC7;array_enc<<0x02D9when0xC8;array_enc<<0x00A8when0xCA;array_enc<<0x02DAwhen0xCB;array_enc<<0x00B8when0xCD;array_enc<<0x02DDwhen0xCE;array_enc<<0x02DBwhen0xCF;array_enc<<0x02C7when0xD0;array_enc<<0x2014when0xE1;array_enc<<0x00C6when0xE3;array_enc<<0x00AAwhen0xE8;array_enc<<0x0141when0xE9;array_enc<<0x00D8when0xEA;array_enc<<0x0152when0xEB;array_enc<<0x00BAwhen0xF1;array_enc<<0x00E6when0xF5;array_enc<<0x0131when0xF8;array_enc<<0x0142when0xF9;array_enc<<0x00F8when0xFA;array_enc<<0x0153when0xFB;array_enc<<0x00DFelsearray_enc<<numendendend# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendclassSymbolEncoding<Encoding# convert a SymbolEncoding string into UTF-8defto_utf8(str,tounicode=nil)array_symbol=str.unpack('C*')array_symbol=self.process_differences(array_symbol)array_enc=[]array_symbol.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenumwhen0x22;array_enc<<0x2200when0x24;array_enc<<0x2203when0x27;array_enc<<0x220Bwhen0x2A;array_enc<<0x2217when0x2D;array_enc<<0x2212when0x40;array_enc<<0x2245when0x41;array_enc<<0x0391when0x42;array_enc<<0x0392when0x43;array_enc<<0x03A7when0x44;array_enc<<0x0394when0x45;array_enc<<0x0395when0x46;array_enc<<0x03A6when0x47;array_enc<<0x0393when0x48;array_enc<<0x0397when0x49;array_enc<<0x0399when0x4A;array_enc<<0x03D1when0x4B;array_enc<<0x039Awhen0x4C;array_enc<<0x039Bwhen0x4D;array_enc<<0x039Cwhen0x4E;array_enc<<0x039Dwhen0x4F;array_enc<<0x039Fwhen0x50;array_enc<<0x03A0when0x51;array_enc<<0x0398when0x52;array_enc<<0x03A1when0x53;array_enc<<0x03A3when0x54;array_enc<<0x03A4when0x55;array_enc<<0x03A5when0x56;array_enc<<0x03C2when0x57;array_enc<<0x03A9when0x58;array_enc<<0x039Ewhen0x59;array_enc<<0x03A8when0x5A;array_enc<<0x0396when0x5C;array_enc<<0x2234when0x5E;array_enc<<0x22A5when0x60;array_enc<<0xF8E5when0x61;array_enc<<0x03B1when0x62;array_enc<<0x03B2when0x63;array_enc<<0x03C7when0x64;array_enc<<0x03B4when0x65;array_enc<<0x03B5when0x66;array_enc<<0x03C6when0x67;array_enc<<0x03B3when0x68;array_enc<<0x03B7when0x69;array_enc<<0x03B9when0x6A;array_enc<<0x03D5when0x6B;array_enc<<0x03BAwhen0x6C;array_enc<<0x03BBwhen0x6D;array_enc<<0x03BCwhen0x6E;array_enc<<0x03BDwhen0x6F;array_enc<<0x03BFwhen0x70;array_enc<<0x03C0when0x71;array_enc<<0x03B8when0x72;array_enc<<0x03C1when0x73;array_enc<<0x03C3when0x74;array_enc<<0x03C4when0x75;array_enc<<0x03C5when0x76;array_enc<<0x03D6when0x77;array_enc<<0x03C9when0x78;array_enc<<0x03BEwhen0x79;array_enc<<0x03C8when0x7A;array_enc<<0x03B6when0x7E;array_enc<<0x223Cwhen0xA0;array_enc<<0x20ACwhen0xA1;array_enc<<0x03D2when0xA2;array_enc<<0x2032when0xA3;array_enc<<0x2264when0xA4;array_enc<<0x2215when0xA5;array_enc<<0x221Ewhen0xA6;array_enc<<0x0192when0xA7;array_enc<<0x2663when0xA8;array_enc<<0x2666when0xA9;array_enc<<0x2665when0xAA;array_enc<<0x2660when0xAB;array_enc<<0x2194when0xAC;array_enc<<0x2190when0xAD;array_enc<<0x2191when0xAE;array_enc<<0x2192when0xAF;array_enc<<0x2193when0xB2;array_enc<<0x2033when0xB3;array_enc<<0x2265when0xB4;array_enc<<0x00D7when0xB5;array_enc<<0x221Dwhen0xB6;array_enc<<0x2202when0xB7;array_enc<<0x2022when0xB8;array_enc<<0x00F7when0xB9;array_enc<<0x2260when0xBA;array_enc<<0x2261when0xBB;array_enc<<0x2248when0xBC;array_enc<<0x2026when0xBD;array_enc<<0xF8E6when0xBE;array_enc<<0xF8E7when0xBF;array_enc<<0x21B5when0xC0;array_enc<<0x2135when0xC1;array_enc<<0x2111when0xC2;array_enc<<0x211Cwhen0xC3;array_enc<<0x2118when0xC4;array_enc<<0x2297when0xC5;array_enc<<0x2295when0xC6;array_enc<<0x2205when0xC7;array_enc<<0x2229when0xC8;array_enc<<0x222Awhen0xC9;array_enc<<0x2283when0xCA;array_enc<<0x2287when0xCB;array_enc<<0x2284when0xCC;array_enc<<0x2282when0xCD;array_enc<<0x2286when0xCE;array_enc<<0x2208when0xCF;array_enc<<0x2209when0xD0;array_enc<<0x2220when0xD1;array_enc<<0x2207when0xD2;array_enc<<0xF6DAwhen0xD3;array_enc<<0xF6D9when0xD4;array_enc<<0xF6DBwhen0xD5;array_enc<<0x220Fwhen0xD6;array_enc<<0x221Awhen0xD7;array_enc<<0x22C5when0xD8;array_enc<<0x00ACwhen0xD9;array_enc<<0x2227when0xDA;array_enc<<0x2228when0xDB;array_enc<<0x21D4when0xDC;array_enc<<0x21D0when0xDD;array_enc<<0x21D1when0xDE;array_enc<<0x21D2when0xDF;array_enc<<0x21D3when0xE0;array_enc<<0x25CAwhen0xE1;array_enc<<0x2329when0xE2;array_enc<<0xF8E8when0xE3;array_enc<<0xF8E9when0xE4;array_enc<<0xF8EAwhen0xE5;array_enc<<0x2211when0xE6;array_enc<<0xF8EBwhen0xE7;array_enc<<0xF8ECwhen0xE8;array_enc<<0xF8EDwhen0xE9;array_enc<<0xF8EEwhen0xEA;array_enc<<0xF8EFwhen0xEB;array_enc<<0xF8F0when0xEC;array_enc<<0xF8F1when0xED;array_enc<<0xF8F2when0xEE;array_enc<<0xF8F3when0xEF;array_enc<<0xF8F4when0xF1;array_enc<<0x232Awhen0xF2;array_enc<<0x222Bwhen0xF3;array_enc<<0x2320when0xF4;array_enc<<0xF8F5when0xF5;array_enc<<0x2321when0xF6;array_enc<<0xF8F6when0xF7;array_enc<<0xF8F7when0xF8;array_enc<<0xF8F8when0xF9;array_enc<<0xF8F9when0xFA;array_enc<<0xF8FAwhen0xFB;array_enc<<0xF8FBwhen0xFC;array_enc<<0xF8FCwhen0xFD;array_enc<<0xF8FDwhen0xFE;array_enc<<0xF8FEelsearray_enc<<numendendend# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendclassWinAnsiEncoding<Encoding# convert a WinAnsiEncoding string into UTF-8defto_utf8(str,tounicode=nil)# content of this method borrowed from REXML::Encoding.decode_cp1252# for further reading:# http://www.intertwingly.net/stories/2004/04/14/i18n.htmlarray_latin9=str.unpack('C*')array_latin9=self.process_differences(array_latin9)array_enc=[]array_latin9.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenum# characters that added compared to iso-8859-1when0x80;array_enc<<0x20AC# 0xe2 0x82 0xacwhen0x82;array_enc<<0x201A# 0xe2 0x82 0x9awhen0x83;array_enc<<0x0192# 0xc6 0x92when0x84;array_enc<<0x201E# 0xe2 0x82 0x9ewhen0x85;array_enc<<0x2026# 0xe2 0x80 0xa6when0x86;array_enc<<0x2020# 0xe2 0x80 0xa0when0x87;array_enc<<0x2021# 0xe2 0x80 0xa1when0x88;array_enc<<0x02C6# 0xcb 0x86when0x89;array_enc<<0x2030# 0xe2 0x80 0xb0when0x8A;array_enc<<0x0160# 0xc5 0xa0when0x8B;array_enc<<0x2039# 0xe2 0x80 0xb9when0x8C;array_enc<<0x0152# 0xc5 0x92when0x8E;array_enc<<0x017D# 0xc5 0xbdwhen0x91;array_enc<<0x2018# 0xe2 0x80 0x98when0x92;array_enc<<0x2019# 0xe2 0x80 0x99when0x93;array_enc<<0x201Cwhen0x94;array_enc<<0x201Dwhen0x95;array_enc<<0x2022when0x96;array_enc<<0x2013when0x97;array_enc<<0x2014when0x98;array_enc<<0x02DCwhen0x99;array_enc<<0x2122when0x9A;array_enc<<0x0161when0x9B;array_enc<<0x203Awhen0x9C;array_enc<<0x0152# 0xc5 0x93when0x9E;array_enc<<0x017E# 0xc5 0xbewhen0x9F;array_enc<<0x0178elsearray_enc<<numendendend# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendclassZapfDingbatsEncoding<Encoding# convert a ZapfDingbatsEncoding string into UTF-8defto_utf8(str,tounicode=nil)# mapping to unicode taken from:# http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txtarray_symbol=str.unpack('C*')array_symbol=self.process_differences(array_symbol)array_enc=[]array_symbol.eachdo|num|iftounicode&&(code=tounicode.decode(num))array_enc<<codeelsiftounicodearray_enc<<PDF::Reader::Encoding::UNKNOWN_CHARelsecasenumwhen0x21;array_enc<<0x2701when0x22;array_enc<<0x2702when0x23;array_enc<<0x2703when0x24;array_enc<<0x2704when0x25;array_enc<<0x260Ewhen0x26;array_enc<<0x2706when0x27;array_enc<<0x2707when0x28;array_enc<<0x2708when0x29;array_enc<<0x2709when0x2A;array_enc<<0x261Bwhen0x2B;array_enc<<0x261Ewhen0x2C;array_enc<<0x270Cwhen0x2D;array_enc<<0x270Dwhen0x2E;array_enc<<0x270Ewhen0x2F;array_enc<<0x270Fwhen0x30;array_enc<<0x2710when0x31;array_enc<<0x2711when0x32;array_enc<<0x2712when0x33;array_enc<<0x2713when0x34;array_enc<<0x2714when0x35;array_enc<<0x2715when0x36;array_enc<<0x2716when0x37;array_enc<<0x2717when0x38;array_enc<<0x2718when0x39;array_enc<<0x2719when0x3A;array_enc<<0x271Awhen0x3B;array_enc<<0x271Bwhen0x3C;array_enc<<0x271Cwhen0x3D;array_enc<<0x271Dwhen0x3E;array_enc<<0x271Ewhen0x3F;array_enc<<0x271Ewhen0x40;array_enc<<0x2720when0x41;array_enc<<0x2721when0x42;array_enc<<0x2722when0x43;array_enc<<0x2723when0x44;array_enc<<0x2724when0x45;array_enc<<0x2725when0x46;array_enc<<0x2726when0x47;array_enc<<0x2727when0x48;array_enc<<0x2605when0x49;array_enc<<0x2729when0x4A;array_enc<<0x272Awhen0x4B;array_enc<<0x272Bwhen0x4C;array_enc<<0x272Cwhen0x4D;array_enc<<0x272Dwhen0x4E;array_enc<<0x272Ewhen0x4F;array_enc<<0x272Fwhen0x50;array_enc<<0x2730when0x51;array_enc<<0x2731when0x52;array_enc<<0x2732when0x53;array_enc<<0x2733when0x54;array_enc<<0x2734when0x55;array_enc<<0x2735when0x56;array_enc<<0x2736when0x57;array_enc<<0x2737when0x58;array_enc<<0x2738when0x59;array_enc<<0x2739when0x5A;array_enc<<0x273Awhen0x5B;array_enc<<0x273Bwhen0x5C;array_enc<<0x273Cwhen0x5D;array_enc<<0x273Dwhen0x5E;array_enc<<0x273Ewhen0x5F;array_enc<<0x273Fwhen0x60;array_enc<<0x2740when0x61;array_enc<<0x2741when0x62;array_enc<<0x2742when0x63;array_enc<<0x2743when0x64;array_enc<<0x2744when0x65;array_enc<<0x2745when0x66;array_enc<<0x2746when0x67;array_enc<<0x2747when0x68;array_enc<<0x2748when0x69;array_enc<<0x2749when0x6A;array_enc<<0x274Awhen0x6B;array_enc<<0x274Bwhen0x6C;array_enc<<0x25CFwhen0x6D;array_enc<<0x274Dwhen0x6E;array_enc<<0x25A0when0x6F;array_enc<<0x274Fwhen0x70;array_enc<<0x2750when0x71;array_enc<<0x2751when0x72;array_enc<<0x2752when0x73;array_enc<<0x2753when0x74;array_enc<<0x2754when0x75;array_enc<<0x2755when0x76;array_enc<<0x2756when0x77;array_enc<<0x2757when0x78;array_enc<<0x2758when0x79;array_enc<<0x2759when0x7A;array_enc<<0x275Awhen0x7B;array_enc<<0x275Bwhen0x7C;array_enc<<0x275Cwhen0x7D;array_enc<<0x275Dwhen0x7E;array_enc<<0x275Ewhen0x80;array_enc<<0xF8D7when0x81;array_enc<<0xF8D8when0x82;array_enc<<0xF8D9when0x83;array_enc<<0xF8DAwhen0x84;array_enc<<0xF8DBwhen0x85;array_enc<<0xF8DCwhen0x86;array_enc<<0xF8DDwhen0x87;array_enc<<0xF8DEwhen0x88;array_enc<<0xF8DFwhen0x89;array_enc<<0xF8E0when0x8A;array_enc<<0xF8E1when0x8B;array_enc<<0xF8E2when0x8C;array_enc<<0xF8E3when0x8D;array_enc<<0xF8E4when0xA1;array_enc<<0x2761when0xA2;array_enc<<0x2762when0xA3;array_enc<<0x2763when0xA4;array_enc<<0x2764when0xA5;array_enc<<0x2765when0xA6;array_enc<<0x2766when0xA7;array_enc<<0x2767when0xA8;array_enc<<0x2663when0xA9;array_enc<<0x2666when0xAA;array_enc<<0x2665when0xAB;array_enc<<0x2660when0xAC;array_enc<<0x2460when0xAD;array_enc<<0x2461when0xAE;array_enc<<0x2462when0xAF;array_enc<<0x2463when0xB0;array_enc<<0x2464when0xB1;array_enc<<0x2465when0xB2;array_enc<<0x2466when0xB3;array_enc<<0x2467when0xB4;array_enc<<0x2468when0xB5;array_enc<<0x2469when0xB6;array_enc<<0x2776when0xB7;array_enc<<0x2777when0xB8;array_enc<<0x2778when0xB9;array_enc<<0x2779when0xBA;array_enc<<0x277Awhen0xBB;array_enc<<0x277Bwhen0xBC;array_enc<<0x277Cwhen0xBD;array_enc<<0x277Dwhen0xBE;array_enc<<0x277Ewhen0xBF;array_enc<<0x277Fwhen0xC0;array_enc<<0x2780when0xC1;array_enc<<0x2781when0xC2;array_enc<<0x2782when0xC3;array_enc<<0x2783when0xC4;array_enc<<0x2784when0xC5;array_enc<<0x2785when0xC6;array_enc<<0x2786when0xC7;array_enc<<0x2787when0xC8;array_enc<<0x2788when0xC9;array_enc<<0x2789when0xCA;array_enc<<0x278Awhen0xCB;array_enc<<0x278Bwhen0xCC;array_enc<<0x278Cwhen0xCD;array_enc<<0x278Dwhen0xCE;array_enc<<0x278Ewhen0xCF;array_enc<<0x278Fwhen0xD0;array_enc<<0x2790when0xD1;array_enc<<0x2791when0xD2;array_enc<<0x2792when0xD3;array_enc<<0x2793when0xD4;array_enc<<0x2794when0xD5;array_enc<<0x2795when0xD6;array_enc<<0x2796when0xD7;array_enc<<0x2797when0xD8;array_enc<<0x2798when0xD9;array_enc<<0x2799when0xDA;array_enc<<0x279Awhen0xDB;array_enc<<0x279Bwhen0xDC;array_enc<<0x279Cwhen0xDD;array_enc<<0x279Dwhen0xDE;array_enc<<0x279Ewhen0xDF;array_enc<<0x279Fwhen0xE0;array_enc<<0x27A0when0xE1;array_enc<<0x27A1when0xE2;array_enc<<0x27A2when0xE3;array_enc<<0x27A3when0xE4;array_enc<<0x27A4when0xE5;array_enc<<0x27A5when0xE6;array_enc<<0x27A6when0xE7;array_enc<<0x27A7when0xE8;array_enc<<0x27A8when0xE9;array_enc<<0x27A9when0xEA;array_enc<<0x27AAwhen0xEB;array_enc<<0x27ABwhen0xEC;array_enc<<0x27ACwhen0xED;array_enc<<0x27ADwhen0xEE;array_enc<<0x27AEwhen0xEF;array_enc<<0x27AFwhen0xF1;array_enc<<0x27B1when0xF2;array_enc<<0x27B2when0xF3;array_enc<<0x27B3when0xF4;array_enc<<0x27B4when0xF5;array_enc<<0x27B5when0xF6;array_enc<<0x27B6when0xF7;array_enc<<0x27B7when0xF8;array_enc<<0x27B8when0xF9;array_enc<<0x27B9when0xFA;array_enc<<0x27BAwhen0xFB;array_enc<<0x27BBwhen0xFC;array_enc<<0x27BCwhen0xFD;array_enc<<0x27BDwhen0xFE;array_enc<<0x27BEelsearray_enc<<numendendend# convert any glyph names to unicode codepointsarray_enc=self.process_glyphnames(array_enc)# replace charcters that didn't convert to unicode nicely with something validarray_enc.collect!{|c|c?c:PDF::Reader::Encoding::UNKNOWN_CHAR}# pack all our Unicode codepoints into a UTF-8 stringret=array_enc.pack("U*")# set the strings encoding correctly under ruby 1.9+ret.force_encoding("UTF-8")ifret.respond_to?(:force_encoding)returnretendendendend