class PDF::Reader::Encoding::IdentityH
def to_utf8(str, map = nil)
def to_utf8(str, map = nil) array_enc = [] # iterate over string, reading it in 2 byte chunks and interpreting those # chunks as ints str.unpack("n*").each do |c| # convert the int to a unicode codepoint if possible. # without a ToUnicode CMap, it's impossible to reliably convert this text # to unicode, so just replace each character with a little box. Big smacks # the the PDF producing app. if map && (code = map.decode(c)) array_enc << code else array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR end end # replace charcters that didn't convert to unicode nicely with something valid array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR } # pack all our Unicode codepoints into a UTF-8 string ret = array_enc.pack("U*") # set the strings encoding correctly under ruby 1.9+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding) return ret end