class HexaPDF::Font::CMap
See: PDF2.0 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411
value.
Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
def self.create_to_unicode_cmap(mapping)
codepoint mapping.
Returns a string containing a ToUnicode CMap that represents the given code to Unicode
def self.create_to_unicode_cmap(mapping) Writer.new.create_to_unicode_cmap(mapping) end
def self.for_name(name)
Creates a new CMap object by parsing a predefined CMap with the given name.
def self.for_name(name) return @cmap_cache[name] if @cmap_cache.key?(name) file = File.join(CMAP_DIR, name) if File.exist?(file) @cmap_cache[name] = parse(File.read(file, encoding: ::Encoding::UTF_8)) else raise HexaPDF::Error, "No CMap named '#{name}' found" end end
def self.parse(string)
def self.parse(string) Parser.new.parse(string) end
def self.predefined?(name)
def self.predefined?(name) File.exist?(File.join(CMAP_DIR, name)) end
def add_cid_mapping(code, cid)
def add_cid_mapping(code, cid) @cid_mapping[code] = cid end
def add_cid_range(start_code, end_code, start_cid)
Adds a CID range, mapping characters codes from +start_code+ to +end_code+ to CIDs starting
def add_cid_range(start_code, end_code, start_cid) @cid_range_mappings << [start_code..end_code, start_cid] end
def add_codespace_range(first, *rest)
This means that the first range is checked against the first byte, the second range against
Add a codespace range using an array of ranges for the individual bytes.
def add_codespace_range(first, *rest) @codespace_ranges << [first, rest] end
def add_unicode_mapping(code, string)
def add_unicode_mapping(code, string) @unicode_mapping[code] = string end
def initialize
def initialize @codespace_ranges = [] @cid_mapping = {} @cid_range_mappings = [] @unicode_mapping = {} end
def read_codes(string)
Parses the string and returns all character codes.
def read_codes(string) codes = [] bytes = string.each_byte loop do byte = bytes.next code = 0 found = @codespace_ranges.any? do |first_byte_range, rest_ranges| next unless first_byte_range.cover?(byte) code = (code << 8) + byte valid = rest_ranges.all? do |range| begin byte = bytes.next rescue StopIteration raise HexaPDF::Error, "Missing bytes while reading codes via CMap" end code = (code << 8) + byte range.cover?(byte) end codes << code if valid end unless found raise HexaPDF::Error, "Invalid byte while reading codes via CMap: #{byte}" end end codes end
def to_cid(code)
def to_cid(code) cid = @cid_mapping.fetch(code, -1) if cid == -1 @cid_range_mappings.reverse_each do |range, start_cid| if range.cover?(code) cid = start_cid + code - range.first break end end end (cid == -1 ? 0 : cid) end
def to_unicode(code)
Returns the Unicode string in UTF-8 encoding for the given character code, or +nil+ if no
def to_unicode(code) unicode_mapping[code] end
def use_cmap(cmap)
def use_cmap(cmap) @codespace_ranges.concat(cmap.codespace_ranges) @cid_mapping.merge!(cmap.cid_mapping) @cid_range_mappings.concat(cmap.cid_range_mappings) @unicode_mapping.merge!(cmap.unicode_mapping) end