lib/ttfunk/subset/code_page.rb
# frozen_string_literal: true require 'set' require_relative 'base' module TTFunk module Subset # A subset that uses standard code page encoding. class CodePage < Base class << self # Get a mapping from an encoding to Unicode # # @param encoding [Encoding, String, Symbol] # @return [Hash{Integer => Integer}] def unicode_mapping_for(encoding) mapping_cache[encoding] ||= (0..255).each_with_object({}) do |c, ret| codepoint = c.chr(encoding) .encode(Encoding::UTF_8, undef: :replace, replace: '') .codepoints .first ret[c] = codepoint if codepoint end end private def mapping_cache @mapping_cache ||= {} end end # Code page used in this subset. # This is used for proper `OS/2` table encoding. # @return [Integer] attr_reader :code_page # Encoding used in this subset. # @return [Encoding, String, Symbol] attr_reader :encoding # @param original [TTFunk::File] # @param code_page [Integer] # @param encoding [Encoding, String, Symbol] def initialize(original, code_page, encoding) super(original) @code_page = code_page @encoding = encoding @subset = Array.new(256) @from_unicode_cache = {} use(space_char_code) end # Get a mapping from this subset to Unicode. # # @return [Hash] def to_unicode_map self.class.unicode_mapping_for(encoding) .select { |codepoint, _unicode| @subset[codepoint] } end # Add a character to subset. # # @param character [Integer] Unicode codepoint # @return [void] def use(character) @subset[from_unicode(character)] = character end # Can this subset include the character? This depends on the encoding used # in this subset. # # @param character [Integer] Unicode codepoint # @return [Boolean] def covers?(character) !from_unicode(character).nil? end # Does this subset actually has the character? # # @param character [Integer] Unicode codepoint # @return [Boolean] def includes?(character) code = from_unicode(character) code && @subset[code] end # Get character code for Unicode codepoint. # # @param character [Integer] Unicode codepoint # @return [Integer, nil] def from_unicode(character) @from_unicode_cache[character] ||= (+'' << character).encode!(encoding).ord rescue Encoding::UndefinedConversionError nil end # Get `cmap` table for this subset. # # @return [TTFunk::Table::Cmap] def new_cmap_table @new_cmap_table ||= begin mapping = {} @subset.each_with_index do |unicode, roman| mapping[roman] = unicode_cmap[unicode] end TTFunk::Table::Cmap.encode(mapping, :mac_roman) end end # Get the list of Glyph IDs from the original font that are in this # subset. # # @return [Array<Integer>] def original_glyph_ids ([0] + @subset.map { |unicode| unicode && unicode_cmap[unicode] }) .compact.uniq.sort end # Get a chacter code for Space in this subset # # @return [Integer, nil] def space_char_code @space_char_code ||= from_unicode(Unicode::SPACE_CHAR) end end end end