module HexaPDF::Layout::TextLayouter::SimpleTextSegmentation
def self.call(items)
Breaks the items (an array of InlineBox and TextFragment objects) into atomic pieces
def self.call(items) result = [] glues = {} penalties = {} items.each do |item| if item.kind_of?(InlineBox) result << Box.new(item) else i = 0 while i < item.items.size # Collect characters and kerning values until break character is encountered box_items = [] while (glyph = item.items[i]) && (glyph.kind_of?(Numeric) || !BREAK_RE.match?(glyph.str)) box_items << glyph i += 1 end # A hyphen belongs to the text fragment box_items << glyph if glyph && !glyph.kind_of?(Numeric) && glyph.str == '-' unless box_items.empty? result << Box.new(item.dup_attributes(box_items.freeze)) end if glyph case glyph.str when ' ' result << (glues[item.attributes_hash] ||= Glue.new(item.dup_attributes([glyph].freeze))) when "\n", "\v", "\f", "\u{85}", "\u{2029}" result << (penalties[item.attributes_hash] ||= Penalty.new(Penalty::PARAGRAPH_BREAK, 0)) when "\u{2028}" result << Penalty.new(Penalty::LINE_BREAK, 0) when "\r" if !item.items[i + 1] || item.items[i + 1].kind_of?(Numeric) || item.items[i + 1].str != "\n" result << (penalties[item.attributes_hash] ||= Penalty.new(Penalty::PARAGRAPH_BREAK, 0)) end when '-' result << Penalty::Standard when "\t" spaces = [item.style.font.decode_utf8(" ").first] * 8 result << Glue.new(item.dup_attributes(spaces.freeze)) when "\u{00AD}" frag = item.dup_attributes([item.style.font.decode_utf8("-").first].freeze) result << Penalty.new(Penalty::Standard.penalty, frag.width, item: frag) when "\u{00A0}" frag = item.dup_attributes([item.style.font.decode_utf8(" ").first].freeze) result << Penalty.new(Penalty::ProhibitedBreak.penalty, frag.width, item: frag) when "\u{200B}" result << Penalty.new(0) end end i += 1 end end end result end