module Metanorma::Utils
def anchor_attributes(presxml: false)
def anchor_attributes(presxml: false) ret = [%w(annotation from), %w(annotation to), %w(callout target), %w(xref to), %w(eref bibitemid), %w(citation bibitemid), %w(xref target), %w(label for), %w(location target), %w(index to), %w(termsource bibitemid), %w(admonition target)] ret1 = [%w(fn target), %w(semx source), %w(fmt-title source), %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid), %w(fmt-xref-label container), %w(fmt-fn-body target), %w(fmt-annotation-body from), %w(fmt-annotation-body to), %w(fmt-annotation-start source), %w(fmt-annotation-start end), %w(fmt-annotation-start target), %w(fmt-annotation-end source), %w(fmt-annotation-end start), %w(fmt-annotation-end target)] presxml ? ret + ret1 : ret end
def anchor_or_uuid(node = nil)
def anchor_or_uuid(node = nil) uuid = UUIDTools::UUID.random_create node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id end
def asciidoc_sub(text, flavour = :standoc)
def asciidoc_sub(text, flavour = :standoc) return nil if text.nil? return "" if text.empty? d = Asciidoctor::Document.new( text.lines.entries, { header_footer: false, backend: flavour }, ) b = d.parse.blocks.first b.apply_subs(b.source) end
def attr_code(attributes)
def attr_code(attributes) attributes.compact.transform_values do |v| v.is_a?(String) ? HTMLEntities.new.decode(v) : v end end
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,
break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
if punct fails, try break on camel case, with soft hyphen
break on punct every LONGSTRING_THRESHOLD chars, with zero width space
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, t = LONGSTR_NOPUNCT) /^\s*$/.match?(text) and return text text.split(/(?=(?:\s|-))/).map do |w| if /^\s*$/.match(w) || (w.size < threshold) then w else w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i| w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct) end.join end end.join end
def break_up_long_str1(text, iteration, nopunct)
def break_up_long_str1(text, iteration, nopunct) s, separator = break_up_long_str2(text) if s.size == 1 # could not break up (iteration % nopunct).zero? and text += "\u00ad" # force soft hyphen text else s[-1] = "#{separator}#{s[-1]}" s.join end end
def break_up_long_str2(text)
def break_up_long_str2(text) s = text.split(STR_BREAKUP_RE, -1) separator = "\u200b" if s.size == 1 s = text.split(CAMEL_CASE_RE) separator = "\u00ad" end [s, separator] end
def case_transform_xml(xml, kase)
def case_transform_xml(xml, kase) x = Nokogiri::XML("<root>#{xml}</root>") x.traverse do |e| e.text? or next e.replace(e.text.send(kase)) end x.root.children.to_xml end
def contenthash(elem)
def contenthash(elem) Digest::MD5.hexdigest("#{elem.path}////#{elem.text}") .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5") end
def create_namespace(xmldoc)
def create_namespace(xmldoc) Namespace.new(xmldoc) end
def csv_split(text, delim = ";")
, " => ," : CSV definition does not deal with space followed by quote
def csv_split(text, delim = ";") text.nil? || text.empty? and return [] CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""), liberal_parsing: true, col_sep: delim)&.compact&.map(&:strip) end
def default_script(lang)
def default_script(lang) case lang when "ar", "fa" then "Arab" when "ur" then "Aran" when "ru", "bg" then "Cyrl" when "hi" then "Deva" when "el" then "Grek" when "zh" then "Hans" when "ko" then "Kore" when "he" then "Hebr" when "ja" then "Jpan" else "Latn" end end
def dl_to_attrs(elem, dlist, name)
def dl_to_attrs(elem, dlist, name) e = dlist.at("./dt[text()='#{name}']") or return val = e.at("./following::dd/p") || e.at("./following::dd") or return elem[name] = val.text end
def dl_to_elems(ins, elem, dlist, name)
def dl_to_elems(ins, elem, dlist, name) a = elem.at("./#{name}[last()]") ins = a if a dlist.xpath("./dt[text()='#{name}']").each do |e| ins = dl_to_elems1(e, name, ins) end ins end
def dl_to_elems1(term, name, ins)
def dl_to_elems1(term, name, ins) v = term.at("./following::dd") e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first v.name = name ins.next = v ins.next end
def endash_date(elem)
def endash_date(elem) elem.traverse do |n| n.text? or next n.replace(n.text.gsub(/\s+--?\s+/, "–").gsub("--", "–")) end end
def external_path(path)
def external_path(path) win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) || (RUBY_PLATFORM =~ /mswin|mingw/)) if win path.gsub!(%{/}, "\\") path[/\s/] ? "\"#{path}\"" : path else path end end
def firstchar_xml(line)
def firstchar_xml(line) m = /^([<>][^<>]+[<>])*(.)/.match(line) or return "" m[2] end
def guid_anchor?(id)
def guid_anchor?(id) /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i .match?(id) end
def line_sanitise(ret)
By default, carriage return in source translates to whitespace;
def line_sanitise(ret) ret.size == 1 and return ret (0...(ret.size - 1)).each do |i| last = firstchar_xml(ret[i].reverse) nextfirst = firstchar_xml(ret[i + 1]) cjk1 = /#{CJK}/o.match?(last) cjk2 = /#{CJK}/o.match?(nextfirst) text1 = /[^\p{Z}\p{C}]/.match?(last) text2 = /[^\p{Z}\p{C}]/.match?(nextfirst) cjk1 && (cjk2 || !text2) and next !text1 && cjk2 and next ret[i] += " " end ret end
def localdir(node)
def localdir(node) docfile = node.attr("docfile") docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/" end
def noko(_script = "Latn", &block)
to allow for HTMLentities
block for processing XML document fragments as XHTML,
def noko(_script = "Latn", &block) fragment = ::Nokogiri::XML.parse(NOKOHEAD).fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment .to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .gsub("–", "\u0096").gsub("—", "\u0097") .gsub("–", "\u0096").gsub("—", "\u0097") end
def noko_html(&block)
def noko_html(&block) doc = ::Nokogiri::XML.parse(NOKOHEAD) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment.to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .lines.map do |l| l.gsub(/\s*\n/, "") end end
def ns(xpath)
def ns(xpath) xpath.gsub(%r{/([a-zA-Z])}, "/xmlns:\\1") .gsub(%r{::([a-zA-Z])}, "::xmlns:\\1") .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1") .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1") end
def numeric_escapes(xml)
def numeric_escapes(xml) c = HTMLEntities.new xml.split(/(&[^ \r\n\t#&;]+;)/).map do |t| if /^(&[^ \t\r\n#;]+;)/.match?(t) c.encode(c.decode(t), :hexadecimal) else t end end.join end
def rtl_script?(script)
def rtl_script?(script) %w(Arab Aran Hebr).include? script end
def set_nested_value(hash, keys, new_val)
Set hash value using keys path
def set_nested_value(hash, keys, new_val) key = keys[0] if keys.length == 1 hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val) else hash[key].nil? ? new_val : [hash[key], new_val] end elsif hash[key].is_a?(::Array) hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil? hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash) set_nested_value(hash[key][-1], keys[1..-1], new_val) elsif hash[key].nil? || hash[key].empty? hash[key] = {} set_nested_value(hash[key], keys[1..-1], new_val) elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]] set_nested_value(hash[key], keys[1..-1], new_val) elsif !hash[key][keys[1]] hash[key] = [hash[key], {}] set_nested_value(hash[key][-1], keys[1..-1], new_val) else set_nested_value(hash[key], keys[1..-1], new_val) end hash end
def smartformat(text)
def smartformat(text) ret = HTMLEntities.new.decode( text.gsub(/ --? /, " — ") .gsub("--", "—"), ) ret = ret.gsub(%r{(#{CJK})(["'])}o, "\\1\u200a\\2") .gsub(%r{(["'])(#{CJK})}o, "\\1\u200a\\2") ret = ret.smart_format ret = ret.gsub(%r{(#{CJK})\u200a}o, "\\1") .gsub(%r{\u200a(#{CJK})}o, "\\1") HTMLEntities.new.encode(ret, :basic) end
def strict_capitalize_first(str)
def strict_capitalize_first(str) str.split(/ /).each_with_index.map do |w, i| letters = w.chars letters.first.upcase! if i.zero? letters.join end.join(" ") end
def strict_capitalize_phrase(str)
def strict_capitalize_phrase(str) str.split(/ /).map do |w| letters = w.chars letters.first.upcase! letters.join end.join(" ") end
def to_ncname(name, asciionly: false)
It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName
# => "1___2___3"
to_ncname('1 < 2 & 3')
A utility method for escaping XML NCNames (XML Names without colons).
def to_ncname(name, asciionly: false) name, valid = to_ncname_prep(name, asciionly) valid and return name starting_char = name[0] starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID) name.size == 1 and return starting_char following_chars = name[1..-1] following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID) following_chars.gsub!(":", NCNAME_INVALID) starting_char << following_chars end
def to_ncname_prep(name, asciionly)
def to_ncname_prep(name, asciionly) name = name&.to_s name.nil? and name = "" asciionly and name = HTMLEntities.new.encode(name, :basic, :hexadecimal) [name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)] end
def to_xhtml_fragment(xml)
def to_xhtml_fragment(xml) doc = ::Nokogiri::XML.parse(NOKOHEAD) doc.fragment(xml) end
def wrap_in_para(node, out)
if the contents of node are blocks, output them to out;
def wrap_in_para(node, out) if node.blocks? then out << node.content else out.p { |p| p << node.content } end end