module Metanorma::Utils

def anchor_attributes(presxml: false)

all element/attribute pairs that are ID anchors in Metanorma

def anchor_attributes(presxml: false)
  ret = [%w(annotation from), %w(annotation to), %w(callout target),
         %w(xref to), %w(eref bibitemid), %w(citation bibitemid),
         %w(xref target), %w(label for), %w(location target),
         %w(index to), %w(termsource bibitemid), %w(admonition target)]
  ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
          %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
          %w(fmt-xref-label container), %w(fmt-fn-body target),
          %w(fmt-annotation-body from), %w(fmt-annotation-body to),
          %w(fmt-annotation-start source), %w(fmt-annotation-start end),
          %w(fmt-annotation-start target), %w(fmt-annotation-end source),
          %w(fmt-annotation-end start), %w(fmt-annotation-end target)]
  presxml ? ret + ret1 : ret
end

def anchor_or_uuid(node = nil)

def anchor_or_uuid(node = nil)
  uuid = UUIDTools::UUID.random_create
  node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
end

def asciidoc_sub(text, flavour = :standoc)

def asciidoc_sub(text, flavour = :standoc)
  return nil if text.nil?
  return "" if text.empty?
  d = Asciidoctor::Document.new(
    text.lines.entries,
    { header_footer: false, backend: flavour },
  )
  b = d.parse.blocks.first
  b.apply_subs(b.source)
end

def attr_code(attributes)

def attr_code(attributes)
  attributes.compact.transform_values do |v|
    v.is_a?(String) ? HTMLEntities.new.decode(v) : v
  end
end

def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,

with soft hyphen
break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
if punct fails, try break on camel case, with soft hyphen
break on punct every LONGSTRING_THRESHOLD chars, with zero width space

def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,
t = LONGSTR_NOPUNCT)
  /^\s*$/.match?(text) and return text
  text.split(/(?=(?:\s|-))/).map do |w|
    if /^\s*$/.match(w) || (w.size < threshold) then w
    else
      w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
        w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
      end.join
    end
  end.join
end

def break_up_long_str1(text, iteration, nopunct)

def break_up_long_str1(text, iteration, nopunct)
  s, separator = break_up_long_str2(text)
  if s.size == 1 # could not break up
    (iteration % nopunct).zero? and
      text += "\u00ad" # force soft hyphen
    text
  else
    s[-1] = "#{separator}#{s[-1]}"
    s.join
  end
end

def break_up_long_str2(text)

def break_up_long_str2(text)
  s = text.split(STR_BREAKUP_RE, -1)
  separator = "\u200b"
  if s.size == 1
    s = text.split(CAMEL_CASE_RE)
    separator = "\u00ad"
  end
  [s, separator]
end

def case_transform_xml(xml, kase)

def case_transform_xml(xml, kase)
  x = Nokogiri::XML("<root>#{xml}</root>")
  x.traverse do |e|
    e.text? or next
    e.replace(e.text.send(kase))
  end
  x.root.children.to_xml
end

def contenthash(elem)

def contenthash(elem)
  Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
    .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
end

def create_namespace(xmldoc)

def create_namespace(xmldoc)
  Namespace.new(xmldoc)
end

def csv_split(text, delim = ";")

at start of field
, " => ," : CSV definition does not deal with space followed by quote

def csv_split(text, delim = ";")
  text.nil? || text.empty? and return []
  CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""),
                 liberal_parsing: true,
                 col_sep: delim)&.compact&.map(&:strip)
end

def default_script(lang)

def default_script(lang)
  case lang
  when "ar", "fa" then "Arab"
  when "ur" then "Aran"
  when "ru", "bg" then "Cyrl"
  when "hi" then "Deva"
  when "el" then "Grek"
  when "zh" then "Hans"
  when "ko" then "Kore"
  when "he" then "Hebr"
  when "ja" then "Jpan"
  else
    "Latn"
  end
end

def dl_to_attrs(elem, dlist, name)

convert definition list term/value pair into Nokogiri XML attribute

def dl_to_attrs(elem, dlist, name)
  e = dlist.at("./dt[text()='#{name}']") or return
  val = e.at("./following::dd/p") || e.at("./following::dd") or return
  elem[name] = val.text
end

def dl_to_elems(ins, elem, dlist, name)

convert definition list term/value pairs into Nokogiri XML elements

def dl_to_elems(ins, elem, dlist, name)
  a = elem.at("./#{name}[last()]")
  ins = a if a
  dlist.xpath("./dt[text()='#{name}']").each do |e|
    ins = dl_to_elems1(e, name, ins)
  end
  ins
end

def dl_to_elems1(term, name, ins)

def dl_to_elems1(term, name, ins)
  v = term.at("./following::dd")
  e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
  v.name = name
  ins.next = v
  ins.next
end

def endash_date(elem)

def endash_date(elem)
  elem.traverse do |n|
    n.text? or next
    n.replace(n.text.gsub(/\s+--?\s+/, "&#8211;").gsub("--", "&#8211;"))
  end
end

def external_path(path)

def external_path(path)
  win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) ||
           (RUBY_PLATFORM =~ /mswin|mingw/))
  if win
    path.gsub!(%{/}, "\\")
    path[/\s/] ? "\"#{path}\"" : path
  else
    path
  end
end

def firstchar_xml(line)

need to deal with both and its reverse string, >me<

def firstchar_xml(line) m = /^([<>][^<>]+[<>])*(.)/.match(line) or return "" m[2] end

def guid_anchor?(id)

def guid_anchor?(id) /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i .match?(id) end

def line_sanitise(ret)

but in CJK, it does not. (Non-CJK text \n CJK)
By default, carriage return in source translates to whitespace;

def line_sanitise(ret) ret.size == 1 and return ret (0...(ret.size - 1)).each do |i| last = firstchar_xml(ret[i].reverse) nextfirst = firstchar_xml(ret[i + 1]) cjk1 = /#{CJK}/o.match?(last) cjk2 = /#{CJK}/o.match?(nextfirst) text1 = /[^\p{Z}\p{C}]/.match?(last) text2 = /[^\p{Z}\p{C}]/.match?(nextfirst) cjk1 && (cjk2 || !text2) and next !text1 && cjk2 and next ret[i] += " " end ret end

def localdir(node)

def localdir(node) docfile = node.attr("docfile") docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/" end

def noko(_script = "Latn", &block)

Unescape special chars used in Asciidoctor substitution processing
to allow for HTMLentities
block for processing XML document fragments as XHTML,

def noko(_script = "Latn", &block) fragment = ::Nokogiri::XML.parse(NOKOHEAD).fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment .to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .gsub("", "\u0096").gsub("", "\u0097") .gsub("", "\u0096").gsub("", "\u0097") end

def noko_html(&block)

def noko_html(&block) doc = ::Nokogiri::XML.parse(NOKOHEAD) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment.to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .lines.map do |l| l.gsub(/\s*\n/, "") end end

def ns(xpath)

def ns(xpath) xpath.gsub(%r{/([a-zA-Z])}, "/xmlns:\\1") .gsub(%r{::([a-zA-Z])}, "::xmlns:\\1") .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1") .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1") end

def numeric_escapes(xml)

def numeric_escapes(xml) c = HTMLEntities.new xml.split(/(&[^ \r\n\t#&;]+;)/).map do |t| if /^(&[^ \t\r\n#;]+;)/.match?(t) c.encode(c.decode(t), :hexadecimal) else t end end.join end

def rtl_script?(script)

def rtl_script?(script) %w(Arab Aran Hebr).include? script end

def set_nested_value(hash, keys, new_val)

mod from https://stackoverflow.com/a/42425884
Set hash value using keys path

def set_nested_value(hash, keys, new_val) key = keys[0] if keys.length == 1 hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val) else hash[key].nil? ? new_val : [hash[key], new_val] end elsif hash[key].is_a?(::Array) hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil? hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash) set_nested_value(hash[key][-1], keys[1..-1], new_val) elsif hash[key].nil? || hash[key].empty? hash[key] = {} set_nested_value(hash[key], keys[1..-1], new_val) elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]] set_nested_value(hash[key], keys[1..-1], new_val) elsif !hash[key][keys[1]] hash[key] = [hash[key], {}] set_nested_value(hash[key][-1], keys[1..-1], new_val) else set_nested_value(hash[key], keys[1..-1], new_val) end hash end

def smartformat(text)

TODO needs internationalisation of quote

def smartformat(text) ret = HTMLEntities.new.decode( text.gsub(/ --? /, " — ") .gsub("--", "—"), ) ret = ret.gsub(%r{(#{CJK})(["'])}o, "\\1\u200a\\2") .gsub(%r{(["'])(#{CJK})}o, "\\1\u200a\\2") ret = ret.smart_format ret = ret.gsub(%r{(#{CJK})\u200a}o, "\\1") .gsub(%r{\u200a(#{CJK})}o, "\\1") HTMLEntities.new.encode(ret, :basic) end

def strict_capitalize_first(str)

def strict_capitalize_first(str) str.split(/ /).each_with_index.map do |w, i| letters = w.chars letters.first.upcase! if i.zero? letters.join end.join(" ") end

def strict_capitalize_phrase(str)

def strict_capitalize_phrase(str) str.split(/ /).map do |w| letters = w.chars letters.first.upcase! letters.join end.join(" ") end

def to_ncname(name, asciionly: false)

NCName is "an XML Name, minus the :"
It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName

# => "1___2___3"
to_ncname('1 < 2 & 3')

A utility method for escaping XML NCNames (XML Names without colons).

def to_ncname(name, asciionly: false) name, valid = to_ncname_prep(name, asciionly) valid and return name starting_char = name[0] starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID) name.size == 1 and return starting_char following_chars = name[1..-1] following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID) following_chars.gsub!(":", NCNAME_INVALID) starting_char << following_chars end

def to_ncname_prep(name, asciionly)

def to_ncname_prep(name, asciionly) name = name&.to_s name.nil? and name = "" asciionly and name = HTMLEntities.new.encode(name, :basic, :hexadecimal) [name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)] end

def to_xhtml_fragment(xml)

def to_xhtml_fragment(xml) doc = ::Nokogiri::XML.parse(NOKOHEAD) doc.fragment(xml) end

def wrap_in_para(node, out)

else, wrap them in

if the contents of node are blocks, output them to out;

def wrap_in_para(node, out) if node.blocks? then out << node.content else out.p { |p| p << node.content } end end

Modules

Metanorma::Utils::Array

Metanorma::Utils::Hash

Classes

Metanorma::Utils::LineStatus

Metanorma::Utils::Log

Metanorma::Utils::Namespace

Instance Methods

# anchor_attributes

# anchor_or_uuid

# asciidoc_sub

# attr_code

# break_up_long_str

# break_up_long_str1

# break_up_long_str2

# case_transform_xml

# contenthash

# create_namespace

# csv_split

# default_script

# dl_to_attrs

# dl_to_elems

# dl_to_elems1

# endash_date

# external_path

# firstchar_xml

# guid_anchor?

# line_sanitise

# localdir

# noko

# noko_html

# ns

# numeric_escapes

# rtl_script?

# set_nested_value

# smartformat

# strict_capitalize_first

# strict_capitalize_phrase

# to_ncname

# to_ncname_prep

# to_xhtml_fragment

# wrap_in_para

Defined in

lib/utils/anchor.rb

lib/utils/cjk.rb

lib/utils/hash_transform_keys.rb

lib/utils/hash_transform_keys.rb

lib/utils/image.rb

lib/utils/linestatus.rb

lib/utils/log.rb

lib/utils/main.rb

lib/utils/namespace.rb

lib/utils/version.rb

lib/utils/xml.rb