module Metanorma::Utils

def anchor_attributes(presxml: false)

all element/attribute pairs that are ID anchors in Metanorma
def anchor_attributes(presxml: false)
  ret = [%w(annotation from), %w(annotation to), %w(callout target),
         %w(xref to), %w(eref bibitemid), %w(citation bibitemid),
         %w(xref target), %w(label for), %w(location target),
         %w(index to), %w(termsource bibitemid), %w(admonition target)]
  ret1 = [%w(fn target), %w(semx source), %w(fmt-title source),
          %w(fmt-xref to), %w(fmt-xref target), %w(fmt-eref bibitemid),
          %w(fmt-xref-label container), %w(fmt-fn-body target),
          %w(fmt-annotation-body from), %w(fmt-annotation-body to),
          %w(fmt-annotation-start source), %w(fmt-annotation-start end),
          %w(fmt-annotation-start target), %w(fmt-annotation-end source),
          %w(fmt-annotation-end start), %w(fmt-annotation-end target)]
  presxml ? ret + ret1 : ret
end

def anchor_or_uuid(node = nil)

def anchor_or_uuid(node = nil)
  uuid = UUIDTools::UUID.random_create
  node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
end

def asciidoc_sub(text, flavour = :standoc)

def asciidoc_sub(text, flavour = :standoc)
  return nil if text.nil?
  return "" if text.empty?
  d = Asciidoctor::Document.new(
    text.lines.entries,
    { header_footer: false, backend: flavour },
  )
  b = d.parse.blocks.first
  b.apply_subs(b.source)
end

def attr_code(attributes)

def attr_code(attributes)
  attributes.compact.transform_values do |v|
    v.is_a?(String) ? HTMLEntities.new.decode(v) : v
  end
end

def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,

with soft hyphen
break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
if punct fails, try break on camel case, with soft hyphen
break on punct every LONGSTRING_THRESHOLD chars, with zero width space
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,
t = LONGSTR_NOPUNCT)
  /^\s*$/.match?(text) and return text
  text.split(/(?=(?:\s|-))/).map do |w|
    if /^\s*$/.match(w) || (w.size < threshold) then w
    else
      w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
        w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
      end.join
    end
  end.join
end

def break_up_long_str1(text, iteration, nopunct)

def break_up_long_str1(text, iteration, nopunct)
  s, separator = break_up_long_str2(text)
  if s.size == 1 # could not break up
    (iteration % nopunct).zero? and
      text += "\u00ad" # force soft hyphen
    text
  else
    s[-1] = "#{separator}#{s[-1]}"
    s.join
  end
end

def break_up_long_str2(text)

def break_up_long_str2(text)
  s = text.split(STR_BREAKUP_RE, -1)
  separator = "\u200b"
  if s.size == 1
    s = text.split(CAMEL_CASE_RE)
    separator = "\u00ad"
  end
  [s, separator]
end

def case_transform_xml(xml, kase)

def case_transform_xml(xml, kase)
  x = Nokogiri::XML("<root>#{xml}</root>")
  x.traverse do |e|
    e.text? or next
    e.replace(e.text.send(kase))
  end
  x.root.children.to_xml
end

def contenthash(elem)

def contenthash(elem)
  Digest::MD5.hexdigest("#{elem.path}////#{elem.text}")
    .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5")
end

def create_namespace(xmldoc)

def create_namespace(xmldoc)
  Namespace.new(xmldoc)
end

def csv_split(text, delim = ";")

at start of field
, " => ," : CSV definition does not deal with space followed by quote
def csv_split(text, delim = ";")
  text.nil? || text.empty? and return []
  CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""),
                 liberal_parsing: true,
                 col_sep: delim)&.compact&.map(&:strip)
end

def default_script(lang)

def default_script(lang)
  case lang
  when "ar", "fa" then "Arab"
  when "ur" then "Aran"
  when "ru", "bg" then "Cyrl"
  when "hi" then "Deva"
  when "el" then "Grek"
  when "zh" then "Hans"
  when "ko" then "Kore"
  when "he" then "Hebr"
  when "ja" then "Jpan"
  else
    "Latn"
  end
end

def dl_to_attrs(elem, dlist, name)

convert definition list term/value pair into Nokogiri XML attribute
def dl_to_attrs(elem, dlist, name)
  e = dlist.at("./dt[text()='#{name}']") or return
  val = e.at("./following::dd/p") || e.at("./following::dd") or return
  elem[name] = val.text
end

def dl_to_elems(ins, elem, dlist, name)

convert definition list term/value pairs into Nokogiri XML elements
def dl_to_elems(ins, elem, dlist, name)
  a = elem.at("./#{name}[last()]")
  ins = a if a
  dlist.xpath("./dt[text()='#{name}']").each do |e|
    ins = dl_to_elems1(e, name, ins)
  end
  ins
end

def dl_to_elems1(term, name, ins)

def dl_to_elems1(term, name, ins)
  v = term.at("./following::dd")
  e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
  v.name = name
  ins.next = v
  ins.next
end

def endash_date(elem)

def endash_date(elem)
  elem.traverse do |n|
    n.text? or next
    n.replace(n.text.gsub(/\s+--?\s+/, "&#8211;").gsub("--", "&#8211;"))
  end
end

def external_path(path)

def external_path(path)
  win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) ||
           (RUBY_PLATFORM =~ /mswin|mingw/))
  if win
    path.gsub!(%{/}, "\\")
    path[/\s/] ? "\"#{path}\"" : path
  else
    path
  end
end

def firstchar_xml(line)

need to deal with both and its reverse string, >me<
def firstchar_xml(line)
  m = /^([<>][^<>]+[<>])*(.)/.match(line) or return ""
  m[2]
end

def guid_anchor?(id)

def guid_anchor?(id)
  /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
    .match?(id)
end

def line_sanitise(ret)

but in CJK, it does not. (Non-CJK text \n CJK)
By default, carriage return in source translates to whitespace;
def line_sanitise(ret)
  ret.size == 1 and return ret
  (0...(ret.size - 1)).each do |i|
    last = firstchar_xml(ret[i].reverse)
    nextfirst = firstchar_xml(ret[i + 1])
    cjk1 = /#{CJK}/o.match?(last)
    cjk2 = /#{CJK}/o.match?(nextfirst)
    text1 = /[^\p{Z}\p{C}]/.match?(last)
    text2 = /[^\p{Z}\p{C}]/.match?(nextfirst)
    cjk1 && (cjk2 || !text2) and next
    !text1 && cjk2 and next
    ret[i] += " "
  end
  ret
end

def localdir(node)

def localdir(node)
  docfile = node.attr("docfile")
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
end

def noko(_script = "Latn", &block)

Unescape special chars used in Asciidoctor substitution processing
to allow for HTMLentities
block for processing XML document fragments as XHTML,
def noko(_script = "Latn", &block)
  fragment = ::Nokogiri::XML.parse(NOKOHEAD).fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  fragment
    .to_xml(encoding: "UTF-8", indent: 0,
            save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .gsub("&#150;", "\u0096").gsub("&#151;", "\u0097")
    .gsub("&#x96;", "\u0096").gsub("&#x97;", "\u0097")
end

def noko_html(&block)

def noko_html(&block)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  fragment = doc.fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  fragment.to_xml(encoding: "UTF-8", indent: 0,
                  save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .lines.map do |l|
    l.gsub(/\s*\n/, "")
  end
end

def ns(xpath)

def ns(xpath)
  xpath.gsub(%r{/([a-zA-Z])}, "/xmlns:\\1")
    .gsub(%r{::([a-zA-Z])}, "::xmlns:\\1")
    .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1")
    .gsub(%r{\[([a-zA-Z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1")
end

def numeric_escapes(xml)

def numeric_escapes(xml)
  c = HTMLEntities.new
  xml.split(/(&[^ \r\n\t#&;]+;)/).map do |t|
    if /^(&[^ \t\r\n#;]+;)/.match?(t)
      c.encode(c.decode(t), :hexadecimal)
    else t
    end
  end.join
end

def rtl_script?(script)

def rtl_script?(script)
  %w(Arab Aran Hebr).include? script
end

def set_nested_value(hash, keys, new_val)

mod from https://stackoverflow.com/a/42425884
Set hash value using keys path
def set_nested_value(hash, keys, new_val)
  key = keys[0]
  if keys.length == 1
    hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val)
                else hash[key].nil? ? new_val : [hash[key], new_val]
                end
  elsif hash[key].is_a?(::Array)
    hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil?
    hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash)
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  elsif hash[key].nil? || hash[key].empty?
    hash[key] = {}
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]]
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif !hash[key][keys[1]]
    hash[key] = [hash[key], {}]
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  else
    set_nested_value(hash[key], keys[1..-1], new_val)
  end
  hash
end

def smartformat(text)

TODO needs internationalisation of quote
def smartformat(text)
  ret = HTMLEntities.new.decode(
    text.gsub(/ --? /, "&#8201;&#8212;&#8201;")
    .gsub("--", "&#8212;"),
  )
  ret = ret.gsub(%r{(#{CJK})(["'])}o, "\\1\u200a\\2")
    .gsub(%r{(["'])(#{CJK})}o, "\\1\u200a\\2")
  ret = ret.smart_format
  ret = ret.gsub(%r{(#{CJK})\u200a}o, "\\1")
    .gsub(%r{\u200a(#{CJK})}o, "\\1")
  HTMLEntities.new.encode(ret, :basic)
end

def strict_capitalize_first(str)

def strict_capitalize_first(str)
  str.split(/ /).each_with_index.map do |w, i|
    letters = w.chars
    letters.first.upcase! if i.zero?
    letters.join
  end.join(" ")
end

def strict_capitalize_phrase(str)

def strict_capitalize_phrase(str)
  str.split(/ /).map do |w|
    letters = w.chars
    letters.first.upcase!
    letters.join
  end.join(" ")
end

def to_ncname(name, asciionly: false)

NCName is "an XML Name, minus the :"
It follows the requirements of the specification for NCName: https://www.w3.org/TR/xml-names/#NT-NCName

# => "1___2___3"
to_ncname('1 < 2 & 3')

A utility method for escaping XML NCNames (XML Names without colons).
def to_ncname(name, asciionly: false)
  name, valid = to_ncname_prep(name, asciionly)
  valid and return name
  starting_char = name[0]
  starting_char.gsub!(INVALID_NCNAME_START_REGEXP, NCNAME_INVALID)
  name.size == 1 and return starting_char
  following_chars = name[1..-1]
  following_chars.gsub!(INVALID_NCNAME_CHAR_REGEXP, NCNAME_INVALID)
  following_chars.gsub!(":", NCNAME_INVALID)
  starting_char << following_chars
end

def to_ncname_prep(name, asciionly)

def to_ncname_prep(name, asciionly)
  name = name&.to_s
  name.nil? and name = ""
  asciionly and name = HTMLEntities.new.encode(name,
                                               :basic, :hexadecimal)
  [name, name.nil? || name.empty? || name.match?(SAFE_NCNAME_REGEXP)]
end

def to_xhtml_fragment(xml)

def to_xhtml_fragment(xml)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  doc.fragment(xml)
end

def wrap_in_para(node, out)

else, wrap them in


if the contents of node are blocks, output them to out;

def wrap_in_para(node, out)
  if node.blocks? then out << node.content
  else
    out.p { |p| p << node.content }
  end
end