require "asciidoctor"
require "tempfile"
require "sterile"
require "htmlentities"
require "nokogiri"
require "csv"
require_relative "../sterile/sterile"
require_relative "cjk"
module Metanorma
module Utils
class << self
# , " => ," : CSV definition does not deal with space followed by quote
# at start of field
def csv_split(text, delim = ";")
text.nil? || text.empty? and return []
CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""),
liberal_parsing: true,
col_sep: delim)&.compact&.map(&:strip)
end
def asciidoc_sub(text, flavour = :standoc)
return nil if text.nil?
return "" if text.empty?
d = Asciidoctor::Document.new(
text.lines.entries,
{ header_footer: false, backend: flavour },
)
b = d.parse.blocks.first
b.apply_subs(b.source)
end
def localdir(node)
docfile = node.attr("docfile")
docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
end
# TODO needs internationalisation of quote
def smartformat(text)
ret = HTMLEntities.new.decode(
text.gsub(/ --? /, " — ")
.gsub("--", "—"),
)
ret = ret.gsub(%r{(#{CJK})(["'])}o, "\\1\u200a\\2")
.gsub(%r{(["'])(#{CJK})}o, "\\1\u200a\\2")
ret = ret.smart_format
ret = ret.gsub(%r{(#{CJK})\u200a}o, "\\1")
.gsub(%r{\u200a(#{CJK})}o, "\\1")
HTMLEntities.new.encode(ret, :basic)
end
def endash_date(elem)
elem.traverse do |n|
n.text? or next
n.replace(n.text.gsub(/\s+--?\s+/, "–").gsub("--", "–"))
end
end
# Set hash value using keys path
# mod from https://stackoverflow.com/a/42425884
def set_nested_value(hash, keys, new_val)
key = keys[0]
if keys.length == 1
hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val)
else hash[key].nil? ? new_val : [hash[key], new_val]
end
elsif hash[key].is_a?(::Array)
hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil?
hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash)
set_nested_value(hash[key][-1], keys[1..-1], new_val)
elsif hash[key].nil? || hash[key].empty?
hash[key] = {}
set_nested_value(hash[key], keys[1..-1], new_val)
elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]]
set_nested_value(hash[key], keys[1..-1], new_val)
elsif !hash[key][keys[1]]
hash[key] = [hash[key], {}]
set_nested_value(hash[key][-1], keys[1..-1], new_val)
else
set_nested_value(hash[key], keys[1..-1], new_val)
end
hash
end
def strict_capitalize_phrase(str)
str.split(/ /).map do |w|
letters = w.chars
letters.first.upcase!
letters.join
end.join(" ")
end
def strict_capitalize_first(str)
str.split(/ /).each_with_index.map do |w, i|
letters = w.chars
letters.first.upcase! if i.zero?
letters.join
end.join(" ")
end
def external_path(path)
win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) ||
(RUBY_PLATFORM =~ /mswin|mingw/))
if win
path.gsub!(%{/}, "\\")
path[/\s/] ? "\"#{path}\"" : path
else
path
end
end
def default_script(lang)
case lang
when "ar", "fa" then "Arab"
when "ur" then "Aran"
when "ru", "bg" then "Cyrl"
when "hi" then "Deva"
when "el" then "Grek"
when "zh" then "Hans"
when "ko" then "Kore"
when "he" then "Hebr"
when "ja" then "Jpan"
else
"Latn"
end
end
def rtl_script?(script)
%w(Arab Aran Hebr).include? script
end
LONGSTR_THRESHOLD = 10
LONGSTR_NOPUNCT = 2
# break on punct every LONGSTRING_THRESHOLD chars, with zero width space
# if punct fails, try break on camel case, with soft hyphen
# break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT,
# with soft hyphen
def break_up_long_str(text, threshold = LONGSTR_THRESHOLD,
nopunct = LONGSTR_NOPUNCT)
/^\s*$/.match?(text) and return text
text.split(/(?=(?:\s|-))/).map do |w|
if /^\s*$/.match(w) || (w.size < threshold) then w
else
w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
end.join
end
end.join
end
STR_BREAKUP_RE = %r{
(?<=[=_—–\u2009→?+;]) | # break after any of these
(?<=[,.:])(?!\d) | # break on punct only if not preceding digit
(?<=[>])(?![>]) | # > not >->
(?<=[\]])(?![\]]) | # ] not ]-]
(?<=//) | # //
(?<=[/])(?![/]) | # / not /-/
(?<![<])(?=[<]) | # < not <-<
(?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
}x.freeze
CAMEL_CASE_RE = %r{
(?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
}x.freeze
def break_up_long_str1(text, iteration, nopunct)
s, separator = break_up_long_str2(text)
if s.size == 1 # could not break up
(iteration % nopunct).zero? and
text += "\u00ad" # force soft hyphen
text
else
s[-1] = "#{separator}#{s[-1]}"
s.join
end
end
def break_up_long_str2(text)
s = text.split(STR_BREAKUP_RE, -1)
separator = "\u200b"
if s.size == 1
s = text.split(CAMEL_CASE_RE)
separator = "\u00ad"
end
[s, separator]
end
end
end
end