require "set"
require "relaton_bib"
module Metanorma
module Standoc
module Cleanup
def ref_dl_cleanup(xmldoc)
xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
bib = dl_bib_extract(c) or next
validate_ref_dl(bib, c)
bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
bibitem = Nokogiri::XML(bibitemxml)
bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
c.replace(bibitem.root)
end
end
# do not accept implicit id
def validate_ref_dl(bib, clause)
id = bib["id"]
id ||= clause["id"] unless /^_/.match?(clause["id"])
unless id
@log.add("Anchors", clause,
"The following reference is missing an anchor:\n"\
"#{clause.to_xml}")
return
end
@refids << id
validate_ref_dl1(bib, id, clause)
end
def validate_ref_dl1(bib, id, clause)
bib["title"] or
@log.add("Bibliography", clause, "Reference #{id} is missing a title")
bib["docid"] or
@log.add("Bibliography", clause,
"Reference #{id} is missing a document identifier (docid)")
end
def extract_from_p(tag, bib, key)
return unless bib[tag]
"<#{key}>#{bib[tag].at('p').children}</#{key}>"
end
# if the content is a single paragraph, replace it with its children
# single links replaced with uri
def p_unwrap(para)
elems = para.elements
if elems.size == 1 && elems[0].name == "p"
link_unwrap(elems[0]).children.to_xml.strip
else
para.to_xml.strip
end
end
def link_unwrap(para)
elems = para.elements
if elems.size == 1 && elems[0].name == "link"
para.at("./link").replace(elems[0]["target"].strip)
end
para
end
def dd_bib_extract(dtd)
return nil if dtd.children.empty?
dtd.at("./dl") and return dl_bib_extract(dtd)
elems = dtd.remove.elements
return p_unwrap(dtd) unless elems.size == 1 &&
%w(ol ul).include?(elems[0].name)
elems[0].xpath("./li").each_with_object([]) do |li, ret|
ret << p_unwrap(li)
end
end
def add_to_hash(bib, key, val)
Metanorma::Utils::set_nested_value(bib, key.split("."), val)
end
# definition list, with at most one level of unordered lists
def dl_bib_extract(clause, nested = false)
dl = clause.at("./dl") or return
key = ""
bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
(dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) and next
add_to_hash(m, key, dd_bib_extract(dtd))
end
clause.xpath("./clause").each do |c1|
key = c1&.at("./title")&.text&.downcase&.strip
next unless %w(contributor relation series).include? key
add_to_hash(bib, key, dl_bib_extract(c1, true))
end
dl_bib_extract_title(bib, clause, nested)
end
def dl_bib_extract_title(bib, clause, nested)
(!nested && clause.at("./title")) or return bib
title = clause.at("./title").remove.children.to_xml
bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
bib["title"].is_a?(String)
bib["title"] ||= []
bib["title"] << title if !title.empty?
bib
end
# ---
def formattedref_spans(xmldoc)
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
spans_to_bibitem(b, spans_preprocess(extract_content(b)))
end
end
def extract_content(bib)
extract_docid(bib) + extract_spans(bib)
end
def extract_spans(bib)
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
keys = s["class"].split(".", 2)
m << { key: keys[0], type: keys[1],
val: s.children.to_xml }
(s["class"] == "type" and s.remove) or s.replace(s.children)
end
end
def extract_docid(bib)
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
m << { key: "docid", type: d["type"], val: d.text }
d.remove
end
end
def spans_preprocess(spans)
ret = { contributor: [], docid: [], uri: [], date: [] }
spans.each do |s|
case s[:key]
when "uri", "docid"
ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
when "publisher"
ret[:contributor] << { role: "publisher", entity: "organization",
name: s[:val] }
when "surname", "initials", "givenname", "formatted-initials"
ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
end
end
ret
end
def spans_preprocess_contrib(span, contrib)
span[:key] = "formatted-initials" if span[:key] == "initials"
spans_preprocess_new_contrib?(span, contrib) and
contrib << { role: span[:type] || "author", entity: "person" }
contrib[-1][span[:key].to_sym] = span[:val]
contrib
end
def spans_preprocess_new_contrib?(span, contrib)
contrib.empty? ||
(if span[:key] == "surname" then contrib[-1][:surname]
else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
end) ||
contrib[-1][:role] != (span[:type] || "author")
end
def spans_to_bibitem(bib, spans)
ret = ""
spans[:title] and ret += "<title>#{spans[:title]}</title>"
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
spans[:date].each { |s| ret += span_to_docid(s, "date") }
spans[:contributor].each { |s| ret += span_to_contrib(s) }
spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
spans[:type] and bib["type"] = spans[:type]
bib << ret
end
def span_to_docid(span, key)
if span[:type]
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
else
"<#{key}>#{span[:val]}</#{key}>"
end
end
def span_to_contrib(span)
e = if span[:entity] == "organization"
"<organization><name>#{span[:name]}</name></organization>"
else span_to_person(span)
end
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
end
def span_to_person(span)
pre = (span[:"formatted-initials"] and
"<formatted-initials>"\
"#{span[:"formatted-initials"]}</formatted-initials>") ||
"<forename>#{span[:givenname]}</forename>"
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
"</person>"
end
end
end
end