gem.sh

lib/kramdown-rfc2629.rb

# -*- coding: utf-8 -*-
#
#--
# Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
# Copyright (C) 2010-2014 Carsten Bormann <cabo@tzi.org>
#
# This file was derived from a part of the kramdown gem which is licensed under the MIT license.
# This derived work is also licensed under the MIT license, see LICENSE.
#++
#
require 'shellwords'

raise "sorry, 1.8 was last decade" unless RUBY_VERSION >= '1.9'

gem 'kramdown', '~> 2.4.0'
require 'kramdown'
my_span_elements =  %w{list xref eref iref cref spanx vspace}
Kramdown::Parser::Html::Constants::HTML_SPAN_ELEMENTS.concat my_span_elements

require 'rexml/parsers/baseparser'
require 'open3'                 # for math
require 'json'                  # for math
require 'rexml/document'        # for SVG and bibxml acrobatics

require 'kramdown-rfc/doi'      # for fetching information for a DOI
require 'kramdown-rfc/rfc8792'

class Object
  def deep_clone
    Marshal.load(Marshal.dump(self))
  end
end

module Kramdown
  RFCXML_SPAN_ELEMENTS = Set.new(Kramdown::Parser::Html::Constants::HTML_SPAN_ELEMENTS)

  Kramdown::Options.define(:ol_start_at_first_marker, Kramdown::Options::Boolean, false, <<~EOF)
      If this option is `true`, an ordered list (<ol) will use the
      number in its first marker (1 for 1. etc.) as the default value
      of the start= attribute.

      Default: false (for backward compatibility)
      Used by: RFCXML converter
    EOF

  Kramdown::Options.define(:nested_ol_types, Object, %w[1], <<~EOF) do |val|
      Values for type= attribute for nested ordered lists (ol).
      The value needs to be an array of <ol type= values, expressed as one of:
      1. A YAML array
      2. A string that will be split on commas (with optional blank space following)
      3. A string that will be split on blank space

      Default: ["1"]
      Used by: RFCXML converter
    EOF
    val = case val
          when String
            if val[0] == "[" && val[-1] == "]"
              begin
                val = YAML.safe_load(val)
              rescue Psych::SyntaxError
                warn "** YAML syntax error in nested_ol_types=#{val.inspect}"
                val = %w[1]
              end
            else
              val = val.split(/, */)
              val = val[0].split(/ +/) if val.size == 1
            end
            Kramdown::Options.simple_array_validator(val, :nested_ol_types)
          when Array
            val.map!{ |x| x.to_s }
            val = Kramdown::Options.simple_array_validator(val, :nested_ol_types)
          else
            raise Kramdown::Error, "Invalid value for option '#{:nested_ol_types}': '#{val.inspect}'"
          end
    if val == []
      val = %w[1]
      warn "** Option #{:nested_ol_types} cannot be empty, defaulting to #{val.inspect}"
    end
    val
  end


  module Parser

    class RFC2629Kramdown < Kramdown

      def replace_abbreviations(el, regexps = nil)
        unless regexps          # DUPLICATED AND MODIFIED CODE FROM UPSTREAM, CHECK ON UPSTREAM UPGRADE
          sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a, b| b.length <=> a.length }
          regexps = [Regexp.union(*sorted_abbrevs.map {|k|
                                    /#{Regexp.escape(k).gsub(/\\\s/, "[\\s\\p{Z}]+").force_encoding(Encoding::UTF_8)}/})]
          regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
          # warn regexps.inspect
        end
        super(el, regexps)
      end


      def initialize(*doc)
        super
        @span_parsers.unshift(:xref)
        @span_parsers.unshift(:iref)
        @span_parsers.unshift(:span_pi)
        @block_parsers.unshift(:block_pi)
      end

      XREF_BASE = /#{REXML::XMLTokens::NAME_CHAR}+/ # a token for a reference
      XREF_TXT = /(?:[^\(]|\([^\)]*\))+/ # parenthesized text
      XREF_RE = /#{XREF_BASE}(?: \(#{XREF_TXT}\))?/
      XREF_RE_M = /\A(#{XREF_BASE})(?: \((#{XREF_TXT})\))?/ # matching version of XREF_RE
      XREF_SINGLE = /(?:Section|Appendix) #{XREF_RE}/
      XREF_MULTI = /(?:Sections|Appendices) (?:#{XREF_RE}, )*#{XREF_RE},? and #{XREF_RE}/
      XREF_ANY = /(?:#{XREF_SINGLE}|#{XREF_MULTI})/
      SECTIONS_RE = /(?:#{XREF_ANY} and )?#{XREF_ANY}/

      def self.idref_cleanup(href)
        # can't start an IDREF with a number or reserved start
        if href =~ / /
          if $options.v3
            warn "** space(s) in cross-reference '#{href}' -- are you trying to use section references?"
          else
            warn "** space(s) in cross-reference '#{href}' -- note that section references are supported in v3 mode only."
          end
        end
        href.gsub(/\A(?:[0-9]|section-|u-|figure-|table-|iref-)/) { "_#{$&}" }
      end

      def rfc_mention(target1)  # only works for RFCnnnn
        target1 =~ /\A([A-Z]*)(.*)\z/
        "#$1 #$2 "
      end

      def handle_bares(s, attr, format, href, last_join = nil)
        if s.match(/\A(#{XREF_ANY}) and (#{XREF_ANY})\z/)
          handle_bares($1, {}, nil, href, " and ")
          handle_bares($2, {}, nil, href, " of ")
          return
        end

        href = href.split(' ')[0] # Remove any trailing (...)
        target1, target2 = href.split("@", 2)
        multi = last_join != nil
        (sn, s) = s.split(' ', 2)
        loop do
          m = s.match(/\A#{XREF_RE_M}(, (?:and )?| and )?/)
          break if not m

          if not multi and not m[2] and not m[3] and not target2
            # Modify |attr| if there is a single reference.  This can only be
            # used if there is only one section reference and the section part
            # has no title.
            attr['section'] = m[1]
            attr['sectionFormat'] = format
            attr['text'] = m[2]
            return
          end

          if sn
            @tree.children << Element.new(:text, "#{sn} ", {})
            sn = nil
          end

          multi = true
          s[m[0]] = ''

          attr1 = { 'target' => target1, 'section' => m[1], 'sectionFormat' => 'bare', 'text' => m[2] }
          @tree.children << Element.new(:xref, nil, attr1)
          andof = m[3] || last_join || " of "
          if andof == " of " && target2
            andof += rfc_mention(target1)
          end
          @tree.children << Element.new(:text, andof, {})
        end
      end

      XREF_START = /\{\{(?:(?:\{(.*?\n??.*?)\}(?:\{(.*?\n??.*?)\})?)|(\X*?))((?:\}\})|\})/u

      # Introduce new {{target}} syntax for empty xrefs, which would
      # otherwise be an ugly ![!](target) or ![ ](target)
      # (I'd rather use [[target]], but that somehow clashes with links.)
      def parse_xref
        @src.pos += @src.matched_size
        unless @src[4] == "}}"
          warn "*** #{@src[0]}: unmatched braces #{@src[4].inspect}"
        end
        if contact_name = @src[1]
          attr = {'fullname' => contact_name.gsub("\n", " ")}
          if ascii_name = @src[2]
            attr["asciiFullname"] = ascii_name.gsub("\n", " ")
          end
          el = Element.new(:contact, nil, attr)
        else
          href = @src[3]
          attr = {}
          handled_subref = false
          if $options.v3
            # match Section ... of ...; set section, sectionFormat
            case href.gsub(/[\u00A0\s]+/, ' ') # may need nbsp and/or newlines
            when /\A(#{SECTIONS_RE}) of (.*)\z/
              href = $2
              handle_bares($1, attr, "of", href)
              handled_subref = true
            when /\A(.*), (#{SECTIONS_RE})\z/
              href = $1
              handle_bares($2, attr, "comma", href)
              handled_subref = true
            when /\A(.*) \((#{SECTIONS_RE})\)\z/
              href = $1
              handle_bares($2, attr, "parens", href)
              handled_subref = true
            when /#{XREF_RE_M}<(.+)\z/
              href = $3
              if $2
                attr['section'] = $2
                attr['relative'] = "#" << $1
              else
                attr['section'] = $1
              end
              attr['sectionFormat'] = 'bare'
            when /\A<<(.+)\z/
              href = $1
              attr['format'] = 'title'
            when /\A<(.+)\z/
              href = $1
              attr['format'] = 'counter'
            end
          end
          if href.match(/#{XREF_RE_M}\z/)
            href = $1
            attr['text'] = $2
          end
          target1, target2 = href.split("@", 2) # should do this only for sectionref...
          if target2
            href = target2
            unless handled_subref
              @tree.children << Element.new(:text, rfc_mention(target1), {})
            end
          end
          href = self.class.idref_cleanup(href)
          attr['target'] = href
          el = Element.new(:xref, nil, attr)
        end
        @tree.children << el
      end
      define_parser(:xref, XREF_START, '\{\{')

      IREF_START = /\(\(\((.*?)\)\)\)/u

      # Introduce new (((target))) syntax for irefs
      def parse_iref
        @src.pos += @src.matched_size
        href = @src[1]
        el = Element.new(:iref, nil, {'target' => href}) # XXX
        @tree.children << el
      end
      define_parser(:iref, IREF_START, '\(\(\(')

      # HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m    # still defined!

      # warn [:OPT_SPACE, OPT_SPACE, HTML_INSTRUCTION_RE].inspect

      PI_BLOCK_START = /^#{OPT_SPACE}<\?/u

      def parse_block_pi
        # warn [:BLOCK].inspect
        line = @src.current_line_number
        if (result = @src.scan(HTML_INSTRUCTION_RE))
          @tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
          @src.scan(TRAILING_WHITESPACE)
          true
        else
          false
        end
      end
      define_parser(:block_pi, PI_BLOCK_START)

      PI_SPAN_START = /<\?/u

      def parse_span_pi
        # warn [:SPAN].inspect
        line = @src.current_line_number
        if (result = @src.scan(HTML_INSTRUCTION_RE))
          @tree.children << Element.new(:xml_pi, result, nil, category: :span, location: line)
        else
          add_text(@src.getch)
        end
      end
      define_parser(:span_pi, PI_SPAN_START, '<\?')

      # warn [:HERE, @@parsers.keys].inspect

    end
  end

  class Element

    # Not fixing studly element names postalLine and seriesInfo yet

    # occasionally regenerate the studly attribute name list via
    # script in data/studly.rb
    STUDLY_ATTR = %w(
    asciiAbbrev asciiFullname asciiInitials asciiName asciiSurname
    asciiValue blankLines derivedAnchor derivedContent derivedCounter
    derivedLink displayFormat docName expiresDate hangIndent hangText
    indexInclude iprExtract keepWithNext keepWithPrevious originalSrc
    prepTime quoteTitle quotedFrom removeInRFC sectionFormat seriesNo
    showOnFrontPage slugifiedName sortRefs submissionType symRefs tocDepth
    tocInclude
    )
    STUDLY_ATTR_MAP = Hash[STUDLY_ATTR.map {|s| [s.downcase, s]}]

    TRUTHY = Hash.new {|h, k| k}
    TRUTHY["false"] = false
    TRUTHY["no"] = false

    # explicit or automatic studlification
    # note that explicit (including trailing "_") opts out of automatic
    def self.attrmangle(k)
      if (d = k.gsub(/_(.|$)/) { $1.upcase }) != k or d = STUDLY_ATTR_MAP[k]
        d
      end
    end

    def rfc2629_fix(opts)
      if a = attr
        if anchor = a.delete('id')
          a['anchor'] = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(anchor)
        end
        if anchor = a.delete('href')
          a['target'] = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(anchor)
        end
        if lang = a.delete('lang-')
          a['xml:lang'] = lang
        end
        if av = a.delete('noabbrev')      # pseudo attribute -> opts
          opts = opts.merge(noabbrev: TRUTHY[av]) # updated copy
        end
        attr.keys.each do |k|
          if d = self.class.attrmangle(k)
            a[d] = a.delete(k)
          end
        end
      end
      opts
    end
  end

  module Converter

    # Converts a Kramdown::Document to HTML.
    class Rfc2629 < Base

      # we use these to do XML stuff, too
      include ::Kramdown::Utils::Html

      def el_html_attributes(el)
        html_attributes(el.attr)
      end
      def el_html_attributes_with(el, defattr)
        html_attributes(defattr.merge(el.attr))
      end

      # :stopdoc:

      KRAMDOWN_PERSISTENT = ENV["KRAMDOWN_PERSISTENT"]
      KRAMDOWN_PERSISTENT_VERBOSE = /v/ === KRAMDOWN_PERSISTENT

      if KRAMDOWN_PERSISTENT
        begin
          require 'net/http/persistent'
          $http = Net::HTTP::Persistent.new name: 'kramdown-rfc', proxy: :ENV
        rescue Exception => e
          warn "** Not using persistent HTTP -- #{e}"
          warn "**   To silence this message and get full speed, try:"
          warn "**     gem install net-http-persistent"
          warn "**   If this doesn't work, you can ignore this warning."
        end
      end

      # Defines the amount of indentation used when nesting XML tags.
      INDENTATION = 2

      # Initialize the XML converter with the given Kramdown document +doc+.
      def initialize(*doc)
        super
        @sec_level = 1
        @location_delta = 100000 # until reset
        @location_correction = 0 # pre-scanning corrections
        @in_dt = 0
        @footnote_names_in_use = {}
      end

      def correct_location(location)
        location + @location_delta + @location_correction
      end

      def convert(el)
        opts = el.options[:options]
        # warn "** tree opts #{opts.inspect}"
        if nested_ol_types = @options[:nested_ol_types]
          opts[:nested_ol_types] ||= nested_ol_types
          # warn "** tree opts out #{opts.inspect}"
        end
        indent = -INDENTATION
        if el.children[-1].type == :raw
          raw = convert1(el.children.pop, indent, opts)
        end
        "#{convert1(el, indent, opts)}#{end_sections(1, indent, el.options[:location])}#{raw}"
      end

      def convert1(el, indent, opts = {})
        nopts = el.rfc2629_fix(opts)
        send("convert_#{el.type}", el, indent, nopts)
      end

      def inner_a(el, indent, opts)
        indent += INDENTATION
        el.children.map do |inner_el|
          nopts = inner_el.rfc2629_fix(opts)
          send("convert_#{inner_el.type}", inner_el, indent, nopts)
        end
      end

      def inner(el, indent, opts)
        inner_a(el, indent, opts).join('')
      end

      def convert_blank(el, indent, opts)
        "\n"
      end

      def convert_text(el, indent, opts)
        escape_html(el.value, :text)
      end

      def convert_p(el, indent, opts)
        if (el.children.size == 1 && el.children[0].type == :img) || opts[:unpacked]
          inner(el, indent, opts) # Part of the bad reference hack
        else
          "#{' '*indent}<t#{el_html_attributes(el)}>#{inner(el, indent, opts)}</t>\n"
        end
      end

      def saner_generate_id(value)
        generate_id(value).gsub(/-+/, '-')
      end

      def self.process_markdown1(v)             # Uuh.  Heavy coupling.
        doc = ::Kramdown::Document.new(v, $global_markdown_options)
        $stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
        doc.to_rfc2629
      end

      def self.process_markdown(v)
        process_markdown1(v)[3..-6] # skip <t>...</t>\n
      end

      def self.process_markdown_to_rexml(v)
        s = process_markdown1(v)
        REXML::Document.new(s)
      end

      SVG_COLORS = Hash.new {|h, k| k}
      <<COLORS.each_line {|l| k, v = l.chomp.split; SVG_COLORS[k] = v}
black	#000000
silver	#C0C0C0
gray	#808080
white	#FFFFFF
maroon	#800000
red	#FF0000
purple	#800080
fuchsia	#FF00FF
green	#008000
lime	#00FF00
olive	#808000
yellow	#FFFF00
navy	#000080
blue	#0000FF
teal	#008080
aqua	#00FFFF
COLORS

      def svg_munch_id(id)
        id.gsub(/[^-._A-Za-z0-9]/) {|x| "_%02X" % x.ord}
      end

      def self.hex_to_lin(h)
        h.to_i(16)**2.22        # approximating sRGB gamma
      end
      define_method :hex_to_lin, &method(:hex_to_lin)

      B_W_THRESHOLD = hex_to_lin("a4") # a little brighter than 1/2 0xFF -> white

      def svg_munch_color(c, fill)
        c = SVG_COLORS[c]
        case c
        when /\A#(..)(..)(..)\z/
          if hex_to_lin($1)*0.2126 + hex_to_lin($2)*0.7152 + hex_to_lin($3)*0.0722 >= B_W_THRESHOLD
            'white'
          else
            'black'
          end
        when 'none'
          'none' if fill        # delete for stroke
        else
          c
        end
      end

      SVG_NAMESPACES = {"xmlns"=>"http://www.w3.org/2000/svg",
                        "xlink"=>"http://www.w3.org/1999/xlink"}

      def svg_clean_kgt(s)
        d = REXML::Document.new(s)
        REXML::XPath.each(d.root, "/xmlns:svg", SVG_NAMESPACES) do |x|
          if (w = x.attributes["width"]) && (h = x.attributes["height"])
            x.attributes["viewBox"] = "0 0 %d %d" % [w, h]
          end
          if x.attributes["viewBox"]
            x.attributes["width"] = nil
            x.attributes["height"] = nil
          end
        end
        REXML::XPath.each(d.root, "//rect|//line|//path") do |x|
          x.attributes["fill"] = "none"
          x.attributes["stroke"] = "black"
          x.attributes["stroke-width"] = "1.5"
        end
        d.to_s
      rescue => detail
        warn "*** Can't clean SVG: #{detail}"
        d
      end

      def svg_clean(s)          # expensive, risky
        d = REXML::Document.new(s)
        REXML::XPath.each(d.root, "//*[@shape-rendering]") { |x| x.attributes["shape-rendering"] = nil }  #; warn x.inspect }
        REXML::XPath.each(d.root, "//*[@text-rendering]") { |x| x.attributes["text-rendering"] = nil }  #; warn x.inspect  }
        REXML::XPath.each(d.root, "//*[@stroke]") { |x| x.attributes["stroke"] = svg_munch_color(x.attributes["stroke"], false) }
        REXML::XPath.each(d.root, "//*[@fill]") { |x| x.attributes["fill"] = svg_munch_color(x.attributes["fill"], true) }
        REXML::XPath.each(d.root, "//*[@id]") { |x| x.attributes["id"] = svg_munch_id(x.attributes["id"]) }
##      REXML::XPath.each(d.root, "//rect") { |x| x.attributes["style"] = "fill:none;stroke:black;stroke-width:1" unless x.attributes["style"] }
        # Fix for mermaid:
        REXML::XPath.each(d.root, "//polygon") { |x| x.attributes["rx"] = nil; x.attributes["ry"] = nil }
        d.to_s
      rescue => detail
        warn "*** Can't clean SVG: #{detail}"
        d
      end

      def memoize(meth, *args)
        require 'digest'
        Dir.mkdir(REFCACHEDIR) unless Dir.exist?(REFCACHEDIR)
        kdrfc_version = Gem.loaded_specs["kramdown-rfc2629"].version.to_s.gsub('.', '_') rescue "UNKNOWN"
        fn = "#{REFCACHEDIR}/kdrfc-#{kdrfc_version}-#{meth}-#{Digest::SHA256.hexdigest(Marshal.dump(args))[0...40]}.cache"
        begin
          out = Marshal.load(File.binread(fn))
        rescue StandardError => e
          # warn e.inspect
          out = method(meth).call(*args)
          File.binwrite(fn, Marshal.dump(out))
        end
        out
      end

      def capture_croak(t, err)
        if err != ''
          err.lines do |l|
            warn "*** [#{t}:] #{l.chomp}"
          end
        end
      end

      def shell_prepare(opt)
        " " << opt.shellsplit.shelljoin
      end

      DEFAULT_AASVG="aasvg --spaces=1"

      def svg_tool_process(t, svg_opt, txt_opt, result)
        require 'tempfile'
        file = Tempfile.new("kramdown-rfc")
        file.write(result)
        file.close
        dont_clean = false
        dont_check = false
        svg_opt = shell_prepare(svg_opt) if svg_opt
        txt_opt = shell_prepare(txt_opt) if txt_opt
        case t
        when "protocol", "protocol-goat", "protocol-aasvg"
          cmdparm = result.lines.map(&:strip).select {|x| x != ''}.join(',')
          result, err, _s = Open3.capture3("protocol #{Shellwords.escape(cmdparm)}#{txt_opt}", stdin_data: '')
          if t == "protocol-goat"
            file.unlink
            file = Tempfile.new("kramdown-rfc")
            file.write(result)
            file.close
            result1, err, _s = Open3.capture3("goat#{svg_opt} #{file.path}", stdin_data: result);
            dont_clean = true
          elsif t == "protocol-aasvg"
            result1, err, _s = Open3.capture3("#{DEFAULT_AASVG}#{svg_opt}", stdin_data: result);
            dont_clean = true
            dont_check = true
          else
            result1 = nil
          end
        when "goat"
          result1, err, _s = Open3.capture3("goat#{svg_opt} #{file.path}", stdin_data: result);
          dont_clean = true
        when "aasvg"
          result1, err, _s = Open3.capture3("#{DEFAULT_AASVG}#{svg_opt}", stdin_data: result);
          dont_clean = true
          dont_check = true
        when "ditaa"        # XXX: This needs some form of option-setting
          result1, err, _s = Open3.capture3("ditaa #{file.path} --svg -o -#{svg_opt}", stdin_data: result);
        when "mscgen"
          result1, err, _s = Open3.capture3("mscgen -T svg -i #{file.path} -o -#{svg_opt}", stdin_data: result);
        when "mermaid"
          result1, err, _s = Open3.capture3("mmdc -i #{file.path}#{svg_opt}", stdin_data: result); #  -b transparent
          outpath = file.path + ".svg"
          result1 = File.read(outpath) rescue '' # don't die before providing error message
          File.unlink(outpath) rescue nil        # ditto
        when "plantuml", "plantuml-utxt", "plantuml-ascii-art"
          if t == "plantuml-ascii-art"
            result, ascii_art = result.split(/^~{3,} ascii-art\n/, 2)
            unless ascii_art
              warn "*** Didn't find ascii-art in plantuml-ascii-art #{result.inspect}"
              ascii_art = result.to_s
            end
          end
          plantuml = "@startuml\n#{result}\n@enduml"
          result1, err, _s = Open3.capture3("plantuml -pipe -tsvg#{svg_opt}", stdin_data: plantuml);
          case t
          when "plantuml-utxt"
            result, err1, _s = Open3.capture3("plantuml -pipe -tutxt#{txt_opt}", stdin_data: plantuml)
            err << err1.to_s
          when "plantuml-ascii-art"
            result = ascii_art
          end
        when "railroad", "railroad-utf8"
          result1, err1, _s = Open3.capture3("kgt -l abnf -e svg#{svg_opt}", stdin_data: result);
          result1 = svg_clean_kgt(result1); dont_clean = true
          result, err, _s = Open3.capture3("kgt -l abnf -e rr#{t == "railroad" ? "text" : "utf8"}#{txt_opt}",
                                            stdin_data: result);
          err << err1.to_s
        when "math", "math-asciitex"
          result1, err, _s = Open3.capture3("tex2svg --font STIX --speech=false#{svg_opt} #{Shellwords.escape(' ' << result)}");
          begin
            raise Errno::ENOENT if t == "math-asciitex"
            result, err1, s = Open3.capture3("utftex -m #{txt_opt}", stdin_data: result)
            if s.exitstatus != 0
              warn "** utftex: #{err1.inspect}"
              raise Errno::ENOENT
            end
          rescue Errno::ENOENT
            warn "** utftex not working, falling back to asciitex" unless t == "math-asciitex"
            result, err1, _s = Open3.capture3("asciitex -f #{file.path}#{txt_opt}")
          end
          err << err1
        end
        capture_croak(t, err)
        # warn ["text:", result.inspect]
        # warn ["svg:", result1.inspect]
        file.unlink
        if result1
          result1 = svg_clean(result1) unless dont_clean
          unless dont_check
            file = Tempfile.new("kramdown-rfc")
            file.write(result1)
            file.close
            result1, err, _s = Open3.capture3("svgcheck -qa #{file.path}");
            file.unlink
            # warn ["svgcheck:", result1.inspect]
            capture_croak("svgcheck", err)
          end
          if result1 == ''
            warn "*** could not create svg for #{result.inspect[0...20]}..."
            exit 65 # EX_DATAERR
          end
        end
        [result, result1]       # text, svg
      end

      ARTWORK_TYPES = %w(ascii-art binary-art call-flow hex-dump svg)

      def convert_codeblock(el, indent, opts)
        # el.attr['anchor'] ||= saner_generate_id(el.value) -- no longer in 1.0.6
        result = el.value
        gi = el.attr.delete('gi')
        blockclass = el.attr.delete('class')
        if blockclass == 'language-tbreak'
          result = result.lines.map {|line| [line.chomp, 0]}
          spaceind = 0
          result.each_with_index {|pair, index|
            if pair[0] == ''
              result[spaceind][1] += 1
              pair[0] = nil unless index == spaceind
            else
              spaceind = index
            end
          }
          # $stderr.puts(result.inspect)
          result = result.map {|line, space|
            "<![CDATA[#{line.gsub(/^\s+/) {|s| "\u00A0" * s.size}}]]><vspace blankLines=\"#{space}\"/>" if line
          }.compact.join("\n")
          "#{' '*indent}<t>#{result}</t>\n"
        else
          artwork_attr = {}
          t = nil
          if blockclass
            classes = blockclass.split(' ')
            classes.each do |cl|
              if md = cl.match(/\Alanguage-(.*)/)
                t = artwork_attr["type"] = md[1] # XXX overwrite
              else
                $stderr.puts "*** Unimplemented codeblock class: #{cl}"
              end
            end
          end
          # compensate for XML2RFC idiosyncrasy by insisting on a blank line
          unless el.attr.delete('tight')
            result[0,0] = "\n" unless result[0,1] == "\n"
          end
          el.attr.each do |k, v|
            if md = k.match(/\A(?:artwork|sourcecode)-(.*)/)
              el.attr.delete(k)
              artwork_attr[md[1]] = v
            end
          end
          case t
          when "aasvg", "ditaa", "goat",
               "math", "math-asciitex", "mermaid",  "mscgen",
               "plantuml", "plantuml-utxt", "plantuml-ascii-art",
               "protocol", "protocol-aasvg", "protocol-goat",
               "railroad", "railroad-utf8"
            if gi
              warn "*** Can't set GI #{gi} for composite SVG artset"
            end
            result, result1 = memoize(:svg_tool_process, t,
                                      artwork_attr.delete("svg-options"),
                                      artwork_attr.delete("txt-options"),
                                      result)
            retart = mk_artwork(artwork_attr, "ascii-art",
                                "<![CDATA[#{result}#{result =~ /\n\Z/ ? '' : "\n"}]]>")
            if result1          # nest TXT in artset with SVG
              retsvg = mk_artwork(artwork_attr, "svg",
                                  result1.sub(/.*?<svg/m, "<svg"))
              retart = "<artset>#{retsvg}#{retart}</artset>"
            end
            "#{' '*indent}<figure#{el_html_attributes(el)}>#{retart}</figure>\n"
          else
            gi ||= (
              if !$options.v3 || !t || ARTWORK_TYPES.include?(t) || artwork_attr["align"]
                "artwork"
              else
                "sourcecode"
              end
            )
            loc_str =
              if anchor = el.attr['anchor']
                "##{anchor}"
              elsif lineno = el.options[:location]
                "#{correct_location(lineno)}"
              else
                "UNKNOWN"
              end
            preprocs = el.attr.delete("pre")
            checks = el.attr.delete("check")
            postprocs = el.attr.delete("post")
            case t
            when "cbor"
              warn "** There is no sourcecode-type “cbor”."
              warn "**   Do you mean “cbor-diag” (diagnostic notation)"
              warn "**   or “cbor-pretty” (annotated hex-dump)?"
            when "json"
              checks ||= "json"
            when /\A(.*)-from-yaml\z/
              t = $1
              preprocs ||= "yaml2json"
            end
            preprocs = (preprocs || '').split("-")
            checks = (checks || '').split("-")
            postprocs = (postprocs || '').split("-")
            result = sourcecode_checkproc(preprocs, checks, postprocs, loc_str, result)
            "#{' '*indent}<figure#{el_html_attributes(el)}><#{gi}#{html_attributes(artwork_attr)}><![CDATA[#{result}#{result =~ /\n\Z/ ? '' : "\n"}]]></#{gi}></figure>\n"
          end
        end
      end

      def sourcecode_proc(proc, loc_str, result)
        case proc
        when "dedent"
          result = remove_indentation(result)
        when /\Afold(\d*)(left(\d*))?(dry)?\z/
          fold = [$1.to_i,            # col 0 for ''
                  ($3.to_i if $2),    # left 0 for '', nil if no "left"
                  $4]                 # dry
          result = fix_unterminated_line(fold8792_1(trim_empty_lines_around(result), *fold)) # XXX
        when "yaml2json"
          begin
            y = YAML.safe_load(result, aliases: true, filename: loc_str)
            result = JSON.pretty_generate(y)
          rescue => e
            warn "*** #{loc_str}: YAML isn't: #{e.message}\n"
          end
        else
          warn "*** #{loc_str}: unknown proc '#{proc}'"
        end
        result
      end

      def sourcecode_checkproc(preprocs, checks, postprocs, loc_str, result)
        preprocs.each do |proc|
          result = sourcecode_proc(proc, loc_str, result)
        end if preprocs
        check_input = result
        checks.each do |check|
          case check
          when "skipheader"
            check_input = handle_artwork_sourcecode(check_input).sub(/.*?\n\n/m, '')
          when "json"
            # check for 8792; undo if needed:
            begin
              JSON.load(handle_artwork_sourcecode(check_input))
            rescue => e
              err1 = "*** #{loc_str}: JSON isn't: #{JSON.dump(e.message[0..40])}\n"
              begin
                JSON.load("{" << check_input << "}")
              rescue => e
                warn err1 << "***  not even with braces added around: #{JSON.dump(e.message[0..40])}"
              end
            end
          else
            warn "*** #{loc_str}: unknown check '#{check}'"
          end
        end if checks
        postprocs.each do |proc|
          result = sourcecode_proc(proc, loc_str, result)
        end if postprocs
        result
      end

      def mk_artwork(artwork_attr, typ, content)
        "<artwork #{html_attributes(artwork_attr.merge("type" => typ))}>#{content}</artwork>"
      end

      def convert_blockquote(el, indent, opts)
        text = inner(el, indent, opts)
        if $options.v3
          gi = el.attr.delete('gi')
          if gi && gi != 'ul'
            if RFCXML_SPAN_ELEMENTS === gi
              text.sub!(/\A\s*<t>(.*)<\/t>\s*\z/) {$1} # XXX unwrap inner text from block
              "#{' '*indent}<t><#{gi}#{el_html_attributes(el)}>#{text}</#{gi}></t>\n"
            else
              "#{' '*indent}<#{gi}#{el_html_attributes(el)}>\n#{text}#{' '*indent}</#{gi}>\n"
            end
          else
            "#{' '*indent}<ul#{el_html_attributes_with(el, {"empty" => 'true'})}><li>\n#{text}#{' '*indent}</li></ul>\n"
          end
        else
        text = "<t></t>" unless text =~ /</ # empty block quote
        "#{' '*indent}<t><list style='empty'#{el_html_attributes(el)}>\n#{text}#{' '*indent}</list></t>\n"
        end
      end

      def end_sections(to_level, indent, location)
        if indent < 0
          indent = 0
        end
        if @sec_level >= to_level
          delta = (@sec_level - to_level)
          @sec_level = to_level
          "#{' '*indent}</section>\n" * delta
        else
          $stderr.puts "** #{correct_location(location)}: Bad section nesting: start heading level at 1 and increment by 1"
        end
      end

      def clean_pcdata(parts)    # hack, will become unnecessary with XML2RFCv3
        clean = ''
        irefs = ''
        # warn "clean_parts: #{parts.inspect}"
        parts.each do |p|
          md = p.match(%r{([^<]*)(.*)})
          clean << md[1]
          irefs << md[2]        # breaks for spanx... don't emphasize in headings!
        end
        [clean, irefs]
      end

      def clean_pcdatav3(parts) # hack, will become unnecessary with v3 tables
        clean = ''
        parts.each do |p|
          next if p.empty?
          if p == "<br />"
            p = "\u2028"        # XXX
          end
          d = REXML::Document.new("<foo>#{p}</foo>")
          t = REXML::XPath.each(d.root, "//text()").to_a.join
          if t != p
            warn "** simplified markup #{p.inspect} into #{t.inspect} in table heading"
          end
          clean << t
        end
        clean
      end

      def convert_header(el, indent, opts)
        # todo: handle appendix tags
        el = el.deep_clone
        options = @doc ? @doc.options : @options # XXX: 0.11 vs. 0.12
        if options[:auto_ids] && !el.attr['anchor']
          el.attr['anchor'] = saner_generate_id(el.options[:raw_text])
        end
        if $options.v3
          if sl = el.attr.delete('slugifiedName') # could do general name- play
            attrstring = html_attributes({'slugifiedName' => sl})
          end
          # noabbrev: true -- Workaround for https://github.com/ietf-tools/xml2rfc/issues/683
          nm = inner(el, indent, opts.merge(noabbrev: true))
          if ttl = el.attr['title']
            warn "*** Section has two titles: >>#{ttl}<< and >>#{nm}<<"
            warn "*** Do you maybe have a loose IAL?"
          end
          irefs = "<name#{attrstring}>#{nm}</name>" #
        else
        clean, irefs = clean_pcdata(inner_a(el, indent, opts))
        el.attr['title'] = clean
        end
        "#{end_sections(el.options[:level], indent, el.options[:location])}#{' '*indent}<section#{@sec_level += 1; el_html_attributes(el)}>#{irefs}\n"
      end

      def convert_hr(el, indent, opts) # misuse for page break
        "#{' '*indent}<t><vspace blankLines='999' /></t>\n"
      end

      STYLES = {ul: 'symbols', ol: 'numbers', dl: 'hanging'}

      def convert_ul(el, indent, opts)
        opts = opts.merge(vspace: el.attr.delete('vspace'))
        attrstring = el_html_attributes_with(el, {"style" => STYLES[el.type]})
        if opts[:unpacked]
          "#{' '*indent}<list#{attrstring}>\n#{inner(el, indent, opts)}#{' '*indent}</list>\n"
          else
          "#{' '*indent}<t><list#{attrstring}>\n#{inner(el, indent, opts)}#{' '*indent}</list></t>\n"
        end
      end

      def convert_ol(el, indent, opts)
        if @options[:ol_start_at_first_marker] and (first_list_marker =
                                                    el.options[:first_list_marker])
          el.attr['start'] ||= first_list_marker[/\d+/]
        end
        nested_types = opts[:nested_ol_types] || ["1"]
        # warn "** ol opts #{opts.inspect} types #{nested_types.inspect}"
        if nested_attr = el.attr.delete('nestedOlTypes')
          nested_types = ::Kramdown::Options.parse(:nested_ol_types, nested_attr)
        end
        if nested_types = nested_types.dup
          # warn "** nested_types #{nested_types.inspect}"
          nested_here = nested_types.shift
          opts = opts.merge(nested_ol_types: nested_types << nested_here)
          el.attr['type'] ||= nested_here
          # warn "** actual ol type #{el.attr['type'].inspect}"
        end
        convert_ul(el, indent, opts)
      end

      def convert_dl(el, indent, opts)
        if $options.v3
          if hangindent = el.attr.delete('hangIndent')
            el.attr['indent'] ||= hangindent # new attribute name wins
          end
          vspace = el.attr.delete('vspace')
          if vspace && !el.attr['newline']
            el.attr['newline'] = 'true'
          end
          "#{' '*indent}<dl#{el_html_attributes(el)}>\n#{inner(el, indent, opts.dup)}#{' '*indent}</dl>\n"
        else
          convert_ul(el, indent, opts)
        end
      end

      def convert_li(el, indent, opts)
        res_a = inner_a(el, indent, opts)
        if el.children.empty? || el.children.first.options[:category] == :span
          res = res_a.join('')
        else                    # merge multiple <t> elements
          res = res_a.select { |x|
            x.strip != ''
          }.map { |x|
            x.sub(/\A\s*<t>(.*)<\/t>\s*\Z/m) { $1}
          }.join("#{' '*indent}<vspace blankLines='1'/>\n").gsub(%r{(</list>)\s*<vspace blankLines='1'/>}) { $1 }.gsub(%r{<vspace blankLines='1'/>\s*(<list)}) { $1 }
        end
        "#{' '*indent}<t#{el_html_attributes(el)}>#{res}#{(res =~ /\n\Z/ ? ' '*indent : '')}</t>\n"
      end

      def convert_dd(el, indent, opts)
        if $options.v3
          out = ''
          if !opts[:haddt]
            out ="#{' '*indent}<dt/>\n" # you can't make this one up
          end
          opts[:haddt] = false
          out << "#{' '*indent}<dd#{el_html_attributes(el)}>\n#{inner(el, indent, opts)}#{' '*indent}</dd>\n"
        else
        output = ' '*indent
        if @in_dt == 1
          @in_dt = 0
        else
          output << "<t#{el_html_attributes(el)}>"
        end
        res = inner(el, indent+INDENTATION, opts.merge(unpacked: true))
#        if el.children.empty? || el.children.first.options[:category] != :block
          output << res << (res =~ /\n\Z/ ? ' '*indent : '')
#        else                    FIXME: The latter case is needed for more complex cases
#          output << "\n" << res << ' '*indent
#        end
        output << "</t>\n"
        end
      end

      def convert_dt(el, indent, opts) # SERIOUSLY BAD HACK:
        if $options.v3
          out = ''
          if opts[:haddt]
            out ="#{' '*indent}<dd><t/></dd>\n" # you can't make this one up
          end
          opts[:haddt] = true
          out << "#{' '*indent}<dt#{el_html_attributes(el)}>#{inner(el, indent, opts)}</dt>\n"
        else
        close = "#{' '*indent}</t>\n" * @in_dt
        @in_dt = 1
        vspace = opts[:vspace]
        vspaceel = "<vspace blankLines='#{vspace}'/>" if vspace
        ht = escape_html(inner(el, indent, opts), :attribute) # XXX this may leave gunk
        "#{close}#{' '*indent}<t#{el_html_attributes(el)} hangText=\"#{ht}\">#{vspaceel}\n"
        end
      end

      HTML_TAGS_WITH_BODY=['div', 'script']

      def convert_html_element(el, indent, opts)
        res = inner(el, indent, opts)
        if el.options[:category] == :span
          "<#{el.value}#{el_html_attributes(el)}" << (!res.empty? ? ">#{res}</#{el.value}>" : " />")
        else
          output = ''
          output << ' '*indent if !el.options[:parent_is_raw]
          output << "<#{el.value}#{el_html_attributes(el)}"
          if !res.empty? && el.options[:parse_type] != :block
            output << ">#{res}</#{el.value}>"
          elsif !res.empty?
            output << ">\n#{res}"  << ' '*indent << "</#{el.value}>"
          elsif HTML_TAGS_WITH_BODY.include?(el.value)
            output << "></#{el.value}>"
          else
            output << " />"
          end
          output << "\n" if el.options[:outer_element] || !el.options[:parent_is_raw]
          output
        end
      end

      def convert_xml_comment(el, indent, opts)
        if el.value =~ /\A<\?line (([-+]?)[0-9]+)\?>\z/
          lineno = $1.to_i
          case $2
          when ''               # absolute
            @location_delta = lineno - el.options[:location]
          when '+', '-'         # correction (pre-scanning!)
            @location_correction += lineno
          end
        end
        if el.options[:category] == :block && !el.options[:parent_is_raw]
          ' '*indent + el.value + "\n"
        else
          el.value
        end
      end
      alias :convert_xml_pi :convert_xml_comment
      alias :convert_html_doctype :convert_xml_comment

      ALIGNMENTS = { default: :left, left: :left, right: :right, center: :center}
      COLS_ALIGN = { "l" => :left, "c" => :center, "r" => :right}

      def convert_table(el, indent, opts) # This only works for tables with headers
        alignment = el.options[:alignment].map { |al| ALIGNMENTS[al]}
        cols = (el.attr.delete("cols") || "").split(' ')
        "#{' '*indent}<texttable#{el_html_attributes(el)}>\n#{inner(el, indent, opts.merge(table_alignment: alignment, table_cols: cols))}#{' '*indent}</texttable>\n"
      end

      def convert_thead(el, indent, opts)
        inner(el, indent, opts)
      end
      alias :convert_tbody :convert_thead
      alias :convert_tfoot :convert_thead
      alias :convert_tr  :convert_thead

      def convert_td(el, indent, opts)
        if alignment = opts[:table_alignment]
          alignment = alignment.shift
          if cols = opts[:table_cols].shift
            md = cols.match(/(\d*(|em|[%*]))([lrc])/)
            if md[1].to_i != 0
              widthval = md[1]
              widthval << "em" if md[2].empty?
              widthopt = "width='#{widthval}' "
            end
            alignment = COLS_ALIGN[md[3]] || :left
          end
        end
        if alignment
          xmlres = inner_a(el, indent, opts)
          if $options.v3
            res = clean_pcdatav3(xmlres)
          else
            res, irefs = clean_pcdata(xmlres)
            warn "*** lost markup #{irefs} in table heading" unless irefs.empty?
          end
          "#{' '*indent}<ttcol #{widthopt}align='#{alignment}'#{el_html_attributes(el)}>#{res.empty? ? "&#160;" : res}</ttcol>\n" # XXX need clean_pcdata
        else
          res = inner(el, indent, opts)
          "#{' '*indent}<c#{el_html_attributes(el)}>#{res.empty? ? "&#160;" : res}</c>\n"
        end
      end
      alias :convert_th :convert_td

      def convert_comment(el, indent, opts)
## Don't actually output all those comments into the XML:
#        if el.options[:category] == :block
#          "#{' '*indent}<!-- #{el.value} -->\n"
#        else
#          "<!-- #{el.value} -->"
#        end
      end

      def convert_br(el, indent, opts)
        if $options.v3
          "<br />"
        else
          "<vspace />"
        end
      end

      def convert_a(el, indent, opts)
        gi = el.attr.delete('gi')
        res = inner(el, indent, opts)
        target = el.attr['target']
        if target[0..1] == "{{"
          # XXX ignoring all attributes and content
          s = ::Kramdown::Converter::Rfc2629::process_markdown(target)
          # if res != '' && s[-2..-1] == '/>'
          #   if s =~ /\A<([-A-Za-z0-9_.]+) /
          #     gi ||= $1
          #   end
          #   s[-2..-1] = ">#{res}</#{gi}>"
          # end
          return s
        end
        if target[0] == "#"     # handle [](#foo) as xref as in RFC 7328
          el.attr['target'] = target = target[1..-1]
          if target.downcase == res.downcase
            res = ''            # get rid of raw anchors leaking through
          end
          gi ||= "xref"
        else
          gi ||= "eref"
        end
        "<#{gi}#{el_html_attributes(el)}>#{res}</#{gi}>"
      end

      def convert_xref(el, indent, opts)
        gi = el.attr.delete('gi')
        text = el.attr.delete('text')
        target = el.attr['target']
        if target[0] == "&"
          "#{target};"
        else
          if target =~ %r{\A\w+:(?://|.*@)}
            gi ||= "eref"
          else
            gi ||= "xref"
          end
          if text
            tail = ">#{Rfc2629::process_markdown(text)}</#{gi}>"
          else
            tail = "/>"
          end
          "<#{gi}#{el_html_attributes(el)}#{tail}"
        end
      end

      def convert_contact(el, indent, opts)
        "<contact#{el_html_attributes(el)}/>"
      end

      REFCACHEDIR = ENV["KRAMDOWN_REFCACHEDIR"] || ".refcache"

      # warn "*** REFCACHEDIR #{REFCACHEDIR}"

      KRAMDOWN_OFFLINE = ENV["KRAMDOWN_OFFLINE"]
      KRAMDOWN_REFCACHE_REFETCH = ENV["KRAMDOWN_REFCACHE_REFETCH"]
      KRAMDOWN_REFCACHE_QUIET = ENV["KRAMDOWN_REFCACHE_QUIET"]

      def get_and_write_resource(url, fn)
        options = {}
        if ENV["KRAMDOWN_DONT_VERIFY_HTTPS"]
          options[:ssl_verify_mode] = OpenSSL::SSL::VERIFY_NONE
        end             # workaround for OpenSSL on Windows...
        # URI.open(url, **options) do |uf|          # not portable to older versions
        OpenURI.open_uri(url, **options) do |uf|
          s = uf.read
          if uf.status[0] != "200"
            warn "*** Status code #{status} while fetching #{url}"
          else
            File.write(fn, s)
          end
        end
      end

      def get_and_write_resource_persistently(url, fn)
        t1 = Time.now
        response = $http.request(URI(url))
        if response.code != "200"
          raise "Status code #{response.code} while fetching #{url}"
        else
          File.write(fn, response.body)
        end
        t2 = Time.now
        warn "(#{"%.3f" % (t2 - t1)} s)" if KRAMDOWN_PERSISTENT_VERBOSE
      end

      def get_doi(refname)
        lit = doi_fetch_and_convert(refname, fuzzy: true)
        anchor = "DOI_#{refname.gsub("/", "_")}"
        KramdownRFC::ref_to_xml(anchor, lit)
      end

      # this is now slightly dangerous as multiple urls could map to the same cachefile
      def get_and_cache_resource(url, cachefile, tvalid = 7200, tn = Time.now)
        fn = "#{REFCACHEDIR}/#{cachefile}"
        Dir.mkdir(REFCACHEDIR) unless Dir.exist?(REFCACHEDIR)
        f = File.stat(fn) rescue nil unless KRAMDOWN_REFCACHE_REFETCH
        if !KRAMDOWN_OFFLINE && (!f || tn - f.mtime >= tvalid)
          if f
            message = "renewing (stale by #{"%.1f" % ((tn-f.mtime)/86400)} days)"
            fetch_timeout = 10 # seconds, give up quickly if just renewing
          else
            message = "fetching"
            fetch_timeout = 60 # seconds; long timeout needed for Travis
          end
          $stderr.puts "#{fn}: #{message} from #{url}" unless KRAMDOWN_REFCACHE_QUIET
          if Array === url
            begin
              case url[0]
              when :DOI
                ref = get_doi(url[1])
                File.write(fn, ref)
              end
            rescue Exception => e
              warn "*** Error fetching #{url[0]} #{url[1].inspect}: #{e}"
            end
          elsif ENV["HAVE_WGET"]
            `cd #{REFCACHEDIR}; wget -t 3 -T #{fetch_timeout} -Nnv "#{url}"` # ignore errors if offline (hack)
            begin
              File.utime nil, nil, fn
            rescue Errno::ENOENT
              warn "Can't fetch #{url} -- is wget in path?"
            end
          else
            require 'open-uri'
            require 'socket'
            require 'openssl'
            require 'timeout'
            begin
              Timeout::timeout(fetch_timeout) do
                if $http
                  begin         # belt and suspenders
                    get_and_write_resource_persistently(url, fn)
                  rescue Exception => e
                    warn "*** Can't get with persistent HTTP: #{e}"
                    get_and_write_resource(url, fn)
                  end
                else
                  get_and_write_resource(url, fn)
                end
              end
            rescue OpenURI::HTTPError, Errno::EHOSTUNREACH, Errno::ECONNREFUSED,
                   SocketError, Timeout::Error => e
              warn "*** #{e} while fetching #{url}"
            end
          end
        end
        begin
          File.read(fn) # this blows up if no cache available after fetch attempt
        rescue Errno::ENOENT => e
          warn "*** #{e} for #{fn}"
        end
      end

      def self.bcp_std_ref(t, n)
        warn "*** #{t} anchors not supported in v2 format" unless $options.v3
        [name = "reference.#{t}.#{"%04d" % n.to_i}.xml",
         "#{XML_RESOURCE_ORG_PREFIX}/bibxml-rfcsubseries/#{name}"] # FOR NOW
      end

      KRAMDOWN_REFCACHETTL = (e = ENV["KRAMDOWN_REFCACHETTL"]) ? e.to_i : 3600

      KRAMDOWN_REFCACHETTL_RFC = (e = ENV["KRAMDOWN_REFCACHETTL_RFC"]) ? e.to_i : 86400*7
      KRAMDOWN_REFCACHETTL_DOI_IANA = (e = ENV["KRAMDOWN_REFCACHETTL_DOI_IANA"]) ? e.to_i : 86400
      KRAMDOWN_REFCACHETTL_DOI = (e = ENV["KRAMDOWN_REFCACHETTL_DOI"]) ? e.to_i : KRAMDOWN_REFCACHETTL_DOI_IANA
      KRAMDOWN_REFCACHETTL_IANA = (e = ENV["KRAMDOWN_REFCACHETTL_IANA"]) ? e.to_i : KRAMDOWN_REFCACHETTL_DOI_IANA

      # [subdirectory name, cache ttl in seconds, does it provide for ?anchor=]
      XML_RESOURCE_ORG_MAP = {
        "RFC" => ["bibxml", KRAMDOWN_REFCACHETTL_RFC, false,
                  ->(fn, n){ [name = "reference.RFC.#{"%04d" % n.to_i}.xml",
                              "https://bib.ietf.org/public/rfc/bibxml/#{name}"] }
# was                         "https://www.rfc-editor.org/refs/bibxml/#{name}"] }
                 ],
        "I-D" => ["bibxml3", false, false,
                  ->(fn, n){ [fn,
                              "https://datatracker.ietf.org/doc/bibxml3/draft-#{n.sub(/\Adraft-/, '')}.xml"] }
                 ],
        "BCP" => ["bibxml-rfcsubseries", KRAMDOWN_REFCACHETTL_RFC, false,
                  ->(fn, n){ Rfc2629::bcp_std_ref("BCP", n) }
                 ],
        "STD" => ["bibxml-rfcsubseries", KRAMDOWN_REFCACHETTL_RFC, false,
                  ->(fn, n){ Rfc2629::bcp_std_ref("STD", n) }
                 ],
        "W3C" => "bibxml4",
        "3GPP" => "bibxml5",
        "SDO-3GPP" => "bibxml5",
        "ANSI" => "bibxml2",
        "CCITT" => "bibxml2",
        "FIPS" => "bibxml2",
        # "IANA" => "bibxml2",   overtaken by bibxml8
        "IEEE" => "bibxml6",    # copied over to bibxml6 2019-02-27
        "ISO" => "bibxml2",
        "ITU" => "bibxml2",
        "NIST" => "bibxml2",
        "OASIS" => "bibxml2",
        "PKCS" => "bibxml2",
        "DOI" => ["bibxml7", KRAMDOWN_REFCACHETTL_DOI, true,
                  ->(fn, n){ ["computed-#{fn}", [:DOI, n] ] }, true # always_altproc
                 ], # emulate old 24 h cache
        "IANA" => ["bibxml8", KRAMDOWN_REFCACHETTL_IANA, true], # ditto
      }

      # XML_RESOURCE_ORG_HOST = ENV["XML_RESOURCE_ORG_HOST"] || "xml.resource.org"
      # XML_RESOURCE_ORG_HOST = ENV["XML_RESOURCE_ORG_HOST"] || "xml2rfc.tools.ietf.org"
      XML_RESOURCE_ORG_HOST = ENV["XML_RESOURCE_ORG_HOST"] || "bib.ietf.org"
      XML_RESOURCE_ORG_PREFIX = ENV["XML_RESOURCE_ORG_PREFIX"] ||
                                "https://#{XML_RESOURCE_ORG_HOST}/public/rfc"
      KRAMDOWN_USE_TOOLS_SERVER = ENV["KRAMDOWN_USE_TOOLS_SERVER"]

      KRAMDOWN_NO_TARGETS = ENV['KRAMDOWN_NO_TARGETS']
      KRAMDOWN_KEEP_TARGETS = ENV['KRAMDOWN_KEEP_TARGETS']

      def convert_img(el, indent, opts) # misuse the tag!
        if a = el.attr
          alt = a.delete('alt').strip
          alt = '' if alt == '!' # work around re-wrap uglyness
          if src = a.delete('src')
            a['target'] = src
          end
        end
        if alt == ":include:"   # Really bad misuse of tag...
          ann = el.attr.delete('ann')
          anchor = el.attr.delete('anchor') || (
            # not yet
            warn "*** missing anchor for '#{src}'"
            src
          )
          anchor = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(anchor)
          anchor.gsub!('/', '_')              # should take out all illegals
          to_insert = ""
          src.scan(/(W3C|3GPP|[A-Z-]+)[.]?([A-Za-z_0-9.\(\)\/\+-]+)/) do |t, n|
            never_altproc = n.sub!(/^[.]/, "")
            fn = "reference.#{t}.#{n}.xml"
            sub, ttl, _can_anchor, altproc, always_altproc = XML_RESOURCE_ORG_MAP[t]
            ttl ||= KRAMDOWN_REFCACHETTL  # everything but RFCs might change a lot
            puts "*** Huh: #{fn}" unless sub
            if altproc && !never_altproc && (!KRAMDOWN_USE_TOOLS_SERVER || always_altproc)
              fn, url = altproc.call(fn, n)
            else
              url = "#{XML_RESOURCE_ORG_PREFIX}/#{sub}/#{fn}"
              fn = "alt-#{fn}" if never_altproc || KRAMDOWN_USE_TOOLS_SERVER
            end
            # if can_anchor # create anchor server-side for stand_alone: false
            #   url << "?anchor=#{anchor}"
            #   fn[/.xml$/] = "--anchor=#{anchor}.xml"
            # end
            to_insert = get_and_cache_resource(url, fn.gsub('/', '_'), ttl)
            to_insert.scrub! rescue nil # only do this for Ruby >= 2.1

            begin
              d = REXML::Document.new(to_insert)
              d.xml_decl.nowrite
              d.delete d.doctype
              d.context[:attribute_quote] = :quote  # Set double-quote as the attribute value delimiter
              d.root.attributes["anchor"] = anchor
              if t == "RFC" or t == "I-D"
                if KRAMDOWN_NO_TARGETS || !KRAMDOWN_KEEP_TARGETS
                  d.root.attributes["target"] = nil
                  REXML::XPath.each(d.root, "/reference/format") { |x|
                    d.root.delete_element(x)
                  }
                else
                  REXML::XPath.each(d.root, "/reference/format") { |x|
                    x.attributes["target"].sub!(%r{https?://www.ietf.org/internet-drafts/},
                                                %{https://www.ietf.org/archive/id/}) if t == "I-D"
                  }
                end
              elsif t == "IANA"
                d.root.attributes["target"].sub!(%r{\Ahttp://www.iana.org/assignments/}, 'https://www.iana.org/assignments/')
              end
              if ann
                el = ::Kramdown::Converter::Rfc2629::process_markdown_to_rexml(ann).root
                el.name = "annotation"
                d.root.add_element(el)
              end
              to_insert = d.to_s
            rescue Exception => e
              warn "** Can't manipulate reference XML: #{e}"
              broken = true
              to_insert = nil
            end
            # this may be a bit controversial: Don't break the build if reference is broken
            if KRAMDOWN_OFFLINE || broken
              unless to_insert
                to_insert = "<reference anchor='#{anchor}'> <front> <title>*** BROKEN REFERENCE ***</title> <author> <organization/> </author> <date/> </front> </reference>"
                warn "*** KRAMDOWN_OFFLINE: Inserting broken reference for #{fn}"
              end
            else
              exit 66 unless to_insert # EX_NOINPUT
            end
          end
          to_insert
        else
          "<xref#{el_html_attributes(el)}>#{alt}</xref>"
        end
      end

      def convert_codespan(el, indent, opts)
        attrstring = el_html_attributes_with(el, {"style" => 'verb'})
        "<spanx#{attrstring}>#{escape_html(el.value)}</spanx>"
      end

      def convert_footnote(el, indent, opts) # XXX: footnotes into crefs???
        # this would be more like xml2rfc v3:
        # "\n#{' '*indent}<cref>\n#{inner(el.value, indent, opts).rstrip}\n#{' '*indent}</cref>"
        content = inner(el.value, indent, opts).strip
        content = content.sub(/\A<t>(.*)<\/t>\z/m) {$1}
        name = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(el.options[:name])
        o_name = name.dup
        while @footnote_names_in_use[name] do
          if name =~ /_\d+\z/
            name.succ!
          else
            name << "_1"
          end
        end
        @footnote_names_in_use[name] = true
        attrstring = el_html_attributes_with(el, {"anchor" => name})
        if $options.v3
          if o_name[-1] == "-"
            # Ignore HTML attributes.  Hmm.
            content
          else
            # do not indent span-level so we can stick to previous word.  Good?
            "<cref#{attrstring}>#{content}</cref>"
          end
        else
          content = escape_html(content, :text) # text only...
          "\n#{' '*indent}<cref#{attrstring}>#{content}</cref>"
        end
      end

      def convert_raw(el, indent, opts)
        end_sections(1, indent, el.options[:location]) +
        el.value + (el.options[:category] == :block ? "\n" : '')
      end

      EMPH = { em: "emph", strong: "strong"}

      def convert_em(el, indent, opts)
        if $options.v3
          gi = el.type
          "<#{gi}#{el_html_attributes(el)}>#{inner(el, indent, opts)}</#{gi}>"
        else
        attrstring = el_html_attributes_with(el, {"style" => EMPH[el.type]})
        span, irefs = clean_pcdata(inner_a(el, indent, opts))
        "<spanx#{attrstring}>#{span}</spanx>#{irefs}"
        end
      end
      alias :convert_strong :convert_em

      def convert_entity(el, indent, opts)
        entity_to_str(el.value)
      end

      TYPOGRAPHIC_SYMS = {
        :mdash => [::Kramdown::Utils::Entities.entity('mdash')],
        :ndash => [::Kramdown::Utils::Entities.entity('ndash')],
        :hellip => [::Kramdown::Utils::Entities.entity('hellip')],
        :laquo_space => [::Kramdown::Utils::Entities.entity('laquo'), ::Kramdown::Utils::Entities.entity('nbsp')],
        :raquo_space => [::Kramdown::Utils::Entities.entity('nbsp'), ::Kramdown::Utils::Entities.entity('raquo')],
        :laquo => [::Kramdown::Utils::Entities.entity('laquo')],
        :raquo => [::Kramdown::Utils::Entities.entity('raquo')]
      }
      def convert_typographic_sym(el, indent, opts)
        if (result = @options[:typographic_symbols][el.value])
          escape_html(result, :text)
        else
          TYPOGRAPHIC_SYMS[el.value].map {|e| entity_to_str(e) }.join('')
        end
      end

      def convert_smart_quote(el, indent, opts)
        entity_to_str(smart_quote_entity(el))
      end

      MATH_LATEX_FILENAME = File.expand_path '../../data/math.json', __FILE__
      MATH_LATEX = JSON.parse(File.read(MATH_LATEX_FILENAME, encoding: Encoding::UTF_8))
      MATH_REPLACEMENTS = MATH_LATEX["replacements"]
      MATH_COMBININGMARKS = MATH_LATEX["combiningmarks"]

      def munge_latex(s)
        MATH_REPLACEMENTS.each do |o, n|
          s.gsub!(o, n)
        end
        MATH_COMBININGMARKS.each do |m, n|
          re = /\\#{m[1..-1]}\{(\X)\}/
          s.gsub!(re) { "#$1#{n}" }
        end
        s
      end
      # XXX: This is missing sup/sub support, which needs to be added

      def convert_math(el, indent, opts) # XXX: This is wrong
        el = el.deep_clone
        if el.options[:category] == :block
          el.attr['artwork-type'] ||= ''
          el.attr['artwork-type'] += (el.attr['artwork-type'].empty? ? '' : ' ') + 'math'
          artwork_attr = {}
          el.attr.each do |k, v|
            if md = k.match(/\Aartwork-(.*)/)
              el.attr.delete(k)
              artwork_attr[md[1]] = v
            end
          end
          result, err, _s = Open3.capture3("tex2mail -noindent -ragged -by_par -linelength=69", stdin_data: el.value);
          # warn "*** tex2mail not in path?" unless s.success? -- doesn't have useful status
          capture_croak("tex2mail", err)
          "#{' '*indent}<figure#{el_html_attributes(el)}><artwork#{html_attributes(artwork_attr)}><![CDATA[#{result}#{result =~ /\n\Z/ ? '' : "\n"}]]></artwork></figure>\n"

        else
          type = 'spanx'
          if $options.v3
            type = 'contact'
            result = munge_latex(el.value)
            attrstring = el_html_attributes_with(el, {"fullname" => result.chomp, "asciiFullname" => ''})
          else
            warn "*** no support for inline math in XML2RFCv2"
            type = 'spanx'
            attrstring = el_html_attributes_with(el, {"style" => 'verb'})
            content = escape_html(el.value, :text)
          end
          "<#{type}#{attrstring}>#{content}</#{type}>"
        end
      end

      ITEM_RE = '\s*(?:"([^"]*)"|([^,]*?))\s*'
      IREF_RE = %r{\A(!\s*)?#{ITEM_RE}(?:,#{ITEM_RE})?\z}

      def iref_attr(s)
        md = s.match(IREF_RE)
        attr = {
          item: md[2] || md[3],
          subitem: md[4] || md[5],
          primary: md[1] && 'true',
        }
        "<iref#{html_attributes(attr)}/>"
      end

      def nobr_hack(s)          # replace this by actual <nobr> once that exists
        # https://github.com/ietf-tools/xml2rfc/blob/main/xml2rfc/utils.py#L42
        s.gsub(/([-\s\/])(?!\s)/) { case rep = $1
                                   when /\A\s\z/
                                     "\u00A0" # nbsp
                                   when "-"
                                     "\u2011" # nbhy -- XXX this might mangle dashes
                                   else
                                     "#{rep}\u2060"
                                   end
        }
      end

      def convert_iref(el, indent, opts)
        iref_attr(el.attr['target'])
      end

      def convert_abbreviation(el, indent, opts) # XXX: This is wrong
        if opts[:noabbrev]
          return el.value
        end

        value = el.value
        ix = value.gsub(/[\s\p{Z}]+/, " ")
        title = @root.options[:abbrev_defs][ix]
        if title.nil?
          warn "*** abbrev mismatch: value = #{value.inspect} ix = #{ix.inspect}"
        else
          title = nil if title.empty?
        end

        if title == "<bcp14>" && $options.v3
          return "<bcp14>#{value}</bcp14>"
        end

        hacked_value = value

        nobr = false
        if title && title =~ /\A<nobr>(\z|\s)/
          nobr = true
          _nobr, title = title.split(' ', 2)
          hacked_value = nobr_hack(value)
          if title.nil? || title.empty?
            return hacked_value # we have "exhausted" this abbrev -- suppress normal meaning
          end
        end

        if title && title[0] == "#"
          target, title = title.split(' ', 2)
          if target == "#"
            target = value
          else
            target = target[1..-1]
          end
        else
          target = nil
        end

        if item = title
          pairs = title.split(Parser::RFC2629Kramdown::IREF_START).each_slice(2).to_a
          replacement = pairs.map {|x,| s = x.strip; s unless s.empty?}.compact.join(" ")
          irefs = pairs.map {|_,x| x && [x]}.compact
          warn "@@@ ABBREV MISMATCH #{irefs}" if title.scan(Parser::RFC2629Kramdown::IREF_START) != irefs
          if irefs.empty?
            subitem = value
          else
            iref = irefs.map{|a,| iref_attr(a)}.join('')
          end
          unless replacement.empty?
            replacement = nobr_hack(replacement) if nobr # XXX this can break XML
            replacement = ::Kramdown::Converter::Rfc2629::process_markdown(replacement)
            hacked_value = replacement
          end
        else
          item = value
        end
        iref ||= "<iref#{html_attributes(item: item, subitem: subitem)}/>"
        if target
          "#{iref}<xref#{html_attributes(target: target, format: "none")}>#{hacked_value}</xref>"
        else
          "#{iref}#{hacked_value}"
        end
      end

      def convert_root(el, indent, opts)
        result = inner(el, indent, opts)
      end

    end

  end
end
Modules

Classes

lib/kramdown-rfc2629.rb

Source Files