lib/kramdown-rfc/command.rb



#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'kramdown-rfc2629'
require 'kramdown-rfc/parameterset'
require 'kramdown-rfc/refxml'
require 'kramdown-rfc/rfc8792'
require 'yaml'
require 'kramdown-rfc/erb'
require 'date'

# try to get this from gemspec.
KDRFC_VERSION=Gem.loaded_specs["kramdown-rfc2629"].version rescue "unknown-version"

Encoding.default_external = "UTF-8" # wake up, smell the coffee

def add_quote(s)
  l = s.lines
  l.map {|li| "> #{li}"}.join
end

def process_chunk(s, nested, dedent, fold, quote)
  process_includes(s) if nested
  s = remove_indentation(s) if dedent
  s = fold8792_1(s, *fold) if fold
  s = add_quote(s) if quote
  s
end

def process_includes(input)
 input.gsub!(/^\{::include((?:-[a-z0-9]+)*)\s+(.*?)\}/) {
  include_flags = $1
  fn = [$2]
  chunks = false
  nested = false
  dedent = false
  fold = false
  quote = false
  include_flags.split("-") do |flag|
    case flag
    when ""
    when "nested"
      nested = true
    when "quote"
      quote = true
    when "dedent"
      dedent = true
    when /\Afold(\d*)(left(\d*))?(dry)?\z/
      fold = [$1.to_i,            # col 0 for ''
              ($3.to_i if $2),    # left 0 for '', nil if no "left"
              $4]                 # dry
    when "all", "last"
      fn = fn.flat_map{|n| Dir[n]}
      fn = [fn.last] if flag == "last"
      chunks = fn.map{ |f|
        ret = process_chunk(File.read(f), nested, dedent, fold, quote)
        nested = false; dedent = false; fold = false; quote = false
        ret
      }
    else
      warn "** unknown include flag #{flag}"
    end
  end
  chunks = fn.map{|f| File.read(f)} unless chunks # no all/last
  chunks = chunks.map {|ch| process_chunk(ch, nested, dedent, fold, quote)}
  chunks.join.chomp
 }
end


def boilerplate(key)
  ret = ''
  case key.downcase
  when /\Abcp14(info)?(\+)?(-tagged)?(-bcp)?\z/i
    #            $1    $2     $3       $4
    if $1
      ret << <<RFC8174ise
Although this document is not an IETF Standards Track publication, it
adopts the conventions for normative language to provide clarity of
instructions to the implementer.
RFC8174ise
    end
    ret << <<RFC8174
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
"MAY", and "OPTIONAL" in this document are to be interpreted as
described in BCP 14 {{!RFC2119}} {{!RFC8174}} when, and only when, they
appear in all capitals, as shown here.
RFC8174
    if $2
      ret << <<PLUS
These words may also appear in this document in
lower case as plain English words, absent their normative meanings.
PLUS
    end
    if $3
      ($options.v3_used ||= []) << "** need --v3 to tag bcp14"
      ret << <<TAGGED

*[MUST]: <bcp14>
*[MUST NOT]: <bcp14>
*[REQUIRED]: <bcp14>
*[SHALL]: <bcp14>
*[SHALL NOT]: <bcp14>
*[SHOULD]: <bcp14>
*[SHOULD NOT]: <bcp14>
*[RECOMMENDED]: <bcp14>
*[NOT RECOMMENDED]: <bcp14>
*[MAY]: <bcp14>
*[OPTIONAL]: <bcp14>
TAGGED
    end
    if $4                       # experimental; idnits complains:
      #  ** The document seems to lack a both a reference to RFC 2119 and the
      #     recommended RFC 2119 boilerplate, even if it appears to use RFC 2119
      #     keywords -- however, there's a paragraph with a matching beginning.
      #     Boilerplate error?
      ret.sub!("{{!RFC2119}} {{!RFC8174}}", "{{!BCP14}}")
    end
    ret
  when /\Arfc\s*7942(info)?\z/i
    if $1
      ret << <<INFO
(Boilerplate as per {{Section 2.1 of RFC7942}}:)

INFO
    end
    ret << <<RFC7942
This section records the status of known implementations of the
protocol defined by this specification at the time of posting of
this Internet-Draft, and is based on a proposal described in
{{?RFC7942}}.  The description of implementations in this section is
intended to assist the IETF in its decision processes in
progressing drafts to RFCs.  Please note that the listing of any
individual implementation here does not imply endorsement by the
IETF.  Furthermore, no effort has been spent to verify the
information presented here that was supplied by IETF contributors.
This is not intended as, and must not be construed to be, a
catalog of available implementations or their features.  Readers
are advised to note that other implementations may exist.

According to {{?RFC7942}}, "this will allow reviewers and working
groups to assign due consideration to documents that have the
benefit of running code, which may serve as evidence of valuable
experimentation and feedback that have made the implemented
protocols more mature.  It is up to the individual working groups
to use this information as they see fit".
RFC7942
  else
    warn "** Unknown boilerplate key: #{key}"
    "{::boilerplate #{key}}"
  end
end

def do_the_tls_dance
  begin
    require 'openssl'
    File.open(OpenSSL::X509::DEFAULT_CERT_FILE) do end
    # This guards against having an unreadable cert file (yes, that appears to happen a lot).
  rescue
    if Dir[File.join(OpenSSL::X509::DEFAULT_CERT_DIR, "*.pem")].empty?
      # This guards against having no certs at all, not against missing the right one for IETF.
      # Oh well.
      warn "** Configuration problem with OpenSSL certificate store."
      warn "**   You may want to examine #{OpenSSL::X509::DEFAULT_CERT_FILE}"
      warn "**    and #{OpenSSL::X509::DEFAULT_CERT_DIR}."
      warn "**   Activating suboptimal workaround."
      warn "**   Occasionally run `certified-update` to maintain that workaround."
      require 'certified'
    end
  end
end

RE_NL = /(?:\r\n|\n|\r)/
RE_SECTION = /---(?: +(\w+)(-?))? *#{RE_NL}(.*?#{RE_NL})(?=---(?:\s+\w+-?)?\s*#{RE_NL}|\Z)/m

NMDTAGS = ["{:/nomarkdown}\n\n", "\n\n{::nomarkdown}\n"]

NORMINFORM = { "!" => :normative, "?" => :informative }

def yaml_load(input, *args)
 begin
  if YAML.respond_to?(:safe_load)
    begin
      YAML.safe_load(input, *args)
    rescue ArgumentError
      YAML.safe_load(input, permitted_classes: args[0], permitted_symbols: args[1], aliases: args[2])
    end
  else
    YAML.load(input)
  end
 rescue Psych::SyntaxError => e
   warn "*** YAML syntax error: #{e}"
   exit 65 # EX_DATAERR
 end
end

def process_kramdown_options(coding_override = nil,
                             smart_quotes = nil, typographic_symbols = nil,
                             header_kramdown_options = nil)

  ascii_target = coding_override && coding_override =~ /ascii/
  suppress_typography = ascii_target || $options.v3
  entity_output = ascii_target ? :numeric : :as_char;

  options = {input: 'RFC2629Kramdown', entity_output: entity_output, link_defs: {}}

  if smart_quotes.nil? && suppress_typography
    smart_quotes = false
  end
  if smart_quotes == false
    smart_quotes = ["'".ord, "'".ord, '"'.ord, '"'.ord]
  end
  case smart_quotes
  when Array
    options[:smart_quotes] = smart_quotes
  when nil, true
    # nothin
  else
    warn "*** Can't deal with smart_quotes value #{smart_quotes.inspect}"
  end

  if typographic_symbols.nil? && suppress_typography
    typographic_symbols = false
  end
  if typographic_symbols == false
    typographic_symbols = Hash[::Kramdown::Parser::Kramdown::TYPOGRAPHIC_SYMS.map { |k, v|
                                 if Symbol === v
                                   [v.intern, k]
                                 end
                               }.compact]
  end
  # warn [:TYPOGRAPHIC_SYMBOLS, typographic_symbols].to_yaml
  case typographic_symbols
  when Hash
    options[:typographic_symbols] = typographic_symbols
  when nil, true
    # nothin
  else
    warn "*** Can't deal with typographic_symbols value #{typographic_symbols.inspect}"
  end

  if header_kramdown_options
    options.merge! header_kramdown_options
  end

  $global_markdown_options = options   # For nested calls in bibref annotation processing and xref text

  options
end

XREF_SECTIONS_RE = ::Kramdown::Parser::RFC2629Kramdown::SECTIONS_RE
XSR_PREFIX = "#{XREF_SECTIONS_RE} of "
XSR_SUFFIX = ", (#{XREF_SECTIONS_RE})| \\((#{XREF_SECTIONS_RE})\\)"
XREF_TXT = ::Kramdown::Parser::RFC2629Kramdown::XREF_TXT
XREF_TXT_SUFFIX = " \\(#{XREF_TXT}\\)"

def spacify_re(s)
  s.gsub(' ', '[\u00A0\s]+')
end

include ::Kramdown::Utils::Html

def xml_from_sections(input)

  unless ENV["KRAMDOWN_NO_SOURCE"]
    require 'kramdown-rfc/gzip-clone'
    require 'base64'
    compressed_input = Gzip.compress_m0(input)
    $source = Base64.encode64(compressed_input)
  end

  sections = input.scan(RE_SECTION)
  # resulting in an array; each section is [section-label, nomarkdown-flag, section-text]
  line = 1                      # skip "---"
  sections.each do |section|
    section << line
    line += 1 + section[2].lines.count
  end
  # warn "#{line-1} lines"

  # the first section is a YAML with front matter parameters (don't put a label here)
  # We put back the "---" plus gratuitous blank lines to hack the line number in errors
  yaml_in = input[/---\s*/] << sections.shift[2]
  ps = KramdownRFC::ParameterSet.new(yaml_load(yaml_in, [Date], [], true))

  if v = ps[:v]
    warn "*** unsupported RFCXML version #{v}" if v != 3
    if $options.v2
      warn "*** command line --v2 wins over document's 'v: #{v}'"
    else
      $options.v3 = true
      $options.v = 3
      ps.default!(:stand_alone, true)
      ps.default!(:ipr, "trust200902")
      ps.default!(:pi,  {"toc" => true, "sortrefs" => true, "symrefs" => true})
    end
  end

  if r = ENV["KRAMDOWN_RFC_DOCREV"]
    warn "** building document revision -#{r}"
    unless n = ps.has(:docname) and n.sub!(/-latest\z/, "-#{r}")
      warn "** -d#{r}: docname #{n.inspect} doesn't have a '-latest' suffix"
    end
  end

  if o = ps[:'autolink-iref-cleanup']
    $options.autolink_iref_cleanup = o
  end
  if o = ps[:'svg-id-cleanup']
    $options.svg_id_cleanup = o
  end

  coding_override = ps.has(:coding)
  smart_quotes = ps[:smart_quotes] || ps[:"smart-quotes"]
  typographic_symbols = ps[:typographic_symbols]
  header_kramdown_options = ps[:kramdown_options]

  kramdown_options = process_kramdown_options(coding_override,
                                              smart_quotes, typographic_symbols,
                                              header_kramdown_options)

  # all the other sections are put in a Hash, possibly concatenated from parts there
  sechash = Hash.new{ |h,k| h[k] = ""}
  snames = []                   # a stack of section names
  sections.each do |sname, nmdflag, text, line|
    # warn [:SNAME, sname, nmdflag, text[0..10]].inspect
    nmdin, nmdout = {
      "-" => ["", ""],          # stay in nomarkdown
      "" => NMDTAGS, # pop out temporarily
    }[nmdflag || ""]
    if sname
      snames << sname           # "--- label" -> push label (now current)
    else
      snames.pop                # just "---" -> pop label (previous now current)
    end
    sechash[snames.last] << "#{nmdin}<?line #{line}?>\n#{text}#{nmdout}"
  end

  ref_replacements = { }
  anchor_to_bibref = { }

  displayref = {}

  [:ref, :normative, :informative].each do |sn|
    if refs = ps.has(sn)
      warn "*** bad section #{sn}: #{refs.inspect}" unless refs.respond_to? :each
      refs.each do |k, v|
        if v.respond_to? :to_str
          if bibtagsys(v)       # enable "foo: RFC4711" as a custom anchor definition
            anchor_to_bibref[k] = v.to_str
          end
          ref_replacements[v.to_str] = k
        end
        if Hash === v
          if aliasname = v.delete("-")
            ref_replacements[aliasname] = k
          end
          if bibref = v.delete("=")
            anchor_to_bibref[k] = bibref
          end
          if dr = v.delete("display")
            displayref[k.gsub("/", "_")] = dr
          end
        end
      end
    end
  end
  open_refs = ps[:ref] || { }       # consumed

  norm_ref = { }

  # convenience replacement of {{-coap}} with {{I-D.ietf-core-coap}}
  # collect normative/informative tagging {{!RFC2119}} {{?RFC4711}}
  sechash.each do |k, v|
    next if k == "fluff"
    v.gsub!(/{{(#{
      spacify_re(XSR_PREFIX)
    })?([\w.\/_\-]+@)?(?:([?!])(-)?|(-))([\w._\-]+)(?:=([\w.\/_\-]+))?(#{
      XREF_TXT_SUFFIX
    })?(#{
      spacify_re(XSR_SUFFIX)
    })?}}/) do |match|
      xsr_prefix = $1
      subref = $2
      norminform = $3
      replacing = $4 || $5
      word = $6
      bibref = $7
      xrt_suffix = $8
      xsr_suffix = $9
      if replacing
        if new = ref_replacements[word]
          word = new
        else
          warn "*** no alias replacement for {{-#{word}}}"
          word = "-#{word}"
        end
      end       # now, word is the anchor
      if bibref
        if old = anchor_to_bibref[word]
          if bibref != old
            warn "*** conflicting definitions for xref #{word}: #{old} != #{bibref}"
          end
        else
          anchor_to_bibref[word] = bibref
        end
      end

      # things can be normative in one place and informative in another -> normative
      # collect norm/inform above and assign it by priority here
      if norminform
        norm_ref[word] ||= norminform == '!' # one normative ref is enough
      end
      "{{#{xsr_prefix}#{subref}#{word}#{xrt_suffix}#{xsr_suffix}}}"
    end
  end

  [:normative, :informative].each do |k|
    ps.rest[k.to_s] ||= { }
  end

  norm_ref.each do |k, v|
    # could check bibtagsys here: needed if open_refs is nil or string
    target = ps.has(v ? :normative : :informative)
    warn "*** overwriting #{k}" if target.has_key?(k)
    target[k] = open_refs[k] # add reference to normative/informative
  end
  # note that unused items from ref are considered OK, therefore no check for that here

  # also should allow norm/inform check of other references
  # {{?coap}} vs. {{!coap}} vs. {{-coap}} (undecided)
  # or {{?-coap}} vs. {{!-coap}} vs. {{-coap}} (undecided)
  # could require all references to be decided by a global flag
  overlap = [:normative, :informative].map { |s| (ps.has(s) || { }).keys }.reduce(:&)
  unless overlap.empty?
    warn "*** #{overlap.join(', ')}: both normative and informative"
  end

  stand_alone = ps[:stand_alone]

  [:normative, :informative].each do |sn|
    if refs = ps[sn]
      refs.each do |k, v|
        href = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(k)
        kramdown_options[:link_defs][k] = ["##{href}", nil]   # allow [RFC2119] in addition to {{RFC2119}}

        bibref = anchor_to_bibref[k] || k
        bts, url = bibtagsys(bibref, k, stand_alone)
        ann = v.delete("annotation") || v.delete("ann") if Hash === v
        if bts && (!v || v == {} || v.respond_to?(:to_str))
          if stand_alone
            a = %{{: anchor="#{k}"}}
            a[-1...-1] = %{ ann="#{escape_html(ann, :attribute)}"} if ann
            sechash[sn.to_s] << %{\n#{NMDTAGS[0]}\n![:include:](#{bts})#{a}\n#{NMDTAGS[1]}\n}
          else
            warn "*** please use standalone mode for adding annotations to references" if ann
            bts.gsub!('/', '_')
            (ps.rest["bibxml"] ||= []) << [bts, url]
            sechash[sn.to_s] << %{&#{bts};\n} # ???
          end
        else
          if v && Integer === v
            case href
            when /\AErr(.*)/
              epno = $1
              rfcno = v.to_s
              v = {
                "target" => "https://www.rfc-editor.org/errata/eid#{epno}",
                "title" => "RFC Errata Report #{epno}",
                "quote-title" => false,
                "seriesinfo" => { "RFC" => rfcno },
                "date" => false
              }
            else
              # superfluous -- would be caught by next "unless"
              warn "*** don't know how to expand numeric ref #{k}"
              next
            end
          end
          unless v && Hash === v
            warn "*** don't know how to expand ref #{k}"
            next
          end
          if bts && !v.delete("override")
            warn "*** warning: explicit settings completely override canned bibxml in reference #{k}"
          end
          v["ann"] = ann if ann
          sechash[sn.to_s] << KramdownRFC::ref_to_xml(href, v)
        end
      end
    end
  end

  erbfilename = File.expand_path '../../../data/kramdown-rfc2629.erb', __FILE__
  erbfile = File.read(erbfilename, coding: "UTF-8")
  erb = ERB.trim_new(erbfile, '-')
  # remove redundant nomarkdown pop outs/pop ins as they confuse kramdown
  input = erb.result(binding).gsub(%r"{::nomarkdown}\s*{:/nomarkdown}"m, "")
  ps.warn_if_leftovers
  sechash.delete("fluff")       # fluff is a "commented out" section
  if !sechash.empty?            # any sections unused by the ERb file?
    warn "*** sections left #{sechash.keys.inspect}!"
  end

  [input, kramdown_options, coding_override]
end

XML_RESOURCE_ORG_PREFIX = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_PREFIX

# return XML entity name, url, rewrite_anchor flag
def bibtagsys(bib, anchor=nil, stand_alone=true)
  if bib =~ /\Arfc(\d+)/i
    rfc4d = "%04d" % $1.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml/reference.RFC.#{rfc4d}.xml"]
  elsif $options.v3 && bib =~ /\A(bcp|std)(\d+)/i
    n4d = "%04d" % $2.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml-rfcsubseries-new/reference.#{$1.upcase}.#{n4d}.xml"]
  elsif bib =~ /\A([-A-Z0-9]+)\./ &&
        (xro = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_MAP[$1])
    dir, _ttl, rewrite_anchor = xro
    bib1 = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(bib)
    if anchor && bib1 != anchor
      if rewrite_anchor
        a = %{?anchor=#{anchor}}
      else
        if !stand_alone
          warn "*** selecting a custom anchor '#{anchor}' for '#{bib1}' requires stand_alone mode"
          warn "    the output will need manual editing to correct this"
        end
      end
    end
    [bib1,
     "#{XML_RESOURCE_ORG_PREFIX}/#{dir}/reference.#{bib}.xml#{a}"]
  end
end

def read_encodings
  encfilename = File.expand_path '../../../data/encoding-fallbacks.txt', __FILE__
  encfile = File.read(encfilename, coding: "UTF-8")
  Hash[encfile.lines.map{|l|
         l.chomp!;
         x, s = l.split(" ", 2)
         [x.hex.chr(Encoding::UTF_8), s || " "]}]
end

FALLBACK = read_encodings

def expand_tabs(s, tab_stops = 8)
  s.gsub(/([^\t\n]*)\t/) do
    $1 + " " * (tab_stops - ($1.size % tab_stops))
  end
end


require 'optparse'
require 'ostruct'

$options ||= OpenStruct.new
op = OptionParser.new do |opts|
  opts.banner = <<BANNER
Usage: kramdown-rfc2629 [options] [file.md] > file.xml
Version: #{KDRFC_VERSION}
BANNER
  opts.on("-V", "--version", "Show version and exit") do |v|
    puts "kramdown-rfc #{KDRFC_VERSION}"
    exit
  end
  opts.on("-H", "--help", "Show option summary and exit") do |v|
    puts opts
    exit
  end
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
    $options.verbose = v
  end
  opts.on("-3", "--[no-]v3", "Use RFCXML v3 processing rules") do |v|
    $options.v3 = v
  end
  opts.on("-2", "--[no-]v2", "Use RFCXML v2 processing rules") do |v|
    $options.v2 = v
  end
end
op.parse!

if $options.v2 && $options.v3
  warn "*** can't have v2 and eat v3 cake"
  $options.v2 = false
end

if $options.v3.nil? && !$options.v2
  if Time.now.to_i >= 1645567342 # Time.parse("2022-02-22T22:02:22Z").to_i
    $options.v3 = true           # new default from the above date
  end
end

warn "*** v2 #{$options.v2.inspect} v3 #{$options.v3.inspect}" if $options.verbose

input = ARGF.read
input.scrub! do |c|
  warn "*** replaced invalid UTF-8 byte sequence #{c.inspect} by U+FFFD REPLACEMENT CHARACTER"
  0xFFFD.chr(Encoding::UTF_8)
end
if input[0] == "\uFEFF"
   warn "*** There is a leading byte order mark. Ignored."
   input[0..0] = ''
end
if input[-1] != "\n"
  # warn "*** added missing newline at end"
  input << "\n"                 # fix #26
end
process_includes(input) unless ENV["KRAMDOWN_SAFE"]
input.gsub!(/^\{::boilerplate\s+(.*?)\}/) {
  bp = boilerplate($1)
  delta = bp.lines.count
  bp + "<?line -#{delta+1}?>\n"
}
if input =~ /[\t]/
   warn "*** Input contains HT (\"tab\") characters. Undefined behavior will ensue."
   input = expand_tabs(input)
end

if input =~ /\A---/        # this is a sectionized file
  do_the_tls_dance unless ENV["KRAMDOWN_DONT_VERIFY_HTTPS"]
  input, options, coding_override = xml_from_sections(input)
else
  options = process_kramdown_options # all default
end
if input =~ /\A<\?xml/          # if this is a whole XML file, protect it
  input = "{::nomarkdown}\n#{input}\n{:/nomarkdown}\n"
end

if $options.v3_used && !$options.v3
  warn $options.v3_used
  $options.v3_used = nil
  $options.v3 = true
end

if coding_override
  input = input.encode(Encoding.find(coding_override), fallback: FALLBACK)
end

# 1.4.17: because of UTF-8 bibxml files, kramdown always needs to see UTF-8 (!)
if input.encoding != Encoding::UTF_8
  input = input.encode(Encoding::UTF_8)
end

# warn "options: #{options.inspect}"
doc = Kramdown::Document.new(input, options)
$stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
output = doc.to_rfc2629

if $options.v3_used && !$options.v3
  warn $options.v3_used
  $options.v3 = true
end

# only reparse output document if cleanup actions required
if $options.autolink_iref_cleanup || $options.svg_id_cleanup
  require 'rexml/document'

  d = REXML::Document.new(output)
  d.context[:attribute_quote] = :quote  # Set double-quote as the attribute value delimiter

  if $options.autolink_iref_cleanup
    require 'kramdown-rfc/autolink-iref-cleanup'
    autolink_iref_cleanup(d)
  end

  if $options.svg_id_cleanup
    require 'kramdown-rfc/svg-id-cleanup'
    svg_id_cleanup(d)
  end

  output = d.to_s
end

if coding_override
  output = output.encode(Encoding.find(coding_override), fallback: FALLBACK)
end

puts output