gem.sh

lib/kramdown-rfc/command.rb

#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'kramdown-rfc2629'
require 'kramdown-rfc/parameterset'
require 'kramdown-rfc/refxml'
require 'kramdown-rfc/rfc8792'
require 'yaml'
require 'kramdown-rfc/erb'
require 'date'

# try to get this from gemspec.
KDRFC_VERSION=Gem.loaded_specs["kramdown-rfc2629"].version rescue "unknown-version"

Encoding.default_external = "UTF-8" # wake up, smell the coffee

# Note that this doesn't attempt to handle HT characters
def remove_indentation(s)
  l = s.lines
  indent = l.grep(/\S/).map {|l| l[/^\s*/].size}.min
  l.map {|li| li.sub(/^ {0,#{indent}}/, "")}.join
end

def add_quote(s)
  l = s.lines
  l.map {|li| "> #{li}"}.join
end

def process_chunk(s, nested, dedent, fold, quote)
  process_includes(s) if nested
  s = remove_indentation(s) if dedent
  s = fold8792_1(s, *fold) if fold
  s = add_quote(s) if quote
  s
end

def process_includes(input)
 input.gsub!(/^\{::include((?:-[a-z0-9]+)*)\s+(.*?)\}/) {
  include_flags = $1
  fn = [$2]
  chunks = false
  nested = false
  dedent = false
  fold = false
  quote = false
  include_flags.split("-") do |flag|
    case flag
    when ""
    when "nested"
      nested = true
    when "quote"
      quote = true
    when "dedent"
      dedent = true
    when /\Afold(\d*)(left(\d*))?(dry)?\z/
      fold = [$1.to_i,            # col 0 for ''
              ($3.to_i if $2),    # left 0 for '', nil if no "left"
              $4]                 # dry
    when "all", "last"
      fn = fn.flat_map{|n| Dir[n]}
      fn = [fn.last] if flag == "last"
      chunks = fn.map{ |f|
        ret = process_chunk(File.read(f), nested, dedent, fold, quote)
        nested = false; dedent = false; fold = false; quote = false
        ret
      }
    else
      warn "** unknown include flag #{flag}"
    end
  end
  chunks = fn.map{|f| File.read(f)} unless chunks # no all/last
  chunks = chunks.map {|ch| process_chunk(ch, nested, dedent, fold, quote)}
  chunks.join.chomp
 }
end


def boilerplate(key)
  case key.downcase
  when /\Abcp14(info)?(\+)?(-tagged)?\z/i
    ret = ''
    if $1
      ret << <<RFC8174ise
Although this document is not an IETF Standards Track publication, it
adopts the conventions for normative language to provide clarity of
instructions to the implementer.
RFC8174ise
    end
    ret << <<RFC8174
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
"MAY", and "OPTIONAL" in this document are to be interpreted as
described in BCP 14 {{!RFC2119}} {{!RFC8174}} when, and only when, they
appear in all capitals, as shown here.
RFC8174
    if $2
      ret << <<PLUS
These words may also appear in this document in
lower case as plain English words, absent their normative meanings.
PLUS
    end
    if $3
      ($options.v3_used ||= []) << "** need --v3 to tag bcp14"
      ret << <<TAGGED

*[MUST]: <bcp14>
*[MUST NOT]: <bcp14>
*[REQUIRED]: <bcp14>
*[SHALL]: <bcp14>
*[SHALL NOT]: <bcp14>
*[SHOULD]: <bcp14>
*[SHOULD NOT]: <bcp14>
*[RECOMMENDED]: <bcp14>
*[NOT RECOMMENDED]: <bcp14>
*[MAY]: <bcp14>
*[OPTIONAL]: <bcp14>
TAGGED
    end
    ret
  else
    warn "** Unknwon boilerplate key: #{key}"
    "{::boilerplate #{key}}"
  end
end

def do_the_tls_dance
  begin
    require 'openssl'
    File.open(OpenSSL::X509::DEFAULT_CERT_FILE) do end
    # This guards against having an unreadable cert file (yes, that appears to happen a lot).
  rescue
    if Dir[File.join(OpenSSL::X509::DEFAULT_CERT_DIR, "*.pem")].empty?
      # This guards against having no certs at all, not against missing the right one for IETF.
      # Oh well.
      warn "** Configuration problem with OpenSSL certificate store."
      warn "**   You may want to examine #{OpenSSL::X509::DEFAULT_CERT_FILE}"
      warn "**    and #{OpenSSL::X509::DEFAULT_CERT_DIR}."
      warn "**   Activating suboptimal workaround."
      warn "**   Occasionally run `certified-update` to maintain that workaround."
      require 'certified'
    end
  end
end

RE_NL = /(?:\n|\r|\r\n)/
RE_SECTION = /---(?: +(\w+)(-?))?\s*#{RE_NL}(.*?#{RE_NL})(?=---(?:\s+\w+-?)?\s*#{RE_NL}|\Z)/m

NMDTAGS = ["{:/nomarkdown}\n\n", "\n\n{::nomarkdown}\n"]

NORMINFORM = { "!" => :normative, "?" => :informative }

def yaml_load(input, *args)
 begin
  if YAML.respond_to?(:safe_load)
    begin
      YAML.safe_load(input, *args)
    rescue ArgumentError
      YAML.safe_load(input, permitted_classes: args[0], permitted_symbols: args[1], aliases: args[2])
    end
  else
    YAML.load(input)
  end
 rescue Psych::SyntaxError => e
   warn "*** YAML syntax error: #{e}"
   exit 65 # EX_DATAERR
 end
end

def process_kramdown_options(coding_override = nil,
                             smart_quotes = nil, typographic_symbols = nil,
                             header_kramdown_options = nil)

  ascii_target = coding_override && coding_override =~ /ascii/
  suppress_typography = ascii_target || $options.v3
  entity_output = ascii_target ? :numeric : :as_char;

  options = {input: 'RFC2629Kramdown', entity_output: entity_output, link_defs: {}}

  if smart_quotes.nil? && suppress_typography
    smart_quotes = false
  end
  if smart_quotes == false
    smart_quotes = ["'".ord, "'".ord, '"'.ord, '"'.ord]
  end
  case smart_quotes
  when Array
    options[:smart_quotes] = smart_quotes
  when nil, true
    # nothin
  else
    warn "*** Can't deal with smart_quotes value #{smart_quotes.inspect}"
  end

  if typographic_symbols.nil? && suppress_typography
    typographic_symbols = false
  end
  if typographic_symbols == false
    typographic_symbols = Hash[::Kramdown::Parser::Kramdown::TYPOGRAPHIC_SYMS.map { |k, v|
                                 if Symbol === v
                                   [v.intern, k]
                                 end
                               }.compact]
  end
  # warn [:TYPOGRAPHIC_SYMBOLS, typographic_symbols].to_yaml
  case typographic_symbols
  when Hash
    options[:typographic_symbols] = typographic_symbols
  when nil, true
    # nothin
  else
    warn "*** Can't deal with typographic_symbols value #{typographic_symbols.inspect}"
  end

  if header_kramdown_options
    options.merge! header_kramdown_options
  end

  $global_markdown_options = options   # For nested calls in bibref annotation processing and xref text

  options
end

XREF_SECTIONS_RE = ::Kramdown::Parser::RFC2629Kramdown::SECTIONS_RE
XSR_PREFIX = "#{XREF_SECTIONS_RE} of "
XSR_SUFFIX = ", (#{XREF_SECTIONS_RE})| \\((#{XREF_SECTIONS_RE})\\)"
XREF_TXT = ::Kramdown::Parser::RFC2629Kramdown::XREF_TXT
XREF_TXT_SUFFIX = " \\(#{XREF_TXT}\\)"

def spacify_re(s)
  s.gsub(' ', '[\u00A0\s]+')
end

def xml_from_sections(input)

  unless ENV["KRAMDOWN_NO_SOURCE"]
    require 'kramdown-rfc/gzip-clone'
    require 'base64'
    compressed_input = Gzip.compress_m0(input)
    $source = Base64.encode64(compressed_input)
  end

  sections = input.scan(RE_SECTION)
  # resulting in an array; each section is [section-label, nomarkdown-flag, section-text]

  # the first section is a YAML with front matter parameters (don't put a label here)
  # We put back the "---" plus gratuitous blank lines to hack the line number in errors
  yaml_in = input[/---\s*/] << sections.shift[2]
  ps = KramdownRFC::ParameterSet.new(yaml_load(yaml_in, [Date], [], true))

  if v = ps[:v]
    warn "*** unsupported RFCXML version #{v}" if v != 3
    if $options.v2
      warn "*** command line --v2 wins over document's 'v: #{v}'"
    else
      $options.v3 = true
      $options.v = 3
      ps.default!(:stand_alone, true)
      ps.default!(:ipr, "trust200902")
      ps.default!(:pi,  {"toc" => true, "sortrefs" => true, "symrefs" => true})
    end
  end

  if o = ps[:'autolink-iref-cleanup']
    $options.autolink_iref_cleanup = o
  end

  coding_override = ps.has(:coding)
  smart_quotes = ps[:smart_quotes]
  typographic_symbols = ps[:typographic_symbols]
  header_kramdown_options = ps[:kramdown_options]

  kramdown_options = process_kramdown_options(coding_override,
                                              smart_quotes, typographic_symbols,
                                              header_kramdown_options)

  # all the other sections are put in a Hash, possibly concatenated from parts there
  sechash = Hash.new{ |h,k| h[k] = ""}
  snames = []                   # a stack of section names
  sections.each do |sname, nmdflag, text|
    # warn [:SNAME, sname, nmdflag, text[0..10]].inspect
    nmdin, nmdout = {
      "-" => ["", ""],          # stay in nomarkdown
      "" => NMDTAGS, # pop out temporarily
    }[nmdflag || ""]
    if sname
      snames << sname           # "--- label" -> push label (now current)
    else
      snames.pop                # just "---" -> pop label (previous now current)
    end
    sechash[snames.last] << "#{nmdin}#{text}#{nmdout}"
  end

  ref_replacements = { }
  anchor_to_bibref = { }

  displayref = {}

  [:ref, :normative, :informative].each do |sn|
    if refs = ps.has(sn)
      warn "*** bad section #{sn}: #{refs.inspect}" unless refs.respond_to? :each
      refs.each do |k, v|
        if v.respond_to? :to_str
          if bibtagsys(v)       # enable "foo: RFC4711" as a custom anchor definition
            anchor_to_bibref[k] = v.to_str
          end
          ref_replacements[v.to_str] = k
        end
        if Hash === v
          if aliasname = v.delete("-")
            ref_replacements[aliasname] = k
          end
          if bibref = v.delete("=")
            anchor_to_bibref[k] = bibref
          end
          if dr = v.delete("display")
            displayref[k] = dr
          end
        end
      end
    end
  end
  open_refs = ps[:ref] || { }       # consumed

  norm_ref = { }

  # convenience replacement of {{-coap}} with {{I-D.ietf-core-coap}}
  # collect normative/informative tagging {{!RFC2119}} {{?RFC4711}}
  sechash.each do |k, v|
    next if k == "fluff"
    v.gsub!(/{{(#{
      spacify_re(XSR_PREFIX)
    })?(?:([?!])(-)?|(-))([\w._\-]+)(?:=([\w.\/_\-]+))?(#{
      XREF_TXT_SUFFIX
    })?(#{
      spacify_re(XSR_SUFFIX)
    })?}}/) do |match|
      xsr_prefix = $1
      norminform = $2
      replacing = $3 || $4
      word = $5
      bibref = $6
      xrt_suffix = $7
      xsr_suffix = $8
      if replacing
        if new = ref_replacements[word]
          word = new
        else
          warn "*** no alias replacement for {{-#{word}}}"
          word = "-#{word}"
        end
      end       # now, word is the anchor
      if bibref
        if old = anchor_to_bibref[word]
          if bibref != old
            warn "*** conflicting definitions for xref #{word}: #{old} != #{bibref}"
          end
        else
          anchor_to_bibref[word] = bibref
        end
      end

      # things can be normative in one place and informative in another -> normative
      # collect norm/inform above and assign it by priority here
      if norminform
        norm_ref[word] ||= norminform == '!' # one normative ref is enough
      end
      "{{#{xsr_prefix}#{word}#{xrt_suffix}#{xsr_suffix}}}"
    end
  end

  [:normative, :informative].each do |k|
    ps.rest[k.to_s] ||= { }
  end

  norm_ref.each do |k, v|
    # could check bibtagsys here: needed if open_refs is nil or string
    target = ps.has(v ? :normative : :informative)
    warn "*** overwriting #{k}" if target.has_key?(k)
    target[k] = open_refs[k] # add reference to normative/informative
  end
  # note that unused items from ref are considered OK, therefore no check for that here

  # also should allow norm/inform check of other references
  # {{?coap}} vs. {{!coap}} vs. {{-coap}} (undecided)
  # or {{?-coap}} vs. {{!-coap}} vs. {{-coap}} (undecided)
  # could require all references to be decided by a global flag
  overlap = [:normative, :informative].map { |s| (ps.has(s) || { }).keys }.reduce(:&)
  unless overlap.empty?
    warn "*** #{overlap.join(', ')}: both normative and informative"
  end

  stand_alone = ps[:stand_alone]

  [:normative, :informative].each do |sn|
    if refs = ps[sn]
      refs.each do |k, v|
        href = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(k)
        kramdown_options[:link_defs][k] = ["##{href}", nil]   # allow [RFC2119] in addition to {{RFC2119}}

        bibref = anchor_to_bibref[k] || k
        bts, url = bibtagsys(bibref, k, stand_alone)
        if bts && (!v || v == {} || v.respond_to?(:to_str))
          if stand_alone
            a = %{{: anchor="#{k}"}}
            sechash[sn.to_s] << %{\n#{NMDTAGS[0]}\n![:include:](#{bts})#{a}\n#{NMDTAGS[1]}\n}
          else
            bts.gsub!('/', '_')
            (ps.rest["bibxml"] ||= []) << [bts, url]
            sechash[sn.to_s] << %{&#{bts};\n} # ???
          end
        else
          unless v && Hash === v
            warn "*** don't know how to expand ref #{k}"
            next
          end
          if bts && !v.delete("override")
            warn "*** warning: explicit settings completely override canned bibxml in reference #{k}"
          end
          sechash[sn.to_s] << KramdownRFC::ref_to_xml(href, v)
        end
      end
    end
  end

  erbfilename = File.expand_path '../../../data/kramdown-rfc2629.erb', __FILE__
  erbfile = File.read(erbfilename, coding: "UTF-8")
  erb = ERB.trim_new(erbfile, '-')
  # remove redundant nomarkdown pop outs/pop ins as they confuse kramdown
  input = erb.result(binding).gsub(%r"{::nomarkdown}\s*{:/nomarkdown}"m, "")
  ps.warn_if_leftovers
  sechash.delete("fluff")       # fluff is a "commented out" section
  if !sechash.empty?            # any sections unused by the ERb file?
    warn "*** sections left #{sechash.keys.inspect}!"
  end

  [input, kramdown_options, coding_override]
end

XML_RESOURCE_ORG_PREFIX = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_PREFIX

# return XML entity name, url, rewrite_anchor flag
def bibtagsys(bib, anchor=nil, stand_alone=true)
  if bib =~ /\Arfc(\d+)/i
    rfc4d = "%04d" % $1.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml/reference.RFC.#{rfc4d}.xml"]
  elsif $options.v3 && bib =~ /\A(bcp|std)(\d+)/i
    n4d = "%04d" % $2.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml-rfcsubseries-new/reference.#{$1.upcase}.#{n4d}.xml"]
  elsif bib =~ /\A([-A-Z0-9]+)\./ &&
        (xro = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_MAP[$1])
    dir, _ttl, rewrite_anchor = xro
    bib1 = ::Kramdown::Parser::RFC2629Kramdown.idref_cleanup(bib)
    if anchor && bib1 != anchor
      if rewrite_anchor
        a = %{?anchor=#{anchor}}
      else
        if !stand_alone
          warn "*** selecting a custom anchor '#{anchor}' for '#{bib1}' requires stand_alone mode"
          warn "    the output will need manual editing to correct this"
        end
      end
    end
    [bib1,
     "#{XML_RESOURCE_ORG_PREFIX}/#{dir}/reference.#{bib}.xml#{a}"]
  end
end

def read_encodings
  encfilename = File.expand_path '../../../data/encoding-fallbacks.txt', __FILE__
  encfile = File.read(encfilename, coding: "UTF-8")
  Hash[encfile.lines.map{|l|
         l.chomp!;
         x, s = l.split(" ", 2)
         [x.hex.chr(Encoding::UTF_8), s || " "]}]
end

FALLBACK = read_encodings

def expand_tabs(s, tab_stops = 8)
  s.gsub(/([^\t\n]*)\t/) do
    $1 + " " * (tab_stops - ($1.size % tab_stops))
  end
end


require 'optparse'
require 'ostruct'

$options ||= OpenStruct.new
op = OptionParser.new do |opts|
  opts.banner = <<BANNER
Usage: kramdown-rfc2629 [options] file.md|file.mkd > file.xml
Version: #{KDRFC_VERSION}
BANNER
  opts.on("-V", "--version", "Show version and exit") do |v|
    puts "kramdown-rfc #{KDRFC_VERSION}"
    exit
  end
  opts.on("-H", "--help", "Show option summary and exit") do |v|
    puts opts
    exit
  end
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
    $options.verbose = v
  end
  opts.on("-3", "--[no-]v3", "Use RFCXML v3 processing rules") do |v|
    $options.v3 = v
  end
  opts.on("-2", "--[no-]v2", "Use RFCXML v2 processing rules") do |v|
    $options.v2 = v
  end
end
op.parse!

if $options.v2 && $options.v3
  warn "*** can't have v2 and eat v3 cake"
  $options.v2 = false
end

if $options.v3.nil? && !$options.v2
  if Time.now.to_i >= 1645567342 # Time.parse("2022-02-22T22:02:22Z").to_i
    $options.v3 = true           # new default from the above date
  end
end

warn "*** v2 #{$options.v2.inspect} v3 #{$options.v3.inspect}" if $options.verbose

input = ARGF.read
if input[0] == "\uFEFF"
   warn "*** There is a leading byte order mark. Ignored."
   input[0..0] = ''
end
if input[-1] != "\n"
  # warn "*** added missing newline at end"
  input << "\n"                 # fix #26
end
process_includes(input) unless ENV["KRAMDOWN_SAFE"]
input.gsub!(/^\{::boilerplate\s+(.*?)\}/) {
  boilerplate($1)
}
if input =~ /[\t]/
   warn "*** Input contains HT (\"tab\") characters. Undefined behavior will ensue."
   input = expand_tabs(input)
end

if input =~ /\A---/        # this is a sectionized file
  do_the_tls_dance unless ENV["KRAMDOWN_DONT_VERIFY_HTTPS"]
  input, options, coding_override = xml_from_sections(input)
else
  options = process_kramdown_options # all default
end
if input =~ /\A<\?xml/          # if this is a whole XML file, protect it
  input = "{::nomarkdown}\n#{input}\n{:/nomarkdown}\n"
end

if $options.v3_used && !$options.v3
  warn $options.v3_used
  $options.v3_used = nil
  $options.v3 = true
end

if coding_override
  input = input.encode(Encoding.find(coding_override), fallback: FALLBACK)
end

# 1.4.17: because of UTF-8 bibxml files, kramdown always needs to see UTF-8 (!)
if input.encoding != Encoding::UTF_8
  input = input.encode(Encoding::UTF_8)
end

# warn "options: #{options.inspect}"
doc = Kramdown::Document.new(input, options)
$stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
output = doc.to_rfc2629

if $options.v3_used && !$options.v3
  warn $options.v3_used
  $options.v3 = true
end

if $options.autolink_iref_cleanup
  require 'rexml/document'
  require 'kramdown-rfc/autolink-iref-cleanup'

  d = REXML::Document.new(output)
  autolink_iref_cleanup(d)
  output = d.to_s
end

if coding_override
  output = output.encode(Encoding.find(coding_override), fallback: FALLBACK)
end

puts output
Modules

Classes

lib/kramdown-rfc/command.rb

Source Files