lib/asciidoctor/attribute_list.rb



module Asciidoctor
# Public: Handles parsing AsciiDoc attribute lists into a Hash of key/value
# pairs. By default, attributes must each be separated by a comma and quotes
# may be used around the value. If a key is not detected, the value is assigned
# to a 1-based positional key, The positional attributes can be "rekeyed" when
# given a posattrs array either during parsing or after the fact.
#
# Examples
#
#    attrlist = Asciidoctor::AttributeList.new('astyle')
#
#    attrlist.parse
#    => {0 => 'astyle'} 
#
#    attrlist.rekey(['style'])
#    => {'style' => 'astyle'}
#
#    attrlist = Asciidoctor::AttributeList.new('quote, Famous Person, Famous Book (2001)')
#
#    attrlist.parse(['style', 'attribution', 'citetitle'])
#    => {'style' => 'quote', 'attribution' => 'Famous Person', 'citetitle' => 'Famous Book (2001)'} 
#
class AttributeList

  # Public: Regular expressions for detecting the boundary of a value
  BOUNDARY_PATTERNS = {
    '"' => /.*?[^\\](?=")/,
    '\'' => /.*?[^\\](?=')/,
    ',' => /.*?(?=[ \t]*(,|$))/
  }

  # Public: Regular expressions for unescaping quoted characters
  UNESCAPE_PATTERNS = {
    '\\"' => /\\"/,
    '\\\'' => /\\'/ 
  }

  # Public: Regular expressions for skipping blanks and delimiters
  SKIP_PATTERNS = {
    :blank => /[ \t]+/,
    ',' => /[ \t]*(,|$)/
  }

  # Public: A regular expression for an attribute name
  # TODO named attributes cannot contain dash characters
  NAME_PATTERN = /[A-Za-z:_][A-Za-z:_\-\.]*/

  # Public: A regular expression for splitting a comma-separated string
  CSV_SPLIT_PATTERN = /[ \t]*,[ \t]*/

  def initialize(source, block = nil, quotes = ['\'', '"'], delimiter = ',', escape_char = '\\')
    @scanner = ::StringScanner.new source
    @block = block
    @quotes = quotes
    @escape_char = escape_char
    @delimiter = delimiter
    @attributes = nil
  end

  def parse_into(attributes, posattrs = [])
    attributes.update(parse(posattrs))
  end

  def parse(posattrs = [])
    return @attributes unless @attributes.nil?

    @attributes = {}
    # not sure if I want this assignment or not
    #attributes[0] = @scanner.string
    index = 0

    while parse_attribute(index, posattrs)
      break if @scanner.eos?
      skip_delimiter
      index += 1
    end

    @attributes
  end

  def rekey(posattrs)
    AttributeList.rekey(@attributes, posattrs)
  end

  def self.rekey(attributes, pos_attrs)
    pos_attrs.each_with_index do |key, index|
      next if key.nil?
      pos = index + 1
      unless (val = attributes[pos]).nil?
        attributes[key] = val
        #QUESTION should we delete the positional key?
        #attributes.delete pos
      end
    end

    attributes
  end

  def parse_attribute(index = 0, pos_attrs = [])
    single_quoted_value = false
    skip_blank
    first = @scanner.peek(1)
    # example: "quote" || 'quote'
    if @quotes.include? first
      value = nil
      name = parse_attribute_value @scanner.get_byte
      if first == '\''
        single_quoted_value = true
      end
    else
      name = scan_name

      skipped = 0
      c = nil
      if @scanner.eos?
        if name.nil?
          return false
        end
      else
        skipped = skip_blank || 0
        c = @scanner.get_byte
      end

      # example: quote
      if c.nil? || c == @delimiter
        value = nil
      # example: Sherlock Holmes || =foo=
      elsif c != '=' || name.nil?
        remainder = scan_to_delimiter
        name = '' if name.nil?
        name += ' ' * skipped + c
        name += remainder unless remainder.nil?
        value = nil
      else
        skip_blank
        # example: foo=,
        if @scanner.peek(1) == @delimiter
          value = nil
        else
          c = @scanner.get_byte

          # example: foo="bar" || foo='bar' || foo="ba\"zaar" || foo='ba\'zaar' || foo='ba"zaar' (all spaces ignored)
          if @quotes.include? c
            value = parse_attribute_value c
            if c == '\''
              single_quoted_value = true
            end
          # example: foo=bar (all spaces ignored)
          elsif !c.nil?
            value = c + scan_to_delimiter
          end
        end
      end
    end

    if value.nil?
      resolved_name = single_quoted_value && !@block.nil? ? @block.apply_normal_subs(name) : name
      if !(pos_name = pos_attrs[index]).nil?
        @attributes[pos_name] = resolved_name
      else
        #@attributes[index + 1] = resolved_name
      end
      # not sure if we want to always assign the positional key
      @attributes[index + 1] = resolved_name
      # not sure if I want this assignment or not
      #@attributes[resolved_name] = nil
    else
      resolved_value = value
      # example: options="opt1,opt2,opt3"
      # opts is an alias for options
      if name == 'options' || name == 'opts'
        name = 'options'
        resolved_value.split(CSV_SPLIT_PATTERN).each do |o|
          @attributes[o + '-option'] = ''
        end
      elsif single_quoted_value && !@block.nil?
        resolved_value = @block.apply_normal_subs(value)
      end
      @attributes[name] = resolved_value
    end

    true
  end

  def parse_attribute_value(quote)
    # empty quoted value
    if @scanner.peek(1) == quote
      @scanner.get_byte 
      return ''
    end

    value = scan_to_quote quote
    if value.nil?
      quote + scan_to_delimiter
    else
      @scanner.get_byte
      value.gsub(UNESCAPE_PATTERNS[@escape_char + quote], quote)
    end
  end

  def skip_blank
    @scanner.skip SKIP_PATTERNS[:blank]
  end

  def skip_delimiter
    @scanner.skip SKIP_PATTERNS[@delimiter]
  end

  def scan_name
    @scanner.scan NAME_PATTERN
  end

  def scan_to_delimiter
    @scanner.scan BOUNDARY_PATTERNS[@delimiter]
  end

  def scan_to_quote(quote)
    @scanner.scan BOUNDARY_PATTERNS[quote]
  end

end
end