module Asciidoctor
# Public: Methods to parse lines of AsciiDoc into an object hierarchy
# representing the structure of the document. All methods are class methods and
# should be invoked from the Lexer class. The main entry point is ::next_block.
# No Lexer instances shall be discovered running around. (Any attempt to
# instantiate a Lexer will be futile).
#
# The object hierarchy created by the Lexer consists of zero or more Section
# and Block objects. Section objects may be nested and a Section object
# contains zero or more Block objects. Block objects may be nested, but may
# only contain other Block objects. Block objects which represent lists may
# contain zero or more ListItem objects.
#
# Examples
#
# # Create a Reader for the AsciiDoc lines and retrieve the next block from it.
# # Lexer::next_block requires a parent, so we begin by instantiating an empty Document.
#
# doc = Document.new
# reader = Reader.new lines
# block = Lexer.next_block(reader, doc)
# block.class
# # => Asciidoctor::Block
class Lexer
BlockMatchData = Struct.new(:context, :masq, :tip, :terminator)
# Public: Make sure the Lexer object doesn't get initialized.
#
# Raises RuntimeError if this constructor is invoked.
def initialize
raise 'Au contraire, mon frere. No lexer instances will be running around.'
end
# Public: Parses AsciiDoc source read from the Reader into the Document
#
# This method is the main entry-point into the Lexer when parsing a full document.
# It first looks for and, if found, processes the document title. It then
# proceeds to iterate through the lines in the Reader, parsing the document
# into nested Sections and Blocks.
#
# reader - the Reader holding the source lines of the document
# document - the empty Document into which the lines will be parsed
# options - a Hash of options to control processing
#
# returns the Document object
def self.parse(reader, document, options = {})
block_attributes = parse_document_header(reader, document)
unless options[:header_only]
while reader.has_more_lines?
new_section, block_attributes = next_section(reader, document, block_attributes)
document << new_section unless new_section.nil?
end
end
document
end
# Public: Parses the document header of the AsciiDoc source read from the Reader
#
# Reads the AsciiDoc source from the Reader until the end of the document
# header is reached. The Document object is populated with information from
# the header (document title, document attributes, etc). The document
# attributes are then saved to establish a save point to which to rollback
# after parsing is complete.
#
# This method assumes that there are no blank lines at the start of the document,
# which are automatically removed by the reader.
#
# returns the Hash of orphan block attributes captured above the header
def self.parse_document_header(reader, document)
# capture any lines of block-level metadata and plow away any comment lines
# that precede first block
block_attributes = parse_block_metadata_lines(reader, document)
# check if the first line is the document title
# if so, add a header to the document and parse the header metadata
if is_next_line_document_title?(reader, block_attributes)
document.id, document.title, _, _ = parse_section_title(reader, document)
# QUESTION: should this be encapsulated in document?
if document.id.nil? && block_attributes.has_key?('id')
document.id = block_attributes.delete('id')
end
parse_header_metadata(reader, document)
end
if document.attributes.has_key? 'doctitle'
document.title = document.attributes['doctitle']
end
document.clear_playback_attributes block_attributes
document.save_attributes
# NOTE these are the block-level attributes (not document attributes) that
# precede the first line of content (document title, first section or first block)
block_attributes
end
# Public: Return the next section from the Reader.
#
# This method process block metadata, content and subsections for this
# section and returns the Section object and any orphaned attributes.
#
# If the parent is a Document and has a header (document title), then
# this method will put any non-section blocks at the start of document
# into a preamble Block. If there are no such blocks, the preamble is
# dropped.
#
# Since we are reading line-by-line, there's a chance that metadata
# that should be associated with the following block gets consumed.
# To deal with this case, the method returns a running Hash of
# "orphaned" attributes that get passed to the next Section or Block.
#
# reader - the source Reader
# parent - the parent Section or Document of this new section
# attributes - a Hash of metadata that was left orphaned from the
# previous Section.
#
# Examples
#
# source
# # => "Greetings\n---------\nThis is my doc.\n\nSalutations\n-----------\nIt is awesome."
#
# reader = Reader.new source.lines.entries
# # create empty document to parent the section
# # and hold attributes extracted from header
# doc = Document.new
#
# Lexer.next_section(reader, doc).first.title
# # => "Greetings"
#
# Lexer.next_section(reader, doc).first.title
# # => "Salutations"
#
# returns a two-element Array containing the Section and Hash of orphaned attributes
def self.next_section(reader, parent, attributes = {})
preamble = false
# FIXME if attributes[1] is a verbatim style, then don't check for section
# check if we are at the start of processing the document
# NOTE we could drop a hint in the attributes to indicate
# that we are at a section title (so we don't have to check)
if parent.is_a?(Document) && parent.blocks.empty? &&
(parent.has_header? || !is_next_line_section?(reader, attributes))
if parent.has_header?
preamble = Block.new(parent, :preamble)
parent << preamble
end
section = parent
current_level = 0
if parent.attributes.has_key? 'fragment'
expected_next_levels = nil
# small tweak to allow subsequent level-0 sections for book doctype
elsif parent.doctype == 'book'
expected_next_levels = [0, 1]
else
expected_next_levels = [1]
end
else
section = initialize_section(reader, parent, attributes)
# clear attributes, except for title which carries over
# section title to next block of content
attributes = attributes.delete_if {|k, v| k != 'title'}
current_level = section.level
# subsections in preface & appendix in multipart books start at level 2
if current_level == 0 && section.special &&
section.document.doctype == 'book' && ['preface', 'appendix'].include?(section.sectname)
expected_next_levels = [current_level + 2]
else
expected_next_levels = [current_level + 1]
end
end
reader.skip_blank_lines
# Parse lines belonging to this section and its subsections until we
# reach the end of this section level
#
# 1. first look for metadata thingies (anchor, attribute list, block title line, etc)
# 2. then look for a section, recurse if found
# 3. then process blocks
#
# We have to parse all the metadata lines before continuing with the loop,
# otherwise subsequent metadata lines get interpreted as block content
while reader.has_more_lines?
parse_block_metadata_lines(reader, section, attributes)
next_level = is_next_line_section? reader, attributes
if next_level
next_level += section.document.attr('leveloffset', 0).to_i
doctype = parent.document.doctype
if next_level == 0 && doctype != 'book'
puts "asciidoctor: ERROR: line #{reader.lineno + 1}: only book doctypes can contain level 0 sections"
end
if next_level > current_level || (section.is_a?(Document) && next_level == 0)
unless expected_next_levels.nil? || expected_next_levels.include?(next_level)
puts "asciidoctor: WARNING: line #{reader.lineno + 1}: section title out of sequence: " +
"expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, " +
"got level #{next_level}"
end
# the attributes returned are those that are orphaned
new_section, attributes = next_section(reader, section, attributes)
section << new_section
else
# close this section (and break out of the nesting) to begin a new one
break
end
else
# just take one block or else we run the risk of overrunning section boundaries
new_block = next_block(reader, section, attributes, :parse_metadata => false)
if !new_block.nil?
(preamble || section) << new_block
attributes = {}
else
# don't clear attributes if we don't find a block because they may
# be trailing attributes that didn't get associated with a block
end
end
reader.skip_blank_lines
end
# drop the preamble if it has no content
if preamble && preamble.blocks.empty?
section.delete_at(0)
end
# The attributes returned here are orphaned attributes that fall at the end
# of a section that need to get transfered to the next section
# see "trailing block attributes transfer to the following section" in
# test/attributes_test.rb for an example
[section != parent ? section : nil, attributes.dup]
end
# Public: Return the next Section or Block object from the Reader.
#
# Begins by skipping over blank lines to find the start of the next Section
# or Block. Processes each line of the reader in sequence until a Section or
# Block is found or the reader has no more lines.
#
# Uses regular expressions from the Asciidoctor module to match Section
# and Block delimiters. The ensuing lines are then processed according
# to the type of content.
#
# reader - The Reader from which to retrieve the next block
# parent - The Document, Section or Block to which the next block belongs
#
# Returns a Section or Block object holding the parsed content of the processed lines
#--
# QUESTION should next_block have an option for whether it should keep looking until
# a block is found? right now it bails when it encounters a line to be skipped
def self.next_block(reader, parent, attributes = {}, options = {})
# Skip ahead to the block content
skipped = reader.skip_blank_lines
# bail if we've reached the end of the parent block or document
return nil unless reader.has_more_lines?
# check for option to find list item text only
# if skipped a line, assume a list continuation was
# used and block content is acceptable
if options[:text] && skipped > 0
options.delete(:text)
end
parse_metadata = options.fetch(:parse_metadata, true)
#parse_sections = options.fetch(:parse_sections, false)
document = parent.document
parent_context = parent.is_a?(Block) ? parent.context : nil
block = nil
style = nil
explicit_style = nil
while reader.has_more_lines? && block.nil?
# if parsing metadata, read until there is no more to read
if parse_metadata && parse_block_metadata_line(reader, document, attributes, options)
reader.advance
next
#elsif parse_sections && parent_context.nil? && is_next_line_section?(reader, attributes)
# block, attributes = next_section(reader, parent, attributes)
# break
end
# QUESTION introduce parsing context object?
this_line = reader.get_line
delimited_block = false
block_context = nil
terminator = nil
# QUESTION put this inside call to rekey attributes?
if attributes.has_key? 1
explicit_style = attributes['style']
style = attributes['style'] = attributes[1]
end
if delimited_blk_match = is_delimited_block?(this_line, true)
delimited_block = true
block_context = delimited_blk_match.context
terminator = delimited_blk_match.terminator
if !style
style = attributes['style'] = block_context.to_s
elsif style != block_context.to_s
if delimited_blk_match.masq.include? style
block_context = style.to_sym
elsif delimited_blk_match.masq.include?('admonition') && ADMONITION_STYLES.include?(style)
block_context = :admonition
else
puts "asciidoctor: WARNING: line #{reader.lineno}: invalid style for #{block_context} block: #{style}"
style = block_context.to_s
end
end
end
if !delimited_block
# this loop only executes once; used for flow control
# break once a block is found or at end of loop
# returns nil if the line must be dropped
# Implementation note - while(true) is twice as fast as loop
while true
# process lines verbatim
if !style.nil? && COMPLIANCE[:strict_verbatim_paragraphs] && VERBATIM_STYLES.include?(style)
block_context = style.to_sym
reader.unshift_line this_line
# advance to block parsing =>
break
end
# process lines normally
if !options[:text]
# NOTE we're letting break lines (ruler, page_break, etc) have attributes
if (match = this_line.match(REGEXP[:break_line]))
block = Block.new(parent, BREAK_LINES[match[0][0..2]])
break
# TODO make this a media_blk and handle image, video & audio
elsif (match = this_line.match(REGEXP[:media_blk_macro]))
blk_ctx = match[1].to_sym
block = Block.new(parent, blk_ctx)
if blk_ctx == :image
posattrs = ['alt', 'width', 'height']
elsif blk_ctx == :video
posattrs = ['poster', 'width', 'height']
else
posattrs = []
end
unless style.nil? || explicit_style
attributes['alt'] = style if blk_ctx == :image
attributes.delete('style')
style = nil
end
block.parse_attributes(match[3], posattrs,
:unescape_input => (blk_ctx == :image),
:sub_input => true,
:sub_result => false,
:into => attributes)
target = block.sub_attributes(match[2])
if target.empty?
# drop the line if target resolves to nothing
return nil
end
attributes['target'] = target
block.title = attributes.delete('title') if attributes.has_key?('title')
if blk_ctx == :image
document.register(:images, target)
attributes['alt'] ||= File.basename(target, File.extname(target))
# QUESTION should video or audio have an auto-numbered caption?
block.assign_caption attributes.delete('caption'), 'figure'
end
break
# NOTE we're letting the toc macro have attributes
elsif (match = this_line.match(REGEXP[:toc]))
block = Block.new(parent, :toc)
block.parse_attributes(match[1], [], :sub_result => false, :into => attributes)
break
end
end
# haven't found anything yet, continue
if (match = this_line.match(REGEXP[:colist]))
block = Block.new(parent, :colist)
attributes['style'] = 'arabic'
items = []
block.buffer = items
reader.unshift_line this_line
expected_index = 1
begin
# might want to move this check to a validate method
if match[1].to_i != expected_index
puts "asciidoctor: WARNING: line #{reader.lineno + 1}: callout list item index: expected #{expected_index} got #{match[1]}"
end
list_item = next_list_item(reader, block, match)
expected_index += 1
if !list_item.nil?
items << list_item
coids = document.callouts.callout_ids(items.size)
if !coids.empty?
list_item.attributes['coids'] = coids
else
puts "asciidoctor: WARNING: line #{reader.lineno}: no callouts refer to list item #{items.size}"
end
end
end while reader.has_more_lines? && match = reader.peek_line.match(REGEXP[:colist])
document.callouts.next_list
break
elsif (match = this_line.match(REGEXP[:ulist]))
reader.unshift_line this_line
block = next_outline_list(reader, :ulist, parent)
break
elsif (match = this_line.match(REGEXP[:olist]))
reader.unshift_line this_line
block = next_outline_list(reader, :olist, parent)
# QUESTION move this logic to next_outline_list?
if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style')
marker = block.buffer.first.marker
if marker.start_with? '.'
# first one makes more sense, but second on is AsciiDoc-compliant
#attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s
attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s
else
style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s
end
end
break
elsif (match = this_line.match(REGEXP[:dlist]))
reader.unshift_line this_line
block = next_labeled_list(reader, match, parent)
break
elsif (style == 'float' || style == 'discrete') && is_section_title?(this_line, reader.peek_line)
reader.unshift_line this_line
float_id, float_title, float_level, _ = parse_section_title(reader, document)
float_id ||= attributes['id'] if attributes.has_key?('id')
block = Block.new(parent, :floating_title)
if float_id.nil? || float_id.empty?
# FIXME remove hack of creating throwaway Section to get at the generate_id method
tmp_sect = Section.new(parent)
tmp_sect.title = float_title
block.id = tmp_sect.generate_id
else
block.id = float_id
end
document.register(:ids, [block.id, float_title]) if block.id
block.level = float_level
block.title = float_title
break
# FIXME create another set for "passthrough" styles
# though partintro should likely be a dedicated block
elsif !style.nil? && style != 'normal' && style != 'partintro'
if PARAGRAPH_STYLES.include?(style)
block_context = style.to_sym
reader.unshift_line this_line
# advance to block parsing =>
break
elsif ADMONITION_STYLES.include?(style)
block_context = :admonition
reader.unshift_line this_line
# advance to block parsing =>
break
else
puts "asciidoctor: WARNING: line #{reader.lineno}: invalid style for paragraph: #{style}"
style = nil
# continue to process paragraph
end
end
break_at_list = (skipped == 0 && parent_context.to_s.end_with?('list'))
# a literal paragraph is contiguous lines starting at least one space
if style != 'normal' && this_line.match(REGEXP[:lit_par])
# So we need to actually include this one in the grab_lines group
reader.unshift_line this_line
buffer = reader.grab_lines_until(
:break_on_blank_lines => true,
:break_on_list_continuation => true,
:preserve_last_line => true) {|line|
# a preceding blank line (skipped > 0) indicates we are in a list continuation
# and therefore we should not break at a list item
# (this won't stop breaking on item of same level since we've already parsed them out)
# QUESTION can we turn this block into a lambda or function call?
(break_at_list && line.match(REGEXP[:any_list])) ||
(COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
}
# trim off the indentation equivalent to the size of the least indented line
if !buffer.empty?
offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min
if offset > 0
buffer = buffer.map {|l| l.sub(/^\s{1,#{offset}}/, '') }
end
end
block = Block.new(parent, :literal, buffer)
# a literal gets special meaning inside of a definition list
if LIST_CONTEXTS.include?(parent_context)
attributes['options'] ||= []
# TODO this feels hacky, better way to distinguish from explicit literal block?
attributes['options'] << 'listparagraph'
end
# a paragraph is contiguous nonblank/noncontinuation lines
else
reader.unshift_line this_line
buffer = reader.grab_lines_until(
:break_on_blank_lines => true,
:break_on_list_continuation => true,
:preserve_last_line => true,
:skip_line_comments => true) {|line|
# a preceding blank line (skipped > 0) indicates we are in a list continuation
# and therefore we should not break at a list item
# (this won't stop breaking on item of same level since we've already parsed them out)
# QUESTION can we turn this block into a lambda or function call?
(break_at_list && line.match(REGEXP[:any_list])) ||
(COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
}
# NOTE we need this logic because we've asked the reader to skip
# line comments, which may leave us w/ an empty buffer if those
# were the only lines found
if buffer.empty?
# call get_line since the reader preserved the last line
reader.get_line
return nil
end
catalog_inline_anchors(buffer.join, document)
if !options[:text] && (admonition_match = buffer.first.match(REGEXP[:admonition_inline]))
buffer[0] = admonition_match.post_match.lstrip
block = Block.new(parent, :admonition, buffer)
attributes['style'] = admonition_match[1]
attributes['name'] = admonition_name = admonition_match[1].downcase
attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
else
# QUESTION is this necessary?
#if style == 'normal' && [' ', "\t"].include?(buffer.first[0..0])
# # QUESTION should we only trim leading blanks?
# buffer.map! &:lstrip
#end
block = Block.new(parent, :paragraph, buffer)
end
end
# forbid loop from executing more than once
break
end
end
# either delimited block or styled paragraph
if block.nil? && !block_context.nil?
case block_context
when :admonition
attributes['name'] = admonition_name = style.downcase
attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
block = build_block(block_context, :complex, terminator, parent, reader, attributes)
when :comment
reader.grab_lines_until(:break_on_blank_lines => true, :chomp_last_line => false)
return nil
when :example
block = build_block(block_context, :complex, terminator, parent, reader, attributes, true)
when :listing, :fenced_code, :source
if block_context == :fenced_code
style = attributes['style'] = 'source'
lang = this_line[3..-1].strip
attributes['language'] = lang unless lang.empty?
terminator = terminator[0..2] if terminator.length > 3
elsif block_context == :source
AttributeList.rekey(attributes, [nil, 'language', 'linenums'])
end
block = build_block(:listing, :verbatim, terminator, parent, reader, attributes, true)
when :literal
block = build_block(block_context, :verbatim, terminator, parent, reader, attributes)
when :pass
block = build_block(block_context, :simple, terminator, parent, reader, attributes)
when :open, :sidebar
block = build_block(block_context, :complex, terminator, parent, reader, attributes)
when :table
block_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true)
block = next_table(block_reader, parent, attributes)
when :quote, :verse
AttributeList.rekey(attributes, [nil, 'attribution', 'citetitle'])
block = build_block(block_context, (block_context == :verse ? :verbatim : :complex), terminator, parent, reader, attributes)
else
# this should only happen if there is a misconfiguration
raise "Unsupported block type #{block_context} at line #{reader.lineno}"
end
end
end
# when looking for nested content, one or more line comments, comment
# blocks or trailing attribute lists could leave us without a block,
# so handle accordingly
# REVIEW we may no longer need this check
if !block.nil?
# REVIEW seems like there is a better way to organize this wrap-up
block.id ||= attributes['id'] if attributes.has_key?('id')
block.title = attributes['title'] unless block.title?
block.caption ||= attributes['caption'] unless block.is_a?(Section)
# AsciiDoc always use [id] as the reftext in HTML output,
# but I'd like to do better in Asciidoctor
if block.id && block.title? && !attributes.has_key?('reftext')
document.register(:ids, [block.id, block.title])
end
block.update_attributes(attributes)
if block.context == :listing || block.context == :literal
catalog_callouts(block.buffer.join, document)
end
end
block
end
# Public: Determines whether this line is the start of any of the delimited blocks
#
# returns the match data if this line is the first line of a delimited block or nil if not
def self.is_delimited_block?(line, return_match_data = false)
line_len = line.length
# optimized for best performance
if line_len > 2
if line_len == 3
tip = line.chop
tl = 2
else
tip = line[0..3]
tl = 4
# special case for fenced code blocks
tip_alt = tip.chop
if tip_alt == '```' || tip_alt == '~~~'
tip = tip_alt
tl = 3
end
end
if DELIMITED_BLOCKS.has_key? tip
# if tip is the full line
if tl == line_len - 1
#return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, tip) : true
if return_match_data
context, masq = *DELIMITED_BLOCKS[tip]
BlockMatchData.new(context, masq, tip, tip)
else
true
end
elsif match = line.match(REGEXP[:any_blk])
#return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, match[0]) : true
if return_match_data
context, masq = *DELIMITED_BLOCKS[tip]
BlockMatchData.new(context, masq, tip, match[0])
else
true
end
else
nil
end
else
nil
end
else
nil
end
end
# whether a block supports complex content should be a config setting
# NOTE could invoke filter in here, before and after parsing
def self.build_block(block_context, content_type, terminator, parent, reader, attributes, supports_caption = false)
if terminator.nil?
if content_type == :verbatim
buffer = reader.grab_lines_until(:break_on_blank_lines => true, :break_on_list_continuation => true)
else
buffer = reader.grab_lines_until(
:break_on_blank_lines => true,
:break_on_list_continuation => true,
:preserve_last_line => true,
:skip_line_comments => true) {|line|
COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line]))
}
# QUESTION check for empty buffer?
end
elsif content_type != :complex
buffer = reader.grab_lines_until(:terminator => terminator, :chomp_last_line => true)
else
buffer = nil
block_reader = Reader.new reader.grab_lines_until(:terminator => terminator)
end
block = Block.new(parent, block_context, buffer)
# should supports_caption be necessary?
if supports_caption
block.title = attributes.delete('title') if attributes.has_key?('title')
block.assign_caption attributes.delete('caption')
end
if buffer.nil?
# we can look for blocks until there are no more lines (and not worry
# about sections) since the reader is confined within the boundaries of a
# delimited block
while block_reader.has_more_lines?
parsed_block = next_block(block_reader, block)
block.blocks << parsed_block unless parsed_block.nil?
end
end
block
end
# Internal: Parse and construct an outline list Block from the current position of the Reader
#
# reader - The Reader from which to retrieve the outline list
# list_type - A Symbol representing the list type (:olist for ordered, :ulist for unordered)
# parent - The parent Block to which this outline list belongs
#
# Returns the Block encapsulating the parsed outline (unordered or ordered) list
def self.next_outline_list(reader, list_type, parent)
list_block = Block.new(parent, list_type)
items = []
list_block.buffer = items
if parent.context == list_type
list_block.level = parent.level + 1
else
list_block.level = 1
end
Debug.debug { "Created #{list_type} block: #{list_block}" }
while reader.has_more_lines? && (match = reader.peek_line.match(REGEXP[list_type]))
marker = resolve_list_marker(list_type, match[1])
# if we are moving to the next item, and the marker is different
# determine if we are moving up or down in nesting
if items.size > 0 && marker != items.first.marker
# assume list is nested by default, but then check to see if we are
# popping out of a nested list by matching an ancestor's list marker
this_item_level = list_block.level + 1
p = parent
while p.context == list_type
if marker == p.buffer.first.marker
this_item_level = p.level
break
end
p = p.parent
end
else
this_item_level = list_block.level
end
if items.size == 0 || this_item_level == list_block.level
list_item = next_list_item(reader, list_block, match)
elsif this_item_level < list_block.level
# leave this block
break
elsif this_item_level > list_block.level
# If this next list level is down one from the
# current Block's, append it to content of the current list item
items.last.blocks << next_block(reader, list_block)
end
items << list_item unless list_item.nil?
list_item = nil
reader.skip_blank_lines
end
list_block
end
# Internal: Catalog any callouts found in the text, but don't process them
#
# text - The String of text in which to look for callouts
# document - The current document on which the callouts are stored
#
# Returns nothing
def self.catalog_callouts(text, document)
text.scan(REGEXP[:callout_scan]) {
# alias match for Ruby 1.8.7 compat
m = $~
next if m[0].start_with? '\\'
document.callouts.register(m[1])
}
end
# Internal: Catalog any inline anchors found in the text, but don't process them
#
# text - The String text in which to look for inline anchors
# document - The current document on which the references are stored
#
# Returns nothing
def self.catalog_inline_anchors(text, document)
text.scan(REGEXP[:anchor_macro]) {
# alias match for Ruby 1.8.7 compat
m = $~
next if m[0].start_with? '\\'
id, reftext = m[1].split(',')
id.sub!(REGEXP[:dbl_quoted], '\2')
if !reftext.nil?
reftext.sub!(REGEXP[:m_dbl_quoted], '\2')
end
document.register(:ids, [id, reftext])
}
nil
end
# Internal: Parse and construct a labeled (e.g., definition) list Block from the current position of the Reader
#
# reader - The Reader from which to retrieve the labeled list
# match - The Regexp match for the head of the list
# parent - The parent Block to which this labeled list belongs
#
# Returns the Block encapsulating the parsed labeled list
def self.next_labeled_list(reader, match, parent)
pairs = []
block = Block.new(parent, :dlist)
block.buffer = pairs
# allows us to capture until we find a labeled item
# that uses the same delimiter (::, :::, :::: or ;;)
sibling_pattern = REGEXP[:dlist_siblings][match[2]]
begin
pairs << next_list_item(reader, block, match, sibling_pattern)
end while reader.has_more_lines? && match = reader.peek_line.match(sibling_pattern)
block
end
# Internal: Parse and construct the next ListItem for the current bulleted
# (unordered or ordered) list Block, callout lists included, or the next
# term ListItem and definition ListItem pair for the labeled list Block.
#
# First collect and process all the lines that constitute the next list
# item for the parent list (according to its type). Next, parse those lines
# into blocks and associate them with the ListItem (in the case of a
# labeled list, the definition ListItem). Finally, fold the first block
# into the item's text attribute according to rules described in ListItem.
#
# reader - The Reader from which to retrieve the next list item
# list_block - The parent list Block of this ListItem. Also provides access to the list type.
# match - The match Array which contains the marker and text (first-line) of the ListItem
# sibling_trait - The list marker or the Regexp to match a sibling item
#
# Returns the next ListItem or ListItem pair (depending on the list type)
# for the parent list Block.
def self.next_list_item(reader, list_block, match, sibling_trait = nil)
list_type = list_block.context
if list_type == :dlist
list_term = ListItem.new(list_block, match[1])
list_item = ListItem.new(list_block, match[3])
has_text = !match[3].to_s.empty?
else
# Create list item using first line as the text of the list item
list_item = ListItem.new(list_block, match[2])
if !sibling_trait
sibling_trait = resolve_list_marker(list_type, match[1], list_block.buffer.size, true)
end
list_item.marker = sibling_trait
has_text = true
end
# first skip the line with the marker / term
reader.get_line
list_item_reader = Reader.new grab_lines_for_list_item(reader, list_type, sibling_trait, has_text)
if list_item_reader.has_more_lines?
comment_lines = list_item_reader.consume_line_comments
subsequent_line = list_item_reader.peek_line
list_item_reader.unshift(*comment_lines) unless comment_lines.empty?
if !subsequent_line.nil?
continuation_connects_first_block = (subsequent_line == "\n")
# if there's no continuation connecting the first block, then
# treat the lines as paragraph text (activated when has_text = false)
if !continuation_connects_first_block && list_type != :dlist
has_text = false
end
content_adjacent = !subsequent_line.chomp.empty?
else
continuation_connects_first_block = false
content_adjacent = false
end
# only relevant for :dlist
options = {:text => !has_text}
# we can look for blocks until there are no more lines (and not worry
# about sections) since the reader is confined within the boundaries of a
# list
while list_item_reader.has_more_lines?
new_block = next_block(list_item_reader, list_block, {}, options)
list_item.blocks << new_block unless new_block.nil?
end
list_item.fold_first(continuation_connects_first_block, content_adjacent)
end
if list_type == :dlist
unless list_item.text? || list_item.blocks?
list_item = nil
end
[list_term, list_item]
else
list_item
end
end
# Internal: Collect the lines belonging to the current list item, navigating
# through all the rules that determine what comprises a list item.
#
# Grab lines until a sibling list item is found, or the block is broken by a
# terminator (such as a line comment). Definition lists are more greedy if
# they don't have optional inline item text...they want that text
#
# reader - The Reader from which to retrieve the lines.
# list_type - The Symbol context of the list (:ulist, :olist, :colist or :dlist)
# sibling_trait - A Regexp that matches a sibling of this list item or String list marker
# of the items in this list (default: nil)
# has_text - Whether the list item has text defined inline (always true except for labeled lists)
#
# Returns an Array of lines belonging to the current list item.
def self.grab_lines_for_list_item(reader, list_type, sibling_trait = nil, has_text = true)
buffer = []
# three states for continuation: :inactive, :active & :frozen
# :frozen signifies we've detected sequential continuation lines &
# continuation is not permitted until reset
continuation = :inactive
# if we are within a nested list, we don't throw away the list
# continuation marks because they will be processed when grabbing
# the lines for those nested lists
within_nested_list = false
# a detached continuation is a list continuation that follows a blank line
# it gets associated with the outermost block
detached_continuation = nil
while reader.has_more_lines?
this_line = reader.get_line
# if we've arrived at a sibling item in this list, we've captured
# the complete list item and can begin processing it
# the remainder of the method determines whether we've reached
# the termination of the list
break if is_sibling_list_item?(this_line, list_type, sibling_trait)
prev_line = buffer.empty? ? nil : buffer.last.chomp
if prev_line == LIST_CONTINUATION
if continuation == :inactive
continuation = :active
has_text = true
buffer[-1] = "\n" unless within_nested_list
end
# dealing with adjacent list continuations (which is really a syntax error)
if this_line.chomp == LIST_CONTINUATION
if continuation != :frozen
continuation = :frozen
buffer << this_line
end
this_line = nil
next
end
end
# a delimited block immediately breaks the list unless preceded
# by a list continuation (they are harsh like that ;0)
if match = is_delimited_block?(this_line, true)
if continuation == :active
buffer << this_line
# grab all the lines in the block, leaving the delimiters in place
# we're being more strict here about the terminator, but I think that's a good thing
buffer.concat reader.grab_lines_until(:terminator => match.terminator, :grab_last_line => true)
continuation = :inactive
else
break
end
# technically attr_line only breaks if ensuing line is not a list item
# which really means attr_line only breaks if it's acting as a block delimiter
elsif list_type == :dlist && continuation != :active && this_line.match(REGEXP[:attr_line])
break
else
if continuation == :active && !this_line.chomp.empty?
# literal paragraphs have special considerations (and this is one of
# two entry points into one)
# if we don't process it as a whole, then a line in it that looks like a
# list item will throw off the exit from it
if this_line.match(REGEXP[:lit_par])
reader.unshift_line this_line
buffer.concat reader.grab_lines_until(
:preserve_last_line => true,
:break_on_blank_lines => true,
:break_on_list_continuation => true) {|line|
# we may be in an indented list disguised as a literal paragraph
# so we need to make sure we don't slurp up a legitimate sibling
list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
}
continuation = :inactive
# let block metadata play out until we find the block
elsif this_line.match(REGEXP[:blk_title]) || this_line.match(REGEXP[:attr_line]) || this_line.match(REGEXP[:attr_entry])
buffer << this_line
else
if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
within_nested_list = true
if nested_list_type == :dlist && $~[3].to_s.empty?
# get greedy again
has_text = false
end
end
buffer << this_line
continuation = :inactive
end
elsif !prev_line.nil? && prev_line.chomp.empty?
# advance to the next line of content
if this_line.chomp.empty?
reader.skip_blank_lines
this_line = reader.get_line
# if we hit eof or a sibling, stop reading
break if this_line.nil? || is_sibling_list_item?(this_line, list_type, sibling_trait)
end
if this_line.chomp == LIST_CONTINUATION
detached_continuation = buffer.size
buffer << this_line
else
# has_text is only relevant for dlist, which is more greedy until it has text for an item
# for all other lists, has_text is always true
# in this block, we have to see whether we stay in the list
if has_text
# slurp up any literal paragraph offset by blank lines
if this_line.match(REGEXP[:lit_par])
reader.unshift_line this_line
buffer.concat reader.grab_lines_until(
:preserve_last_line => true,
:break_on_blank_lines => true,
:break_on_list_continuation => true) {|line|
# we may be in an indented list disguised as a literal paragraph
# so we need to make sure we don't slurp up a legitimate sibling
list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
}
# TODO any way to combine this with the check after skipping blank lines?
elsif is_sibling_list_item?(this_line, list_type, sibling_trait)
break
elsif nested_list_type = NESTABLE_LIST_CONTEXTS.detect {|ctx| this_line.match(REGEXP[ctx]) }
buffer << this_line
within_nested_list = true
if nested_list_type == :dlist && $~[3].to_s.empty?
# get greedy again
has_text = false
end
else
break
end
else # only dlist in need of item text, so slurp it up!
# pop the blank line so it's not interpretted as a list continuation
buffer.pop unless within_nested_list
buffer << this_line
has_text = true
end
end
else
has_text = true if !this_line.chomp.empty?
if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
within_nested_list = true
if nested_list_type == :dlist && $~[3].to_s.empty?
# get greedy again
has_text = false
end
end
buffer << this_line
end
end
this_line = nil
end
reader.unshift_line this_line if !this_line.nil?
if detached_continuation
buffer.delete_at detached_continuation
end
# strip trailing blank lines to prevent empty blocks
buffer.pop while !buffer.empty? && buffer.last.chomp.empty?
# We do need to replace the optional trailing continuation
# a blank line would have served the same purpose in the document
if !buffer.empty? && buffer.last.chomp == LIST_CONTINUATION
buffer.pop
end
#puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer.join}<BUFFER"
#puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer}<BUFFER"
buffer
end
# Internal: Initialize a new Section object and assign any attributes provided
#
# The information for this section is retrieved by parsing the lines at the
# current position of the reader.
#
# reader - the source reader
# parent - the parent Section or Document of this Section
# attributes - a Hash of attributes to assign to this section (default: {})
def self.initialize_section(reader, parent, attributes = {})
section = Section.new parent
section.id, section.title, section.level, _ = parse_section_title(reader, section.document)
if section.id.nil? && attributes.has_key?('id')
section.id = attributes['id']
else
# generate an id if one was not *embedded* in the heading line
# or as an anchor above the section
section.id ||= section.generate_id
end
if section.id
section.document.register(:ids, [section.id, section.title])
end
if attributes[1]
section.sectname = attributes[1]
section.special = true
document = parent.document
# FIXME refactor to use assign_caption (also check requirements)
if section.sectname == 'appendix' &&
!attributes.has_key?('caption') &&
!document.attributes.has_key?('caption')
number = document.counter('appendix-number', 'A')
attributes['caption'] = "#{document.attributes['appendix-caption']} #{number}: "
Document::AttributeEntry.new('appendix-number', number).save_to(attributes)
end
else
section.sectname = "sect#{section.level}"
end
section.update_attributes(attributes)
reader.skip_blank_lines
section
end
# Private: Get the Integer section level based on the characters
# used in the ASCII line under the section title.
#
# line - the String line from under the section title.
def self.section_level(line)
SECTION_LEVELS[line[0..0]]
end
#--
# = is level 0, == is level 1, etc.
def self.single_line_section_level(line)
[line.length - 1, 0].max
end
# Internal: Checks if the next line on the Reader is a section title
#
# reader - the source Reader
# attributes - a Hash of attributes collected above the current line
#
# returns the section level if the Reader is positioned at a section title,
# false otherwise
def self.is_next_line_section?(reader, attributes)
return false if !attributes[1].nil? && ['float', 'discrete'].include?(attributes[1])
return false if !reader.has_more_lines?
is_section_title?(*reader.peek_lines(2))
end
# Internal: Convenience API for checking if the next line on the Reader is the document title
#
# reader - the source Reader
# attributes - a Hash of attributes collected above the current line
#
# returns true if the Reader is positioned at the document title, false otherwise
def self.is_next_line_document_title?(reader, attributes)
is_next_line_section?(reader, attributes) == 0
end
# Public: Checks if these lines are a section title
#
# line1 - the first line as a String
# line2 - the second line as a String (default: nil)
#
# returns the section level if these lines are a section title,
# false otherwise
def self.is_section_title?(line1, line2 = nil)
if (level = is_single_line_section_title?(line1))
level
elsif (level = is_two_line_section_title?(line1, line2))
level
else
false
end
end
def self.is_single_line_section_title?(line1)
if !line1.nil? && (match = line1.match(REGEXP[:section_title]))
single_line_section_level match[1]
else
false
end
end
def self.is_two_line_section_title?(line1, line2)
if !line1.nil? && !line2.nil? && line1.match(REGEXP[:section_name]) &&
line2.match(REGEXP[:section_underline]) &&
# chomp so that a (non-visible) endline does not impact calculation
(line1.chomp.size - line2.chomp.size).abs <= 1
section_level line2
else
false
end
end
# Internal: Parse the section title from the current position of the reader
#
# Parse a single or double-line section title. After this method is called,
# the Reader will be positioned at the line after the section title.
#
# reader - the source reader, positioned at a section title
# document- the current document
#
# Examples
#
# reader.lines
# # => ["Foo\n", "~~~\n"]
#
# title, level, id, single = parse_section_title(reader, document)
#
# title
# # => "Foo"
# level
# # => 2
# id
# # => nil
# single
# # => false
#
# line1
# # => "==== Foo\n"
#
# title, level, id, single = parse_section_title(reader, document)
#
# title
# # => "Foo"
# level
# # => 3
# id
# # => nil
# single
# # => true
#
# returns an Array of [String, Integer, String, Boolean], representing the
# id, title, level and line count of the Section, or nil.
#
#--
# NOTE for efficiency, we don't reuse methods that check for a section title
def self.parse_section_title(reader, document)
line1 = reader.get_line
sect_id = nil
sect_title = nil
sect_level = -1
single_line = true
if match = line1.match(REGEXP[:section_title])
sect_id = match[3]
sect_title = match[2]
sect_level = single_line_section_level match[1]
else
line2 = reader.peek_line
if !line2.nil? && (name_match = line1.match(REGEXP[:section_name])) &&
line2.match(REGEXP[:section_underline]) &&
# chomp so that a (non-visible) endline does not impact calculation
(line1.chomp.size - line2.chomp.size).abs <= 1
if anchor_match = name_match[1].match(REGEXP[:anchor_embedded])
sect_id = anchor_match[2]
sect_title = anchor_match[1]
else
sect_title = name_match[1]
end
sect_level = section_level line2
single_line = false
reader.get_line
end
end
if sect_level >= 0
sect_level += document.attr('leveloffset', 0).to_i
end
[sect_id, sect_title, sect_level, single_line]
end
# Public: Consume and parse the two header lines (line 1 = author info, line 2 = revision info).
#
# Returns the Hash of header metadata. If a Document object is supplied, the metadata
# is applied directly to the attributes of the Document.
#
# reader - the Reader holding the source lines of the document
# document - the Document we are building (default: nil)
#
# Examples
#
# parse_header_metadata(Reader.new ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"])
# # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org',
# # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'}
def self.parse_header_metadata(reader, document = nil)
# NOTE this will discard away any comment lines, but not skip blank lines
process_attribute_entries(reader, document)
metadata = {}
if reader.has_more_lines? && !reader.peek_line.chomp.empty?
author_metadata = {}
keys = ['author', 'authorinitials', 'firstname', 'middlename', 'lastname', 'email']
author_line = reader.get_line
author_line.split(REGEXP[:semicolon_delim]).each_with_index do |author_entry, idx|
author_entry.strip!
next if author_entry.empty?
map = {}
if idx.zero?
keys.each do |key|
map[key.to_sym] = key
end
else
keys.each do |key|
map[key.to_sym] = "#{key}_#{idx + 1}"
end
end
if match = author_entry.match(REGEXP[:author_info])
author_metadata[map[:firstname]] = fname = match[1].tr('_', ' ')
author_metadata[map[:author]] = fname
author_metadata[map[:authorinitials]] = fname[0, 1]
if !match[2].nil? && !match[3].nil?
author_metadata[map[:middlename]] = mname = match[2].tr('_', ' ')
author_metadata[map[:lastname]] = lname = match[3].tr('_', ' ')
author_metadata[map[:author]] = [fname, mname, lname].join ' '
author_metadata[map[:authorinitials]] = [fname[0, 1], mname[0, 1], lname[0, 1]].join
elsif !match[2].nil?
author_metadata[map[:lastname]] = lname = match[2].tr('_', ' ')
author_metadata[map[:author]] = [fname, lname].join ' '
author_metadata[map[:authorinitials]] = [fname[0, 1], lname[0, 1]].join
end
author_metadata[map[:email]] = match[4] unless match[4].nil?
else
author_metadata[map[:author]] = author_metadata[map[:firstname]] = author_entry.strip.squeeze(' ')
author_metadata[map[:authorinitials]] = author_metadata[map[:firstname]][0, 1]
end
author_metadata['authorcount'] = idx + 1
# only assign the _1 attributes if there are multiple authors
if idx == 1
keys.each do |key|
author_metadata["#{key}_1"] = author_metadata[key] if author_metadata.has_key? key
end
end
if idx.zero?
author_metadata['authors'] = author_metadata[map[:author]]
else
author_metadata['authors'] = "#{author_metadata['authors']}, #{author_metadata[map[:author]]}"
end
end
# apply header subs and assign to document
if !document.nil?
author_metadata.map do |key, val|
val = val.is_a?(String) ? document.apply_header_subs(val) : val
document.attributes[key] = val if !document.attributes.has_key?(key)
val
end
end
metadata = author_metadata.dup
# NOTE this will discard any comment lines, but not skip blank lines
process_attribute_entries(reader, document)
rev_metadata = {}
if reader.has_more_lines? && !reader.peek_line.chomp.empty?
rev_line = reader.get_line
if match = rev_line.match(REGEXP[:revision_info])
rev_metadata['revdate'] = match[2].strip
rev_metadata['revnumber'] = match[1].rstrip unless match[1].nil?
rev_metadata['revremark'] = match[3].rstrip unless match[3].nil?
else
# throw it back
reader.unshift_line rev_line
end
end
# apply header subs and assign to document
if !document.nil?
rev_metadata.map do |key, val|
val = document.apply_header_subs(val)
document.attributes[key] = val if !document.attributes.has_key?(key)
val
end
end
rev_metadata.each {|k, v|
metadata[k] = v
}
# NOTE this will discard any comment lines, but not skip blank lines
process_attribute_entries(reader, document)
reader.skip_blank_lines
end
metadata
end
# Internal: Parse lines of metadata until a line of metadata is not found.
#
# This method processes sequential lines containing block metadata, ignoring
# blank lines and comments.
#
# reader - the source reader
# parent - the parent to which the lines belong
# attributes - a Hash of attributes in which any metadata found will be stored (default: {})
# options - a Hash of options to control processing: (default: {})
# * :text indicates that lexer is only looking for text content
# and thus the block title should not be captured
#
# returns the Hash of attributes including any metadata found
def self.parse_block_metadata_lines(reader, parent, attributes = {}, options = {})
while parse_block_metadata_line(reader, parent, attributes, options)
# discard the line just processed
reader.advance
reader.skip_blank_lines
end
attributes
end
# Internal: Parse the next line if it contains metadata for the following block
#
# This method handles lines with the following content:
#
# * line or block comment
# * anchor
# * attribute list
# * block title
#
# Any attributes found will be inserted into the attributes argument.
# If the line contains block metadata, the method returns true, otherwise false.
#
# reader - the source reader
# parent - the parent of the current line
# attributes - a Hash of attributes in which any metadata found will be stored
# options - a Hash of options to control processing: (default: {})
# * :text indicates that lexer is only looking for text content
# and thus the block title should not be captured
#
# returns true if the line contains metadata, otherwise false
def self.parse_block_metadata_line(reader, parent, attributes, options = {})
return false if !reader.has_more_lines?
next_line = reader.peek_line
if (commentish = next_line.start_with?('//')) && (match = next_line.match(REGEXP[:comment_blk]))
terminator = match[0]
reader.grab_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator, :preprocess => false)
elsif commentish && next_line.match(REGEXP[:comment])
# do nothing, we'll skip it
elsif !options[:text] && (match = next_line.match(REGEXP[:attr_entry]))
process_attribute_entry(reader, parent, attributes, match)
elsif match = next_line.match(REGEXP[:anchor])
id, reftext = match[1].split(',')
attributes['id'] = id
# AsciiDoc always use [id] as the reftext in HTML output,
# but I'd like to do better in Asciidoctor
#parent.document.register(:ids, id)
if reftext
attributes['reftext'] = reftext
parent.document.register(:ids, [id, reftext])
end
elsif match = next_line.match(REGEXP[:blk_attr_list])
parent.document.parse_attributes(match[1], [], :sub_input => true, :into => attributes)
# NOTE title doesn't apply to section, but we need to stash it for the first block
# TODO should issue an error if this is found above the document title
elsif !options[:text] && (match = next_line.match(REGEXP[:blk_title]))
attributes['title'] = match[1]
else
return false
end
true
end
def self.process_attribute_entries(reader, parent, attributes = nil)
reader.skip_comment_lines
while process_attribute_entry(reader, parent, attributes)
# discard line just processed
reader.advance
reader.skip_comment_lines
end
end
def self.process_attribute_entry(reader, parent, attributes = nil, match = nil)
match ||= reader.has_more_lines? ? reader.peek_line.match(REGEXP[:attr_entry]) : nil
if match
name = match[1]
value = match[2].nil? ? '' : match[2]
if value.end_with? LINE_BREAK
value.chop!.rstrip!
while reader.advance
next_line = reader.peek_line.strip
break if next_line.empty?
if next_line.end_with? LINE_BREAK
value = "#{value} #{next_line.chop.rstrip}"
else
value = "#{value} #{next_line}"
break
end
end
end
if name.end_with?('!')
# a nil value signals the attribute should be deleted (undefined)
value = nil
name = name.chop
end
name = sanitize_attribute_name(name)
accessible = true
if !parent.nil?
accessible = value.nil? ?
parent.document.delete_attribute(name) :
parent.document.set_attribute(name, value)
end
if !attributes.nil?
Document::AttributeEntry.new(name, value).save_to(attributes) if accessible
end
true
else
false
end
end
# Internal: Resolve the 0-index marker for this list item
#
# For ordered lists, match the marker used for this list item against the
# known list markers and determine which marker is the first (0-index) marker
# in its number series.
#
# For callout lists, return <1>.
#
# For bulleted lists, return the marker as passed to this method.
#
# list_type - The Symbol context of the list
# marker - The String marker for this list item
# ordinal - The position of this list item in the list
# validate - Whether to validate the value of the marker
#
# Returns the String 0-index marker for this list item
def self.resolve_list_marker(list_type, marker, ordinal = 0, validate = false)
if list_type == :olist && !marker.start_with?('.')
resolve_ordered_list_marker(marker, ordinal, validate)
elsif list_type == :colist
'<1>'
else
marker
end
end
# Internal: Resolve the 0-index marker for this ordered list item
#
# Match the marker used for this ordered list item against the
# known ordered list markers and determine which marker is
# the first (0-index) marker in its number series.
#
# The purpose of this method is to normalize the implicit numbered markers
# so that they can be compared against other list items.
#
# marker - The marker used for this list item
# ordinal - The 0-based index of the list item (default: 0)
# validate - Perform validation that the marker provided is the proper
# marker in the sequence (default: false)
#
# Examples
#
# marker = 'B.'
# Lexer::resolve_ordered_list_marker(marker, 1, true)
# # => 'A.'
#
# Returns the String of the first marker in this number series
def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false)
number_style = ORDERED_LIST_STYLES.detect {|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
expected = actual = nil
case number_style
when :arabic
if validate
expected = ordinal + 1
actual = marker.to_i
end
marker = '1.'
when :loweralpha
if validate
expected = ('a'[0].ord + ordinal).chr
actual = marker.chomp('.')
end
marker = 'a.'
when :upperalpha
if validate
expected = ('A'[0].ord + ordinal).chr
actual = marker.chomp('.')
end
marker = 'A.'
when :lowerroman
if validate
# TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb
expected = ordinal + 1
actual = roman_numeral_to_int(marker.chomp(')'))
end
marker = 'i)'
when :upperroman
if validate
# TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb
expected = ordinal + 1
actual = roman_numeral_to_int(marker.chomp(')'))
end
marker = 'I)'
end
if validate && expected != actual
# FIXME I need a reader reference or line number to report line number
puts "asciidoctor: WARNING: list item index: expected #{expected}, got #{actual}"
end
marker
end
# Internal: Determine whether the this line is a sibling list item
# according to the list type and trait (marker) provided.
#
# line - The String line to check
# list_type - The context of the list (:olist, :ulist, :colist, :dlist)
# sibling_trait - The String marker for the list or the Regexp to match a sibling
#
# Returns a Boolean indicating whether this line is a sibling list item given
# the criteria provided
def self.is_sibling_list_item?(line, list_type, sibling_trait)
if sibling_trait.is_a?(Regexp)
matcher = sibling_trait
expected_marker = false
else
matcher = REGEXP[list_type]
expected_marker = sibling_trait
end
if m = line.match(matcher)
if expected_marker
expected_marker == resolve_list_marker(list_type, m[1])
else
true
end
else
false
end
end
# Internal: Parse the table contained in the provided Reader
#
# table_reader - a Reader containing the source lines of an AsciiDoc table
# parent - the parent Block of this Asciidoctor::Table
# attributes - attributes captured from above this Block
#
# returns an instance of Asciidoctor::Table parsed from the provided reader
def self.next_table(table_reader, parent, attributes)
table = Table.new(parent, attributes)
table.title = attributes.delete('title') if attributes.has_key?('title')
table.assign_caption attributes.delete('caption')
if attributes.has_key? 'cols'
table.create_columns(parse_col_specs(attributes['cols']))
explicit_col_specs = true
else
explicit_col_specs = false
end
table_reader.skip_blank_lines
parser_ctx = Table::ParserContext.new(table, attributes)
while table_reader.has_more_lines?
line = table_reader.get_line
if parser_ctx.format == 'psv'
if parser_ctx.starts_with_delimiter? line
line = line[1..-1]
# push an empty cell spec if boundary at start of line
parser_ctx.close_open_cell
else
next_cell_spec, line = parse_cell_spec(line, :start)
# if the cell spec is not null, then we're at a cell boundary
if !next_cell_spec.nil?
parser_ctx.close_open_cell next_cell_spec
else
# QUESTION do we not advance to next line? if so, when
# will we if we came into this block?
end
end
end
while !line.empty?
if m = parser_ctx.match_delimiter(line)
if parser_ctx.format == 'csv'
if parser_ctx.buffer_has_unclosed_quotes?(m.pre_match)
# throw it back, it's too small
line = parser_ctx.skip_matched_delimiter(m)
next
end
else
if m.pre_match.end_with? '\\'
line = parser_ctx.skip_matched_delimiter(m, true)
next
end
end
if parser_ctx.format == 'psv'
next_cell_spec, cell_text = parse_cell_spec(m.pre_match, :end)
parser_ctx.push_cell_spec next_cell_spec
parser_ctx.buffer << cell_text
else
parser_ctx.buffer << m.pre_match
end
line = m.post_match
parser_ctx.close_cell
else
# no other delimiters to see here
# suck up this line into the buffer and move on
parser_ctx.buffer << line
# QUESTION make this an option? (unwrap-option?)
if parser_ctx.format == 'csv'
parser_ctx.buffer.rstrip!.concat(' ')
end
line = ''
if parser_ctx.format == 'psv' || (parser_ctx.format == 'csv' &&
parser_ctx.buffer_has_unclosed_quotes?)
parser_ctx.keep_cell_open
else
parser_ctx.close_cell true
end
end
end
table_reader.skip_blank_lines unless parser_ctx.cell_open?
if !table_reader.has_more_lines?
parser_ctx.close_cell true
end
end
table.attributes['colcount'] ||= parser_ctx.col_count
if !explicit_col_specs
# TODO further encapsulate this logic (into table perhaps?)
even_width = (100.0 / parser_ctx.col_count).floor
table.columns.each {|c| c.assign_width(0, even_width) }
end
table.partition_header_footer attributes
table
end
# Internal: Parse the column specs for this table.
#
# The column specs dictate the number of columns, relative
# width of columns, default alignments for cells in each
# column, and/or default styles or filters applied to the cells in
# the column.
#
# Every column spec is guaranteed to have a width
#
# returns a Hash of attributes that specify how to format
# and layout the cells in the table.
def self.parse_col_specs(records)
specs = []
# check for deprecated syntax
if m = records.match(REGEXP[:digits])
1.upto(m[0].to_i) {
specs << {'width' => 1}
}
return specs
end
records.split(',').each {|record|
# TODO might want to use scan rather than this mega-regexp
if m = record.match(REGEXP[:table_colspec])
spec = {}
if m[2]
# make this an operation
colspec, rowspec = m[2].split '.'
if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
spec['halign'] = Table::ALIGNMENTS[:h][colspec]
end
if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
end
end
# TODO support percentage width
spec['width'] = !m[3].nil? ? m[3].to_i : 1
# make this an operation
if m[4] && Table::TEXT_STYLES.has_key?(m[4])
spec['style'] = Table::TEXT_STYLES[m[4]]
end
repeat = !m[1].nil? ? m[1].to_i : 1
1.upto(repeat) {
specs << spec.dup
}
end
}
specs
end
# Internal: Parse the cell specs for the current cell.
#
# The cell specs dictate the cell's alignments, styles or filters,
# colspan, rowspan and/or repeating content.
#
# returns the Hash of attributes that indicate how to layout
# and style this cell in the table.
def self.parse_cell_spec(line, pos = :start)
# the default for the end pos it {} since we
# know we're at a delimiter; when the pos
# is start, we *may* be at a delimiter and
# nil indicates we're not
spec = (pos == :end ? {} : nil)
rest = line
if m = line.match(REGEXP[:table_cellspec][pos])
spec = {}
return [spec, line] if m[0].chomp.empty?
rest = (pos == :start ? m.post_match : m.pre_match)
if m[1]
colspec, rowspec = m[1].split '.'
colspec = colspec.to_s.empty? ? 1 : colspec.to_i
rowspec = rowspec.to_s.empty? ? 1 : rowspec.to_i
if m[2] == '+'
spec['colspan'] = colspec unless colspec == 1
spec['rowspan'] = rowspec unless rowspec == 1
elsif m[2] == '*'
spec['repeatcol'] = colspec unless colspec == 1
end
end
if m[3]
colspec, rowspec = m[3].split '.'
if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
spec['halign'] = Table::ALIGNMENTS[:h][colspec]
end
if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
end
end
if m[4] && Table::TEXT_STYLES.has_key?(m[4])
spec['style'] = Table::TEXT_STYLES[m[4]]
end
end
[spec, rest]
end
# Public: Convert a string to a legal attribute name.
#
# name - the String name of the attribute
#
# Returns a String with the legal AsciiDoc attribute name.
#
# Examples
#
# sanitize_attribute_name('Foo Bar')
# => 'foobar'
#
# sanitize_attribute_name('foo')
# => 'foo'
#
# sanitize_attribute_name('Foo 3 #-Billy')
# => 'foo3-billy'
def self.sanitize_attribute_name(name)
name.gsub(REGEXP[:illegal_attr_name_chars], '').downcase
end
# Internal: Converts a Roman numeral to an integer value.
#
# value - The String Roman numeral to convert
#
# Returns the Integer for this Roman numeral
def self.roman_numeral_to_int(value)
value = value.downcase
digits = { 'i' => 1, 'v' => 5, 'x' => 10 }
result = 0
(0..value.length - 1).each {|i|
digit = digits[value[i..i]]
if i + 1 < value.length && digits[value[i+1..i+1]] > digit
result -= digit
else
result += digit
end
}
result
end
end
end