class Asciidoctor::Lexer
def self.next_block(reader, parent, attributes = {}, options = {})
parent - The Document, Section or Block to which the next block belongs
reader - The Reader from which to retrieve the next block
to the type of content.
and Block delimiters. The ensuing lines are then processed according
Uses regular expressions from the Asciidoctor module to match Section
Block is found or the reader has no more lines.
or Block. Processes each line of the reader in sequence until a Section or
Begins by skipping over blank lines to find the start of the next Section
Public: Return the next Section or Block object from the Reader.
def self.next_block(reader, parent, attributes = {}, options = {}) # Skip ahead to the block content skipped = reader.skip_blank # bail if we've reached the end of the section content return nil unless reader.has_lines? if options[:text] && skipped > 0 options.delete(:text) end Asciidoctor.debug { msg = [] msg << '/' * 64 msg << 'next_block() - First two lines are:' msg << reader.peek_line tmp_line = reader.get_line msg << reader.peek_line reader.unshift tmp_line msg << '/' * 64 msg * "\n" } parse_metadata = options[:parse_metadata] || true parse_sections = options[:parse_sections] || false document = parent.document context = parent.is_a?(Block) ? parent.context : nil block = nil while reader.has_lines? && block.nil? if parse_metadata && parse_block_metadata_line(reader, document, attributes, options) reader.next_line next elsif parse_sections && context.nil? && is_next_line_section?(reader, attributes) block, attributes = next_section(reader, parent, attributes) break end this_line = reader.get_line delimited_blk = delimited_block? this_line # NOTE I've haven't decided whether I want this check here or in # parse_block_metadata (where it is currently) #if this_line.match(REGEXP[:comment_blk]) # reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) } # reader.skip_blank # # NOTE we should break here because we have found a block, it # # just happens to be nil...if we keep going we potentially overrun # # a section heading which is not processed in this anymore # break # NOTE we're letting ruler have attributes if !options[:text] && this_line.match(REGEXP[:ruler]) block = Block.new(parent, :ruler) reader.skip_blank elsif !options[:text] && (match = this_line.match(REGEXP[:image_blk])) block = Block.new(parent, :image) AttributeList.new(document.sub_attributes(match[2])).parse_into(attributes, ['alt', 'width', 'height']) target = block.sub_attributes(match[1]) if !target.to_s.empty? attributes['target'] = target document.register(:images, target) attributes['alt'] ||= File.basename(target, File.extname(target)) # hmmm, this assignment seems like a one-off block.title = attributes['title'] if block.title? && attributes['caption'].nil? attributes['caption'] = "Figure #{document.counter('figure-number')}. " end else # drop the line if target resolves to nothing block = nil end reader.skip_blank elsif delimited_blk && (match = this_line.match(REGEXP[:open_blk])) # an open block is surrounded by '--' lines and has zero or more blocks inside terminator = match[0] buffer = Reader.new reader.grab_lines_until(:terminator => terminator) # Strip lines off end of block - not implemented yet # while buffer.has_lines? && buffer.last.strip.empty? # buffer.pop # end block = Block.new(parent, :open) while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end # needs to come before list detection elsif delimited_blk && (match = this_line.match(REGEXP[:sidebar_blk])) # sidebar is surrounded by '****' (4 or more '*' chars) lines terminator = match[0] # FIXME violates DRY because it's a duplication of quote parsing block = Block.new(parent, :sidebar) buffer = Reader.new reader.grab_lines_until(:terminator => terminator) while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end elsif match = this_line.match(REGEXP[:colist]) block = Block.new(parent, :colist) attributes['style'] = 'arabic' items = [] block.buffer = items reader.unshift this_line expected_index = 1 begin # might want to move this check to a validate method if match[1].to_i != expected_index puts "asciidoctor: WARNING: callout list item index: expected #{expected_index} got #{match[1]}" end list_item = next_list_item(reader, block, match) expected_index += 1 if !list_item.nil? items << list_item coids = document.callouts.callout_ids(items.size) if !coids.empty? list_item.attributes['coids'] = coids else puts 'asciidoctor: WARNING: no callouts refer to list item ' + items.size.to_s end end end while reader.has_lines? && match = reader.peek_line.match(REGEXP[:colist]) document.callouts.next_list elsif match = this_line.match(REGEXP[:ulist]) AttributeList.rekey(attributes, ['style']) reader.unshift(this_line) block = next_outline_list(reader, :ulist, parent) elsif match = this_line.match(REGEXP[:olist]) AttributeList.rekey(attributes, ['style']) reader.unshift(this_line) block = next_outline_list(reader, :olist, parent) # QUESTION move this logic to next_outline_list? if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style') marker = block.buffer.first.marker if marker.start_with? '.' # first one makes more sense, but second on is AsciiDoc-compliant #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s else style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) } attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s end end elsif match = this_line.match(REGEXP[:dlist]) reader.unshift this_line block = next_labeled_list(reader, match, parent) AttributeList.rekey(attributes, ['style']) elsif delimited_blk && (match = this_line.match(document.nested? ? REGEXP[:table_nested] : REGEXP[:table])) # table is surrounded by lines starting with a | followed by 3 or more '=' chars terminator = match[0] AttributeList.rekey(attributes, ['style']) table_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true) block = next_table(table_reader, parent, attributes) # hmmm, this assignment seems like a one-off block.title = attributes['title'] if block.title? && attributes['caption'].nil? attributes['caption'] = "Table #{document.counter('table-number')}. " end # FIXME violates DRY because it's a duplication of other block parsing elsif delimited_blk && (match = this_line.match(REGEXP[:example])) # example is surrounded by lines with 4 or more '=' chars terminator = match[0] AttributeList.rekey(attributes, ['style']) if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s} block = Block.new(parent, :admonition) attributes['name'] = admonition_style.downcase attributes['caption'] ||= admonition_style.capitalize else block = Block.new(parent, :example) # hmmm, this assignment seems like a one-off block.title = attributes['title'] if block.title? && attributes['caption'].nil? attributes['caption'] = "Example #{document.counter('example-number')}. " end end buffer = Reader.new reader.grab_lines_until(:terminator => terminator) while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end # FIXME violates DRY w/ non-delimited block listing elsif delimited_blk && (match = this_line.match(REGEXP[:listing])) terminator = match[0] AttributeList.rekey(attributes, ['style', 'language', 'linenums']) buffer = reader.grab_lines_until(:terminator => terminator) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :listing, buffer) elsif delimited_blk && (match = this_line.match(REGEXP[:quote])) # multi-line verse or quote is surrounded by a block delimiter terminator = match[0] AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle']) quote_context = (attributes['style'] == 'verse' ? :verse : :quote) block_reader = Reader.new reader.grab_lines_until(:terminator => terminator) # only quote can have other section elements (as as section block) section_body = (quote_context == :quote) if section_body block = Block.new(parent, quote_context) while block_reader.has_lines? new_block = next_block(block_reader, block) block.blocks << new_block unless new_block.nil? end else block_reader.chomp_last! block = Block.new(parent, quote_context, block_reader.lines) end elsif delimited_blk && (blk_ctx = [:literal, :pass].detect{|t| this_line.match(REGEXP[t])}) # literal is surrounded by '....' (4 or more '.' chars) lines # pass is surrounded by '++++' (4 or more '+' chars) lines terminator = $~[0] buffer = reader.grab_lines_until(:terminator => terminator) buffer.last.chomp! unless buffer.empty? # a literal can masquerade as a listing if attributes[1] == 'listing' blk_ctx = :listing end block = Block.new(parent, blk_ctx, buffer) elsif this_line.match(REGEXP[:lit_par]) # literal paragraph is contiguous lines starting with # one or more space or tab characters # So we need to actually include this one in the grab_lines group reader.unshift this_line buffer = reader.grab_lines_until(:preserve_last_line => true, :break_on_blank_lines => true) {|line| # labeled list terms can be indented, but a preceding blank indicates # we are in a list continuation and therefore literals should be strictly literal (context == :dlist && skipped == 0 && line.match(REGEXP[:dlist])) || delimited_block?(line) } # trim off the indentation equivalent to the size of the least indented line if !buffer.empty? offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min if offset > 0 buffer = buffer.map {|l| l.sub(/^\s{1,#{offset}}/, '') } end buffer.last.chomp! end block = Block.new(parent, :literal, buffer) # a literal gets special meaning inside of a definition list if LIST_CONTEXTS.include?(context) attributes['options'] ||= [] # TODO this feels hacky, better way to distinguish from explicit literal block? attributes['options'] << 'listparagraph' end ## these switches based on style need to come immediately before the else ## elsif attributes[1] == 'source' AttributeList.rekey(attributes, ['style', 'language', 'linenums']) reader.unshift(this_line) buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :listing, buffer) elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[1] == s} # an admonition preceded by [<TYPE>] and lasts until a blank line reader.unshift(this_line) buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :admonition, buffer) attributes['style'] = admonition_style attributes['name'] = admonition_style.downcase attributes['caption'] ||= admonition_style.capitalize elsif quote_context = [:quote, :verse].detect{|s| attributes[1] == s.to_s} # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle']) reader.unshift(this_line) buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, quote_context, buffer) # a floating (i.e., discrete) title elsif ['float', 'discrete'].include?(attributes[1]) && is_section_title?(this_line, reader.peek_line) attributes['style'] = attributes[1] reader.unshift this_line float_id, float_title, float_level, _ = parse_section_title reader block = Block.new(parent, :floating_title) if float_id.nil? || float_id.empty? # FIXME remove hack of creating throwaway Section to get at the generate_id method tmp_sect = Section.new(parent) tmp_sect.title = float_title block.id = tmp_sect.generate_id else block.id = float_id @document.register(:ids, [float_id, float_title]) end block.level = float_level block.title = float_title # a paragraph - contiguous nonblank/noncontinuation lines else reader.unshift this_line buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true, :skip_line_comments => true) {|line| delimited_block?(line) || line.match(REGEXP[:attr_line]) || # next list item can be directly adjacent to paragraph of previous list item context == :dlist && line.match(REGEXP[:dlist]) # not sure if there are any cases when we need this check for other list types #LIST_CONTEXTS.include?(context) && line.match(REGEXP[context]) } # NOTE we need this logic because the reader is processing line # comments and that might leave us w/ an empty buffer if buffer.empty? reader.get_line break end catalog_inline_anchors(buffer.join, document) if !options[:text] && (admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+'))) buffer[0] = admonition.post_match block = Block.new(parent, :admonition, buffer) attributes['style'] = admonition[1] attributes['name'] = admonition[1].downcase attributes['caption'] ||= admonition[1].capitalize else buffer.last.chomp! block = Block.new(parent, :paragraph, buffer) end end end # when looking for nested content, one or more line comments, comment # blocks or trailing attribute lists could leave us without a block, # so handle accordingly if !block.nil? block.id = attributes['id'] if attributes.has_key?('id') block.title = attributes['title'] unless block.title? block.caption ||= attributes['caption'] unless block.is_a?(Section) # AsciiDoc always use [id] as the reftext in HTML output, # but I'd like to do better in Asciidoctor if block.id && block.title? && !attributes.has_key?('reftext') document.register(:ids, [block.id, block.title]) end block.update_attributes(attributes) if block.context == :listing || block.context == :literal catalog_callouts(block.buffer.join, document) end end block end