class Asciidoctor::Lexer

Public: Methods to parse and build objects from Asciidoc lines

def self.build_ulist(reader, parent = nil)

def self.build_ulist(reader, parent = nil)
  items = []
  list_type = :ulist
  block = Block.new(parent, list_type)
  Asciidoctor.debug "Created :ulist block: #{block}"
  first_item_level = nil
  while reader.has_lines? && match = reader.peek_line.match(REGEXP[list_type])
    this_item_level = match[1].length
    if first_item_level && first_item_level < this_item_level
      # If this next :uline level is down one from the
      # current Block's, put it in a Block of its own
      list_item = next_block(reader, block)
    else
      list_item = build_ulist_item(reader, block, match)
      # Set the base item level for this Block
      first_item_level ||= list_item.level
    end
    items << list_item
    reader.skip_blank
  end
  block.buffer = items
  block
end

def self.build_ulist_item(reader, block, match = nil)

def self.build_ulist_item(reader, block, match = nil)
  list_type = :ulist
  this_line = reader.get_line
  return nil unless this_line
  match ||= this_line.match(REGEXP[list_type])
  if match.nil?
    reader.unshift(this_line)
    return nil
  end
  level = match[1].length
  list_item = ListItem.new
  list_item.level = level
  Asciidoctor.debug "#{__FILE__}:#{__LINE__}: Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
  # Prevent bullet list text starting with . from being treated as a paragraph
  # title or some other unseemly thing in list_item_segment. I think. (NOTE)
  reader.unshift match[2].lstrip.sub(/^\./, '\.')
  item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
   item_segment = list_item_segment(reader)
  while item_segment.has_lines?
    list_item.blocks << next_block(item_segment, block)
  end
  Asciidoctor.debug "\n\nlist_item has #{list_item.blocks.count} blocks, and first is a #{list_item.blocks.first.class} with context #{list_item.blocks.first.context rescue 'n/a'}\n\n"
  first_block = list_item.blocks.first
  if first_block.is_a?(Block) &&
     (first_block.context == :paragraph || first_block.context == :literal)
    list_item.content = first_block.buffer.map{|l| l.strip}.join("\n")
    list_item.blocks.shift
  end
  list_item
end

def self.build_ulist_ref(lines, parent = nil)

def self.build_ulist_ref(lines, parent = nil)
  items = []
  list_type = :ulist
  block = Block.new(parent, list_type)
  Asciidoctor.debug "Created :ulist block: #{block}"
  last_item_level = nil
  this_line = lines.shift
  while this_line && match = this_line.match(REGEXP[list_type])
    level = match[1].length
    list_item = ListItem.new
    list_item.level = level
    Asciidoctor.debug "Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
    lines.unshift match[2].lstrip.sub(/^\./, '\.')
    item_segment = list_item_segment(lines, :alt_ending => REGEXP[list_type], :list_level => level)
    while item_segment.any?
      list_item.blocks << next_block(item_segment, block)
    end
    first_block = list_item.blocks.first
    if first_block.is_a?(Block) &&
       (first_block.context == :paragraph || first_block.context == :literal)
      list_item.content = first_block.buffer.map{|l| l.strip}.join("\n")
      list_item.blocks.shift
    end
    if items.any? && (level > items.last.level)
      Asciidoctor.debug "--> Putting this new level #{level} ListItem under my pops, #{items.last} (level: #{items.last.level})"
      items.last.blocks << list_item
    else
      Asciidoctor.debug "Stacking new list item in parent block's blocks"
      items << list_item
    end
    last_item_level = list_item.level
    # TODO: This has to come from a Reader object
    skip_blank(lines)
    this_line = lines.shift
  end
  lines.unshift(this_line) unless this_line.nil?
  block.buffer = items
  block
end

def self.extract_section_heading(line1, line2 = nil)


=> nil
anchor
=> 3
level
=> "Foo"
name

name, level, anchor = extract_section_heading(line1)

=> "==== Foo\n"
line1

=> nil
anchor
=> 2
level
=> "Foo"
name

name, level, anchor = extract_section_heading(line1, line2)

=> "~~~\n"
line2
=> "Foo\n"
line1

Examples

Returns an array of a String, Integer, and String or nil.

1- or 2-line section heading.
Private: Extracts the name, level and (optional) embedded anchor from a
def self.extract_section_heading(line1, line2 = nil)
  Asciidoctor.debug "#{__method__} -> line1: #{line1.chomp rescue 'nil'}, line2: #{line2.chomp rescue 'nil'}"
  sect_name = sect_anchor = nil
  sect_level = 0
  if is_single_line_section_heading?(line1)
    header_match = line1.match(REGEXP[:level_title])
    sect_name = header_match[2]
    sect_level = single_line_section_level(header_match[1])
  elsif is_two_line_section_heading?(line1, line2)
    header_match = line1.match(REGEXP[:name])
    if anchor_match = header_match[1].match(REGEXP[:anchor_embedded])
      sect_name   = anchor_match[1]
      sect_anchor = anchor_match[2]
    else
      sect_name = header_match[1]
    end
    sect_level = section_level(line2)
  end
  Asciidoctor.debug "#{__method__} -> Returning #{sect_name}, #{sect_level} (anchor: '#{sect_anchor || '<none>'}')"
  return [sect_name, sect_level, sect_anchor]
end

def self.is_section_heading?(line1, line2 = nil)

def self.is_section_heading?(line1, line2 = nil)
  is_single_line_section_heading?(line1) ||
  is_two_line_section_heading?(line1, line2)
end

def self.is_single_line_section_heading?(line)

def self.is_single_line_section_heading?(line)
  !line.nil? && line.match(REGEXP[:level_title])
end

def self.is_two_line_section_heading?(line1, line2)

def self.is_two_line_section_heading?(line1, line2)
  !line1.nil? && !line2.nil? &&
  line1.match(REGEXP[:name]) && line2.match(REGEXP[:line]) &&
  (line1.size - line2.size).abs <= 1
end

def self.list_item_segment(reader, options={})

=> "In a different segment\n"
reader.peek_line

"Open block\n", "\n", "Can have blank lines\n", "--\n"]
=> ["First paragraph\n", "+\n", "Second paragraph\n", "--\n",
list_item_segment(reader)

"In a different segment\n"])
"Open block\n", "\n", "Can have blank lines\n", "--\n", "\n",
["First paragraph\n", "+\n", "Second paragraph\n", "--\n",
reader = Asciidoctor::Reader.new(

Examples

Returns the Array of lines forming the next segment.

item of a lower level.
to include. If this is specified, then break if we find a list
* :list_level may be used to specify a mimimum list item level
include. May be either a single Symbol or an Array of Symbols.
* :list_types may be used to specify list item patterns to
other than a blank line to signify the end of the segment.
* :alt_ending may be used to specify a regular expression match
options - an optional Hash of processing options:
reader - the Reader instance from which to get input.

segment, removing them from the 'lines' Array passed in.
Private: Return the Array of lines constituting the next list item
def self.list_item_segment(reader, options={})
  alternate_ending = options[:alt_ending]
  list_types = Array(options[:list_types]) || [:ulist, :olist, :colist, :dlist]
  list_level = options[:list_level].to_i
  # We know we want to include :lit_par types, even if we have specified,
  # say, only :ulist type list entries.
  list_types << :lit_par unless list_types.include? :lit_par
  segment = []
  reader.skip_blank
  # Grab lines until the first blank line not inside an open block
  # or listing
  in_oblock = false
  in_listing = false
  while reader.has_lines?
    this_line = reader.get_line
    Asciidoctor.debug "----->  Processing: #{this_line}"
    in_oblock = !in_oblock if this_line.match(REGEXP[:oblock])
    in_listing = !in_listing if this_line.match(REGEXP[:listing])
    if !in_oblock && !in_listing
      if this_line.strip.empty?
        # TODO  - FIX THIS BEFORE ANY MORE KITTENS DIE AUGGGHHH!!!
        next_nonblank = reader.instance_variable_get(:@lines).detect{|l| !l.strip.empty?}
        # If there are blank lines ahead, but there's at least one
        # more non-blank line that doesn't trigger an alternate_ending
        # for the block of lines, then vacuum up all the blank lines
        # into this segment and continue with the next non-blank line.
        if next_nonblank &&
           ( alternate_ending.nil? ||
             !next_nonblank.match(alternate_ending)
           ) && list_types.find { |list_type| next_nonblank.match(REGEXP[list_type]) }
           while reader.has_lines? and reader.peek_line.strip.empty?
             segment << this_line
             this_line = reader.get_line
           end
        else
          break
        end
      # Have we come to a line matching an alternate_ending regexp?
      elsif alternate_ending && this_line.match(alternate_ending)
        reader.unshift this_line
        break
      # Do we have a minimum list_level, and have come to a list item
      # line with a lower level?
      elsif list_level &&
            list_types.find { |list_type| this_line.match(REGEXP[list_type]) } &&
            ($1.length < list_level)
        reader.unshift this_line
        break
      end
      # From the Asciidoc user's guide:
      #   Another list or a literal paragraph immediately following
      #   a list item will be implicitly included in the list item
      # Thus, the list_level stuff may be wrong here.
    end
    segment << this_line
  end
  Asciidoctor.debug "*"*40
  Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__}: Returning this:"
  Asciidoctor.debug segment.inspect
  Asciidoctor.debug "*"*10
  Asciidoctor.debug "Leaving #{__method__}: Top of reader queue is:"
  Asciidoctor.debug reader.peek_line
  Asciidoctor.debug "*"*40
  segment
end

def self.next_block(reader, parent = self)

content set to the grabbed lines.
* Return a new Asciidoctor::Block or Asciidoctor::Section instance with the
* Based on the type of content block, grab lines to the end of the block.
* Use defined regular expressions to determine the type of content block.
* Skip over blank lines to find the start of the next content block.

Return the next block from the Reader.
def self.next_block(reader, parent = self)
  # Skip ahead to the block content
  reader.skip_blank
  return nil unless reader.has_lines?
  # NOTE: An anchor looks like this:
  #   [[foo]]
  # with the inside [foo] (including brackets) as match[1]
  if match = reader.peek_line.match(REGEXP[:anchor])
    Asciidoctor.debug "Found an anchor in line:\n\t#{reader.peek_line}"
    # NOTE: This expression conditionally strips off the brackets from
    # [foo], though REGEXP[:anchor] won't actually match without
    # match[1] being bracketed, so the condition isn't necessary.
    anchor = match[1].match(/^\[(.*)\]/) ? $1 : match[1]
    # NOTE: Set @references['foo'] = '[foo]'
    parent.document.references[anchor] = match[1]
    reader.get_line
  else
    anchor = nil
  end
  Asciidoctor.debug "/"*64
  Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:"
  Asciidoctor.debug reader.peek_line
  tmp_line = reader.get_line
  Asciidoctor.debug reader.peek_line
  reader.unshift tmp_line
  Asciidoctor.debug "/"*64
  block = nil
  title = nil
  caption = nil
  source_type = nil
  buffer = []
  while reader.has_lines? && block.nil?
    buffer.clear
    this_line = reader.get_line
    next_line = reader.peek_line || ''
    if this_line.match(REGEXP[:comment])
      next
    elsif match = this_line.match(REGEXP[:title])
      title = match[1]
      reader.skip_blank
    elsif match = this_line.match(REGEXP[:listing_source])
      source_type = match[1]
      reader.skip_blank
    elsif match = this_line.match(REGEXP[:caption])
      caption = match[1]
    elsif is_section_heading?(this_line, next_line)
      # If we've come to a new section, then we've found the end of this
      # current block.  Likewise if we'd found an unassigned anchor, push
      # it back as well, so it can go with this next heading.
      # NOTE - I don't think this will assign the anchor properly. Anchors
      # only match with double brackets - [[foo]], but what's stored in
      # `anchor` at this point is only the `foo` part that was stripped out
      # after matching.  TODO: Need a way to test this.
      reader.unshift(this_line)
      reader.unshift(anchor) unless anchor.nil?
      Asciidoctor.debug "#{__method__}: SENDING to next_section with lines[0] = #{reader.peek_line}"
      block = next_section(reader, parent)
    elsif this_line.match(REGEXP[:oblock])
      # oblock is surrounded by '--' lines and has zero or more blocks inside
      buffer = Reader.new(reader.grab_lines_until { |line| line.match(REGEXP[:oblock]) })
      # Strip lines off end of block - not implemented yet
      # while buffer.has_lines? && buffer.last.strip.empty?
      #   buffer.pop
      # end
      block = Block.new(parent, :oblock, [])
      while buffer.has_lines?
        block.blocks << next_block(buffer, block)
      end
    elsif list_type = [:olist, :colist].detect{|l| this_line.match( REGEXP[l] )}
      items = []
      Asciidoctor.debug "Creating block of type: #{list_type}"
      block = Block.new(parent, list_type)
      while !this_line.nil? && match = this_line.match(REGEXP[list_type])
        item = ListItem.new
        reader.unshift match[2].lstrip.sub(/^\./, '\.')
        item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
        while item_segment.has_lines?
          item.blocks << next_block(item_segment, block)
        end
        if item.blocks.any? &&
           item.blocks.first.is_a?(Block) &&
           (item.blocks.first.context == :paragraph || item.blocks.first.context == :literal)
          item.content = item.blocks.shift.buffer.map{|l| l.strip}.join("\n")
        end
        items << item
        reader.skip_blank
        this_line = reader.get_line
      end
      reader.unshift(this_line) unless this_line.nil?
      block.buffer = items
    elsif match = this_line.match(REGEXP[:ulist])
      reader.unshift(this_line)
      block = build_ulist(reader, parent)
    elsif match = this_line.match(REGEXP[:dlist])
      pairs = []
      block = Block.new(parent, :dlist)
      this_dlist = Regexp.new(/^#{match[1]}(.*)#{match[3]}\s*$/)
      while !this_line.nil? && match = this_line.match(this_dlist)
        if anchor = match[1].match( /\[\[([^\]]+)\]\]/ )
          dt = ListItem.new( $` + $' )
          dt.anchor = anchor[1]
        else
          dt = ListItem.new( match[1] )
        end
        dd = ListItem.new
        # workaround eg. git-config OPTIONS --get-colorbool
        reader.get_line if reader.has_lines? && reader.peek_line.strip.empty?
        dd_segment = Reader.new(list_item_segment(reader, :alt_ending => this_dlist))
        while dd_segment.has_lines?
          dd.blocks << next_block(dd_segment, block)
        end
        if dd.blocks.any? &&
           dd.blocks.first.is_a?(Block) &&
           (dd.blocks.first.context == :paragraph || dd.blocks.first.context == :literal)
          dd.content = dd.blocks.shift.buffer.map{|l| l.strip}.join("\n")
        end
        pairs << [dt, dd]
        reader.skip_blank
        this_line = reader.get_line
      end
      reader.unshift(this_line) unless this_line.nil?
      block.buffer = pairs
    elsif this_line.match(REGEXP[:verse])
      # verse is preceded by [verse] and lasts until a blank line
      buffer = reader.grab_lines_until(:break_on_blank_lines => true)
      block = Block.new(parent, :verse, buffer)
    elsif this_line.match(REGEXP[:note])
      # note is an admonition preceded by [NOTE] and lasts until a blank line
      buffer = reader.grab_lines_until(:break_on_blank_lines => true)
      block = Block.new(parent, :note, buffer)
    elsif block_type = [:listing, :example].detect{|t| this_line.match( REGEXP[t] )}
      buffer = reader.grab_lines_until {|line| line.match( REGEXP[block_type] )}
      block = Block.new(parent, block_type, buffer)
    elsif this_line.match( REGEXP[:quote] )
      block = Block.new(parent, :quote)
      buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:quote] ) })
      while buffer.any?
        block.blocks << next_block(reader, block)
      end
    elsif this_line.match(REGEXP[:lit_blk])
      # example is surrounded by '....' (4 or more '.' chars) lines
      buffer = reader.grab_lines_until {|line| line.match( REGEXP[:lit_blk] ) }
      block = Block.new(parent, :literal, buffer)
    elsif this_line.match(REGEXP[:lit_par])
      # literal paragraph is contiguous lines starting with
      # one or more space or tab characters
      # So we need to actually include this one in the grab_lines group
      reader.unshift this_line
      buffer = reader.grab_lines_until(:preserve_last_line => true) {|line| ! line.match( REGEXP[:lit_par] ) }
      block = Block.new(parent, :literal, buffer)
    elsif this_line.match(REGEXP[:sidebar_blk])
      # example is surrounded by '****' (4 or more '*' chars) lines
      buffer = reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) }
      block = Block.new(parent, :sidebar, buffer)
    else
      # paragraph is contiguous nonblank/noncontinuation lines
      while !this_line.nil? && !this_line.strip.empty?
        if this_line.match( REGEXP[:listing] ) || this_line.match( REGEXP[:oblock] )
          reader.unshift this_line
          break
        end
        buffer << this_line
        this_line = reader.get_line
      end
      if buffer.any? && admonition = buffer.first.match(/^NOTE:\s*/)
        buffer[0] = admonition.post_match
        block = Block.new(parent, :note, buffer)
      elsif source_type
        block = Block.new(parent, :listing, buffer)
      else
        Asciidoctor.debug "Proud parent #{parent} getting a new paragraph with buffer: #{buffer}"
        block = Block.new(parent, :paragraph, buffer)
      end
    end
  end
  block.anchor  ||= anchor
  block.title   ||= title
  block.caption ||= caption
  block
end

def self.next_section(reader, parent = self)

["SALUTATIONS", [:paragraph, "It is awesome."]]
doc.next_section

["GREETINGS", [:paragraph, "This is my doc."]]
doc.next_section

doc = Asciidoctor::Document.new(source)

=> "GREETINGS\n---------\nThis is my doc.\n\nSALUTATIONS\n-----------\nIt is awesome."
source

Examples

Private: Return the next section from the document.
def self.next_section(reader, parent = self)
  section = Section.new(parent)
  Asciidoctor.debug "%"*64
  Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:"
  Asciidoctor.debug reader.peek_line
  tmp_line = reader.get_line
  Asciidoctor.debug reader.peek_line
  reader.unshift tmp_line
  Asciidoctor.debug "%"*64
  # Skip ahead to the next section definition
  while reader.has_lines? && section.name.nil?
    this_line = reader.get_line
    next_line = reader.peek_line || ''
    if match = this_line.match(REGEXP[:anchor])
      section.anchor = match[1]
    elsif is_section_heading?(this_line, next_line)
      section.name, section.level, section.anchor = extract_section_heading(this_line, next_line)
      reader.get_line unless is_single_line_section_heading?(this_line)
    end
  end
  if !section.anchor.nil?
    anchor_id = section.anchor.match(/^\[(.*)\]/) ? $1 : section.anchor
    parent.document.references[anchor_id] = section.anchor
    section.anchor = anchor_id
  end
  # Grab all the lines that belong to this section
  section_lines = []
  while reader.has_lines?
    this_line = reader.get_line
    next_line = reader.peek_line
    if is_section_heading?(this_line, next_line)
      _, this_level, _ = extract_section_heading(this_line, next_line)
      if this_level <= section.level
        # A section can't contain a section level lower than itself,
        # so this signifies the end of the section.
        reader.unshift this_line
        if section_lines.any? && section_lines.last.match(REGEXP[:anchor])
          # Put back the anchor that came before this new-section line
          # on which we're bailing.
          reader.unshift section_lines.pop
        end
        break
      else
        section_lines << this_line
        section_lines << reader.get_line unless is_single_line_section_heading?(this_line)
      end
    elsif this_line.match(REGEXP[:listing])
      section_lines << this_line
      section_lines.concat reader.grab_lines_until {|line| line.match( REGEXP[:listing] ) }
      # Also grab the last line, if there is one
      this_line = reader.get_line
      section_lines << this_line unless this_line.nil?
    else
      section_lines << this_line
    end
  end
  section_reader = Reader.new(section_lines)
  # Now parse section_lines into Blocks belonging to the current Section
  while section_reader.has_lines?
    section_reader.skip_blank
    section << next_block(section_reader, section) if section_reader.has_lines?
  end
  section
end

def self.section_level(line)

line - the String line from under the section name.

used in the ASCII line under the section name.
Private: Get the Integer section level based on the characters
def self.section_level(line)
  char = line.strip.chars.to_a.uniq
  case char
  when ['=']; 0
  when ['-']; 1
  when ['~']; 2
  when ['^']; 3
  when ['+']; 4
  end
end

def self.single_line_section_level(line)

== is level 0, === is level 1, etc.
def self.single_line_section_level(line)
  [line.length - 1, 0].max
end

def self.ulist_level(line)

line - the String line containing the list item

in front of the list item text.
Private: Get the Integer ulist level based on the characters
def self.ulist_level(line)
  if m = line.strip.match(/^(- | \*{1,5})\s+/x)
    return m[1].length
  end
end

def initialize

Public: Make sure the Lexer object doesn't get initialized.
def initialize
  raise 'Au contraire, mon frere. No lexer instances will be running around.'
end