class RDoc::Markup::Parser

def self.parse str

def self.parse str
  parser = new
  parser.tokenize str
  doc = RDoc::Markup::Document.new
  parser.parse doc
end

def self.tokenize str

def self.tokenize str
  parser = new
  parser.tokenize str
  parser.tokens
end

def build_heading level

def build_heading level
  type, text, = get
  text = case type
         when :TEXT then
           skip :NEWLINE
           text
         else
           unget
           ''
         end
  RDoc::Markup::Heading.new level, text
end

def build_list margin

def build_list margin
  p :list_start => margin if @debug
  list = RDoc::Markup::List.new
  label = nil
  until @tokens.empty? do
    type, data, column, = get
    case type
    when *LIST_TOKENS then
      if column < margin || (list.type && list.type != type) then
        unget
        break
      end
      list.type = type
      peek_type, _, column, = peek_token
      case type
      when :NOTE, :LABEL then
        label = [] unless label
        if peek_type == :NEWLINE then
          # description not on the same line as LABEL/NOTE
          # skip the trailing newline & any blank lines below
          while peek_type == :NEWLINE
            get
            peek_type, _, column, = peek_token
          end
          # we may be:
          #   - at end of stream
          #   - at a column < margin:
          #         [text]
          #       blah blah blah
          #   - at the same column, but with a different type of list item
          #       [text]
          #       * blah blah
          #   - at the same column, with the same type of list item
          #       [one]
          #       [two]
          # In all cases, we have an empty description.
          # In the last case only, we continue.
          if peek_type.nil? || column < margin then
            empty = true
          elsif column == margin then
            case peek_type
            when type
              empty = :continue
            when *LIST_TOKENS
              empty = true
            else
              empty = false
            end
          else
            empty = false
          end
          if empty then
            label << data
            next if empty == :continue
            break
          end
        end
      else
        data = nil
      end
      if label then
        data = label << data
        label = nil
      end
      list_item = RDoc::Markup::ListItem.new data
      parse list_item, column
      list << list_item
    else
      unget
      break
    end
  end
  p :list_end => margin if @debug
  if list.empty? then
    return nil unless label
    return nil unless [:LABEL, :NOTE].include? list.type
    list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new
    list << list_item
  end
  list
end

def build_paragraph margin

def build_paragraph margin
  p :paragraph_start => margin if @debug
  paragraph = RDoc::Markup::Paragraph.new
  until @tokens.empty? do
    type, data, column, = get
    if type == :TEXT and column == margin then
      paragraph << data
      break if peek_token.first == :BREAK
      data << ' ' if skip :NEWLINE and /#{SPACE_SEPARATED_LETTER_CLASS}\z/o.match?(data)
    else
      unget
      break
    end
  end
  paragraph.parts.last.sub!(/ \z/, '') # cleanup
  p :paragraph_end => margin if @debug
  paragraph
end

def build_verbatim margin

def build_verbatim margin
  p :verbatim_begin => margin if @debug
  verbatim = RDoc::Markup::Verbatim.new
  min_indent = nil
  generate_leading_spaces = true
  line = ''.dup
  until @tokens.empty? do
    type, data, column, = get
    if type == :NEWLINE then
      line << data
      verbatim << line
      line = ''.dup
      generate_leading_spaces = true
      next
    end
    if column <= margin
      unget
      break
    end
    if generate_leading_spaces then
      indent = column - margin
      line << ' ' * indent
      min_indent = indent if min_indent.nil? || indent < min_indent
      generate_leading_spaces = false
    end
    case type
    when :HEADER then
      line << '=' * data
      _, _, peek_column, = peek_token
      peek_column ||= column + data
      indent = peek_column - column - data
      line << ' ' * indent
    when :RULE then
      width = 2 + data
      line << '-' * width
      _, _, peek_column, = peek_token
      peek_column ||= column + width
      indent = peek_column - column - width
      line << ' ' * indent
    when :BREAK, :TEXT then
      line << data
    when :BLOCKQUOTE then
      line << '>>>'
      peek_type, _, peek_column = peek_token
      if peek_type != :NEWLINE and peek_column
        line << ' ' * (peek_column - column - 3)
      end
    else # *LIST_TOKENS
      list_marker = case type
                    when :BULLET then data
                    when :LABEL  then "[#{data}]"
                    when :NOTE   then "#{data}::"
                    else # :LALPHA, :NUMBER, :UALPHA
                      "#{data}."
                    end
      line << list_marker
      peek_type, _, peek_column = peek_token
      unless peek_type == :NEWLINE then
        peek_column ||= column + list_marker.length
        indent = peek_column - column - list_marker.length
        line << ' ' * indent
      end
    end
  end
  verbatim << line << "\n" unless line.empty?
  verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
  verbatim.normalize
  p :verbatim_end => margin if @debug
  verbatim
end

def get

def get
  @current_token = @tokens.shift
  p :get => @current_token if @debug
  @current_token
end

def initialize

def initialize
  @binary_input   = nil
  @current_token  = nil
  @debug          = false
  @s              = nil
  @tokens         = []
end

def parse parent, indent = 0

def parse parent, indent = 0
  p :parse_start => indent if @debug
  until @tokens.empty? do
    type, data, column, = get
    case type
    when :BREAK then
      parent << RDoc::Markup::BlankLine.new
      skip :NEWLINE, false
      next
    when :NEWLINE then
      # trailing newlines are skipped below, so this is a blank line
      parent << RDoc::Markup::BlankLine.new
      skip :NEWLINE, false
      next
    end
    # indentation change: break or verbatim
    if column < indent then
      unget
      break
    elsif column > indent then
      unget
      parent << build_verbatim(indent)
      next
    end
    # indentation is the same
    case type
    when :HEADER then
      parent << build_heading(data)
    when :RULE then
      parent << RDoc::Markup::Rule.new(data)
      skip :NEWLINE
    when :TEXT then
      unget
      parse_text parent, indent
    when :BLOCKQUOTE then
      nil while (type, = get; type) and type != :NEWLINE
      _, _, column, = peek_token
      bq = RDoc::Markup::BlockQuote.new
      p :blockquote_start => [data, column] if @debug
      parse bq, column
      p :blockquote_end => indent if @debug
      parent << bq
    when *LIST_TOKENS then
      unget
      parent << build_list(indent)
    else
      type, data, column, line = @current_token
      raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
    end
  end
  p :parse_end => indent if @debug
  parent
end

def parse_text parent, indent # :nodoc:

:nodoc:
def parse_text parent, indent # :nodoc:
  parent << build_paragraph(indent)
end

def peek_token

def peek_token
  token = @tokens.first || []
  p :peek => token if @debug
  token
end

def setup_scanner input

def setup_scanner input
  @s = MyStringScanner.new input
end

def skip token_type, error = true

def skip token_type, error = true
  type, = get
  return unless type # end of stream
  return @current_token if token_type == type
  unget
  raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
end

def tokenize input

def tokenize input
  setup_scanner input
  until @s.eos? do
    pos = @s.pos
    # leading spaces will be reflected by the column of the next token
    # the only thing we loose are trailing spaces at the end of the file
    next if @s.scan(/ +/)
    # note: after BULLET, LABEL, etc.,
    # indent will be the column of the next non-newline token
    @tokens << case
               # [CR]LF => :NEWLINE
               when @s.scan(/\r?\n/) then
                 token = [:NEWLINE, @s.matched, *pos]
                 @s.newline!
                 token
               # === text => :HEADER then :TEXT
               when @s.scan(/(=+)(\s*)/) then
                 level = @s[1].length
                 header = [:HEADER, level, *pos]
                 if @s[2] =~ /^\r?\n/ then
                   @s.unscan(@s[2])
                   header
                 else
                   pos = @s.pos
                   @s.scan(/.*/)
                   @tokens << header
                   [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
                 end
               # --- (at least 3) and nothing else on the line => :RULE
               when @s.scan(/(-{3,}) *\r?$/) then
                 [:RULE, @s[1].length - 2, *pos]
               # * or - followed by white space and text => :BULLET
               when @s.scan(/([*-]) +(\S)/) then
                 @s.unscan(@s[2])
                 [:BULLET, @s[1], *pos]
               # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
               when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
                 # FIXME if tab(s), the column will be wrong
                 # either support tabs everywhere by first expanding them to
                 # spaces, or assume that they will have been replaced
                 # before (and provide a check for that at least in debug
                 # mode)
                 list_label = @s[1]
                 @s.unscan(@s[2])
                 list_type =
                   case list_label
                   when /[a-z]/ then :LALPHA
                   when /[A-Z]/ then :UALPHA
                   when /\d/    then :NUMBER
                   else
                     raise ParseError, "BUG token #{list_label}"
                   end
                 [list_type, list_label, *pos]
               # [text] followed by spaces or end of line => :LABEL
               when @s.scan(/\[(.*?)\]( +|\r?$)/) then
                 [:LABEL, @s[1], *pos]
               # text:: followed by spaces or end of line => :NOTE
               when @s.scan(/(.*?)::( +|\r?$)/) then
                 [:NOTE, @s[1], *pos]
               # >>> followed by end of line => :BLOCKQUOTE
               when @s.scan(/>>> *(\w+)?$/) then
                 if word = @s[1]
                   @s.unscan(word)
                 end
                 [:BLOCKQUOTE, word, *pos]
               # anything else: :TEXT
               else
                 @s.scan(/(.*?)(  )?\r?$/)
                 token = [:TEXT, @s[1], *pos]
                 if @s[2] then
                   @tokens << token
                   [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
                 else
                   token
                 end
               end
  end
  self
end

def unget

def unget
  token = @current_token
  p :unget => token if @debug
  raise Error, 'too many #ungets' if token == @tokens.first
  @tokens.unshift token if token
end