class RDoc::Markup::Parser

def self.parse str

def self.parse str
  parser = new
  #parser.debug = true
  parser.tokenize str
  RDoc::Markup::Document.new(*parser.parse)
end

def self.tokenize str

def self.tokenize str
  parser = new
  parser.tokenize str
  parser.tokens
end

def build_heading level

def build_heading level
  heading = RDoc::Markup::Heading.new level, text
  skip :NEWLINE
  heading
end

def build_list margin

def build_list margin
  p :list_start => margin if @debug
  list = RDoc::Markup::List.new
  until @tokens.empty? do
    type, data, column, = get
    case type
    when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
      list_type = type
      if column < margin then
        unget
        break
      end
      if list.type and list.type != list_type then
        unget
        break
      end
      list.type = list_type
      case type
      when :NOTE, :LABEL then
        _, indent, = get # SPACE
        if :NEWLINE == peek_token.first then
          get
          peek_type, new_indent, peek_column, = peek_token
          indent = new_indent if
            peek_type == :INDENT and peek_column >= column
          unget
        end
      else
        data = nil
        _, indent, = get
      end
      list_item = build_list_item(margin + indent, data)
      list << list_item if list_item
    else
      unget
      break
    end
  end
  p :list_end => margin if @debug
  return nil if list.empty?
  list
end

def build_list_item indent, item_type = nil

def build_list_item indent, item_type = nil
  p :list_item_start => [indent, item_type] if @debug
  list_item = RDoc::Markup::ListItem.new item_type
  until @tokens.empty? do
    type, data, column = get
    if column < indent and
       not type == :NEWLINE and
       (type != :INDENT or data < indent) then
      unget
      break
    end
    case type
    when :INDENT then
      unget
      list_item.push(*parse(indent))
    when :TEXT then
      unget
      list_item << build_paragraph(indent)
    when :HEADER then
      list_item << build_heading(data)
    when :NEWLINE then
      list_item << RDoc::Markup::BlankLine.new
    when *LIST_TOKENS then
      unget
      list_item << build_list(column)
    else
      raise ParseError, "Unhandled token #{@current_token.inspect}"
    end
  end
  p :list_item_end => [indent, item_type] if @debug
  return nil if list_item.empty?
  list_item.parts.shift if
    RDoc::Markup::BlankLine === list_item.parts.first and
    list_item.length > 1
  list_item
end

def build_paragraph margin

def build_paragraph margin
  p :paragraph_start => margin if @debug
  paragraph = RDoc::Markup::Paragraph.new
  until @tokens.empty? do
    type, data, column, = get
    case type
    when :INDENT then
      next if data == margin and peek_token[0] == :TEXT
      unget
      break
    when :TEXT then
      if column != margin then
        unget
        break
      end
      paragraph << data
      skip :NEWLINE
    else
      unget
      break
    end
  end
  p :paragraph_end => margin if @debug
  paragraph
end

def build_verbatim margin

def build_verbatim margin
  p :verbatim_begin => margin if @debug
  verbatim = RDoc::Markup::Verbatim.new
  until @tokens.empty? do
    type, data, column, = get
    case type
    when :INDENT then
      if margin >= data then
        unget
        break
      end
      indent = data - margin
      verbatim << ' ' * indent
    when :HEADER then
      verbatim << '=' * data
      _, _, peek_column, = peek_token
      peek_column ||= column + data
      verbatim << ' ' * (peek_column - column - data)
    when :RULE then
      width = 2 + data
      verbatim << '-' * width
      _, _, peek_column, = peek_token
      peek_column ||= column + data + 2
      verbatim << ' ' * (peek_column - column - width)
    when :TEXT then
      verbatim << data
    when *LIST_TOKENS then
      if column <= margin then
        unget
        break
      end
      list_marker = case type
                    when :BULLET                   then '*'
                    when :LABEL                    then "[#{data}]"
                    when :LALPHA, :NUMBER, :UALPHA then "#{data}."
                    when :NOTE                     then "#{data}::"
                    end
      verbatim << list_marker
      _, data, = get
      verbatim << ' ' * (data - list_marker.length)
    when :NEWLINE then
      verbatim << data
      break unless [:INDENT, :NEWLINE].include? peek_token[0]
    else
      unget
      break
    end
  end
  verbatim.normalize
  p :verbatim_end => margin if @debug
  verbatim
end

def get

def get
  @current_token = @tokens.shift
  p :get => @current_token if @debug
  @current_token
end

def initialize

def initialize
  @tokens = []
  @current_token = nil
  @debug = false
  @line = 0
  @line_pos = 0
end

def parse indent = 0

def parse indent = 0
  p :parse_start => indent if @debug
  document = []
  until @tokens.empty? do
    type, data, column, = get
    if type != :INDENT and column < indent then
      unget
      break
    end
    case type
    when :HEADER then
      document << build_heading(data)
    when :INDENT then
      if indent > data then
        unget
        break
      elsif indent == data then
        next
      end
      unget
      document << build_verbatim(indent)
    when :NEWLINE then
      document << RDoc::Markup::BlankLine.new
      skip :NEWLINE, false
    when :RULE then
      document << RDoc::Markup::Rule.new(data)
      skip :NEWLINE
    when :TEXT then
      unget
      document << build_paragraph(indent)
      # we're done with this paragraph (indent mismatch)
      break if peek_token[0] == :TEXT
    when *LIST_TOKENS then
      unget
      list = build_list(indent)
      document << list if list
      # we're done with this list (indent mismatch)
      break if LIST_TOKENS.include? peek_token.first and indent > 0
    else
      type, data, column, line = @current_token
      raise ParseError,
            "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
    end
  end
  p :parse_end => indent if @debug
  document
end

def peek_token

def peek_token
  token = @tokens.first || []
  p :peek => token if @debug
  token
end

def skip token_type, error = true

def skip token_type, error = true
  type, data, = get
  return unless type # end of stream
  return @current_token if token_type == type
  unget
  raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
    error
end

def text

def text
  text = ''
  loop do
    type, data, = get
    text << case type
            when :BULLET then
              _, space, = get # SPACE
              "*#{' ' * (space - 1)}"
            when :LABEL then
              _, space, = get # SPACE
              "[#{data}]#{' ' * (space - data.length - 2)}"
            when :LALPHA, :NUMBER, :UALPHA then
              _, space, = get # SPACE
              "#{data}.#{' ' * (space - 2)}"
            when :NOTE then
              _, space = get # SPACE
              "#{data}::#{' ' * (space - data.length - 2)}"
            when :TEXT then
              data
            when :NEWLINE then
              unget
              break
            when nil then
              break
            else
              raise ParseError, "unhandled token #{@current_token.inspect}"
            end
  end
  text
end

def token_pos offset

def token_pos offset
  [offset - @line_pos, @line]
end

def tokenize input

def tokenize input
  s = StringScanner.new input
  @line = 0
  @line_pos = 0
  until s.eos? do
    pos = s.pos
    @tokens << case
               when s.scan(/\r?\n/) then
                 token = [:NEWLINE, s.matched, *token_pos(pos)]
                 @line_pos = s.pos
                 @line += 1
                 token
               when s.scan(/ +/) then
                 [:INDENT, s.matched_size, *token_pos(pos)]
               when s.scan(/(=+)\s*/) then
                 level = s[1].length
                 level = 6 if level > 6
                 @tokens << [:HEADER, level, *token_pos(pos)]
                 pos = s.pos
                 s.scan(/.*/)
                 [:TEXT, s.matched, *token_pos(pos)]
               when s.scan(/^(-{3,}) *$/) then
                 [:RULE, s[1].length - 2, *token_pos(pos)]
               when s.scan(/([*-])\s+/) then
                 @tokens << [:BULLET, :BULLET, *token_pos(pos)]
                 [:SPACE, s.matched_size, *token_pos(pos)]
               when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then
                 list_label = s[1]
                 width      = s.matched_size - 1
                 s.pos -= 1 # unget \S
                 list_type = case list_label
                             when /[a-z]/ then :LALPHA
                             when /[A-Z]/ then :UALPHA
                             when /\d/    then :NUMBER
                             else
                               raise ParseError, "BUG token #{list_label}"
                             end
                 @tokens << [list_type, list_label, *token_pos(pos)]
                 [:SPACE, width, *token_pos(pos)]
               when s.scan(/\[(.*?)\]( +|$)/) then
                 @tokens << [:LABEL, s[1], *token_pos(pos)]
                 [:SPACE, s.matched_size, *token_pos(pos)]
               when s.scan(/(.*?)::( +|$)/) then
                 @tokens << [:NOTE, s[1], *token_pos(pos)]
                 [:SPACE, s.matched_size, *token_pos(pos)]
               else s.scan(/.*/)
                 [:TEXT, s.matched, *token_pos(pos)]
               end
  end
  self
end

def unget token = @current_token

def unget token = @current_token
  p :unget => token if @debug
  raise Error, 'too many #ungets' if token == @tokens.first
  @tokens.unshift token if token
end