class RDoc::Markup::Parser
def self.parse str
def self.parse str parser = new #parser.debug = true parser.tokenize str RDoc::Markup::Document.new(*parser.parse) end
def self.tokenize str
def self.tokenize str parser = new parser.tokenize str parser.tokens end
def build_heading level
def build_heading level heading = RDoc::Markup::Heading.new level, text skip :NEWLINE heading end
def build_list margin
def build_list margin p :list_start => margin if @debug list = RDoc::Markup::List.new until @tokens.empty? do type, data, column, = get case type when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then list_type = type if column < margin then unget break end if list.type and list.type != list_type then unget break end list.type = list_type case type when :NOTE, :LABEL then _, indent, = get # SPACE if :NEWLINE == peek_token.first then get peek_type, new_indent, peek_column, = peek_token indent = new_indent if peek_type == :INDENT and peek_column >= column unget end else data = nil _, indent, = get end list_item = build_list_item(margin + indent, data) list << list_item if list_item else unget break end end p :list_end => margin if @debug return nil if list.empty? list end
def build_list_item indent, item_type = nil
def build_list_item indent, item_type = nil p :list_item_start => [indent, item_type] if @debug list_item = RDoc::Markup::ListItem.new item_type until @tokens.empty? do type, data, column = get if column < indent and not type == :NEWLINE and (type != :INDENT or data < indent) then unget break end case type when :INDENT then unget list_item.push(*parse(indent)) when :TEXT then unget list_item << build_paragraph(indent) when :HEADER then list_item << build_heading(data) when :NEWLINE then list_item << RDoc::Markup::BlankLine.new when *LIST_TOKENS then unget list_item << build_list(column) else raise ParseError, "Unhandled token #{@current_token.inspect}" end end p :list_item_end => [indent, item_type] if @debug return nil if list_item.empty? list_item.parts.shift if RDoc::Markup::BlankLine === list_item.parts.first and list_item.length > 1 list_item end
def build_paragraph margin
def build_paragraph margin p :paragraph_start => margin if @debug paragraph = RDoc::Markup::Paragraph.new until @tokens.empty? do type, data, column, = get case type when :INDENT then next if data == margin and peek_token[0] == :TEXT unget break when :TEXT then if column != margin then unget break end paragraph << data skip :NEWLINE else unget break end end p :paragraph_end => margin if @debug paragraph end
def build_verbatim margin
def build_verbatim margin p :verbatim_begin => margin if @debug verbatim = RDoc::Markup::Verbatim.new until @tokens.empty? do type, data, column, = get case type when :INDENT then if margin >= data then unget break end indent = data - margin verbatim << ' ' * indent when :HEADER then verbatim << '=' * data _, _, peek_column, = peek_token peek_column ||= column + data verbatim << ' ' * (peek_column - column - data) when :RULE then width = 2 + data verbatim << '-' * width _, _, peek_column, = peek_token peek_column ||= column + data + 2 verbatim << ' ' * (peek_column - column - width) when :TEXT then verbatim << data when *LIST_TOKENS then if column <= margin then unget break end list_marker = case type when :BULLET then data when :LABEL then "[#{data}]" when :LALPHA, :NUMBER, :UALPHA then "#{data}." when :NOTE then "#{data}::" end verbatim << list_marker _, data, = get verbatim << ' ' * (data - list_marker.length) when :NEWLINE then verbatim << data break unless [:INDENT, :NEWLINE].include? peek_token[0] else unget break end end verbatim.normalize p :verbatim_end => margin if @debug verbatim end
def get
def get @current_token = @tokens.shift p :get => @current_token if @debug @current_token end
def initialize
def initialize @tokens = [] @current_token = nil @debug = false @line = 0 @line_pos = 0 end
def parse indent = 0
def parse indent = 0 p :parse_start => indent if @debug document = [] until @tokens.empty? do type, data, column, = get if type != :INDENT and column < indent then unget break end case type when :HEADER then document << build_heading(data) when :INDENT then if indent > data then unget break elsif indent == data then next end unget document << build_verbatim(indent) when :NEWLINE then document << RDoc::Markup::BlankLine.new skip :NEWLINE, false when :RULE then document << RDoc::Markup::Rule.new(data) skip :NEWLINE when :TEXT then unget document << build_paragraph(indent) # we're done with this paragraph (indent mismatch) break if peek_token[0] == :TEXT when *LIST_TOKENS then unget list = build_list(indent) document << list if list # we're done with this list (indent mismatch) break if LIST_TOKENS.include? peek_token.first and indent > 0 else type, data, column, line = @current_token raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" end end p :parse_end => indent if @debug document end
def peek_token
def peek_token token = @tokens.first || [] p :peek => token if @debug token end
def skip token_type, error = true
def skip token_type, error = true type, data, = get return unless type # end of stream return @current_token if token_type == type unget raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error end
def text
def text text = '' loop do type, data, = get text << case type when :BULLET then _, space, = get # SPACE "*#{' ' * (space - 1)}" when :LABEL then _, space, = get # SPACE "[#{data}]#{' ' * (space - data.length - 2)}" when :LALPHA, :NUMBER, :UALPHA then _, space, = get # SPACE "#{data}.#{' ' * (space - 2)}" when :NOTE then _, space = get # SPACE "#{data}::#{' ' * (space - data.length - 2)}" when :TEXT then data when :NEWLINE then unget break when nil then break else raise ParseError, "unhandled token #{@current_token.inspect}" end end text end
def token_pos offset
def token_pos offset [offset - @line_pos, @line] end
def tokenize input
def tokenize input s = StringScanner.new input @line = 0 @line_pos = 0 until s.eos? do pos = s.pos @tokens << case when s.scan(/\r?\n/) then token = [:NEWLINE, s.matched, *token_pos(pos)] @line_pos = s.pos @line += 1 token when s.scan(/ +/) then [:INDENT, s.matched_size, *token_pos(pos)] when s.scan(/(=+)\s*/) then level = s[1].length level = 6 if level > 6 @tokens << [:HEADER, level, *token_pos(pos)] pos = s.pos s.scan(/.*/) [:TEXT, s.matched, *token_pos(pos)] when s.scan(/^(-{3,}) *$/) then [:RULE, s[1].length - 2, *token_pos(pos)] when s.scan(/([*-])\s+/) then @tokens << [:BULLET, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then list_label = s[1] width = s.matched_size - 1 s.pos -= 1 # unget \S list_type = case list_label when /[a-z]/ then :LALPHA when /[A-Z]/ then :UALPHA when /\d/ then :NUMBER else raise ParseError, "BUG token #{list_label}" end @tokens << [list_type, list_label, *token_pos(pos)] [:SPACE, width, *token_pos(pos)] when s.scan(/\[(.*?)\]( +|$)/) then @tokens << [:LABEL, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] when s.scan(/(.*?)::( +|$)/) then @tokens << [:NOTE, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] else s.scan(/.*/) [:TEXT, s.matched, *token_pos(pos)] end end self end
def unget token = @current_token
def unget token = @current_token p :unget => token if @debug raise Error, 'too many #ungets' if token == @tokens.first @tokens.unshift token if token end