class RDoc::Markup::Parser
def tokenize input
def tokenize input s = StringScanner.new input @line = 0 @line_pos = 0 until s.eos? do pos = s.pos @tokens << case when s.scan(/\r?\n/) then token = [:NEWLINE, s.matched, *token_pos(pos)] @line_pos = s.pos @line += 1 token when s.scan(/ +/) then [:INDENT, s.matched_size, *token_pos(pos)] when s.scan(/(=+)\s*/) then level = s[1].length level = 6 if level > 6 @tokens << [:HEADER, level, *token_pos(pos)] pos = s.pos s.scan(/.*/) [:TEXT, s.matched, *token_pos(pos)] when s.scan(/^(-{3,}) *$/) then [:RULE, s[1].length - 2, *token_pos(pos)] when s.scan(/([*-])\s+/) then @tokens << [:BULLET, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then list_label = s[1] width = s.matched_size - 1 s.pos -= 1 # unget \S list_type = case list_label when /[a-z]/ then :LALPHA when /[A-Z]/ then :UALPHA when /\d/ then :NUMBER else raise ParseError, "BUG token #{list_label}" end @tokens << [list_type, list_label, *token_pos(pos)] [:SPACE, width, *token_pos(pos)] when s.scan(/\[(.*?)\]( +|$)/) then @tokens << [:LABEL, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] when s.scan(/(.*?)::( +|$)/) then @tokens << [:NOTE, s[1], *token_pos(pos)] [:SPACE, s.matched_size, *token_pos(pos)] else s.scan(/.*/) [:TEXT, s.matched, *token_pos(pos)] end end self end