class Syntax::XML

def step

yield (potentially) many tokens, and possibly zero tokens.
Step through a single iteration of the tokenization process. This will
def step
  start_group :normal, matched if scan( /\s+/ )
  if @in_tag
    case
      when scan( /([-\w]+):([-\w]+)/ )
        start_group :namespace, subgroup(1)
        start_group :punct, ":"
        start_group :attribute, subgroup(2)
      when scan( /\d+/ )
        start_group :number, matched
      when scan( /[-\w]+/ )
        start_group :attribute, matched
      when scan( %r{[/?]?>} )
        @in_tag = false
        start_group :punct, matched
      when scan( /=/ )
        start_group :punct, matched
      when scan( /["']/ )
        scan_string matched
      else
        append getch
    end
  elsif ( text = scan_until( /(?=[<&])/ ) )
    start_group :normal, text unless text.empty?
    if scan(/<!--.*?(-->|\Z)/m)
      start_group :comment, matched
    else
      case peek(1)
        when "<"
          start_group :punct, getch
          case peek(1)
            when "?"
              append getch
            when "/"
              append getch
            when "!"
              append getch
          end
          start_group :normal, matched if scan( /\s+/ )
          if scan( /([-\w]+):([-\w]+)/ )
            start_group :namespace, subgroup(1)
            start_group :punct, ":"
            start_group :tag, subgroup(2)
          elsif scan( /[-\w]+/ )
            start_group :tag, matched
          end
          @in_tag = true
        when "&"
          if scan( /&\S{1,10};/ )
            start_group :entity, matched
          else
            start_group :normal, scan( /&/ )
          end
      end
    end
  else
    append scan_until( /\Z/ )
  end
end