class CodeRay::Scanners::Python

def scan_tokens encoder, options

def scan_tokens encoder, options
  
  state = :initial
  string_delimiter = nil
  string_raw = false
  string_type = nil
  docstring_coming = match?(/#{DOCSTRING_COMING}/o)
  last_token_dot = false
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
  from_import_state = []
  
  until eos?
    
    if state == :string
      if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
        encoder.text_token match, :delimiter
        encoder.end_group string_type
        string_type = nil
        state = :initial
        next
      elsif string_delimiter.size == 3 && match = scan(/\n/)
        encoder.text_token match, :content
      elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
        encoder.text_token match, :content
      elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
        encoder.text_token match, :char
      elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
        encoder.text_token match, :char
      elsif match = scan(/ \\ . /x)
        encoder.text_token match, :content
      elsif match = scan(/ \\ | $ /x)
        encoder.end_group string_type
        string_type = nil
        encoder.text_token match, :error
        state = :initial
      else
        raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
      end
    
    elsif match = scan(/ [ \t]+ | \\?\n /x)
      encoder.text_token match, :space
      if match == "\n"
        state = :initial if state == :include_expected
        docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
      end
      next
    
    elsif match = scan(/ \# [^\n]* /mx)
      encoder.text_token match, :comment
      next
    
    elsif state == :initial
      
      if match = scan(/#{OPERATOR}/o)
        encoder.text_token match, :operator
      
      elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
        string_delimiter = self[2]
        string_type = docstring_coming ? :docstring : :string
        docstring_coming = false if docstring_coming
        encoder.begin_group string_type
        string_raw = false
        modifiers = self[1]
        unless modifiers.empty?
          string_raw = !!modifiers.index(?r)
          encoder.text_token modifiers, :modifier
          match = string_delimiter
        end
        state = :string
        encoder.text_token match, :delimiter
      
      # TODO: backticks
      
      elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
        kind = IDENT_KIND[match]
        # TODO: keyword arguments
        kind = :ident if last_token_dot
        if kind == :old_keyword
          kind = check(/\(/) ? :ident : :keyword
        elsif kind == :predefined && check(/ *=/)
          kind = :ident
        elsif kind == :keyword
          state = DEF_NEW_STATE[match]
          from_import_state << match.to_sym if state == :include_expected
        end
        encoder.text_token match, kind
      
      elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
        encoder.text_token match, :decorator
      
      elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
        encoder.text_token match, :hex
      
      elsif match = scan(/0[bB][01]+[lL]?/)
        encoder.text_token match, :binary
      
      elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
        if scan(/[jJ]/)
          match << matched
          encoder.text_token match, :imaginary
        else
          encoder.text_token match, :float
        end
      
      elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
        encoder.text_token match, :octal
      
      elsif match = scan(/\d+([lL])?/)
        if self[1] == nil && scan(/[jJ]/)
          match << matched
          encoder.text_token match, :imaginary
        else
          encoder.text_token match, :integer
        end
      
      else
        encoder.text_token getch, :error
      
      end
        
    elsif state == :def_expected
      state = :initial
      if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
        encoder.text_token match, :method
      else
        next
      end
    
    elsif state == :class_expected
      state = :initial
      if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
        encoder.text_token match, :class
      else
        next
      end
      
    elsif state == :include_expected
      if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
        if match == 'as'
          encoder.text_token match, :keyword
          from_import_state << :as
        elsif from_import_state.first == :from && match == 'import'
          encoder.text_token match, :keyword
          from_import_state << :import
        elsif from_import_state.last == :as
          # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
          encoder.text_token match, :ident
          from_import_state.pop
        elsif IDENT_KIND[match] == :keyword
          unscan
          match = nil
          state = :initial
          next
        else
          encoder.text_token match, :include
        end
      elsif match = scan(/,/)
        from_import_state.pop if from_import_state.last == :as
        encoder.text_token match, :operator
      else
        from_import_state = []
        state = :initial
        next
      end
      
    else
      raise_inspect 'Unknown state', encoder, state
      
    end
    
    last_token_dot = match == '.'
    
  end
  
  if state == :string
    encoder.end_group string_type
  end
  
  encoder
end