class CodeRay::Scanners::Python
def scan_tokens encoder, options
def scan_tokens encoder, options state = :initial string_delimiter = nil string_raw = false string_type = nil docstring_coming = match?(/#{DOCSTRING_COMING}/o) last_token_dot = false unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' from_import_state = [] until eos? if state == :string if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) encoder.text_token match, :delimiter encoder.end_group string_type string_type = nil state = :initial next elsif string_delimiter.size == 3 && match = scan(/\n/) encoder.text_token match, :content elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) encoder.text_token match, :content elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox) encoder.text_token match, :char elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox) encoder.text_token match, :char elsif match = scan(/ \\ . /x) encoder.text_token match, :content elsif match = scan(/ \\ | $ /x) encoder.end_group string_type string_type = nil encoder.text_token match, :error unless match.empty? state = :initial else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state end elsif match = scan(/ [ \t]+ | \\?\n /x) encoder.text_token match, :space if match == "\n" state = :initial if state == :include_expected docstring_coming = true if match?(/#{DOCSTRING_COMING}/o) end next elsif match = scan(/ \# [^\n]* /mx) encoder.text_token match, :comment next elsif state == :initial if match = scan(/#{OPERATOR}/o) encoder.text_token match, :operator elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) modifiers = self[1] string_delimiter = self[2] string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string) docstring_coming = false if docstring_coming encoder.begin_group string_type string_raw = false unless modifiers.empty? string_raw = !!modifiers.index(?r) encoder.text_token modifiers, :modifier match = string_delimiter end state = :string encoder.text_token match, :delimiter # TODO: backticks elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) kind = IDENT_KIND[match] # TODO: keyword arguments kind = :ident if last_token_dot if kind == :old_keyword kind = check(/\(/) ? :ident : :keyword elsif kind == :predefined && check(/ *=/) kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/) encoder.text_token match, :decorator elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/) encoder.text_token match, :hex elsif match = scan(/0[bB][01]+[lL]?/) encoder.text_token match, :binary elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) if scan(/[jJ]/) match << matched encoder.text_token match, :imaginary else encoder.text_token match, :float end elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) encoder.text_token match, :octal elsif match = scan(/\d+([lL])?/) if self[1] == nil && scan(/[jJ]/) match << matched encoder.text_token match, :imaginary else encoder.text_token match, :integer end else encoder.text_token getch, :error end elsif state == :def_expected state = :initial if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) encoder.text_token match, :method else next end elsif state == :class_expected state = :initial if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o) encoder.text_token match, :class else next end elsif state == :include_expected if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o) if match == 'as' encoder.text_token match, :keyword from_import_state << :as elsif from_import_state.first == :from && match == 'import' encoder.text_token match, :keyword from_import_state << :import elsif from_import_state.last == :as # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method encoder.text_token match, :ident from_import_state.pop elsif IDENT_KIND[match] == :keyword unscan match = nil state = :initial next else encoder.text_token match, :include end elsif match = scan(/,/) from_import_state.pop if from_import_state.last == :as encoder.text_token match, :operator else from_import_state = [] state = :initial next end else raise_inspect 'Unknown state', encoder, state end last_token_dot = match == '.' end if state == :string encoder.end_group string_type end encoder end