class CodeRay::Scanners::Groovy

Scanner for Groovy.

def scan_tokens encoder, options

def scan_tokens encoder, options
  
  state = :initial
  inline_block_stack = []
  inline_block_paren_depth = nil
  string_delimiter = nil
  import_clause = class_name_follows = last_token = after_def = false
  value_expected = true
  
  until eos?
    
    case state
    
    when :initial
      
      if match = scan(/ \s+ | \\\n /x)
        encoder.text_token match, :space
        if match.index ?\n
          import_clause = after_def = false
          value_expected = true unless value_expected
        end
        next
      
      elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
        value_expected = true
        after_def = false
        encoder.text_token match, :comment
      
      elsif bol? && match = scan(/ \#!.* /x)
        encoder.text_token match, :doctype
      
      elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
        after_def = value_expected = false
        encoder.text_token match, :include
      
      elsif match = scan(/ #{IDENT} | \[\] /ox)
        kind = IDENT_KIND[match]
        value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
        if last_token == '.'
          kind = :ident
        elsif class_name_follows
          kind = :class
          class_name_follows = false
        elsif after_def && check(/\s*[({]/)
          kind = :method
          after_def = false
        elsif kind == :ident && last_token != '?' && check(/:/)
          kind = :key
        else
          class_name_follows = true if match == 'class' || (import_clause && match == 'as')
          import_clause = match == 'import'
          after_def = true if match == 'def'
        end
        encoder.text_token match, kind
      
      elsif match = scan(/;/)
        import_clause = after_def = false
        value_expected = true
        encoder.text_token match, :operator
      
      elsif match = scan(/\{/)
        class_name_follows = after_def = false
        value_expected = true
        encoder.text_token match, :operator
        if !inline_block_stack.empty?
          inline_block_paren_depth += 1
        end
      
      # TODO: ~'...', ~"..." and ~/.../ style regexps
      elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
          && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
        value_expected = true
        value_expected = :regexp if match == '~'
        after_def = false
        encoder.text_token match, :operator
      
      elsif match = scan(/ [)\]}] /x)
        value_expected = after_def = false
        if !inline_block_stack.empty? && match == '}'
          inline_block_paren_depth -= 1
          if inline_block_paren_depth == 0  # closing brace of inline block reached
            encoder.text_token match, :inline_delimiter
            encoder.end_group :inline
            state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
            next
          end
        end
        encoder.text_token match, :operator
      
      elsif check(/[\d.]/)
        after_def = value_expected = false
        if match = scan(/0[xX][0-9A-Fa-f]+/)
          encoder.text_token match, :hex
        elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
          encoder.text_token match, :octal
        elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
          encoder.text_token match, :float
        elsif match = scan(/\d+[lLgG]?/)
          encoder.text_token match, :integer
        end
        
      elsif match = scan(/'''|"""/)
        after_def = value_expected = false
        state = :multiline_string
        encoder.begin_group :string
        string_delimiter = match
        encoder.text_token match, :delimiter
        
      # TODO: record.'name' syntax
      elsif match = scan(/["']/)
        after_def = value_expected = false
        state = match == '/' ? :regexp : :string
        encoder.begin_group state
        string_delimiter = match
        encoder.text_token match, :delimiter
        
      elsif value_expected && match = scan(/\//)
        after_def = value_expected = false
        encoder.begin_group :regexp
        state = :regexp
        string_delimiter = '/'
        encoder.text_token match, :delimiter
        
      elsif match = scan(/ @ #{IDENT} /ox)
        after_def = value_expected = false
        encoder.text_token match, :annotation
        
      elsif match = scan(/\//)
        after_def = false
        value_expected = true
        encoder.text_token match, :operator
        
      else
        encoder.text_token getch, :error
        
      end
      
    when :string, :regexp, :multiline_string
      if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
        encoder.text_token match, :content
        
      elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
        encoder.text_token match, :delimiter
        if state == :regexp
          # TODO: regexp modifiers? s, m, x, i?
          modifiers = scan(/[ix]+/)
          encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
        end
        state = :string if state == :multiline_string
        encoder.end_group state
        string_delimiter = nil
        after_def = value_expected = false
        state = :initial
        next
        
      elsif (state == :string || state == :multiline_string) &&
          (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
        if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
          encoder.text_token match, :content
        else
          encoder.text_token match, :char
        end
      elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
        encoder.text_token match, :char
        
      elsif match = scan(/ \$ #{IDENT} /mox)
        encoder.begin_group :inline
        encoder.text_token '$', :inline_delimiter
        match = match[1..-1]
        encoder.text_token match, IDENT_KIND[match]
        encoder.end_group :inline
        next
      elsif match = scan(/ \$ \{ /x)
        encoder.begin_group :inline
        encoder.text_token match, :inline_delimiter
        inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
        inline_block_paren_depth = 1
        state = :initial
        next
        
      elsif match = scan(/ \$ /mx)
        encoder.text_token match, :content
        
      elsif match = scan(/ \\. /mx)
        encoder.text_token match, :content  # TODO: Shouldn't this be :error?
        
      elsif match = scan(/ \\ | \n /x)
        encoder.end_group state
        encoder.text_token match, :error
        after_def = value_expected = false
        state = :initial
        
      else
        raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
        
      end
      
    else
      raise_inspect 'Unknown state', encoder
      
    end
    
    last_token = match unless [:space, :comment, :doctype].include? kind
    
  end
  
  if [:multiline_string, :string, :regexp].include? state
    encoder.end_group state
  end
  
  encoder
end