lib/coderay/scanners/java_script.rb



module CodeRay
module Scanners
  
  # Scanner for JavaScript.
  # 
  # Aliases: +ecmascript+, +ecma_script+, +javascript+
  class JavaScript < Scanner
    
    register_for :java_script
    file_extension 'js'
    
    # The actual JavaScript keywords.
    KEYWORDS = %w[
      break case catch continue default delete do else
      finally for function if in instanceof new
      return switch throw try typeof var void while with
    ]  # :nodoc:
    PREDEFINED_CONSTANTS = %w[
      false null true undefined NaN Infinity
    ]  # :nodoc:
    
    MAGIC_VARIABLES = %w[ this arguments ]  # :nodoc: arguments was introduced in JavaScript 1.4
    
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
      case delete in instanceof new return throw typeof with
    ]  # :nodoc:
    
    # Reserved for future use.
    RESERVED_WORDS = %w[
      abstract boolean byte char class debugger double enum export extends
      final float goto implements import int interface long native package
      private protected public short static super synchronized throws transient
      volatile
    ]  # :nodoc:
    
    IDENT_KIND = WordList.new(:ident).
      add(RESERVED_WORDS, :reserved).
      add(PREDEFINED_CONSTANTS, :predefined_constant).
      add(MAGIC_VARIABLES, :local_variable).
      add(KEYWORDS, :keyword)  # :nodoc:
    
    ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
    UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
    REGEXP_ESCAPE = / [bBdDsSwW] /x  # :nodoc:
    STRING_CONTENT_PATTERN = {
      "'" => /[^\\']+/,
      '"' => /[^\\"]+/,
      '/' => /[^\\\/]+/,
    }  # :nodoc:
    KEY_CHECK_PATTERN = {
      "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
      '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
    }  # :nodoc:
    
  protected
    
    def setup
      @state = :initial
    end
    
    def scan_tokens encoder, options
      
      state, string_delimiter = options[:state] || @state
      if string_delimiter
        encoder.begin_group state
      end
      
      value_expected = true
      key_expected = false
      function_expected = false
      
      until eos?
        
        case state
          
        when :initial
          
          if match = scan(/ \s+ | \\\n /x)
            value_expected = true if !value_expected && match.index(?\n)
            encoder.text_token match, :space
            
          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
            value_expected = true
            encoder.text_token match, :comment
            state = :open_multi_line_comment if self[1]
            
          elsif check(/\.?\d/)
            key_expected = value_expected = false
            if match = scan(/0[xX][0-9A-Fa-f]+/)
              encoder.text_token match, :hex
            elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
              encoder.text_token match, :octal
            elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
              encoder.text_token match, :float
            elsif match = scan(/\d+/)
              encoder.text_token match, :integer
            end
            
          elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
            # TODO: scan over nested tags
            xml_scanner.tokenize match, :tokens => encoder
            value_expected = false
            
          elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
            value_expected = true
            last_operator = match[-1]
            key_expected = (last_operator == ?{) || (last_operator == ?,)
            function_expected = false
            encoder.text_token match, :operator
            
          elsif match = scan(/ [)\]}]+ /x)
            function_expected = key_expected = value_expected = false
            encoder.text_token match, :operator
            
          elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
            kind = IDENT_KIND[match]
            value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
            # TODO: labels
            if kind == :ident
              if match.index(?$)  # $ allowed inside an identifier
                kind = :predefined
              elsif function_expected
                kind = :function
              elsif check(/\s*[=:]\s*function\b/)
                kind = :function
              elsif key_expected && check(/\s*:/)
                kind = :key
              end
            end
            function_expected = (kind == :keyword) && (match == 'function')
            key_expected = false
            encoder.text_token match, kind
            
          elsif match = scan(/["']/)
            if key_expected && check(KEY_CHECK_PATTERN[match])
              state = :key
            else
              state = :string
            end
            encoder.begin_group state
            string_delimiter = match
            encoder.text_token match, :delimiter
            
          elsif value_expected && (match = scan(/\//))
            encoder.begin_group :regexp
            state = :regexp
            string_delimiter = '/'
            encoder.text_token match, :delimiter
            
          elsif match = scan(/ \/ /x)
            value_expected = true
            key_expected = false
            encoder.text_token match, :operator
            
          else
            encoder.text_token getch, :error
            
          end
          
        when :string, :regexp, :key
          if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
            encoder.text_token match, :content
          elsif match = scan(/["'\/]/)
            encoder.text_token match, :delimiter
            if state == :regexp
              modifiers = scan(/[gim]+/)
              encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
            end
            encoder.end_group state
            string_delimiter = nil
            key_expected = value_expected = false
            state = :initial
          elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
            if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
              encoder.text_token match, :content
            else
              encoder.text_token match, :char
            end
          elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            encoder.text_token match, :char
          elsif match = scan(/\\./m)
            encoder.text_token match, :content
          elsif match = scan(/ \\ | $ /x)
            encoder.end_group state
            encoder.text_token match, :error unless match.empty?
            string_delimiter = nil
            key_expected = value_expected = false
            state = :initial
          else
            raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
          end
          
        when :open_multi_line_comment
          if match = scan(%r! .*? \*/ !mx)
            state = :initial
          else
            match = scan(%r! .+ !mx)
          end
          value_expected = true
          encoder.text_token match, :comment if match
          
        else
          #:nocov:
          raise_inspect 'Unknown state: %p' % [state], encoder
          #:nocov:
          
        end
        
      end
      
      if options[:keep_state]
        @state = state, string_delimiter
      end
      
      if [:string, :regexp].include? state
        encoder.end_group state
      end
      
      encoder
    end
    
  protected
    
    def reset_instance
      super
      @xml_scanner.reset if defined? @xml_scanner
    end
    
    def xml_scanner
      @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
    end
    
  end
  
end
end