lib/coderay/scanners/java.rb
module CodeRay
module Scanners
# Scanner for Java.
class Java < Scanner
register_for :java
autoload :BuiltinTypes, CodeRay.coderay_path('scanners', 'java', 'builtin_types')
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
KEYWORDS = %w[
assert break case catch continue default do else
finally for if instanceof import new package
return switch throw try typeof while
debugger export
] # :nodoc:
RESERVED = %w[ const goto ] # :nodoc:
CONSTANTS = %w[ false null true ] # :nodoc:
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
TYPES = %w[
boolean byte char class double enum float int interface long
short void var
] << '[]' # :nodoc: because int[] should be highlighted as a type
DIRECTIVES = %w[
abstract extends final implements native private protected public
static strictfp synchronized throws transient volatile
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(RESERVED, :reserved).
add(CONSTANTS, :predefined_constant).
add(MAGIC_VARIABLES, :local_variable).
add(TYPES, :type).
add(BuiltinTypes::List, :predefined_type).
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
add(DIRECTIVES, :directive) # :nodoc:
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
} # :nodoc:
IDENT = RUBY_VERSION < '1.9' ? /[a-zA-Z_][A-Za-z_0-9]*/ : Regexp.new('[[[:alpha:]]_][[[:alnum:]]_]*') # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
string_delimiter = nil
package_name_expected = false
class_name_follows = false
last_token_dot = false
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
encoder.text_token match, :space
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
encoder.text_token match, :comment
next
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
encoder.text_token match, package_name_expected
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
case match
when 'import'
package_name_expected = :include
when 'package'
package_name_expected = :namespace
when 'class', 'interface'
class_name_follows = true
end
end
encoder.text_token match, kind
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
encoder.text_token match, :operator
elsif match = scan(/;/)
package_name_expected = false
encoder.text_token match, :operator
elsif match = scan(/\{/)
class_name_follows = false
encoder.text_token match, :operator
elsif check(/[\d.]/)
if match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(/\d+[lL]?/)
encoder.text_token match, :integer
end
elsif match = scan(/["']/)
state = :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif match = scan(/ @ #{IDENT} /ox)
encoder.text_token match, :annotation
else
encoder.text_token getch, :error
end
when :string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(/["'\/]/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
string_delimiter = nil
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group state
state = :initial
encoder.text_token match, :error unless match.empty?
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
last_token_dot = match == '.'
end
if state == :string
encoder.end_group state
end
encoder
end
end
end
end