module CodeRay
module Scanners
# This scanner is really complex, since Ruby _is_ a complex language!
#
# It tries to highlight 100% of all common code,
# and 90% of strange codes.
#
# It is optimized for HTML highlighting, and is not very useful for
# parsing or pretty printing.
class Ruby < Scanner
register_for :ruby
file_extension 'rb'
autoload :Patterns, CodeRay.coderay_path('scanners', 'ruby', 'patterns')
autoload :StringState, CodeRay.coderay_path('scanners', 'ruby', 'string_state')
def interpreted_string_state
StringState.new :string, true, '"'
end
protected
def setup
@state = :initial
end
def scan_tokens encoder, options
state, heredocs = options[:state] || @state
heredocs = heredocs.dup if heredocs.is_a?(Array)
if state && state.instance_of?(StringState)
encoder.begin_group state.type
end
last_state = nil
method_call_expected = false
value_expected = true
inline_block_stack = nil
inline_block_curly_depth = 0
if heredocs
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
end
# def_object_stack = nil
# def_object_paren_depth = 0
patterns = Patterns # avoid constant lookup
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
until eos?
if state.instance_of? ::Symbol
if match = scan(/[ \t\f\v]+/)
encoder.text_token match, :space
elsif match = scan(/\n/)
if heredocs
unscan # heredoc scanning needs \n at start
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
else
state = :initial if state == :undef_comma_expected
encoder.text_token match, :space
value_expected = true
end
elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
encoder.text_token match, self[1] ? :doctype : :comment
elsif match = scan(/\\\n/)
if heredocs
unscan # heredoc scanning needs \n at start
encoder.text_token scan(/\\/), :space
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
else
encoder.text_token match, :space
end
elsif state == :initial
# IDENTS #
if !method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
/#{patterns::METHOD_NAME}/o)
kind = patterns::IDENT_KIND[match]
if value_expected != :colon_expected && scan(/:(?!:)/)
value_expected = true
encoder.text_token match, :key
encoder.text_token ':', :operator
else
value_expected = false
if kind == :ident
if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
kind = :constant
end
elsif kind == :keyword
state = patterns::KEYWORD_NEW_STATE[match]
if patterns::KEYWORDS_EXPECTING_VALUE[match]
value_expected = match == 'when' ? :colon_expected : true
end
end
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
encoder.text_token match, kind
end
elsif method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
/#{patterns::METHOD_AFTER_DOT}/o)
if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
encoder.text_token match, :constant
else
encoder.text_token match, :ident
end
method_call_expected = false
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x)
method_call_expected = self[1]
value_expected = !method_call_expected && !!self[2]
if inline_block_stack
case match
when '{'
inline_block_curly_depth += 1
when '}'
inline_block_curly_depth -= 1
if inline_block_curly_depth == 0 # closing brace of inline block reached
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
inline_block_stack = nil if inline_block_stack.empty?
heredocs = nil if heredocs && heredocs.empty?
encoder.text_token match, :inline_delimiter
encoder.end_group :inline
next
end
end
end
encoder.text_token match, :operator
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
encoder.begin_group :symbol
encoder.text_token ':', :symbol
match = delim.chr
encoder.text_token match, :delimiter
state = self.class::StringState.new :symbol, delim == ?", match
else
encoder.text_token match, :symbol
value_expected = false
end
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
encoder.begin_group :string
if match.size == 1
encoder.text_token match, :delimiter
state = self.class::StringState.new :string, match == '"', match # important for streaming
else
encoder.text_token match[0,1], :delimiter
encoder.text_token match[1..-2], :content if match.size > 2
encoder.text_token match[-1,1], :delimiter
encoder.end_group :string
value_expected = false
end
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
/#{patterns::INSTANCE_VARIABLE}/o)
value_expected = false
encoder.text_token match, :instance_variable
elsif value_expected && match = scan(/\//)
encoder.begin_group :regexp
encoder.text_token match, :delimiter
state = self.class::StringState.new :regexp, true, '/'
elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
if method_call_expected
encoder.text_token match, :error
method_call_expected = false
else
encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
end
value_expected = false
elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
value_expected = true
encoder.text_token match, :operator
elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
quote = self[3]
delim = self[quote ? 4 : 2]
kind = patterns::QUOTE_TO_TYPE[quote]
encoder.begin_group kind
encoder.text_token match, :delimiter
encoder.end_group kind
heredocs ||= [] # create heredocs if empty
heredocs << self.class::StringState.new(kind, quote != "'", delim,
self[1] == '-' ? :indented : :linestart)
value_expected = false
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
kind = patterns::FANCY_STRING_KIND[self[1]]
encoder.begin_group kind
state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
encoder.text_token match, :delimiter
elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
value_expected = false
encoder.text_token match, :integer
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
value_expected = match == '?' ? :colon_expected : true
encoder.text_token match, :operator
elsif match = scan(/`/)
encoder.begin_group :shell
encoder.text_token match, :delimiter
state = self.class::StringState.new :shell, true, match
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
/#{patterns::GLOBAL_VARIABLE}/o)
encoder.text_token match, :global_variable
value_expected = false
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
/#{patterns::CLASS_VARIABLE}/o)
encoder.text_token match, :class_variable
value_expected = false
elsif match = scan(/\\\z/)
encoder.text_token match, :space
else
if method_call_expected
method_call_expected = false
next
end
unless unicode
# check for unicode
$DEBUG_BEFORE, $DEBUG = $DEBUG, false
begin
if check(/./mu).size > 1
# seems like we should try again with unicode
unicode = true
end
rescue
# bad unicode char; use getch
ensure
$DEBUG = $DEBUG_BEFORE
end
next if unicode
end
encoder.text_token getch, :error
end
if last_state
state = last_state unless state.is_a?(StringState) # otherwise, a simple 'def"' results in unclosed tokens
last_state = nil
end
elsif state == :def_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
encoder.text_token match, :method
state = :initial
else
last_state = :dot_expected
state = :initial
end
elsif state == :dot_expected
if match = scan(/\.|::/)
# invalid definition
state = :def_expected
encoder.text_token match, :operator
else
state = :initial
end
elsif state == :module_expected
if match = scan(/<</)
encoder.text_token match, :operator
else
state = :initial
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
encoder.text_token match, :class
end
end
elsif state == :undef_expected
state = :undef_comma_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
encoder.text_token match, :method
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
encoder.begin_group :symbol
encoder.text_token ':', :symbol
match = delim.chr
encoder.text_token match, :delimiter
state = self.class::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
encoder.text_token match, :symbol
end
else
state = :initial
end
elsif state == :undef_comma_expected
if match = scan(/,/)
encoder.text_token match, :operator
state = :undef_expected
else
state = :initial
end
elsif state == :alias_expected
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
if match
encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
encoder.text_token self[2], :space
encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
end
state = :initial
else
#:nocov:
raise_inspect 'Unknown state: %p' % [state], encoder
#:nocov:
end
else # StringState
match = scan_until(state.pattern) || scan_rest
unless match.empty?
encoder.text_token match, :content
break if eos?
end
if state.heredoc && self[1] # end of heredoc
match = getch
match << scan_until(/$/) unless eos?
encoder.text_token match, :delimiter unless match.empty?
encoder.end_group state.type
state = state.next_state
next
end
case match = getch
when state.delim
if state.paren_depth
state.paren_depth -= 1
if state.paren_depth > 0
encoder.text_token match, :content
next
end
end
encoder.text_token match, :delimiter
if state.type == :regexp && !eos?
match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
encoder.text_token match, :modifier unless match.empty?
end
encoder.end_group state.type
value_expected = false
state = state.next_state
when '\\'
if state.interpreted
if esc = scan(/#{patterns::ESCAPE}/o)
encoder.text_token match + esc, :char
else
encoder.text_token match, :error
end
else
case esc = getch
when nil
encoder.text_token match, :content
when state.delim, '\\'
encoder.text_token match + esc, :char
else
encoder.text_token match + esc, :content
end
end
when '#'
case peek(1)
when '{'
inline_block_stack ||= []
inline_block_stack << [state, inline_block_curly_depth, heredocs]
value_expected = true
state = :initial
inline_block_curly_depth = 1
encoder.begin_group :inline
encoder.text_token match + getch, :inline_delimiter
when '$', '@'
encoder.text_token match, :escape
last_state = state
state = :initial
else
#:nocov:
raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
#:nocov:
end
when state.opening_paren
state.paren_depth += 1
encoder.text_token match, :content
else
#:nocov
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
#:nocov:
end
end
end
# cleaning up
if state.is_a? StringState
encoder.end_group state.type
end
if options[:keep_state]
if state.is_a?(StringState) && state.heredoc
(heredocs ||= []).unshift state
state = :initial
elsif heredocs && heredocs.empty?
heredocs = nil
end
@state = state, heredocs
end
if inline_block_stack
until inline_block_stack.empty?
state, = *inline_block_stack.pop
encoder.end_group :inline
encoder.end_group state.type
end
end
encoder
end
end
end
end