class Syntax::Ruby
will make mistakes on some ambiguous cases.
(and some less common syntax) but because it is not a true lexer, it
A tokenizer for the Ruby language. It recognizes all common syntax
def scan_delimited_region( delim_group, inner_group, starter, exprs,
:flush (meaning the delimiter must be flushed left), or
* +heredoc+ is either +false+, meaning the region is not a heredoc, or
the next character will be treated as the delimiter.
* +delim+ is the text to use as the delimiter of the region. If +nil+,
interpolated string or not
* +exprs+ is a boolean flag indicating whether the region is an
* +starter+ is the text to use as the starting delimiter
region
* +inner_group+ is the group to use to classify the contents of the
region
* +delim_group+ is the group to use to classify the delimiters of the
and here-documents.
delimited with quotes) as well as the more complex cases of %-strings
Scan a delimited region of text. This handles the simple cases (strings
def scan_delimited_region( delim_group, inner_group, starter, exprs, delim=nil, heredoc=false ) # begin if !delim start_group delim_group, starter delim = scan( /./ ) append delim delim = case delim when '{' then '}' when '(' then ')' when '[' then ']' when '<' then '>' else delim end end start_region inner_group items = "\\\\|".dup if heredoc items << "(^" items << '\s*' if heredoc == :float items << "#{Regexp.escape(delim)}\s*?)#{EOL}" else items << "#{Regexp.escape(delim)}" end items << "|#(\\$|@@?|\\{)" if exprs items = Regexp.new( items ) loop do p = pos match = scan_until( items ) if match.nil? start_group inner_group, scan_until( /\Z/ ) break else text = pre_match[p..-1] start_group inner_group, text if text.length > 0 case matched.strip when "\\" unless exprs case peek(1) when "'" scan(/./) start_group :escape, "\\'" when "\\" scan(/./) start_group :escape, "\\\\" else start_group inner_group, "\\" end else start_group :escape, "\\" c = getch append c case c when 'x' append scan( /[a-fA-F0-9]{1,2}/ ) when /[0-7]/ append scan( /[0-7]{0,2}/ ) end end when delim end_region inner_group start_group delim_group, matched break when /^#/ do_highlight = (option(:expressions) == :highlight) start_region :expr if do_highlight start_group :expr, matched case matched[1] when ?{ depth = 1 content = "".dup while depth > 0 p = pos c = scan_until( /[\{}]/ ) if c.nil? content << scan_until( /\Z/ ) break else depth += ( matched == "{" ? 1 : -1 ) content << pre_match[p..-1] content << matched if depth > 0 end end if do_highlight subtokenize "ruby", content start_group :expr, "}" else append content + "}" end when ?$, ?@ append scan( /\w+/ ) end end_region :expr if do_highlight else raise "unexpected match on #{matched}" end end end end
def scan_heredoc(float, type, delim)
* +type+ is +nil+, a single quote, or a double quote
* +float+ indicates whether the delimiter may be floated to the right
Scan a heredoc beginning at the current position.
def scan_heredoc(float, type, delim) scan_delimited_region( :constant, :string, "", type != "'", delim, float ? :float : :flush ) end
def setup
def setup @selector = false @allow_operator = false @heredocs = [] end
def step
def step case when bol? && check( /=begin/ ) start_group( :comment, scan_until( /^=end#{EOL}/ ) ) when bol? && check( /__END__#{EOL}/ ) start_group( :comment, scan_until( /\Z/ ) ) else case when check( /def\s+/ ) start_group :keyword, scan( /def\s+/ ) start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ ) when check( /class\s+/ ) start_group :keyword, scan( /class\s+/ ) start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ ) when check( /module\s+/ ) start_group :keyword, scan( /module\s+/ ) start_group :module, scan_until( /(?=[;\s]|#{EOL})/ ) when check( /::/ ) start_group :punct, scan(/::/) when check( /:"/ ) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", true @allow_operator = true when check( /:'/ ) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", false @allow_operator = true when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ ) start_group :symbol, matched @allow_operator = true when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ ) start_group :char, matched @allow_operator = true when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ ) if @selector || matched[-1] == ?? || matched[-1] == ?! start_group :ident, scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) else start_group :constant, scan(/(__FILE__|__LINE__|true|false|nil|self)/) end @selector = false @allow_operator = true when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) start_group :number, matched @allow_operator = true else case peek(2) when "%r" scan_delimited_region :punct, :regex, scan( /../ ), true @allow_operator = true when "%w", "%q" scan_delimited_region :punct, :string, scan( /../ ), false @allow_operator = true when "%s" scan_delimited_region :punct, :symbol, scan( /../ ), false @allow_operator = true when "%W", "%Q", "%x" scan_delimited_region :punct, :string, scan( /../ ), true @allow_operator = true when /%[^\sa-zA-Z0-9]/ scan_delimited_region :punct, :string, scan( /./ ), true @allow_operator = true when "<<" saw_word = ( chunk[-1,1] =~ /[\w!?]/ ) start_group :punct, scan( /<</ ) if saw_word @allow_operator = false return end float_right = scan( /-/ ) append "-" if float_right if ( type = scan( /['"]/ ) ) append type delim = scan_until( /(?=#{type})/ ) if delim.nil? append scan_until( /\Z/ ) return end else delim = scan( /\w+/ ) or return end start_group :constant, delim start_group :punct, scan( /#{type}/ ) if type @heredocs << [ float_right, type, delim ] @allow_operator = true else case peek(1) when /[\n\r]/ unless @heredocs.empty? scan_heredoc(*@heredocs.shift) else start_group :normal, scan( /\s+/ ) end @allow_operator = false when /\s/ start_group :normal, scan( /\s+/ ) when "#" start_group :comment, scan( /#[^\n\r]*/ ) when /[A-Z]/ start_group @selector ? :ident : :constant, scan( /\w+/ ) @allow_operator = true when /[a-z_]/ word = scan( /\w+[?!]?/ ) if !@selector && KEYWORDS.include?( word ) start_group :keyword, word @allow_operator = false elsif start_group :ident, word @allow_operator = true end @selector = false when /\d/ start_group :number, scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ ) @allow_operator = true when '"' scan_delimited_region :punct, :string, "", true @allow_operator = true when '/' if @allow_operator start_group :punct, scan(%r{/}) @allow_operator = false else scan_delimited_region :punct, :regex, "", true @allow_operator = true end when "'" scan_delimited_region :punct, :string, "", false @allow_operator = true when "." dots = scan( /\.{1,3}/ ) start_group :punct, dots @selector = ( dots.length == 1 ) when /[@]/ start_group :attribute, scan( /@{1,2}\w*/ ) @allow_operator = true when /[$]/ start_group :global, scan(/\$/) start_group :global, scan( /\w+|./ ) if check(/./) @allow_operator = true when /[-!?*\/+=<>(\[\{}:;,&|%]/ start_group :punct, scan(/./) @allow_operator = false when /[)\]]/ start_group :punct, scan(/./) @allow_operator = true else # all else just falls through this, to prevent # infinite loops... append getch end end end end end