class Syntax::Ruby

will make mistakes on some ambiguous cases.
(and some less common syntax) but because it is not a true lexer, it
A tokenizer for the Ruby language. It recognizes all common syntax

def scan_delimited_region( delim_group, inner_group, starter, exprs,

:float (meaning the delimiter doens't have to be flush left).
:flush (meaning the delimiter must be flushed left), or
* +heredoc+ is either +false+, meaning the region is not a heredoc, or
the next character will be treated as the delimiter.
* +delim+ is the text to use as the delimiter of the region. If +nil+,
interpolated string or not
* +exprs+ is a boolean flag indicating whether the region is an
* +starter+ is the text to use as the starting delimiter
region
* +inner_group+ is the group to use to classify the contents of the
region
* +delim_group+ is the group to use to classify the delimiters of the

and here-documents.
delimited with quotes) as well as the more complex cases of %-strings
Scan a delimited region of text. This handles the simple cases (strings
def scan_delimited_region( delim_group, inner_group, starter, exprs,
  delim=nil, heredoc=false )
# begin
  if !delim
    start_group delim_group, starter
    delim = scan( /./ )
    append delim
    delim = case delim
      when '{' then '}'
      when '(' then ')'
      when '[' then ']'
      when '<' then '>'
      else delim
    end
  end
  start_region inner_group
  items = "\\\\|".dup
  if heredoc
    items << "(^"
    items << '\s*' if heredoc == :float
    items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
  else
    items << "#{Regexp.escape(delim)}"
  end
  items << "|#(\\$|@@?|\\{)" if exprs
  items = Regexp.new( items )
  loop do
    p = pos
    match = scan_until( items )
    if match.nil?
      start_group inner_group, scan_until( /\Z/ )
      break
    else
      text = pre_match[p..-1]
      start_group inner_group, text if text.length > 0
      case matched.strip
        when "\\"
          unless exprs
            case peek(1)
              when "'"
                scan(/./)
                start_group :escape, "\\'"
              when "\\"
                scan(/./)
                start_group :escape, "\\\\"
              else
                start_group inner_group, "\\"
            end
          else
            start_group :escape, "\\"
            c = getch
            append c
            case c
              when 'x'
                append scan( /[a-fA-F0-9]{1,2}/ )
              when /[0-7]/
                append scan( /[0-7]{0,2}/ )
            end
          end
        when delim
          end_region inner_group
          start_group delim_group, matched
          break
        when /^#/
          do_highlight = (option(:expressions) == :highlight)
          start_region :expr if do_highlight
          start_group :expr, matched
          case matched[1]
            when ?{
              depth = 1
              content = "".dup
              while depth > 0
                p = pos
                c = scan_until( /[\{}]/ )
                if c.nil?
                  content << scan_until( /\Z/ )
                  break
                else
                  depth += ( matched == "{" ? 1 : -1 )
                  content << pre_match[p..-1]
                  content << matched if depth > 0
                end
              end
              if do_highlight
                subtokenize "ruby", content
                start_group :expr, "}"
              else
                append content + "}"
              end
            when ?$, ?@
              append scan( /\w+/ )
          end
          end_region :expr if do_highlight
        else raise "unexpected match on #{matched}"
      end
    end
  end
end

def scan_heredoc(float, type, delim)

* +delim+ is the delimiter to look for
* +type+ is +nil+, a single quote, or a double quote
* +float+ indicates whether the delimiter may be floated to the right

Scan a heredoc beginning at the current position.
def scan_heredoc(float, type, delim)
  scan_delimited_region( :constant, :string, "", type != "'",
    delim, float ? :float : :flush )
end

def setup

Perform ruby-specific setup
def setup
  @selector = false
  @allow_operator = false
  @heredocs = []
end

def step

Step through a single iteration of the tokenization process.
def step
  case
    when bol? && check( /=begin/ )
      start_group( :comment, scan_until( /^=end#{EOL}/ ) )
    when bol? && check( /__END__#{EOL}/ )
      start_group( :comment, scan_until( /\Z/ ) )
  else
    case
      when check( /def\s+/ )
        start_group :keyword, scan( /def\s+/ )
        start_group :method,  scan_until( /(?=[;(\s]|#{EOL})/ )
      when check( /class\s+/ )
        start_group :keyword, scan( /class\s+/ )
        start_group :class,  scan_until( /(?=[;\s<]|#{EOL})/ )
      when check( /module\s+/ )
        start_group :keyword, scan( /module\s+/ )
        start_group :module,  scan_until( /(?=[;\s]|#{EOL})/ )
      when check( /::/ )
        start_group :punct, scan(/::/)
      when check( /:"/ )
        start_group :symbol, scan(/:/)
        scan_delimited_region :symbol, :symbol, "", true
        @allow_operator = true
      when check( /:'/ )
        start_group :symbol, scan(/:/)
        scan_delimited_region :symbol, :symbol, "", false
        @allow_operator = true
      when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
        start_group :symbol, matched
        @allow_operator = true
      when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
        start_group :char, matched
        @allow_operator = true
      when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
        if @selector || matched[-1] == ?? || matched[-1] == ?!
          start_group :ident,
            scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
        else
          start_group :constant,
            scan(/(__FILE__|__LINE__|true|false|nil|self)/)
        end
        @selector = false
        @allow_operator = true
      when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
        start_group :number, matched
        @allow_operator = true
      else
        case peek(2)
          when "%r"
            scan_delimited_region :punct, :regex, scan( /../ ), true
            @allow_operator = true
          when "%w", "%q"
            scan_delimited_region :punct, :string, scan( /../ ), false
            @allow_operator = true
          when "%s"
            scan_delimited_region :punct, :symbol, scan( /../ ), false
            @allow_operator = true
          when "%W", "%Q", "%x"
            scan_delimited_region :punct, :string, scan( /../ ), true
            @allow_operator = true
          when /%[^\sa-zA-Z0-9]/
            scan_delimited_region :punct, :string, scan( /./ ), true
            @allow_operator = true
          when "<<"
            saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
            start_group :punct, scan( /<</ )
            if saw_word
              @allow_operator = false
              return
            end
            float_right = scan( /-/ )
            append "-" if float_right
            if ( type = scan( /['"]/ ) )
              append type
              delim = scan_until( /(?=#{type})/ )
              if delim.nil?
                append scan_until( /\Z/ )
                return
              end
            else
              delim = scan( /\w+/ ) or return
            end
            start_group :constant, delim
            start_group :punct, scan( /#{type}/ ) if type
            @heredocs << [ float_right, type, delim ]
            @allow_operator = true
          else
            case peek(1)
              when /[\n\r]/
                unless @heredocs.empty?
                  scan_heredoc(*@heredocs.shift)
                else
                  start_group :normal, scan( /\s+/ )
                end
                @allow_operator = false
              when /\s/
                start_group :normal, scan( /\s+/ )
              when "#"
                start_group :comment, scan( /#[^\n\r]*/ )
              when /[A-Z]/
                start_group @selector ? :ident : :constant, scan( /\w+/ )
                @allow_operator = true
              when /[a-z_]/
                word = scan( /\w+[?!]?/ )
                if !@selector && KEYWORDS.include?( word )
                  start_group :keyword, word
                  @allow_operator = false
                elsif
                  start_group :ident, word
                  @allow_operator = true
                end
                @selector = false
              when /\d/
                start_group :number,
                  scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
                @allow_operator = true
              when '"'
                scan_delimited_region :punct, :string, "", true
                @allow_operator = true
              when '/'
                if @allow_operator
                  start_group :punct, scan(%r{/})
                  @allow_operator = false
                else
                  scan_delimited_region :punct, :regex, "", true
                  @allow_operator = true
                end
              when "'"
                scan_delimited_region :punct, :string, "", false
                @allow_operator = true
              when "."
                dots = scan( /\.{1,3}/ )
                start_group :punct, dots
                @selector = ( dots.length == 1 )
              when /[@]/
                start_group :attribute, scan( /@{1,2}\w*/ )
                @allow_operator = true
              when /[$]/
                start_group :global, scan(/\$/)
                start_group :global, scan( /\w+|./ ) if check(/./)
                @allow_operator = true
              when /[-!?*\/+=<>(\[\{}:;,&|%]/
                start_group :punct, scan(/./)
                @allow_operator = false
              when /[)\]]/
                start_group :punct, scan(/./)
                @allow_operator = true
              else
                # all else just falls through this, to prevent
                # infinite loops...
                append getch
            end
        end
    end
  end
end