class CodeRay::Scanners::Lua

which is what this scanner tries to conform to.
,
The language’s complete syntax is defined in
Scanner for the Lua programming lanuage.

def scan_tokens(encoder, options)

CodeRay entry hook. Starts parsing.
def scan_tokens(encoder, options)
  state = options[:state] || @state
  brace_depth = @brace_depth
  num_equals = nil
  
  until eos?
    case state
    
    when :initial
      if match = scan(/\-\-\[\=*\[/)   #--[[ long (possibly multiline) comment ]]
        num_equals = match.count("=") # Number must match for comment end
        encoder.begin_group(:comment)
        encoder.text_token(match, :delimiter)
        state = :long_comment
      
      elsif match = scan(/--.*$/) # --Lua comment
        encoder.text_token(match, :comment)
      
      elsif match = scan(/\[=*\[/)     # [[ long (possibly multiline) string ]]
        num_equals = match.count("=") # Number must match for string end
        encoder.begin_group(:string)
        encoder.text_token(match, :delimiter)
        state = :long_string
      
      elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
        encoder.text_token(match, :label)
      
      elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
        encoder.text_token(match, :predefined)
      
      elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
        kind = IDENT_KIND[match]
        
        # Extra highlighting for entities following certain keywords
        if kind == :keyword and match == "function"
          state = :function_expected
        elsif kind == :keyword and match == "goto"
          state = :goto_label_expected
        elsif kind == :keyword and match == "local"
          state = :local_var_expected
        end
        
        encoder.text_token(match, kind)
      
      elsif match = scan(/\{/) # Opening table brace {
        encoder.begin_group(:map)
        encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter)
        brace_depth += 1
        state        = :map
      
      elsif match = scan(/\}/) # Closing table brace }
        if brace_depth == 1
          brace_depth = 0
          encoder.text_token(match, :delimiter)
          encoder.end_group(:map)
        elsif brace_depth == 0 # Mismatched brace
          encoder.text_token(match, :error)
        else
          brace_depth -= 1
          encoder.text_token(match, :inline_delimiter)
          encoder.end_group(:map)
          state = :map
        end
      
      elsif match = scan(/["']/) # String delimiters " and '
        encoder.begin_group(:string)
        encoder.text_token(match, :delimiter)
        start_delim = match
        state       = :string
      
                        # ↓Prefix                hex number ←|→ decimal number
      elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
        encoder.text_token(match, :float)
      
                        # ↓Prefix         hex number ←|→ decimal number
      elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
        encoder.text_token(match, :integer)
      
      elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
        encoder.text_token(match, :operator)
      
      elsif match = scan(/\s+/) # Space
        encoder.text_token(match, :space)
      
      else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
        encoder.text_token(getch, :error)
      end
      
      # It may be that we’re scanning a full-blown subexpression of a table
      # (tables can contain full expressions in parts).
      # If this is the case, return to :map scanning state.
      state = :map if state == :initial && brace_depth >= 1
    
    when :function_expected
      if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
        encoder.text_token(match, :operator)
        state = :initial
      elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
        encoder.text_token(match, :ident)
      elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
        encoder.text_token(match, :function)
        state = :initial
      elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
        encoder.text_token(match, :space)
      else
        encoder.text_token(getch, :error)
        state = :initial
      end
    
    when :goto_label_expected
      if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
        encoder.text_token(match, :label)
        state = :initial
      elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
        encoder.text_token(match, :space)
      else
        encoder.text_token(getch, :error)
      end
    
    when :local_var_expected
      if match = scan(/function/) # local function ...
        encoder.text_token(match, :keyword)
        state = :function_expected
      elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
        encoder.text_token(match, :local_variable)
      elsif match = scan(/,/)
        encoder.text_token(match, :operator)
      elsif match = scan(/\=/)
        encoder.text_token(match, :operator)
        # After encountering the equal sign, arbitrary expressions are
        # allowed again, so just return to the main state for further
        # parsing.
        state = :initial
      elsif match = scan(/\n/)
        encoder.text_token(match, :space)
        state = :initial
      elsif match = scan(/\s+/)
        encoder.text_token(match, :space)
      else
        encoder.text_token(getch, :error)
      end
    
    when :long_comment
      if match = scan(/.*?(?=\]={#{num_equals}}\])/m)
        encoder.text_token(match, :content)
        
        delim = scan(/\]={#{num_equals}}\]/)
        encoder.text_token(delim, :delimiter)
      else # No terminator found till EOF
        encoder.text_token(rest, :error)
        terminate
      end
      encoder.end_group(:comment)
      state = :initial
    
    when :long_string
      if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences
        encoder.text_token(match, :content)
        
        delim = scan(/\]={#{num_equals}}\]/)
        encoder.text_token(delim, :delimiter)
      else # No terminator found till EOF
        encoder.text_token(rest, :error)
        terminate
      end
      encoder.end_group(:string)
      state = :initial
    
    when :string
      if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
        encoder.text_token(match, :content)
      elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
        encoder.text_token(match, :char)
      elsif match = scan(Regexp.compile(start_delim))
        encoder.text_token(match, :delimiter)
        encoder.end_group(:string)
        state = :initial
      elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
        encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
        encoder.end_group(:string)
        state = :initial
      else
        encoder.text_token(getch, :error)
      end
    
    when :map
      if match = scan(/[,;]/)
        encoder.text_token(match, :operator)
      elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
        encoder.text_token(match, :key)
        encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
        encoder.text_token(scan(/\=/), :operator)
        state = :initial
      elsif match = scan(/\s+/m)
        encoder.text_token(match, :space)
      else
        # Note this clause doesn’t advance the scan pointer, it’s a kind of
        # "retry with other options" (the :initial state then of course
        # advances the pointer).
        state = :initial
      end
    else
      raise
    end
    
  end
  
  if options[:keep_state]
    @state = state
  end
  
  encoder.end_group :string if [:string].include? state
  brace_depth.times { encoder.end_group :map }
  
  encoder
end

def setup

Scanner initialization.
def setup
  @state = :initial
  @brace_depth = 0
end