class CodeRay::Scanners::PHP

Original by Stefan Walk.
Scanner for PHP.

def reset_instance

def reset_instance
  super
  @html_scanner.reset
end

def scan_tokens encoder, options

def scan_tokens encoder, options
  
  if check(RE::PHP_START) ||  # starts with <?
   (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
   check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
   check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
    # is HTML with embedded PHP, so start with HTML
    states = [:initial]
  else
    # is just PHP, so start with PHP surrounded by HTML
    states = [:initial, :php]
  end
  
  label_expected = true
  case_expected = false
  
  heredoc_delimiter = nil
  delimiter = nil
  modifier = nil
  
  until eos?
    
    case states.last
    
    when :initial  # HTML
      if match = scan(RE::PHP_START)
        encoder.text_token match, :inline_delimiter
        label_expected = true
        states << :php
      else
        match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
        @html_scanner.tokenize match unless match.empty?
      end
    
    when :php, :php_inline
      if match = scan(/\s+/)
        encoder.text_token match, :space
      
      elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
        encoder.text_token match, :comment
      
      elsif match = scan(RE::IDENTIFIER)
        kind = Words::IDENT_KIND[match]
        if kind == :ident && label_expected && check(/:(?!:)/)
          kind = :label
          label_expected = true
        else
          label_expected = false
          if kind == :ident && match =~ /^[A-Z]/
            kind = :constant
          elsif kind == :keyword
            case match
            when 'class'
              states << :class_expected
            when 'function'
              states << :function_expected
            when 'case', 'default'
              case_expected = true
            end
          elsif match == 'b' && check(/['"]/)  # binary string literal
            modifier = match
            next
          end
        end
        encoder.text_token match, kind
      
      elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
        label_expected = false
        encoder.text_token match, :float
      
      elsif match = scan(/0x[0-9a-fA-F]+/)
        label_expected = false
        encoder.text_token match, :hex
      
      elsif match = scan(/\d+/)
        label_expected = false
        encoder.text_token match, :integer
      
      elsif match = scan(/['"`]/)
        encoder.begin_group :string
        if modifier
          encoder.text_token modifier, :modifier
          modifier = nil
        end
        delimiter = match
        encoder.text_token match, :delimiter
        states.push match == "'" ? :sqstring : :dqstring
      
      elsif match = scan(RE::VARIABLE)
        label_expected = false
        encoder.text_token match, Words::VARIABLE_KIND[match]
      
      elsif match = scan(/\{/)
        encoder.text_token match, :operator
        label_expected = true
        states.push :php
      
      elsif match = scan(/\}/)
        if states.size == 1
          encoder.text_token match, :error
        else
          state = states.pop
          if states.last.is_a?(::Array)
            delimiter = states.last[1]
            states[-1] = states.last[0]
            encoder.text_token match, :delimiter
            encoder.end_group :inline
          else
            encoder.text_token match, :operator
            encoder.end_group :inline if state == :php_inline
            label_expected = true
          end
        end
      
      elsif match = scan(/@/)
        label_expected = false
        encoder.text_token match, :exception
      
      elsif match = scan(RE::PHP_END)
        encoder.text_token match, :inline_delimiter
        while state = states.pop
          encoder.end_group :string if [:sqstring, :dqstring].include? state
          if state.is_a? Array
            encoder.end_group :inline
            encoder.end_group :string if [:sqstring, :dqstring].include? state.first
          end
        end
        states << :initial
      
      elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
        encoder.begin_group :string
        # warn 'heredoc in heredoc?' if heredoc_delimiter
        heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
        encoder.text_token match, :delimiter
        states.push self[3] ? :sqstring : :dqstring
        heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
      
      elsif match = scan(/#{RE::OPERATOR}/o)
        label_expected = match == ';'
        if case_expected
          label_expected = true if match == ':'
          case_expected = false
        end
        encoder.text_token match, :operator
      
      else
        encoder.text_token getch, :error
      
      end
    
    when :sqstring
      if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
        encoder.text_token match, :content
      elsif !heredoc_delimiter && match = scan(/'/)
        encoder.text_token match, :delimiter
        encoder.end_group :string
        delimiter = nil
        label_expected = false
        states.pop
      elsif heredoc_delimiter && match = scan(/\n/)
        if scan heredoc_delimiter
          encoder.text_token "\n", :content
          encoder.text_token matched, :delimiter
          encoder.end_group :string
          heredoc_delimiter = nil
          label_expected = false
          states.pop
        else
          encoder.text_token match, :content
        end
      elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
        encoder.text_token match, :char
      elsif match = scan(/\\./m)
        encoder.text_token match, :content
      elsif match = scan(/\\/)
        encoder.text_token match, :error
      else
        encoder.end_group :string
        states.pop
      end
    
    when :dqstring
      if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
        encoder.text_token match, :content
      elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
        encoder.text_token match, :delimiter
        encoder.end_group :string
        delimiter = nil
        label_expected = false
        states.pop
      elsif heredoc_delimiter && match = scan(/\n/)
        if scan heredoc_delimiter
          encoder.text_token "\n", :content
          encoder.text_token matched, :delimiter
          encoder.end_group :string
          heredoc_delimiter = nil
          label_expected = false
          states.pop
        else
          encoder.text_token match, :content
        end
      elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
        encoder.text_token match, :char
      elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
        encoder.text_token match, :char
      elsif match = scan(/\\./m)
        encoder.text_token match, :content
      elsif match = scan(/\\/)
        encoder.text_token match, :error
      elsif match = scan(/#{RE::VARIABLE}/o)
        if check(/\[#{RE::IDENTIFIER}\]/o)
          encoder.begin_group :inline
          encoder.text_token match, :local_variable
          encoder.text_token scan(/\[/), :operator
          encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
          encoder.text_token scan(/\]/), :operator
          encoder.end_group :inline
        elsif check(/\[/)
          match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
          encoder.text_token match, :error
        elsif check(/->#{RE::IDENTIFIER}/o)
          encoder.begin_group :inline
          encoder.text_token match, :local_variable
          encoder.text_token scan(/->/), :operator
          encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
          encoder.end_group :inline
        elsif check(/->/)
          match << scan(/->/)
          encoder.text_token match, :error
        else
          encoder.text_token match, :local_variable
        end
      elsif match = scan(/\{/)
        if check(/\$/)
          encoder.begin_group :inline
          states[-1] = [states.last, delimiter]
          delimiter = nil
          states.push :php_inline
          encoder.text_token match, :delimiter
        else
          encoder.text_token match, :content
        end
      elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
        encoder.text_token match, :local_variable
      elsif match = scan(/\$/)
        encoder.text_token match, :content
      else
        encoder.end_group :string
        states.pop
      end
    
    when :class_expected
      if match = scan(/\s+/)
        encoder.text_token match, :space
      elsif match = scan(/#{RE::IDENTIFIER}/o)
        encoder.text_token match, :class
        states.pop
      else
        states.pop
      end
    
    when :function_expected
      if match = scan(/\s+/)
        encoder.text_token match, :space
      elsif match = scan(/&/)
        encoder.text_token match, :operator
      elsif match = scan(/#{RE::IDENTIFIER}/o)
        encoder.text_token match, :function
        states.pop
      else
        states.pop
      end
    
    else
      raise_inspect 'Unknown state!', encoder, states
    end
    
  end
  
  while state = states.pop
    encoder.end_group :string if [:sqstring, :dqstring].include? state
    if state.is_a? Array
      encoder.end_group :inline
      encoder.end_group :string if [:sqstring, :dqstring].include? state.first
    end
  end
  
  encoder
end

def setup

def setup
  @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end