class CodeRay::Scanners::PHP
Original by Stefan Walk.
Scanner for PHP.
def reset_instance
def reset_instance super @html_scanner.reset end
def scan_tokens encoder, options
def scan_tokens encoder, options if check(RE::PHP_START) || # starts with <? (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <? check(/.{0,1000}#{RE::HTML_INDICATOR}/om) || check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars # is HTML with embedded PHP, so start with HTML states = [:initial] else # is just PHP, so start with PHP surrounded by HTML states = [:initial, :php] end label_expected = true case_expected = false heredoc_delimiter = nil delimiter = nil modifier = nil until eos? case states.last when :initial # HTML if match = scan(RE::PHP_START) encoder.text_token match, :inline_delimiter label_expected = true states << :php else match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest @html_scanner.tokenize match unless match.empty? end when :php, :php_inline if match = scan(/\s+/) encoder.text_token match, :space elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo) encoder.text_token match, :comment elsif match = scan(RE::IDENTIFIER) kind = Words::IDENT_KIND[match] if kind == :ident && label_expected && check(/:(?!:)/) kind = :label label_expected = true else label_expected = false if kind == :ident && match =~ /^[A-Z]/ kind = :constant elsif kind == :keyword case match when 'class' states << :class_expected when 'function' states << :function_expected when 'case', 'default' case_expected = true end elsif match == 'b' && check(/['"]/) # binary string literal modifier = match next end end encoder.text_token match, kind elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i) label_expected = false encoder.text_token match, :float elsif match = scan(/0x[0-9a-fA-F]+/) label_expected = false encoder.text_token match, :hex elsif match = scan(/\d+/) label_expected = false encoder.text_token match, :integer elsif match = scan(/['"`]/) encoder.begin_group :string if modifier encoder.text_token modifier, :modifier modifier = nil end delimiter = match encoder.text_token match, :delimiter states.push match == "'" ? :sqstring : :dqstring elsif match = scan(RE::VARIABLE) label_expected = false encoder.text_token match, Words::VARIABLE_KIND[match] elsif match = scan(/\{/) encoder.text_token match, :operator label_expected = true states.push :php elsif match = scan(/\}/) if states.size == 1 encoder.text_token match, :error else state = states.pop if states.last.is_a?(::Array) delimiter = states.last[1] states[-1] = states.last[0] encoder.text_token match, :delimiter encoder.end_group :inline else encoder.text_token match, :operator encoder.end_group :inline if state == :php_inline label_expected = true end end elsif match = scan(/@/) label_expected = false encoder.text_token match, :exception elsif match = scan(RE::PHP_END) encoder.text_token match, :inline_delimiter while state = states.pop encoder.end_group :string if [:sqstring, :dqstring].include? state if state.is_a? Array encoder.end_group :inline encoder.end_group :string if [:sqstring, :dqstring].include? state.first end end states << :initial elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o) encoder.begin_group :string # warn 'heredoc in heredoc?' if heredoc_delimiter heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3]) encoder.text_token match, :delimiter states.push self[3] ? :sqstring : :dqstring heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/ elsif match = scan(/#{RE::OPERATOR}/o) label_expected = match == ';' if case_expected label_expected = true if match == ':' case_expected = false end encoder.text_token match, :operator else encoder.text_token getch, :error end when :sqstring if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/) encoder.text_token match, :content elsif !heredoc_delimiter && match = scan(/'/) encoder.text_token match, :delimiter encoder.end_group :string delimiter = nil label_expected = false states.pop elsif heredoc_delimiter && match = scan(/\n/) if scan heredoc_delimiter encoder.text_token "\n", :content encoder.text_token matched, :delimiter encoder.end_group :string heredoc_delimiter = nil label_expected = false states.pop else encoder.text_token match, :content end elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/) encoder.text_token match, :char elsif match = scan(/\\./m) encoder.text_token match, :content elsif match = scan(/\\/) encoder.text_token match, :error else encoder.end_group :string states.pop end when :dqstring if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/)) encoder.text_token match, :content elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/) encoder.text_token match, :delimiter encoder.end_group :string delimiter = nil label_expected = false states.pop elsif heredoc_delimiter && match = scan(/\n/) if scan heredoc_delimiter encoder.text_token "\n", :content encoder.text_token matched, :delimiter encoder.end_group :string heredoc_delimiter = nil label_expected = false states.pop else encoder.text_token match, :content end elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/) encoder.text_token match, :char elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/)) encoder.text_token match, :char elsif match = scan(/\\./m) encoder.text_token match, :content elsif match = scan(/\\/) encoder.text_token match, :error elsif match = scan(/#{RE::VARIABLE}/o) if check(/\[#{RE::IDENTIFIER}\]/o) encoder.begin_group :inline encoder.text_token match, :local_variable encoder.text_token scan(/\[/), :operator encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident encoder.text_token scan(/\]/), :operator encoder.end_group :inline elsif check(/\[/) match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o) encoder.text_token match, :error elsif check(/->#{RE::IDENTIFIER}/o) encoder.begin_group :inline encoder.text_token match, :local_variable encoder.text_token scan(/->/), :operator encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident encoder.end_group :inline elsif check(/->/) match << scan(/->/) encoder.text_token match, :error else encoder.text_token match, :local_variable end elsif match = scan(/\{/) if check(/\$/) encoder.begin_group :inline states[-1] = [states.last, delimiter] delimiter = nil states.push :php_inline encoder.text_token match, :delimiter else encoder.text_token match, :content end elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o) encoder.text_token match, :local_variable elsif match = scan(/\$/) encoder.text_token match, :content else encoder.end_group :string states.pop end when :class_expected if match = scan(/\s+/) encoder.text_token match, :space elsif match = scan(/#{RE::IDENTIFIER}/o) encoder.text_token match, :class states.pop else states.pop end when :function_expected if match = scan(/\s+/) encoder.text_token match, :space elsif match = scan(/&/) encoder.text_token match, :operator elsif match = scan(/#{RE::IDENTIFIER}/o) encoder.text_token match, :function states.pop else states.pop end else raise_inspect 'Unknown state!', encoder, states end end while state = states.pop encoder.end_group :string if [:sqstring, :dqstring].include? state if state.is_a? Array encoder.end_group :inline encoder.end_group :string if [:sqstring, :dqstring].include? state.first end end encoder end
def setup
def setup @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true end