def scan_tokens encoder, options
def scan_tokens encoder, options
if check(RE::PHP_START) || # starts with <?
(match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
# is HTML with embedded PHP, so start with HTML
states = [:initial]
else
# is just PHP, so start with PHP surrounded by HTML
states = [:initial, :php]
end
label_expected = true
case_expected = false
heredoc_delimiter = nil
delimiter = nil
modifier = nil
until eos?
case states.last
when :initial # HTML
if match = scan(RE::PHP_START)
encoder.text_token match, :inline_delimiter
label_expected = true
states << :php
else
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
@html_scanner.tokenize match unless match.empty?
end
when :php
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
encoder.text_token match, :comment
elsif match = scan(RE::IDENTIFIER)
kind = Words::IDENT_KIND[match]
if kind == :ident && label_expected && check(/:(?!:)/)
kind = :label
label_expected = true
else
label_expected = false
if kind == :ident && match =~ /^[A-Z]/
kind = :constant
elsif kind == :keyword
case match
when 'class'
states << :class_expected
when 'function'
states << :function_expected
when 'case', 'default'
case_expected = true
end
elsif match == 'b' && check(/['"]/) # binary string literal
modifier = match
next
end
end
encoder.text_token match, kind
elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
label_expected = false
encoder.text_token match, :float
elsif match = scan(/0x[0-9a-fA-F]+/)
label_expected = false
encoder.text_token match, :hex
elsif match = scan(/\d+/)
label_expected = false
encoder.text_token match, :integer
elsif match = scan(/['"`]/)
encoder.begin_group :string
if modifier
encoder.text_token modifier, :modifier
modifier = nil
end
delimiter = match
encoder.text_token match, :delimiter
states.push match == "'" ? :sqstring : :dqstring
elsif match = scan(RE::VARIABLE)
label_expected = false
encoder.text_token match, Words::VARIABLE_KIND[match]
elsif match = scan(/\{/)
encoder.text_token match, :operator
label_expected = true
states.push :php
elsif match = scan(/\}/)
if states.size == 1
encoder.text_token match, :error
else
states.pop
if states.last.is_a?(::Array)
delimiter = states.last[1]
states[-1] = states.last[0]
encoder.text_token match, :delimiter
encoder.end_group :inline
else
encoder.text_token match, :operator
label_expected = true
end
end
elsif match = scan(/@/)
label_expected = false
encoder.text_token match, :exception
elsif match = scan(RE::PHP_END)
encoder.text_token match, :inline_delimiter
states = [:initial]
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
encoder.begin_group :string
# warn 'heredoc in heredoc?' if heredoc_delimiter
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
encoder.text_token match, :delimiter
states.push self[3] ? :sqstring : :dqstring
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
elsif match = scan(/#{RE::OPERATOR}/o)
label_expected = match == ';'
if case_expected
label_expected = true if match == ':'
case_expected = false
end
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
when :sqstring
if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
encoder.text_token match, :content
elsif !heredoc_delimiter && match = scan(/'/)
encoder.text_token match, :delimiter
encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
elsif heredoc_delimiter && match = scan(/\n/)
if scan heredoc_delimiter
encoder.text_token "\n", :content
encoder.text_token matched, :delimiter
encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
else
encoder.text_token match, :content
end
elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/\\/)
encoder.text_token match, :error
else
states.pop
end
when :dqstring
if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
encoder.text_token match, :content
elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
encoder.text_token match, :delimiter
encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
elsif heredoc_delimiter && match = scan(/\n/)
if scan heredoc_delimiter
encoder.text_token "\n", :content
encoder.text_token matched, :delimiter
encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
else
encoder.text_token match, :content
end
elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
encoder.text_token match, :char
elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/\\/)
encoder.text_token match, :error
elsif match = scan(/#{RE::VARIABLE}/o)
if check(/\[#{RE::IDENTIFIER}\]/o)
encoder.begin_group :inline
encoder.text_token match, :local_variable
encoder.text_token scan(/\[/), :operator
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
encoder.text_token scan(/\]/), :operator
encoder.end_group :inline
elsif check(/\[/)
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
encoder.text_token match, :error
elsif check(/->#{RE::IDENTIFIER}/o)
encoder.begin_group :inline
encoder.text_token match, :local_variable
encoder.text_token scan(/->/), :operator
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
encoder.end_group :inline
elsif check(/->/)
match << scan(/->/)
encoder.text_token match, :error
else
encoder.text_token match, :local_variable
end
elsif match = scan(/\{/)
if check(/\$/)
encoder.begin_group :inline
states[-1] = [states.last, delimiter]
delimiter = nil
states.push :php
encoder.text_token match, :delimiter
else
encoder.text_token match, :content
end
elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
encoder.text_token match, :local_variable
elsif match = scan(/\$/)
encoder.text_token match, :content
else
states.pop
end
when :class_expected
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/#{RE::IDENTIFIER}/o)
encoder.text_token match, :class
states.pop
else
states.pop
end
when :function_expected
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/&/)
encoder.text_token match, :operator
elsif match = scan(/#{RE::IDENTIFIER}/o)
encoder.text_token match, :function
states.pop
else
states.pop
end
else
raise_inspect 'Unknown state!', encoder, states
end
end
encoder
end