class CodeRay::Scanners::HTML
See also: Scanners::XML
Alias: xhtml
HTML Scanner
def reset
def reset super @state = :initial @plain_string_content = nil end
def scan_css encoder, code, state = [:initial]
def scan_css encoder, code, state = [:initial] if code && !code.empty? @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true @css_scanner.tokenize code, :tokens => encoder, :state => state end end
def scan_java_script encoder, code
def scan_java_script encoder, code if code && !code.empty? @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true @java_script_scanner.tokenize code, :tokens => encoder end end
def scan_tokens encoder, options
def scan_tokens encoder, options state = options[:state] || @state plain_string_content = @plain_string_content in_tag = @in_tag in_attribute = nil encoder.begin_group :string if state == :attribute_value_string until eos? if state != :in_special_tag && match = scan(/\s+/m) encoder.text_token match, :space else case state when :initial if match = scan(/<!\[CDATA\[/) encoder.text_token match, :inline_delimiter if match = scan(/.*?\]\]>/m) encoder.text_token match[0..-4], :plain encoder.text_token ']]>', :inline_delimiter elsif match = scan(/.+/) encoder.text_token match, :error end elsif match = scan(/<!--(?:.*?-->|.*)/m) encoder.text_token match, :comment elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m) encoder.text_token match, :doctype elsif match = scan(/<\?xml(?:.*?\?>|.*)/m) encoder.text_token match, :preprocessor elsif match = scan(/<\?(?:.*?\?>|.*)/m) encoder.text_token match, :comment elsif match = scan(/<\/[-\w.:]*>?/m) in_tag = nil encoder.text_token match, :tag elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m) encoder.text_token match, :tag in_tag = self[1] if self[2] state = :in_special_tag if in_tag else state = :attribute end elsif match = scan(/[^<>&]+/) encoder.text_token match, :plain elsif match = scan(/#{ENTITY}/ox) encoder.text_token match, :entity elsif match = scan(/[<>&]/) in_tag = nil encoder.text_token match, :error else raise_inspect '[BUG] else-case reached with state %p' % [state], encoder end when :attribute if match = scan(/#{TAG_END}/o) encoder.text_token match, :tag in_attribute = nil if in_tag state = :in_special_tag else state = :initial end elsif match = scan(/#{ATTR_NAME}/o) in_attribute = IN_ATTRIBUTE[match] encoder.text_token match, :attribute_name state = :attribute_equal else in_tag = nil encoder.text_token getch, :error end when :attribute_equal if match = scan(/=/) #/ encoder.text_token match, :operator state = :attribute_value else state = :attribute next end when :attribute_value if match = scan(/#{ATTR_NAME}/o) encoder.text_token match, :attribute_value state = :attribute elsif match = scan(/["']/) if in_attribute == :script || in_attribute == :style encoder.begin_group :string encoder.text_token match, :delimiter if scan(/javascript:[ \t]*/) encoder.text_token matched, :comment end code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) if in_attribute == :script scan_java_script encoder, code else scan_css encoder, code, [:block] end match = scan(/["']/) encoder.text_token match, :delimiter if match encoder.end_group :string state = :attribute in_attribute = nil else encoder.begin_group :string state = :attribute_value_string plain_string_content = PLAIN_STRING_CONTENT[match] encoder.text_token match, :delimiter end elsif match = scan(/#{TAG_END}/o) encoder.text_token match, :tag state = :initial else encoder.text_token getch, :error end when :attribute_value_string if match = scan(plain_string_content) encoder.text_token match, :content elsif match = scan(/['"]/) encoder.text_token match, :delimiter encoder.end_group :string state = :attribute elsif match = scan(/#{ENTITY}/ox) encoder.text_token match, :entity elsif match = scan(/&/) encoder.text_token match, :content elsif match = scan(/[\n>]/) encoder.end_group :string state = :initial encoder.text_token match, :error end when :in_special_tag case in_tag when 'script', 'style' encoder.text_token match, :space if match = scan(/[ \t]*\n/) if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m) code = self[2] || self[4] closing = self[3] encoder.text_token self[1], :comment else code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/) closing = false end unless code.empty? encoder.begin_group :inline if in_tag == 'script' scan_java_script encoder, code else scan_css encoder, code end encoder.end_group :inline end encoder.text_token closing, :comment if closing state = :initial else raise 'unknown special tag: %p' % [in_tag] end else raise_inspect 'Unknown state: %p' % [state], encoder end end end if options[:keep_state] @state = state @plain_string_content = plain_string_content @in_tag = in_tag end encoder.end_group :string if state == :attribute_value_string encoder end
def setup
def setup @state = :initial @plain_string_content = nil @in_tag = nil end