class CodeRay::Scanners::HTML

See also: Scanners::XML
Alias: xhtml
HTML Scanner

def reset

def reset
  super
  @state = :initial
  @plain_string_content = nil
end

def scan_css encoder, code, state = [:initial]

def scan_css encoder, code, state = [:initial]
  if code && !code.empty?
    @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true
    @css_scanner.tokenize code, :tokens => encoder, :state => state
  end
end

def scan_java_script encoder, code

def scan_java_script encoder, code
  if code && !code.empty?
    @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
    @java_script_scanner.tokenize code, :tokens => encoder
  end
end

def scan_tokens encoder, options

def scan_tokens encoder, options
  state = options[:state] || @state
  plain_string_content = @plain_string_content
  in_tag = @in_tag
  in_attribute = nil
  
  encoder.begin_group :string if state == :attribute_value_string
  
  until eos?
    
    if state != :in_special_tag && match = scan(/\s+/m)
      encoder.text_token match, :space
      
    else
      
      case state
      
      when :initial
        if match = scan(/<!\[CDATA\[/)
          encoder.text_token match, :inline_delimiter
          if match = scan(/.*?\]\]>/m)
            encoder.text_token match[0..-4], :plain
            encoder.text_token ']]>', :inline_delimiter
          elsif match = scan(/.+/)
            encoder.text_token match, :error
          end
        elsif match = scan(/<!--(?:.*?-->|.*)/m)
          encoder.text_token match, :comment
        elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
          encoder.text_token match, :doctype
        elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
          encoder.text_token match, :preprocessor
        elsif match = scan(/<\?(?:.*?\?>|.*)/m)
          encoder.text_token match, :comment
        elsif match = scan(/<\/[-\w.:]*>?/m)
          in_tag = nil
          encoder.text_token match, :tag
        elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m)
          encoder.text_token match, :tag
          in_tag = self[1]
          if self[2]
            state = :in_special_tag if in_tag
          else
            state = :attribute
          end
        elsif match = scan(/[^<>&]+/)
          encoder.text_token match, :plain
        elsif match = scan(/#{ENTITY}/ox)
          encoder.text_token match, :entity
        elsif match = scan(/[<>&]/)
          in_tag = nil
          encoder.text_token match, :error
        else
          raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
        end
        
      when :attribute
        if match = scan(/#{TAG_END}/o)
          encoder.text_token match, :tag
          in_attribute = nil
          if in_tag
            state = :in_special_tag
          else
            state = :initial
          end
        elsif match = scan(/#{ATTR_NAME}/o)
          in_attribute = IN_ATTRIBUTE[match]
          encoder.text_token match, :attribute_name
          state = :attribute_equal
        else
          in_tag = nil
          encoder.text_token getch, :error
        end
        
      when :attribute_equal
        if match = scan(/=/)  #/
          encoder.text_token match, :operator
          state = :attribute_value
        else
          state = :attribute
          next
        end
        
      when :attribute_value
        if match = scan(/#{ATTR_NAME}/o)
          encoder.text_token match, :attribute_value
          state = :attribute
        elsif match = scan(/["']/)
          if in_attribute == :script || in_attribute == :style
            encoder.begin_group :string
            encoder.text_token match, :delimiter
            if scan(/javascript:[ \t]*/)
              encoder.text_token matched, :comment
            end
            code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
            if in_attribute == :script
              scan_java_script encoder, code
            else
              scan_css encoder, code, [:block]
            end
            match = scan(/["']/)
            encoder.text_token match, :delimiter if match
            encoder.end_group :string
            state = :attribute
            in_attribute = nil
          else
            encoder.begin_group :string
            state = :attribute_value_string
            plain_string_content = PLAIN_STRING_CONTENT[match]
            encoder.text_token match, :delimiter
          end
        elsif match = scan(/#{TAG_END}/o)
          encoder.text_token match, :tag
          state = :initial
        else
          encoder.text_token getch, :error
        end
        
      when :attribute_value_string
        if match = scan(plain_string_content)
          encoder.text_token match, :content
        elsif match = scan(/['"]/)
          encoder.text_token match, :delimiter
          encoder.end_group :string
          state = :attribute
        elsif match = scan(/#{ENTITY}/ox)
          encoder.text_token match, :entity
        elsif match = scan(/&/)
          encoder.text_token match, :content
        elsif match = scan(/[\n>]/)
          encoder.end_group :string
          state = :initial
          encoder.text_token match, :error
        end
        
      when :in_special_tag
        case in_tag
        when 'script', 'style'
          encoder.text_token match, :space if match = scan(/[ \t]*\n/)
          if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
            code = self[2] || self[4]
            closing = self[3]
            encoder.text_token self[1], :comment
          else
            code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/)
            closing = false
          end
          unless code.empty?
            encoder.begin_group :inline
            if in_tag == 'script'
              scan_java_script encoder, code
            else
              scan_css encoder, code
            end
            encoder.end_group :inline
          end
          encoder.text_token closing, :comment if closing
          state = :initial
        else
          raise 'unknown special tag: %p' % [in_tag]
        end
        
      else
        raise_inspect 'Unknown state: %p' % [state], encoder
        
      end
      
    end
    
  end
  
  if options[:keep_state]
    @state = state
    @plain_string_content = plain_string_content
    @in_tag = in_tag
  end
  
  encoder.end_group :string if state == :attribute_value_string
  
  encoder
end

def setup

def setup
  @state = :initial
  @plain_string_content = nil
  @in_tag = nil
end