class MaRuKu::In::Markdown::SpanLevelParser::HTMLHelper

This class helps me read and sanitize HTML blocks

def close_script_style

and add it to our original @already buffer.
Finish script or style tag content, wrapping it in CDATA if necessary,
def close_script_style
  tag = @tag_stack.last
  # See http://www.w3.org/TR/xhtml1/#C_4 for character sequences not allowed within an element body.
  if @already =~ /<|&|\]\]>|--/
    new_already = script_style_cdata_start(tag)
    new_already << "\n" unless @already.start_with?("\n")
    new_already << @already
    new_already << "\n" unless @already.end_with?("\n")
    new_already << script_style_cdata_end(tag)
    @already = new_already
  end
  @before_already << @already
  @already = @before_already
end

def debug_state(note)

def debug_state(note)
  my_debug "#{@state}: #{note}: #{@m.to_s.inspect}"
end

def eat_this(line)

def eat_this(line)
  @rest = line + @rest
  things_read = 0
  until @rest.empty?
    case self.state
    when :inside_comment
      if @m = CommentEnd.match(@rest)
        debug_state 'Comment End'
        # Workaround for https://bugs.ruby-lang.org/issues/9277 and another bug in 1.9.2 where even a
        # single dash in a comment will cause REXML to error.
        @already << @m.pre_match.gsub(/-(?![^\-])/, '- ') << @m.to_s
        @rest = @m.post_match
        self.state = :inside_element
      else
        @already << @rest.gsub(/-(?![^\-])/, '- ') # Workaround for https://bugs.ruby-lang.org/issues/9277
        @rest = ""
        self.state = :inside_comment
      end
    when :inside_element
      if @m = CommentStart.match(@rest)
        debug_state 'Comment'
        things_read += 1
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        self.state = :inside_comment
      elsif @m = Tag.match(@rest)
        debug_state 'Tag'
        things_read += 1
        self.state = :inside_element
        handle_tag
      elsif @m = CData.match(@rest)
        debug_state 'CDATA'
        @already << @m.pre_match
        close_script_style if script_style?
        @already << @m.to_s
        @rest = @m.post_match
        self.state = :inside_cdata
      elsif @m = PartialTag.match(@rest)
        debug_state 'PartialTag'
        @already << @m.pre_match
        @rest = @m.post_match
        @partial_tag = @m.to_s
        self.state = :inside_tag
      elsif @m = EverythingElse.match(@rest)
        debug_state 'EverythingElse'
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        self.state = :inside_element
      else
        error "Malformed HTML: not complete: #{@rest.inspect}"
      end
    when :inside_tag
      if @m = /^[^>]*>/.match(@rest)
        @partial_tag << @m.to_s
        @rest = @partial_tag + @m.post_match
        @partial_tag = nil
        self.state = :inside_element
        if @m = Tag.match(@rest)
          things_read += 1
          handle_tag
        end
      else
        @partial_tag << @rest
        @rest = ""
        self.state = :inside_tag
      end
    when :inside_cdata
      if @m = CDataEnd.match(@rest)
        self.state = :inside_element
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        start_script_style if script_style?
      else
        @already << @rest
        @rest = ""
        self.state = :inside_cdata
      end
    else
      raise "Bug bug: state = #{self.state.inspect}"
    end
    break if is_finished? && things_read > 0
  end
end

def error(s)

def error(s)
  raise "Error: #{s} \n" + inspect, caller
end

def handle_tag

def handle_tag
  @already << @m.pre_match
  @rest = @m.post_match
  is_closing = !!@m[1]
  tag = @m[2]
  @first_tag ||= tag
  attributes = @m[3].to_s
  is_single = false
  if attributes[-1, 1] == '/'
    attributes = attributes[0, attributes.size - 1]
    is_single = true
  end
  if TO_SANITIZE.include? tag
    attributes.strip!
    if attributes.size > 0
      @already << '<%s %s />' % [tag, attributes]
    else
      @already << '<%s />' % [tag]
    end
  elsif is_closing
    if @tag_stack.empty?
      error "Malformed: closing tag #{tag.inspect} in empty list"
    elsif @tag_stack.last != tag
      error "Malformed: tag <#{tag}> closes <#{@tag_stack.last}>"
    end
    close_script_style if script_style?
    @already << @m.to_s
    @tag_stack.pop
  else
    @already << @m.to_s
    @tag_stack.push(tag) unless is_single
    start_script_style if script_style?
  end
end

def initialize

def initialize
  @rest = ""
  @tag_stack = []
  @m = nil
  @already = ""
  self.state = :inside_element
end

def inspect

def inspect
  "HTML READER\n state=#{self.state} " +
    "match=#{@m.to_s.inspect}\n" +
    "Tag stack = #{@tag_stack.inspect} \n" +
    "Before:\n" +
    @already.gsub(/^/, '|') + "\n" +
    "After:\n" +
    @rest.gsub(/^/, '|') + "\n"
end

def is_finished?

def is_finished?
  self.state == :inside_element && @tag_stack.empty?
end

def my_debug(s)

def my_debug(s)
  #    puts "---" * 10 + "\n" + inspect + "\t>>>\t" + s
end

def script_style?

Are we within a script or style tag?
def script_style?
  %w(script style).include?(@tag_stack.last)
end

def script_style_cdata_end(tag)

def script_style_cdata_end(tag)
  (tag == 'script') ? "//]]>" : "/*]]>*/"
end

def script_style_cdata_start(tag)

def script_style_cdata_start(tag)
  (tag == 'script') ? "//<![CDATA[" : "/*<![CDATA[*/"
end

def start_script_style

contents of this script or style tag.
Save our @already buffer elsewhere, and switch to using @already for the
def start_script_style
  @before_already, @already = @already, ""
end

def stuff_you_read

def stuff_you_read
  @already
end