lib/sanitize/transformers/fix_fragment_cdata.rb
class Sanitize; module Transformers # Nokogiri 1.4.2 and higher contain a fragment parsing bug that causes the # string "</body></html>" to be appended to the CDATA inside an unterminated # <script> or <style> element. This transformer works around this bug by # finding affected elements and removing the spurious text. # # See http://github.com/tenderlove/nokogiri/issues#issue/315 FIX_FRAGMENT_CDATA = lambda do |env| node_name = env[:node_name] if node_name == 'script' || node_name == 'style' node = env[:node] unless node.children.empty? last_child = node.children.last if last_child.text? && last_child.content =~ %r|</body></html>$| last_child.content = last_child.content.chomp('</body></html>') end end end nil end end; end