class Sanitize

def preprocess(html)

Preprocesses HTML before parsing to remove undesirable Unicode chars.
def preprocess(html)
  html = html.to_s.dup
  unless html.encoding.name == 'UTF-8'
    html.encode!('UTF-8',
      :invalid => :replace,
      :undef   => :replace)
  end
  html.gsub!(REGEX_UNSUITABLE_CHARS, '')
  html
end