class Nokogiri::HTML::Document::EncodingReader

:nodoc:

def self.detect_encoding(chunk)

def self.detect_encoding(chunk)
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
    return Nokogiri.XML(m[1]).encoding
  if Nokogiri.jruby?
    m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
      return m[4]
  end
  handler = SAXHandler.new
  parser = Nokogiri::HTML::SAX::Parser.new(handler)
  catch(:found) {
    parser.parse(chunk)
  }
  handler.encoding
rescue => e
  nil
end

def initialize(io)

def initialize(io)
  @io = io
  @firstchunk = nil
end

def read(len)

def read(len)
  # no support for a call without len
  if !@firstchunk
    @firstchunk = @io.read(len) or return nil
    # This implementation expects and assumes that the first
    # call from htmlReadIO() is made with a length long enough
    # (~1KB) to achieve further encoding detection that
    # libxml2 does not do.
    if encoding = EncodingReader.detect_encoding(@firstchunk)
      raise EncodingFoundException, encoding
    end
    # This chunk is stored for the next read in retry.
    return @firstchunk
  end
  ret = @firstchunk.slice!(0, len)
  if (len -= ret.length) > 0
    rest = @io.read(len) and ret << rest
  end
  if ret.empty?
    nil
  else
    ret
  end
end