class Nokogiri::HTML4::Document::EncodingReader

:nodoc:

def self.detect_encoding(chunk)

def self.detect_encoding(chunk)
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
    return Nokogiri.XML(m[1]).encoding
  if Nokogiri.jruby?
    m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
      return m[4]
    catch(:encoding_found) {
      Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
      nil
    }
  else
    handler = SAXHandler.new
    parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
    parser << chunk rescue Nokogiri::SyntaxError
    handler.encoding
  end
end

def initialize(io)

def initialize(io)
  @io = io
  @firstchunk = nil
  @encoding_found = nil
end

def read(len)

def read(len)
  # no support for a call without len
  if !@firstchunk
    @firstchunk = @io.read(len) or return nil
    # This implementation expects that the first call from
    # htmlReadIO() is made with a length long enough (~1KB) to
    # achieve advanced encoding detection.
    if encoding = EncodingReader.detect_encoding(@firstchunk)
      # The first chunk is stored for the next read in retry.
      raise @encoding_found = EncodingFound.new(encoding)
    end
  end
  @encoding_found = nil
  ret = @firstchunk.slice!(0, len)
  if (len -= ret.length) > 0
    rest = @io.read(len) and ret << rest
  end
  if ret.empty?
    nil
  else
    ret
  end
end

Modules

Classes

class Nokogiri::HTML4::Document::EncodingReader

def self.detect_encoding(chunk)

def initialize(io)

def read(len)

Namespace

Classes in this namespace

Class Methods

Instance Methods

Defined in