lib/nokogiri/html/document.rb



module Nokogiri
  module HTML
    class Document < Nokogiri::XML::Document
      ###
      # Get the meta tag encoding for this document.  If there is no meta tag,
      # then nil is returned
      def meta_encoding
        return nil unless meta = css('meta').find { |node|
          node['http-equiv'] =~ /Content-Type/i
        }

        /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
      end

      ###
      # Set the meta tag encoding for this document.  If there is no meta
      # content tag, nil is returned and the encoding is not set.
      def meta_encoding= encoding
        return nil unless meta = css('meta').find { |node|
          node['http-equiv'] =~ /Content-Type/i
        }

        meta['content'] = "text/html; charset=%s" % encoding
        encoding
      end

      ####
      # Serialize Node using +options+.  Save options can also be set using a
      # block. See SaveOptions.
      #
      # These two statements are equivalent:
      #
      #  node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
      #
      # or
      #
      #   node.serialize(:encoding => 'UTF-8') do |config|
      #     config.format.as_xml
      #   end
      #
      def serialize options = {}
        options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
            XML::Node::SaveOptions::AS_HTML |
            XML::Node::SaveOptions::NO_DECLARATION |
            XML::Node::SaveOptions::NO_EMPTY_TAGS
        super
      end

      ####
      # Create a Nokogiri::XML::DocumentFragment from +tags+
      def fragment tags = nil
        DocumentFragment.new(self, tags, self.root)
      end

      class << self
        ###
        # Parse HTML.  +thing+ may be a String, or any object that
        # responds to _read_ and _close_ such as an IO, or StringIO.
        # +url+ is resource where this document is located.  +encoding+ is the
        # encoding that should be used when processing the document. +options+
        # is a number that sets options in the parser, such as
        # Nokogiri::XML::ParseOptions::RECOVER.  See the constants in
        # Nokogiri::XML::ParseOptions.
        def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML

          options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
          # Give the options to the user
          yield options if block_given?

          if string_or_io.respond_to?(:encoding)
            unless string_or_io.encoding.name == "ASCII-8BIT"
              encoding ||= string_or_io.encoding.name
            end
          end

          if string_or_io.respond_to?(:read)
            url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
            return read_io(string_or_io, url, encoding, options.to_i)
          end

          # read_memory pukes on empty docs
          return new if string_or_io.nil? or string_or_io.empty?

          read_memory(string_or_io, url, encoding, options.to_i)
        end
      end

    end
  end
end