lib/nokogiri/xml/node.rb



require 'stringio'
require 'nokogiri/xml/node/save_options'

module Nokogiri
  module XML
    class Node
      ELEMENT_NODE =       1
      ATTRIBUTE_NODE =     2
      TEXT_NODE =          3
      CDATA_SECTION_NODE = 4
      ENTITY_REF_NODE =    5
      ENTITY_NODE =        6
      PI_NODE =            7
      COMMENT_NODE =       8
      DOCUMENT_NODE =      9
      DOCUMENT_TYPE_NODE = 10
      DOCUMENT_FRAG_NODE = 11
      NOTATION_NODE =      12
      HTML_DOCUMENT_NODE = 13
      DTD_NODE =           14
      ELEMENT_DECL =       15
      ATTRIBUTE_DECL =     16
      ENTITY_DECL =        17
      NAMESPACE_DECL =     18
      XINCLUDE_START =     19
      XINCLUDE_END =       20
      DOCB_DOCUMENT_NODE = 21

      # The Document associated with this Node.
      attr_accessor :document

      ###
      # Decorate this node with the decorators set up in this node's Document
      def decorate!
        document.decorate(self) if document
      end

      ###
      # Get the list of children for this node as a NodeSet
      def children
        list = NodeSet.new(document)
        document.decorate(list)

        first = self.child
        return list unless first # Empty list

        list << first
        while first = first.next
          list << first
        end
        list
      end

      ###
      # Search this node for +paths+.  +paths+ can be XPath or CSS, and an
      # optional hash of namespaces may be appended.
      # See Node#xpath and Node#css.
      def search *paths
        ns = paths.last.is_a?(Hash) ? paths.pop : {}
        xpath(*(paths.map { |path|
          path = path.to_s
          path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(path, :prefix => ".//")
        }.flatten.uniq) + [ns])
      end
      alias :/ :search

      ###
      # Search this node for XPath +paths+. +paths+ must be one or more XPath
      # queries.  A hash of namespaces may be appended.  For example:
      #
      #   node.xpath('.//title')
      #   node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
      #   node.xpath('.//xmlns:name', node.root.namespaces)
      #
      # Custom XPath functions may also be defined.  To define custom functions
      # create a class and implement the # function you want to define.
      # For example:
      #
      #   node.xpath('.//title[regex(., "\w+")]', Class.new {
      #     def regex node_set, regex
      #       node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
      #     end
      #   }.new)
      #
      def xpath *paths
        # Pop off our custom function handler if it exists
        handler = ![
          Hash, String, Symbol
        ].include?(paths.last.class) ? paths.pop : nil

        ns = paths.last.is_a?(Hash) ? paths.pop : document.root.namespaces

        return NodeSet.new(document) unless document.root

        sets = paths.map { |path|
          ctx = XPathContext.new(self)
          ctx.register_namespaces(ns)
          set = ctx.evaluate(path, handler).node_set
          set.document = document
          document.decorate(set)
          set
        }
        return sets.first if sets.length == 1

        NodeSet.new(document) do |combined|
          document.decorate(combined)
          sets.each do |set|
            set.each do |node|
              combined << node
            end
          end
        end
      end

      ###
      # Search this node for CSS +rules+. +rules+ must be one or more CSS
      # selectors.  For example:
      #
      #   node.css('title')
      #   node.css('body h1.bold')
      #   node.css('div + p.green', 'div#one')
      #
      # Custom CSS pseudo classes may also be defined.  To define custom pseudo
      # classes, create a class and implement the custom pseudo class you
      # want defined.  The first argument to the method will be the current
      # matching NodeSet.  Any other arguments are ones that you pass in.
      # For example:
      #
      #   node.css('title:regex("\w+")', Class.new {
      #     def regex node_set, regex
      #       node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
      #     end
      #   })
      #
      def css *rules
        # Pop off our custom function handler if it exists
        handler = ![
          Hash, String, Symbol
        ].include?(rules.last.class) ? rules.pop : nil

        ns = rules.last.is_a?(Hash) ? rules.pop : document.root.namespaces

        rules = rules.map { |rule|
          CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
        }.flatten.uniq + [ns, handler].compact

        xpath(*rules)
      end

      ###
      # Search for the first occurrence of +path+.
      # Returns nil if nothing is found, otherwise a Node.
      def at path, ns = document.root.namespaces
        search(path, ns).first
      end

      ###
      # Get the attribute value for the attribute +name+
      def [](name)
        return nil unless key?(name)
        get(name)
      end

      alias :next           :next_sibling
      alias :previous       :previous_sibling
      alias :remove         :unlink
      alias :get_attribute  :[]
      alias :set_attribute  :[]=
      alias :text           :content
      alias :inner_text     :content
      alias :has_attribute? :key?
      alias :<<             :add_child
      alias :name           :node_name
      alias :name=          :node_name=
      alias :type           :node_type
      alias :to_str         :text

      ####
      # Returns a hash containing the node's attributes.  The key is the
      # attribute name, the value is the string value of the attribute.
      def attributes
        Hash[*(attribute_nodes.map { |node|
          [node.node_name, node]
        }.flatten)]
      end

      ###
      # Get the attribute values for this Node.
      def values
        attribute_nodes.map { |node| node.value }
      end

      ###
      # Get the attribute names for this Node.
      def keys
        attribute_nodes.map { |node| node.node_name }
      end

      ###
      # Iterate over each attribute name and value pair for this Node.
      def each &block
        attribute_nodes.each { |node|
          block.call(node.node_name, node.value)
        }
      end

      ###
      # Remove the attribute named +name+
      def remove_attribute name
        attributes[name].remove if key? name
      end
      alias :delete :remove_attribute

      ####
      # Create nodes from +data+ and insert them before this node
      # (as a sibling).
      def before data
        classes = document.class.name.split('::')
        classes[-1] = 'SAX::Parser'

        parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
        parser.parse(data)
      end

      ####
      # Create nodes from +data+ and insert them after this node
      # (as a sibling).
      def after data
        classes = document.class.name.split('::')
        classes[-1] = 'SAX::Parser'

        handler = AfterHandler.new(self, data)
        parser = eval(classes.join('::')).new(handler)
        parser.parse(data)
        handler.after_nodes.reverse.each do |sibling|
          self.add_next_sibling sibling
        end
      end

      ####
      # Set the content to +string+.
      # If +encode+, encode any special characters first.
      def content= string, encode = true
        self.native_content = encode_special_chars(string)
      end

      ###
      # Set the parent Node for this Node
      def parent= parent_node
        parent_node.add_child(self)
        parent_node
      end

      def comment?
        type == COMMENT_NODE
      end

      def cdata?
        type == CDATA_SECTION_NODE
      end

      def xml?
        type == DOCUMENT_NODE
      end

      def html?
        type == HTML_DOCUMENT_NODE
      end

      def text?
        type == TEXT_NODE
      end

      def read_only?
        # According to gdome2, these are read-only node types
        [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
      end

      def element?
        type == ELEMENT_NODE
      end
      alias :elem? :element?

      def to_s
        document.xml? ? to_xml : to_html
      end

      def inner_html
        children.map { |x| x.to_html }.join
      end

      def css_path
        path.split(/\//).map { |part|
          part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
        }.compact.join(' > ')
      end

      #  recursively get all namespaces from this node and its subtree
      def collect_namespaces
        # TODO: print warning message if a prefix refers to more than one URI in the document?
        ns = {}
        traverse {|j| ns.merge!(j.namespaces)}
        ns
      end

      ###
      # Get a list of ancestor Node for this Node
      def ancestors
        parents = []

        this_parent = self.parent

        while this_parent != nil
          parents << this_parent
          this_parent = this_parent.parent
        end
        parents
      end

      ####
      # Yields self and all children to +block+ recursively.
      def traverse(&block)
        children.each{|j| j.traverse(&block) }
        block.call(self)
      end

      ####
      #  replace node with the new node in the document.
      def replace(new_node)
        if new_node.is_a?(Document)
          raise ArgumentError, <<-EOERR
Node.replace requires a Node argument, and cannot accept a Document.
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
          EOERR
        end
        replace_with_node new_node
      end

      ###
      # Test to see if this Node is equal to +other+
      def == other
        return false unless other
        return false unless other.respond_to?(:pointer_id)
        pointer_id == other.pointer_id
      end

      ###
      # Serialize Node using +encoding+ and +save_options+.  Save options 
      # can also be set using a block. See SaveOptions.
      #
      # These two statements are equivalent:
      #
      #  node.serialize('UTF-8', FORMAT | AS_XML)
      #
      # or
      #
      #   node.serialize('UTF-8') do |config|
      #     config.format.as_xml
      #   end
      #
      def serialize encoding = nil, save_options = SaveOptions::FORMAT, &block
        io = StringIO.new
        write_to io, encoding, save_options, &block
        io.rewind
        io.read
      end

      ###
      # Serialize this Node to HTML using +encoding+
      def to_html encoding = nil
        # FIXME: this is a hack around broken libxml versions
        return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]

        serialize(encoding, SaveOptions::FORMAT |
                            SaveOptions::NO_DECLARATION |
                            SaveOptions::NO_EMPTY_TAGS |
                            SaveOptions::AS_HTML)
      end

      ###
      # Serialize this Node to XML using +encoding+
      def to_xml encoding = nil
        serialize(encoding, SaveOptions::FORMAT | SaveOptions::AS_XML)
      end

      ###
      # Serialize this Node to XML using +encoding+
      def to_xhtml encoding = nil
        # FIXME: this is a hack around broken libxml versions
        return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]

        serialize(encoding, SaveOptions::FORMAT |
                            SaveOptions::NO_DECLARATION |
                            SaveOptions::NO_EMPTY_TAGS |
                            SaveOptions::AS_XHTML)
      end

      ###
      # Write Node to +io+ with +encoding+ and +save_options+
      def write_to io, encoding = nil, save_options = SaveOptions::FORMAT
        config = SaveOptions.new(save_options)
        yield config if block_given?

        native_write_to(io, encoding, config.options)
      end

      ###
      # Write Node as HTML to +io+ with +encoding+
      def write_html_to io, encoding = nil
        write_to io, encoding, SaveOptions::FORMAT |
          SaveOptions::NO_DECLARATION |
          SaveOptions::NO_EMPTY_TAGS |
          SaveOptions::AS_HTML
      end

      ###
      # Write Node as XHTML to +io+ with +encoding+
      def write_xhtml_to io, encoding = nil
        write_to io, encoding, SaveOptions::FORMAT |
          SaveOptions::NO_DECLARATION |
          SaveOptions::NO_EMPTY_TAGS |
          SaveOptions::AS_XHTML
      end

      ###
      # Write Node as XML to +io+ with +encoding+
      def write_xml_to io, encoding = nil
        write_to io, encoding, SaveOptions::FORMAT | SaveOptions::AS_XML
      end

      def self.new_from_str string
        $stderr.puts("This method is deprecated and will be removed in 1.2.0 or by March 1, 2009. Instead, use Nokogiri::HTML.fragment()")
        Nokogiri::HTML.fragment(string).first
      end
    end
  end
end