class Nokogiri::XML::Document
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
Nokogiri::XML::ParseOptions::RECOVER. See the constants in
is a number that sets options in the parser, such as
encoding that should be used when processing the document. +options+
+url+ is resource where this document is located. +encoding+ is the
responds to _read_ and _close_ such as an IO, or StringIO.
Parse an XML file. +thing+ may be a String, or any object that
#
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil return read_io(string_or_io, url, encoding, options.to_i) end # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? read_memory(string_or_io, url, encoding, options.to_i) end
def self.read_io io, url, encoding, options
def self.read_io io, url, encoding, options wrap_with_error_handling do LibXML.xmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options) end end
def self.read_memory(string, url, encoding, options)
def self.read_memory(string, url, encoding, options) wrap_with_error_handling do LibXML.xmlReadMemory(string, string.length, url, encoding, options) end end
def add_child child
def add_child child raise "Document already has a root node" if root if child.type == Node::DOCUMENT_FRAG_NODE raise "Document cannot have multiple root nodes" if child.children.size > 1 super(child.children.first) else super end end
def collect_namespaces
traverses the entire graph, and also has to bring each node accross the
Note this is a very expensive operation in current implementation, as it
in the hash.
Non-prefixed default namespaces (as in "xmlns=") are not included
The hash returned will look like this: { 'xmlns:foo' => 'bar' }
For example, given this document:
WARNING: this method will clobber duplicate names in the keys.
{ 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
This method will return:
For example, given this document:
return them as a hash.
Recursively get all namespaces from this node and its subtree and
#
def collect_namespaces ns = {} traverse { |j| ns.merge!(j.namespaces) } ns end
def create_cdata text
def create_cdata text Nokogiri::XML::CDATA.new(self, text.to_s) end
def create_element name, *args, &block
doc.create_element "div" { |node| node['class'] = "container" } #
doc.create_element "div", "contents", :class => "container" #
contents
doc.create_element "div", "contents" #
contents
doc.create_element "div", :class => "container" #
doc.create_element "div" #
Create an element with +name+, and optionally setting the content and attributes.
#
def create_element name, *args, &block elm = Nokogiri::XML::Element.new(name, self, &block) args.each do |arg| case arg when Hash arg.each { |k,v| key = k.to_s if key =~ /^xmlns(:\w+)?$/ ns_name = key.split(":", 2)[1] elm.add_namespace_definition ns_name, v next end elm[k.to_s] = v.to_s } else elm.content = arg end end elm end
def create_entity(name, entity_type=Nokogiri::XML::EntityDecl::INTERNAL_GENERAL,
def create_entity(name, entity_type=Nokogiri::XML::EntityDecl::INTERNAL_GENERAL, external_id=nil, system_id=nil, content=nil) LibXML.xmlResetLastError() ptr = LibXML.xmlAddDocEntity(cstruct, name, entity_type, external_id, system_id, content) if ptr.null? error = LibXML.xmlGetLastError() if error raise SyntaxError.wrap(error) else raise RuntimeError, "Could not create entity" end end Node.wrap(LibXML::XmlEntity.new(ptr)) end
def create_text_node text, &block
def create_text_node text, &block Nokogiri::XML::Text.new(text.to_s, self, &block) end
def decorate node
#
def decorate node return unless @decorators @decorators.each { |klass,list| next unless node.is_a?(klass) list.each { |moodule| node.extend(moodule) } } end
def decorators key
def decorators key @decorators ||= Hash.new @decorators[key] ||= [] end
def document
def document self end
def dup deep = 1
def dup deep = 1 dup_ptr = LibXML.xmlCopyDoc(cstruct, deep) return nil if dup_ptr.null? # xmlCopyDoc does not preserve document type. wtf? cstruct = LibXML::XmlDocumentCast.new(dup_ptr) cstruct[:type] = self.type self.class.wrap(dup_ptr) end
def encoding
def encoding ptr = cstruct[:encoding] ptr.null? ? nil : ptr.read_string end
def encoding= encoding
def encoding= encoding # TODO: if :encoding is already set, then it's probably getting leaked. cstruct[:encoding] = LibXML.xmlStrdup(encoding) end
def fragment tags = nil
Create a Nokogiri::XML::DocumentFragment from +tags+
#
def fragment tags = nil DocumentFragment.new(self, tags, self.root) end
def implied_xpath_context
def implied_xpath_context "/" end
def initialize *args # :nodoc:
def initialize *args # :nodoc: @errors = [] @decorators = nil end
def inspect_attributes
def inspect_attributes [:name, :children] end
def name
def name 'document' end
def namespaces
def namespaces root ? root.namespaces : {} end
def new(*args)
def new(*args) version = args.first || "1.0" doc = wrap(LibXML.xmlNewDoc(version)) doc.send :initialize, *args doc end
def recursively_remove_namespaces_from_node(node)
def recursively_remove_namespaces_from_node(node) node.cstruct[:ns] = nil node.children.each do |child| recursively_remove_namespaces_from_node(child) end unless node.cstruct[:nsDef].nil? LibXML.xmlFreeNsList(node.cstruct[:nsDef]) node.cstruct[:nsDef] = nil end end
def remove_namespaces!
def remove_namespaces! self.class.recursively_remove_namespaces_from_node(root) end
def root
def root ptr = LibXML.xmlDocGetRootElement(cstruct) ptr.null? ? nil : Node.wrap(LibXML::XmlNode.new(ptr)) end
def root= new_root
def root= new_root old_root = nil if new_root.nil? old_root_ptr = LibXML.xmlDocGetRootElement(cstruct) if (! old_root_ptr.null?) old_root = Node.wrap(old_root_ptr) LibXML.xmlUnlinkNode(old_root.cstruct) old_root.cstruct.keep_reference_from_document! end return new_root end if new_root.cstruct[:doc] != cstruct[:doc] old_root_ptr = LibXML.xmlDocGetRootElement(cstruct) new_root_ptr = LibXML.xmlDocCopyNode(new_root.cstruct, cstruct, 1) raise RuntimeError "Could not reparent node (xmlDocCopyNode)" if new_root_ptr.null? new_root = Node.wrap(new_root_ptr) end LibXML.xmlDocSetRootElement(cstruct, new_root.cstruct) if old_root_ptr && ! old_root_ptr.null? LibXML::XmlNode.new(old_root_ptr).keep_reference_from_document! end new_root end
def slop!
... which does absolutely nothing.
irb> doc.slop!
... followed by irb's implicit inspect (and therefore instantiation of every node) ...
irb> doc = Nokogiri::HTML my_markup
and not
irb> doc = Nokogiri::Slop my_markup
irb, the preferred idiom is:
is called will not be decorated with sloppy behavior. So, if you're in
Note that any nodes that have been instantiated before #slop!
Explore a document with shortcut methods. See Nokogiri::Slop for details.
#
def slop! unless decorators(XML::Node).include? Nokogiri::Decorators::Slop decorators(XML::Node) << Nokogiri::Decorators::Slop decorate! end self end
def url
def url cstruct[:URL] end
def validate
Validate this Document against it's DTD. Returns a list of errors on
#
def validate return nil unless internal_subset internal_subset.validate self end
def version
def version cstruct[:version] end
def wrap doc_struct
def wrap doc_struct if doc_struct.is_a?(FFI::Pointer) # cast native pointers up into a doc cstruct return nil if doc_struct.null? doc_struct = LibXML::XmlDocument.new(doc_struct) end doc = self.allocate doc.cstruct = doc_struct doc.cstruct.ruby_doc = doc doc.instance_eval { @decorators = nil; @node_cache = [] } doc.send :initialize doc end
def wrap_with_error_handling(&block)
def wrap_with_error_handling(&block) error_list = [] LibXML.xmlInitParser() LibXML.xmlResetLastError() LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(error_list)) ptr = yield LibXML.xmlSetStructuredErrorFunc(nil, nil) if ptr.null? error = LibXML.xmlGetLastError() if error raise SyntaxError.wrap(error) else raise RuntimeError, "Could not parse document" end end document = wrap(ptr) document.errors = error_list return document end