module Nokogiri::XML::Searchable
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
Search this node's immediate children using CSS selector +selector+
>(selector) → NodeSet
:call-seq:
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName ns = document.root&.namespaces || {} xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first) end
def at(*args)
result. +paths+ must be one or more XPath or CSS queries.
Search this object for +paths+, and return only the first
at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
call-seq:
##
def at(*args) search(*args).first end
def at_css(*args)
match. +rules+ must be one or more CSS selectors.
Search this object for CSS +rules+, and return only the first
at_css(*rules, [namespace-bindings, custom-pseudo-class])
call-seq:
#
def at_css(*args) css(*args).first end
def at_xpath(*args)
match. +paths+ must be one or more XPath queries.
Search this node for XPath +paths+, and return only the first
at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
call-seq:
#
def at_xpath(*args) xpath(*args).first end
def css(*args)
you'll never find anything. However, "H1" might be found in an XML document, where tags
tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
node.css('li[2]') # retrieve the second li element in a list
# equivalent to 'li:nth-child(2)'
example:
⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
node.css
node.css('div @class')
# ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
node.css('img / @href') # same
node.css('img > @href') # returns all +href+ attributes on an +img+ element
💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
node.css('title:regex("\w+")', handler)
}.new
end
node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
def regex(node_set, regex)
handler = Class.new {
NodeSet. Any other arguments are ones that you pass in. For example:
class you want defined. The first argument to the method will be the matching context
function. To define custom pseudo classes, create a class and implement the custom pseudo
💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
A hash of namespace bindings may be appended. For example:
node.css('div + p.green', 'div#one')
node.css('body h1.bold')
node.css('title')
selectors. For example:
Search this object for CSS +rules+. +rules+ must be one or more CSS
css(*rules, [namespace-bindings, custom-pseudo-class])
call-seq:
##
def css(*args) rules, handler, ns, _ = extract_params(args) css_internal(self, rules, handler, ns) end
def css_internal(node, rules, handler, ns)
def css_internal(node, rules, handler, ns) xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil) end
def css_rules_to_xpath(rules, ns)
def css_rules_to_xpath(rules, ns) rules.map { |rule| xpath_query_from_css_rule(rule, ns) } end
def extract_params(params) # :nodoc:
def extract_params(params) # :nodoc: handler = params.find do |param| ![Hash, String, Symbol].include?(param.class) end params -= [handler] if handler hashes = [] while Hash === params.last || params.last.nil? hashes << params.pop break if params.empty? end ns, binds = hashes.reverse ns ||= document.root&.namespaces || {} [params, handler, ns, binds] end
def search(*args)
node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
}.new
end
node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
def regex node_set, regex
handler = Class.new {
list. For example:
are ones that you pass in. Note that this class may appear anywhere in the argument
The first argument to the method will be the current matching NodeSet. Any other arguments
`nokogiri` namespace in XPath queries.
functions create a class and implement the function you want to define, which will be in the
💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
bindings. For example:
For XPath queries, a hash of variable bindings may also be appended to the namespace
node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
A hash of namespace bindings may be appended:
node.search("div.employee", ".//title")
Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
call-seq:
##
def search(*args) paths, handler, ns, binds = extract_params(args) xpaths = paths.map(&:to_s).map do |path| LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns) end.flatten.uniq xpath(*(xpaths + [ns, handler, binds].compact)) end
def xpath(*args)
node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
}.new
end
node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
def regex(node_set, regex)
handler = Class.new {
list. For example:
are ones that you pass in. Note that this class may appear anywhere in the argument
The first argument to the method will be the current matching NodeSet. Any other arguments
implement the function you want to define, which will be in the `nokogiri` namespace.
💡 Custom XPath functions may also be defined. To define custom functions create a class and
node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
A hash of variable bindings may also be appended to the namespace bindings. For example:
node.xpath('.//xmlns:name', node.root.namespaces)
node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
A hash of namespace bindings may be appended. For example:
node.xpath('.//title')
queries.
Search this node for XPath +paths+. +paths+ must be one or more XPath
xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
call-seq:
##
def xpath(*args) paths, handler, ns, binds = extract_params(args) xpath_internal(self, paths, handler, ns, binds) end
def xpath_impl(node, path, handler, ns, binds)
def xpath_impl(node, path, handler, ns, binds) ctx = XPathContext.new(node) ctx.register_namespaces(ns) path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml? binds&.each do |key, value| ctx.register_variable(key.to_s, value) end ctx.evaluate(path, handler) end
def xpath_internal(node, paths, handler, ns, binds)
def xpath_internal(node, paths, handler, ns, binds) document = node.document return NodeSet.new(document) unless document if paths.length == 1 return xpath_impl(node, paths.first, handler, ns, binds) end NodeSet.new(document) do |combined| paths.each do |path| xpath_impl(node, path, handler, ns, binds).each { |set| combined << set } end end end
def xpath_query_from_css_rule(rule, ns)
def xpath_query_from_css_rule(rule, ns) visitor = Nokogiri::CSS::XPathVisitor.new( builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL, doctype: document.xpath_doctype, ) self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context| CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns, visitor: visitor, }) end.join(" | ") end