module Nokogiri
module XML
#
# The Searchable module declares the interface used for searching your DOM.
#
# It implements the public methods `search`, `css`, and `xpath`,
# as well as allowing specific implementations to specialize some
# of the important behaviors.
#
module Searchable
# Regular expression used by Searchable#search to determine if a query
# string is CSS or XPath
LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
###
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
#
# Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
#
# node.search("div.employee", ".//title")
#
# A hash of namespace bindings may be appended:
#
# node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
# node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
#
# For XPath queries, a hash of variable bindings may also be
# appended to the namespace bindings. For example:
#
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
#
# Custom XPath functions and CSS pseudo-selectors may also be
# defined. To define custom functions create a class and
# implement the function you want to define. The first argument
# to the method will be the current matching NodeSet. Any other
# arguments are ones that you pass in. Note that this class may
# appear anywhere in the argument list. For example:
#
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
# Class.new {
# def regex node_set, regex
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
# end
# }.new
# )
#
# See Searchable#xpath and Searchable#css for further usage help.
def search *args
paths, handler, ns, binds = extract_params(args)
xpaths = paths.map(&:to_s).map do |path|
(path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
end.flatten.uniq
xpath(*(xpaths + [ns, handler, binds].compact))
end
alias :/ :search
###
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
#
# Search this object for +paths+, and return only the first
# result. +paths+ must be one or more XPath or CSS queries.
#
# See Searchable#search for more information.
def at *args
search(*args).first
end
alias :% :at
###
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
#
# Search this object for CSS +rules+. +rules+ must be one or more CSS
# selectors. For example:
#
# node.css('title')
# node.css('body h1.bold')
# node.css('div + p.green', 'div#one')
#
# A hash of namespace bindings may be appended. For example:
#
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
#
# Custom CSS pseudo classes may also be defined. To define
# custom pseudo classes, create a class and implement the custom
# pseudo class you want defined. The first argument to the
# method will be the current matching NodeSet. Any other
# arguments are ones that you pass in. For example:
#
# node.css('title:regex("\w+")', Class.new {
# def regex node_set, regex
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
# end
# }.new)
#
# Note that the CSS query string is case-sensitive with regards
# to your document type. That is, if you're looking for "H1" in
# an HTML document, you'll never find anything, since HTML tags
# will match only lowercase CSS queries. However, "H1" might be
# found in an XML document, where tags names are case-sensitive
# (e.g., "H1" is distinct from "h1").
#
def css *args
rules, handler, ns, _ = extract_params(args)
css_internal self, rules, handler, ns
end
##
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
#
# Search this object for CSS +rules+, and return only the first
# match. +rules+ must be one or more CSS selectors.
#
# See Searchable#css for more information.
def at_css *args
css(*args).first
end
###
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
#
# Search this node for XPath +paths+. +paths+ must be one or more XPath
# queries.
#
# node.xpath('.//title')
#
# A hash of namespace bindings may be appended. For example:
#
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
# node.xpath('.//xmlns:name', node.root.namespaces)
#
# A hash of variable bindings may also be appended to the namespace bindings. For example:
#
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
#
# Custom XPath functions may also be defined. To define custom
# functions create a class and implement the function you want
# to define. The first argument to the method will be the
# current matching NodeSet. Any other arguments are ones that
# you pass in. Note that this class may appear anywhere in the
# argument list. For example:
#
# node.xpath('.//title[regex(., "\w+")]', Class.new {
# def regex node_set, regex
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
# end
# }.new)
#
def xpath *args
return NodeSet.new(document) unless document
paths, handler, ns, binds = extract_params(args)
sets = paths.map do |path|
ctx = XPathContext.new(self)
ctx.register_namespaces(ns)
path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
binds.each do |key,value|
ctx.register_variable key.to_s, value
end if binds
ctx.evaluate(path, handler)
end
return sets.first if sets.length == 1
NodeSet.new(document) do |combined|
sets.each do |set|
set.each do |node|
combined << node
end
end
end
end
##
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
#
# Search this node for XPath +paths+, and return only the first
# match. +paths+ must be one or more XPath queries.
#
# See Searchable#xpath for more information.
def at_xpath *args
xpath(*args).first
end
private
def css_internal node, rules, handler, ns
xpaths = rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
node.xpath(*(xpaths + [ns, handler].compact))
end
def xpath_query_from_css_rule rule, ns
implied_xpath_contexts.map do |implied_xpath_context|
CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
end.join(' | ')
end
def extract_params params # :nodoc:
handler = params.find do |param|
![Hash, String, Symbol].include?(param.class)
end
params -= [handler] if handler
hashes = []
while Hash === params.last || params.last.nil?
hashes << params.pop
break if params.empty?
end
ns, binds = hashes.reverse
ns ||= document.root ? document.root.namespaces : {}
[params, handler, ns, binds]
end
end
end
end