class HTML::Tag
:nodoc:
attributes.
closing tag, or a self-closing tag. It has a name, and may have a hash of
A Tag is any node that represents markup. It may be an opening tag, a
def ==(node)
def ==(node) return false unless super return false unless closing == node.closing && self.name == node.name attributes == node.attributes end
def [](attr)
A convenience for obtaining an attribute of the node. Returns +nil+ if
def [](attr) @attributes ? @attributes[attr] : nil end
def childless?(xml = false)
def childless?(xml = false) return false if xml && @closing.nil? !@closing.nil? || @name =~ /^(img|br|hr|link|meta|area|base|basefont| col|frame|input|isindex|param)$/ox end
def find(conditions)
matching node is returned. Otherwise, +nil+ is returned. (See the
If either the node or any of its children meet the given conditions, the
def find(conditions) match(conditions) && self || super end
def initialize(parent, line, pos, name, attributes, closing)
to describe the node. It will be parsed and the node name, attributes and
Create a new node as a child of the given parent, using the given content
def initialize(parent, line, pos, name, attributes, closing) super(parent, line, pos) @name = name @attributes = attributes @closing = closing end
def match(conditions)
descendant: { tag: "span",
attributes: { class: "enum" } },
parent: { tag: "li",
ancestor: { tag: "ul" },
node.match tag: "div",
# a "span" descendant that contains # text matching /hello world/:
# and an "li" parent (with "class" = "enum"), and whether or not it has
# get funky: test to see if the node is a "div", has a "ul" ancestor
node.match children: { count: 2..4, only: { tag: "span" } }
# test if the node has between 2 and 4 span tags as immediate children
node.match descendant: { tag: "strong" }
# test if any of the node's descendants are "strong" tags
node.match child: { tag: "em" }
# test if any of the node's immediate children are "em" tags
node.match ancestor: { tag: "table" }
# test if any of the node's ancestors are "table" tags
node.match parent: { tag: "div" }
# test if the node's parent is a "div"
node.match tag: "span"
# test if the node is a "span" tag
Usage:
* if the condition is +false+ or +nil+, the value must be +nil+.
* if the condition is +true+, the value must not be +nil+.
* if the condition is a number, the value must match number.to_s.
* if the condition is a regexp, it must match the value.
* if the condition is a string, it must be a substring of the value.
Conditions are matched using the following algorithm:
counted.
to match on the children, and only matching children will be
** :only: another hash consisting of the keys to use
greater than this number.
** :greater_than: the number of matching children must be
this number.
** :less_than: the number of matching children must be less than
include) the number of children that match.
** :count: either a number or a range which must equal (or
keys:
* :children: a hash, for counting children of a node. Accepts the
the criteria described by the hash, and at least one sibling must match.
* :before: a hash. The node must be before any sibling meeting
the criteria described by the hash, and at least one sibling must match.
* :after: a hash. The node must be after any sibling meeting
meet the criteria described by the hash.
* :sibling: a hash. At least one of the node's siblings must
must meet the criteria described by the hash.
* :descendant: a hash. At least one of the node's descendants
meet the criteria described by the hash.
* :ancestor: a hash. At least one of the node's ancestors must
must meet the criteria described by the hash.
* :child: a hash. At least one of the node's immediate children
corresponding hash.
* :parent: a hash. The node's parent must match the
corresponding values in the hash.
* :attributes: a hash. The node's values must match the
* :tag: the node name must match the corresponding value
(all are optional):
+conditions+ parameter must be a hash of any of the following keys
Returns +true+ if the node meets any of the given conditions. The
def match(conditions) conditions = validate_conditions(conditions) # check content of child nodes if conditions[:content] if children.empty? return false unless match_condition("", conditions[:content]) else return false unless children.find { |child| child.match(conditions[:content]) } end end # test the name return false unless match_condition(@name, conditions[:tag]) if conditions[:tag] # test attributes (conditions[:attributes] || {}).each do |key, value| return false unless match_condition(self[key], value) end # test parent return false unless parent.match(conditions[:parent]) if conditions[:parent] # test children return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] # test ancestors if conditions[:ancestor] return false unless catch :found do p = self throw :found, true if p.match(conditions[:ancestor]) while p = p.parent end end # test descendants if conditions[:descendant] return false unless children.find do |child| # test the child child.match(conditions[:descendant]) || # test the child's descendants child.match(:descendant => conditions[:descendant]) end end # count children if opts = conditions[:children] matches = children.select do |c| (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) end matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] opts.each do |key, value| next if key == :only case key when :count if Integer === value return false if matches.length != value else return false unless value.include?(matches.length) end when :less_than return false unless matches.length < value when :greater_than return false unless matches.length > value else raise "unknown count condition #{key}" end end end # test siblings if conditions[:sibling] || conditions[:before] || conditions[:after] siblings = parent ? parent.children : [] self_index = siblings.index(self) if conditions[:sibling] return false unless siblings.detect do |s| s != self && s.match(conditions[:sibling]) end end if conditions[:before] return false unless siblings[self_index+1..-1].detect do |s| s != self && s.match(conditions[:before]) end end if conditions[:after] return false unless siblings[0,self_index].detect do |s| s != self && s.match(conditions[:after]) end end end true end
def match_condition(value, condition)
def match_condition(value, condition) case condition when String value && value == condition when Regexp value && value.match(condition) when Numeric value == condition.to_s when true !value.nil? when false, nil value.nil? else false end end
def select(selector, *values)
def select(selector, *values) selector = HTML::Selector.new(selector, values) selector.select(self) end
def tag?
def tag? true end
def to_s
def to_s if @closing == :close "</#{@name}>" else s = "<#{@name}" @attributes.each do |k,v| s << " #{k}" s << "=\"#{v}\"" if String === v end s << " /" if @closing == :self s << ">" @children.each { |child| s << child.to_s } s << "</#{@name}>" if @closing != :self && !@children.empty? s end end