module Hpricot::Container::Trav
def classes
def classes get_attribute('class').to_s.strip.split(/\s+/) end
def containers
nodes. This is a good way to get all HTML elements which
Return all children of this node which can contain other
def containers children.grep(Container::Trav) end
def each_child(&block) # :yields: child_node
+each_child+ iterates over each child.
def each_child(&block) # :yields: child_node children.each(&block) if children nil end
def each_child_with_index(&block) # :yields: child_node, index
+each_child_with_index+ iterates over each child.
def each_child_with_index(&block) # :yields: child_node, index children.each_with_index(&block) if children nil end
def each_hyperlink # :yields: text
Note that +each_hyperlink+ yields HTML href attribute of BASE element.
It yields Hpricot::Text.
of A element.
+each_hyperlink+ traverses hyperlinks such as HTML href attribute
def each_hyperlink # :yields: text links = [] each_hyperlink_attribute {|elem, attr, hyperlink| yield hyperlink } end
def each_hyperlink_attribute
def each_hyperlink_attribute traverse_element( '{http://www.w3.org/1999/xhtml}a', '{http://www.w3.org/1999/xhtml}area', '{http://www.w3.org/1999/xhtml}link', '{http://www.w3.org/1999/xhtml}img', '{http://www.w3.org/1999/xhtml}object', '{http://www.w3.org/1999/xhtml}q', '{http://www.w3.org/1999/xhtml}blockquote', '{http://www.w3.org/1999/xhtml}ins', '{http://www.w3.org/1999/xhtml}del', '{http://www.w3.org/1999/xhtml}form', '{http://www.w3.org/1999/xhtml}input', '{http://www.w3.org/1999/xhtml}head', '{http://www.w3.org/1999/xhtml}base', '{http://www.w3.org/1999/xhtml}script') {|elem| case elem.name when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i attrs = ['href'] when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i attrs = ['src', 'longdesc', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i attrs = ['classid', 'codebase', 'data', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i attrs = ['cite'] when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i attrs = ['action'] when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i attrs = ['src', 'usemap'] when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i attrs = ['profile'] when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i attrs = ['src', 'for'] end attrs.each {|attr| if hyperlink = elem.get_attribute(attr) yield elem, attr, hyperlink end } } end
def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri
+each_hyperlink_uri+ doesn't yields href of the BASE element.
HTML BASE element or the argument ((|base_uri|)).
The URI objects are created with a base URI which is given by
It yields Hpricot::Text and URI for each hyperlink.
of A element.
+each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute
def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri base_uri = URI.parse(base_uri) if String === base_uri links = [] each_hyperlink_attribute {|elem, attr, hyperlink| if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name base_uri = URI.parse(hyperlink.to_s) else links << hyperlink end } if base_uri links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s } else links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) } end end
def each_uri(base_uri=nil) # :yields: URI
HTML BASE element or the argument ((|base_uri|)).
The URI objects are created with a base URI which is given by
It yields URI for each hyperlink.
of A element.
+each_uri+ traverses hyperlinks such as HTML href attribute
def each_uri(base_uri=nil) # :yields: URI each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri } end
def filter(&block)
It doesn't return location object even if self is location object.
+filter+ returns an node.
inner nodes are examined.
If given block returns true, corresponding node is retained and
If given block returns false, corresponding node is dropped.
+filter+ yields each node except top node.
loc.filter {|descendant_loc| predicate } -> node
node.filter {|descendant_node| predicate } -> node
+filter+ rebuilds the tree without some components.
def filter(&block) subst = {} each_child_with_index {|descendant, i| if yield descendant if descendant.elem? subst[i] = descendant.filter(&block) else subst[i] = descendant end else subst[i] = nil end } to_node.subst_subnode(subst) end
def find_element(*names)
the arguments.
+find_element+ searches an element which universal name is specified by
def find_element(*names) traverse_element(*names) {|e| return e } nil end
def following_siblings()
Find sibling elements which follow the current one. Like the other "sibling" methods, this weeds
def following_siblings() sibs = parent.containers si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end
def get_element_by_id(id)
def get_element_by_id(id) traverse_all_element do |ele| if ele.elem? and eid = ele.get_attribute('id') return ele if eid.to_s == id end end nil end
def get_elements_by_tag_name(*a)
def get_elements_by_tag_name(*a) list = Elements[] a.delete("*") traverse_element(*a.map { |tag| [tag, "{http://www.w3.org/1999/xhtml}#{tag}"] }.flatten) do |e| list << e if e.elem? end list end
def insert_after(nodes, ele)
Insert +nodes+, an array of HTML elements or a single element,
def insert_after(nodes, ele) case nodes when Array nodes.reverse_each { |n| insert_after(n, ele) } else reparent nodes idx = children.index(ele) children[idx ? idx + 1 : children.length, 0] = nodes end end
def insert_before(nodes, ele)
Insert +nodes+, an array of HTML elements or a single element,
def insert_before(nodes, ele) case nodes when Array nodes.each { |n| insert_before(n, ele) } else reparent nodes children[children.index(ele) || 0, 0] = nodes end end
def next_sibling
By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that.
Returns the container node neighboring this node to the south: just below it.
def next_sibling sib = parent.containers sib[sib.index(self) + 1] if parent end
def preceding_siblings()
Find all preceding sibling elements. Like the other "sibling" methods, this weeds
def preceding_siblings() sibs = parent.containers si = sibs.index(self) return Elements[*sibs[0...si]] end
def previous_sibling
By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that.
Returns the container node neighboring this node to the north: just above it.
def previous_sibling sib = parent.containers x = sib.index(self) - 1 sib[x] if sib and x >= 0 end
def replace_child(old, new)
def replace_child(old, new) reparent new children[children.index(old), 1] = [*new] end
def siblings_at(*pos)
Like the other "sibling" methods, this doesn't find text and comment nodes.
ele.siblings_at(0, 5..6) # the current element and two others
ele.siblings_at(1, 5, 7) # gets three elements at offsets below the current element
ele.siblings_at(-3..-1, 1..3) # gets three elements before and three after
This method accepts ranges and sets of numbers.
to this element.
Puts together an array of neighboring sibling elements based on their proximity
def siblings_at(*pos) sib = parent.containers i, si = 0, sib.index(self) Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end
def traverse_text_internal(&block)
def traverse_text_internal(&block) each_child {|c| c.traverse_text_internal(&block) } end