lib/mechanize/page.rb
# = Synopsis # This class encapsulates an HTML page. If Mechanize finds a content # type of 'text/html', this class will be instantiated and returned. # # == Example # require 'rubygems' # require 'mechanize' # # agent = Mechanize.new # agent.get('http://google.com/').class #=> Mechanize::Page # class Mechanize::Page < Mechanize::File extend Forwardable extend Mechanize::ElementMatcher attr_accessor :mech def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) raise Mechanize::ContentTypeError, response['content-type'] unless response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i @bases = nil @encoding = nil @encodings = [nil] @forms = nil @frames = nil @iframes = nil @links = nil @mech = mech @meta = nil @parser = nil @encodings << Mechanize::Util.detect_charset(body) if body response.each do |header, value| next unless value =~ /charset/i @encodings << charset(value) end if body # Force the encoding to be 8BIT so we can perform regular expressions. # We'll set it to the detected encoding later body.force_encoding('ASCII-8BIT') if body.respond_to?(:force_encoding) body.scan(/<meta .*?>/i) do |meta| next unless meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i meta =~ /content=(["'])?(.*?)\1/i encoding = charset $2 @encodings << encoding if encoding end end super(uri, response, body, code) end def title @title ||= if doc = parser title = doc.search('title').inner_text title.empty? ? nil : title end end def charset content_type charset = content_type[/charset=([^; ]+)/i, 1] return nil if charset == 'none' charset end def encoding=(encoding) @encoding = encoding if @parser parser_encoding = @parser.encoding if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase) # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end def encoding parser.respond_to?(:encoding) ? parser.encoding : nil end def parser return @parser if @parser return nil unless @body if @encoding then @parser = mech.html_parser.parse(html_body, nil, @encoding) else @encodings.reverse_each do |encoding| @parser = mech.html_parser.parse(html_body, nil, encoding) break if @parser.errors.empty? break unless @parser.errors.any? do |error| error.message =~ /(indicate encoding)|(Invalid char)/ end end end @parser end alias :root :parser # Get the content type def content_type response['content-type'] end # Search through the page like HPricot def_delegator :parser, :search, :search def_delegator :parser, :/, :/ def_delegator :parser, :at, :at ## # :method: form_with(criteria) # # Find a single form matching +criteria+. # Example: # page.form_with(:action => '/post/login.php') do |f| # ... # end ## # :method: forms_with(criteria) # # Find all forms form matching +criteria+. # Example: # page.forms_with(:action => '/post/login.php').each do |f| # ... # end elements_with :form ## # :method: link_with(criteria) # # Find a single link matching +criteria+. # Example: # page.link_with(:href => /foo/).click ## # :method: links_with(criteria) # # Find all links matching +criteria+. # Example: # page.links_with(:href => /foo/).each do |link| # puts link.href # end elements_with :link ## # :method: base_with(criteria) # # Find a single base tag matching +criteria+. # Example: # page.base_with(:href => /foo/).click ## # :method: bases_with(criteria) # # Find all base tags matching +criteria+. # Example: # page.bases_with(:href => /foo/).each do |base| # puts base.href # end elements_with :base ## # :method: frame_with(criteria) # # Find a single frame tag matching +criteria+. # Example: # page.frame_with(:src => /foo/).click ## # :method: frames_with(criteria) # # Find all frame tags matching +criteria+. # Example: # page.frames_with(:src => /foo/).each do |frame| # p frame.src # end elements_with :frame ## # :method: iframe_with(criteria) # # Find a single iframe tag matching +criteria+. # Example: # page.iframe_with(:src => /foo/).click ## # :method: iframes_with(criteria) # # Find all iframe tags matching +criteria+. # Example: # page.iframes_with(:src => /foo/).each do |iframe| # p iframe.src # end elements_with :iframe ## # Return a list of all link and area tags def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end ## # Return a list of all form tags def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end ## # Return a list of all meta tags def meta @meta ||= search('head > meta').map do |node| next unless node['http-equiv'] && node['content'] (equiv, content) = node['http-equiv'], node['content'] if equiv && equiv.downcase == 'refresh' Meta.parse(content, uri) do |delay, href| node['delay'] = delay node['href'] = href Meta.new(node, @mech, self) end end end.compact end ## # Return a list of all base tags def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end ## # Return a list of all frame tags def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end ## # Return a list of all iframe tags def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end ## # Return a list of all img tags def images @images ||= search('img').map { |node| Image.new(node, self) } end def image_urls @image_urls ||= images.map(&:url).uniq end ## # Return a list of all label tags def labels @labels ||= search('label').map { |node| Label.new(node, self) } end def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end private def html_body if @body @body.empty? ? '<html></html>' : @body else '' end end end require 'mechanize/headers' require 'mechanize/page/image' require 'mechanize/page/label' require 'mechanize/page/link' require 'mechanize/page/base' require 'mechanize/page/frame' require 'mechanize/page/meta'