Mechanize::Page

This class encapsulates an HTML page. If Mechanize finds a content type of 'text/html', this class will be instantiated and returned.

Example:

require 'mechanize'

agent = Mechanize.new
agent.get('http://google.com/').class # => Mechanize::Page

Constants

DEFAULT_RESPONSE

Attributes

encodings[R]

Possible encodings for this page based on HTTP headers and meta elements

mech[RW]

Public Class Methods

charset(content_type) click to toggle source
# File lib/mechanize/page.rb, line 406
def charset content_type
  charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/, 1]
  return nil if charset == 'none'
  charset
end
Also aliased as: charset_from_content_type
charset_from_content_type(content_type) click to toggle source
Alias for: charset
meta_charset(body) click to toggle source

Retrieves all charsets from meta tags in body

# File lib/mechanize/page.rb, line 428
def self.meta_charset body
  # HACK use .map
  body.scan(/<meta .*?>/).map do |meta|
    if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\11// then
      $2
    elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\11// then
      meta =~ /content\s*=\s*(["'])?(.*?)\11//

      m_charset = charset $2 if $2

      m_charset if m_charset
    end
  end.compact
end
meta_content_type(body) click to toggle source

Retrieves the last content-type set by a meta tag in body

# File lib/mechanize/page.rb, line 446
def self.meta_content_type body
  body.scan(/<meta .*?>/).reverse.map do |meta|
    if meta =~ /http-equiv\s*=\s*(["'])?content-type\11// then
      meta =~ /content=(["'])?(.*?)\11//

      return $2
    end
  end

  nil
end
new(uri=nil, response=nil, body=nil, code=nil, mech=nil) click to toggle source
# File lib/mechanize/page.rb, line 27
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  response ||= DEFAULT_RESPONSE

  @meta_content_type = nil
  @encoding = nil
  @encodings = [nil]
  raise 'no' if mech and not Mechanize === mech
  @mech = mech

  reset

  @encodings << Mechanize::Util.detect_charset(body) if body

  @encodings.concat self.class.response_header_charset(response)

  if body
    # Force the encoding to be 8BIT so we can perform regular expressions.
    # We'll set it to the detected encoding later
    body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding

    @encodings.concat self.class.meta_charset body

    meta_content_type = self.class.meta_content_type body
    @meta_content_type = meta_content_type if meta_content_type
  end

  @encodings << mech.default_encoding if mech and mech.default_encoding

  super uri, response, body, code
end
response_header_charset(response) click to toggle source
# File lib/mechanize/page.rb, line 415
def self.response_header_charset response
  charsets = []
  response.each do |header, value|
    next unless header == 'content-type'
    next unless value =~ /charset/
    charsets << charset(value)
  end
  charsets
end

Public Instance Methods

/ click to toggle source
Alias for: search
at click to toggle source

Search through the page for path under namespace using Nokogiri's at. The path may be either a CSS or XPath expression.

See also Nokogiri::XML::Node#at

# File lib/mechanize/page.rb, line 212
def_delegator :parser, :at, :at
base_with(criteria) click to toggle source

Find a single base tag matching criteria. Example:

page.base_with(:href => /foo/).click
# File lib/mechanize/page.rb, line 259
  
bases() click to toggle source

Return a list of all base tags

# File lib/mechanize/page.rb, line 357
def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end
bases_with(criteria) click to toggle source

Find all base tags matching criteria. Example:

page.bases_with(:href => /foo/).each do |base|
  puts base.href
end
# File lib/mechanize/page.rb, line 268
elements_with :base
canonical_uri() click to toggle source

Return the canonical URI for the page if there is a link tag with href="canonical".

# File lib/mechanize/page.rb, line 177
def canonical_uri
  link = at('link[@rel="canonical"][@href]')
  return unless link
  href = link['href']

  URI href
rescue URI::InvalidURIError
  URI Mechanize::Util.uri_escape href
end
content_type() click to toggle source

Get the content type

# File lib/mechanize/page.rb, line 188
def content_type
  @meta_content_type || response['content-type']
end
detected_encoding() click to toggle source
# File lib/mechanize/page.rb, line 74
def detected_encoding
  Mechanize::Util.detect_charset(body)
end
encoding() click to toggle source
# File lib/mechanize/page.rb, line 94
def encoding
  parser.respond_to?(:encoding) ? parser.encoding : nil
end
encoding=(encoding) click to toggle source
# File lib/mechanize/page.rb, line 78
def encoding=(encoding)
  reset

  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end
encoding_error?(parser=nil) click to toggle source

Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.

# File lib/mechanize/page.rb, line 100
def encoding_error?(parser=nil)
  parser = self.parser unless parser
  return false if parser.errors.empty?
  parser.errors.any? do |error|
    error.message =~ /(indicate\ encoding)|
                      (Invalid\ char)|
                      (input\ conversion\ failed)/
  end
end
form_with(criteria) click to toggle source

Find a single form matching criteria. Example:

page.form_with(:action => '/post/login.php') do |f|
  ...
end
# File lib/mechanize/page.rb, line 223
  
forms() click to toggle source

Return a list of all form tags

# File lib/mechanize/page.rb, line 336
def forms
  @forms ||= search('form').map do |html_form|
    form = Mechanize::Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end
forms_with(criteria) click to toggle source

Find all forms form matching criteria. Example:

page.forms_with(:action => '/post/login.php').each do |f|
  ...
end
# File lib/mechanize/page.rb, line 232
elements_with :form
frame_with(criteria) click to toggle source

Find a single frame tag matching criteria. Example:

page.frame_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 277
  
frames() click to toggle source

Return a list of all frame tags

# File lib/mechanize/page.rb, line 364
def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end
frames_with(criteria) click to toggle source

Find all frame tags matching criteria. Example:

page.frames_with(:src => /foo/).each do |frame|
  p frame.src
end
# File lib/mechanize/page.rb, line 286
elements_with :frame
iframe_with(criteria) click to toggle source

Find a single iframe tag matching criteria. Example:

page.iframe_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 295
  
iframes() click to toggle source

Return a list of all iframe tags

# File lib/mechanize/page.rb, line 371
def iframes
  @iframes ||=
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end
iframes_with(criteria) click to toggle source

Find all iframe tags matching criteria. Example:

page.iframes_with(:src => /foo/).each do |iframe|
  p iframe.src
end
# File lib/mechanize/page.rb, line 304
elements_with :iframe
image_urls() click to toggle source
# File lib/mechanize/page.rb, line 383
def image_urls
  @image_urls ||= images.map(&:url).uniq
end
image_with(criteria) click to toggle source

Find a single image matching criteria. Example:

page.image_with(:alt => /main/).fetch.save
# File lib/mechanize/page.rb, line 313
  
images() click to toggle source

Return a list of all img tags

# File lib/mechanize/page.rb, line 378
def images
  @images ||=
    search('img').map { |node| Image.new(node, self) }
end
images_with(criteria) click to toggle source

Find all images matching criteria. Example:

page.images_with(:src => /jpg\Z/).each do |img|
  img.fetch.save
end
# File lib/mechanize/page.rb, line 322
elements_with :image
labels() click to toggle source

Return a list of all label tags

# File lib/mechanize/page.rb, line 389
def labels
  @labels ||=
    search('label').map { |node| Label.new(node, self) }
end
labels_hash() click to toggle source
# File lib/mechanize/page.rb, line 394
def labels_hash
  unless @labels_hash
    hash = {}
    labels.each do |label|
      hash[label.node['for']] = label if label.for
    end
    @labels_hash = hash
  end
  return @labels_hash
end
meta_charset() click to toggle source
# File lib/mechanize/page.rb, line 70
def meta_charset
  self.class.meta_charset(body)
end
meta_refresh() click to toggle source

Return a list of all meta refresh elements

# File lib/mechanize/page.rb, line 347
def meta_refresh
  query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'

  @meta_refresh ||= search(query).map do |node|
    MetaRefresh.from_node node, self
  end.compact
end
parser() click to toggle source
# File lib/mechanize/page.rb, line 110
def parser
  return @parser if @parser
  return nil unless @body

  if @encoding then
    @parser = @mech.html_parser.parse html_body, nil, @encoding
  elsif mech.force_default_encoding then
    @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
  else
    @encodings.reverse_each do |encoding|
      @parser = @mech.html_parser.parse html_body, nil, encoding

      break unless encoding_error? @parser
    end
  end

  @parser
end
Also aliased as: root
reset() click to toggle source
# File lib/mechanize/page.rb, line 162
def reset
  @bases = nil
  @forms = nil
  @frames = nil
  @iframes = nil
  @links = nil
  @labels = nil
  @labels_hash = nil
  @meta_refresh = nil
  @parser = nil
  @title = nil
end
response_header_charset() click to toggle source
# File lib/mechanize/page.rb, line 66
def response_header_charset
  self.class.response_header_charset(response)
end
root() click to toggle source
Alias for: parser
search click to toggle source

Search for paths in the page using Nokogiri's search. The paths can be XPath or CSS and an optional Hash of namespaces may be appended.

See Nokogiri::XML::Node#search for further details.

# File lib/mechanize/page.rb, line 200
def_delegator :parser, :search, :search
Also aliased as: /
title() click to toggle source
# File lib/mechanize/page.rb, line 58
def title
  @title ||=
    if doc = parser
      title = doc.search('title').inner_text
      title.empty? ? nil : title
    end
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.