class Robots
Constants
- DEFAULT_TIMEOUT
Public Class Methods
get_robots_txt(uri, user_agent)
click to toggle source
# File lib/robots.rb, line 101 def self.get_robots_txt(uri, user_agent) begin Timeout::timeout(Robots.timeout) do io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil end rescue Timeout::Error STDERR.puts "robots.txt request timed out" end end
new(user_agent)
click to toggle source
# File lib/robots.rb, line 119 def initialize(user_agent) @user_agent = user_agent @parsed = {} end
timeout()
click to toggle source
# File lib/robots.rb, line 115 def self.timeout @timeout || DEFAULT_TIMEOUT end
timeout=(t)
click to toggle source
# File lib/robots.rb, line 111 def self.timeout=(t) @timeout = t end
Public Instance Methods
allowed?(uri)
click to toggle source
# File lib/robots.rb, line 124 def allowed?(uri) uri = URI.parse(uri.to_s) unless uri.is_a?(URI) host = uri.host @parsed[host] ||= ParsedRobots.new(uri, @user_agent) @parsed[host].allowed?(uri, @user_agent) end
other_values(uri)
click to toggle source
# File lib/robots.rb, line 131 def other_values(uri) uri = URI.parse(uri.to_s) unless uri.is_a?(URI) host = uri.host @parsed[host] ||= ParsedRobots.new(uri, @user_agent) @parsed[host].other_values end