Object
Matches an attribute value that could be treated by a browser as a URL with a protocol prefix, such as "http:" or "javascript:". Any string of zero or more characters followed by a colon is considered a match, even if the colon is encoded as an entity and even if it's an incomplete entity (which IE6 and Opera will still parse).
Returns a sanitized copy of html, using the settings in config if specified.
# File lib/sanitize.rb, line 51 def self.clean(html, config = {}) Sanitize.new(config).clean(html) end
Performs Sanitize#clean in place, returning html, or nil if no changes were made.
# File lib/sanitize.rb, line 57 def self.clean!(html, config = {}) Sanitize.new(config).clean!(html) end
Performs a Sanitize#clean using a full-document HTML parser instead of the default fragment parser. This will add a DOCTYPE and html tag unless they are already present
# File lib/sanitize.rb, line 64 def self.clean_document(html, config = {}) Sanitize.new(config).clean_document(html) end
Performs Sanitize#clean_document in place, returning html, or nil if no changes were made.
# File lib/sanitize.rb, line 70 def self.clean_document!(html, config = {}) Sanitize.new(config).clean_document!(html) end
Sanitizes the specified Nokogiri::XML::Node and all its children.
# File lib/sanitize.rb, line 75 def self.clean_node!(node, config = {}) Sanitize.new(config).clean_node!(node) end
Returns a new Sanitize object initialized with the settings in config.
# File lib/sanitize.rb, line 84 def initialize(config = {}) @config = Config::DEFAULT.merge(config) @transformers = { :breadth => Array(@config[:transformers_breadth].dup), :depth => Array(@config[:transformers]) + Array(@config[:transformers_depth]) } # Default depth transformers. These always run at the end of the chain, # after any custom transformers. @transformers[:depth] << Transformers::CleanComment unless @config[:allow_comments] @transformers[:depth] << Transformers::CleanCDATA << Transformers::CleanElement.new(@config) end
Returns a sanitized copy of html.
# File lib/sanitize.rb, line 102 def clean(html) if html dupe = html.dup clean!(dupe) || dupe end end
Performs clean in place, returning html, or nil if no changes were made.
# File lib/sanitize.rb, line 111 def clean!(html, parser = Nokogiri::HTML::DocumentFragment) fragment = parser.parse(html) clean_node!(fragment) output_method_params = {:encoding => @config[:output_encoding], :indent => 0} if @config[:output] == :xhtml output_method = fragment.method(:to_xhtml) output_method_params[:save_with] = Nokogiri::XML::Node::SaveOptions::AS_XHTML elsif @config[:output] == :html output_method = fragment.method(:to_html) else raise Error, "unsupported output format: #{@config[:output]}" end result = output_method.call(output_method_params) return result == html ? nil : html[0, html.length] = result end
# File lib/sanitize.rb, line 131 def clean_document(html) unless html.nil? clean_document!(html.dup) || html end end
# File lib/sanitize.rb, line 137 def clean_document!(html) if !@config[:elements].include?('html') && !@config[:remove_contents] raise 'You must have the HTML element whitelisted to call #clean_document unless remove_contents is set to true' # otherwise Nokogiri will raise for having multiple root nodes when # it moves its children to the root document context end clean!(html, Nokogiri::HTML::Document) end
Sanitizes the specified Nokogiri::XML::Node and all its children.
# File lib/sanitize.rb, line 148 def clean_node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node) node_whitelist = Set.new unless @transformers[:breadth].empty? traverse_breadth(node) {|n| transform_node!(n, node_whitelist, :breadth) } end traverse_depth(node) {|n| transform_node!(n, node_whitelist, :depth) } node end
Generated with the Darkfish Rdoc Generator 2.