class HTML::HTMLParser
(X)HTML parser.
Parses a String and returns an REXML::Document with the (X)HTML content.
For example:
html = "<p>paragraph</p>" parser = HTMLParser.new(html) puts parser.document
Requires a patched version of SGMLParser.
Attributes
document[R]
Public Class Methods
new()
click to toggle source
Calls superclass method
HTML::SGMLParser.new
# File lib/html/htmlparser.rb, line 368 def initialize() super @document = HTML::Document.new("") @current = @document.root end
parse(html)
click to toggle source
# File lib/html/htmlparser.rb, line 362 def self.parse(html) parser = HTMLParser.new parser.feed(html) parser.document end
Public Instance Methods
handle_comment(data)
click to toggle source
# File lib/html/htmlparser.rb, line 378 def handle_comment(data) end
handle_data(data)
click to toggle source
# File lib/html/htmlparser.rb, line 374 def handle_data(data) @current.children << HTML::Text.new(@current, 0, 0, data) end
handle_special(data)
click to toggle source
# File lib/html/htmlparser.rb, line 381 def handle_special(data) end
unknown_charref(ref)
click to toggle source
# File lib/html/htmlparser.rb, line 398 def unknown_charref(ref) end
unknown_endtag(tag)
click to toggle source
# File lib/html/htmlparser.rb, line 394 def unknown_endtag(tag) @current = @current.parent if @current.parent end
unknown_entityref(ref)
click to toggle source
# File lib/html/htmlparser.rb, line 401 def unknown_entityref(ref) @current.children << HTML::Text.new(@current, 0, 0, "&#{ref}<") end
unknown_starttag(tag, attrs)
click to toggle source
# File lib/html/htmlparser.rb, line 384 def unknown_starttag(tag, attrs) attrs = attrs.inject({}) do |hash, attr| hash[attr[0].downcase] = attr[1] hash end element = HTML::Tag.new(@current || @document, 0, 0, tag.downcase, attrs, true) @current.children << element @current = element end