# File lib/scraper/reader.rb, line 189
189:     def parse_page(content, encoding = nil, options = nil, parser = :tidy)
190:       begin
191:         # Get the document encoding from the meta header.
192:         if meta = content.match(/(<meta\s*([^>]*)http-equiv=['"]?content-type['"]?([^>]*))/i)
193:           if meta = meta[0].match(/charset=([\w-]*)/i)
194:             encoding = meta[1]
195:           end
196:         end
197:         encoding ||= "utf8"
198:         case (parser || :tidy)
199:         when :tidy
200:           # Make sure the Tidy path is set and always apply the default
201:           # options (these only control things like errors, output type).
202:           find_tidy
203:           options = (options || {}).update(TIDY_OPTIONS)
204:           options[:input_encoding] = encoding.gsub("-", "").downcase
205:           document = Tidy.open(options) do |tidy|
206:             html = tidy.clean(content)
207:             HTML::Document.new(html).find(:tag=>"html")
208:           end
209:         when :html_parser
210:           document = HTML::HTMLParser.parse(content).root
211:         else
212:           raise HTMLParseError, "No parser #{parser || "unspecified"}"
213:         end
214:         return Parsed[document, encoding]
215:       rescue Exception=>error
216:         raise HTMLParseError.new(error)
217:       end
218:     end