class RDF::Util::File::RemoteDocument

A RemoteDocument contains the body and headers of a remote resource.

Link headers are parsed using the `LinkHeader` gem @see github.com/asplake/link_header

Attributes

base_uri[R]

Base URI based on resource location or returned Location header. @return [String]

charset[R]

Encoding of resource (from Content-Type), downcased. Also applied to content if it is UTF @return [String}]

code[R]

Response code @return [Integer]

content_type[R]

Content-Type of the returned resource @return [String]

etag[R]

ETag from headers @return [String]

headers[R]

Raw headers from response @return [Hash{Symbol => Object}]

last_modified[R]

Last-Modified time from headers @return [DateTime]

requested_url[R]

Originally requested URL @return [String]

Public Class Methods

new(body, options = {}) click to toggle source

Set content @param [String] body entity content of request.

Calls superclass method
# File lib/rdf/util/file.rb, line 397
def initialize(body, options = {})
  options.each do |key, value|
    # de-quote charset
    matchdata = value.match(/^["'](.*)["']$/.freeze) if key == "charset"
    value = matchdata[1] if matchdata
    value = value.downcase if value.is_a?(String)
    instance_variable_set(:"@#{key}", value)
  end
  @headers = options.fetch(:headers, {})
  @charset = options[:charset].to_s.downcase if options[:charset]

  # Find Content-Type
  if headers[:content_type]
    ct, *params = headers[:content_type].split(';').map(&:strip)
    @content_type ||= ct

    # Find charset
    params.each do |param|
      p, v = param.split('=')
      next unless p.downcase == 'charset'
      @charset ||= v.sub(/^["']?(.*)["']?$/, '\1').downcase
    end
  end

  @etag = headers[:etag]
  @last_modified = DateTime.parse(headers[:last_modified]) if headers[:last_modified]
  encoding = @charset ||= "utf-8"

  unless encoding.start_with?("utf")
    body.force_encoding(Encoding::UTF_8)
    encoding = "utf-8"

    # Make sure Unicode is in NFC
    begin
      body.unicode_normalize! unless !body.unicode_normalized?
    rescue Encoding::CompatibilityError
      # Oh, well ...
    end if body.respond_to?(:unicode_normalized?)
  end

  super(body, "r:#{encoding}")
end

Public Instance Methods

content_encoding() click to toggle source

Returns a list of encodings in Content-Encoding field as an array of strings.

The encodings are downcased for canonicalization. @return [Array<String>]

# File lib/rdf/util/file.rb, line 445
def content_encoding
  headers.fetch(:content_encoding, "").split(',').map(&:strip).map(&:downcase)
end