class RDF::Reader

The base class for RDF parsers.

@example Loading an RDF reader implementation

require 'rdf/ntriples'

@example Iterating over known RDF reader classes

RDF::Reader.each { |klass| puts klass.name }

@example Obtaining an RDF reader class

RDF::Reader.for(:ntriples)     #=> RDF::NTriples::Reader
RDF::Reader.for("etc/doap.nt")
RDF::Reader.for(file_name:      "etc/doap.nt")
RDF::Reader.for(file_extension: "nt")
RDF::Reader.for(content_type:   "application/n-triples")

@example Instantiating an RDF reader class

RDF::Reader.for(:ntriples).new($stdin) { |reader| ... }

@example Parsing RDF statements from a file

RDF::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

@example Parsing RDF statements from a string

data = StringIO.new(File.read("etc/doap.nt"))
RDF::Reader.for(:ntriples).new(data) do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

@abstract @see RDF::Format @see RDF::Writer

Attributes

options[R]

Any additional options for this reader.

@return [Hash] @since 0.3.0

Public Class Methods

each(&block) click to toggle source

Enumerates known RDF reader classes.

@yield [klass] @yieldparam [Class] klass @return [Enumerator]

# File lib/rdf/reader.rb, line 52
def self.each(&block)
  @@subclasses.each(&block)
end
for(options = {}, &block) click to toggle source

Finds an RDF reader class based on the given criteria.

If the reader class has a defined format, use that.

@overload for(format)

Finds an RDF reader class based on a symbolic name.

@param  [Symbol] format
@return [Class]

@overload for(filename)

Finds an RDF reader class based on a file name.

@param  [String] filename
@return [Class]

@overload for(options = {})

Finds an RDF reader class based on various options.

@param  [Hash{Symbol => Object}] options
@option options [String, #to_s]   :file_name      (nil)
@option options [Symbol, #to_sym] :file_extension (nil)
@option options [String, #to_s]   :content_type   (nil)
@return [Class]
@option options [String]          :sample (nil)
  A sample of input used for performing format detection.
  If we find no formats, or we find more than one, and we have a sample, we can
  perform format detection to find a specific format to use, in which case
  we pick the first one we find
@return [Class]
@yieldreturn [String] another way to provide a sample, allows lazy for retrieving the sample.

@return [Class]

# File lib/rdf/reader.rb, line 90
def self.for(options = {}, &block)
  options = options.merge(has_reader: true) if options.is_a?(Hash)
  if format = self.format || Format.for(options, &block)
    format.reader
  end
end
format(klass = nil) click to toggle source

Retrieves the RDF serialization format class for this reader class.

@return [Class]

# File lib/rdf/reader.rb, line 101
def self.format(klass = nil)
  if klass.nil?
    Format.each do |format|
      if format.reader == self
        return format
      end
    end
    nil # not found
  end
end
Also aliased as: format_class
format_class(klass = nil)
Alias for: format
new(input = $stdin, options = {}, &block) click to toggle source

Initializes the reader.

@param [IO, File, String] input

the input stream to read

@param [Hash{Symbol => Object}] options

any additional options

@option options [Encoding] :encoding (Encoding::UTF_8)

the encoding of the input stream

@option options [Boolean] :validate (false)

whether to validate the parsed statements and values

@option options [Boolean] :canonicalize (false)

whether to canonicalize parsed literals

@option options [Boolean] :intern (true)

whether to intern all parsed URIs

@option options [Hash] :prefixes (Hash.new)

the prefix mappings to use (not supported by all readers)

@option options [#to_s] :base_uri (nil)

the base URI to use when resolving relative URIs (not supported by
all readers)

@yield [reader] `self` @yieldparam [RDF::Reader] reader @yieldreturn [void] ignored

# File lib/rdf/reader.rb, line 240
def initialize(input = $stdin, options = {}, &block)
  @options = options.dup
  @options[:validate]     ||= false
  @options[:canonicalize] ||= false
  @options[:intern]       ||= true
  @options[:prefixes]     ||= Hash.new
  @options[:base_uri]     ||= input.base_uri if input.respond_to?(:base_uri)

  @input = case input
    when String then StringIO.new(input)
    else input
  end

  if block_given?
    case block.arity
      when 0 then instance_eval(&block)
      else block.call(self)
    end
  end
end
open(filename, format: nil, **options, &block) click to toggle source

Parses input from the given file name or URL.

@note A reader returned via this method may not be readable depending on the processing model of the specific reader, as the file is only open during the scope of `open`. The reader is intended to be accessed through a block.

@example Parsing RDF statements from a file

RDF::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

@param [String, to_s] filename @param [Symbol] format @param [Hash{Symbol => Object}] options

any additional options (see {RDF::Util::File.open_file}, {RDF::Reader#initialize} and {RDF::Format.for})

@yield [reader] @yieldparam [RDF::Reader] reader @yieldreturn [void] ignored @raise [RDF::FormatError] if no reader found for the specified format

# File lib/rdf/reader.rb, line 182
def self.open(filename, format: nil, **options, &block)
  Util::File.open_file(filename, options) do |file|
    format_options = options.dup
    format_options[:content_type] ||= file.content_type if file.respond_to?(:content_type)
    format_options[:file_name] ||= filename
    options[:encoding] ||= file.encoding if file.respond_to?(:encoding)
    options[:filename] ||= filename
    reader = self.for(format || format_options) do
      # Return a sample from the input file
      sample = file.read(1000)
      file.rewind
      sample
    end
    if reader
      reader.new(file, options, &block)
    else
      raise FormatError, "unknown RDF format: #{format_options.inspect}\nThis may be resolved with a require of the 'linkeddata' gem."
    end
  end
end
options() click to toggle source

Options suitable for automatic Reader provisioning. @return [Array<RDF::CLI::Option>]

# File lib/rdf/reader.rb, line 115
def self.options
  [
    RDF::CLI::Option.new(
      symbol: :canonicalize,
      datatype: TrueClass,
      on: ["--canonicalize"],
      description: "Canonicalize input/output.") {true},
    RDF::CLI::Option.new(
      symbol: :encoding,
      datatype: Encoding,
      on: ["--encoding ENCODING"],
      description: "The encoding of the input stream.") {|arg| Encoding.find arg},
    RDF::CLI::Option.new(
      symbol: :intern,
      datatype: TrueClass,
      on: ["--intern"],
      description: "Intern all parsed URIs.") {true},
    RDF::CLI::Option.new(
      symbol: :prefixes,
      datatype: Hash,
      multiple: true,
      on: ["--prefixes PREFIX,PREFIX"],
      description: "A comma-separated list of prefix:uri pairs.") do |arg|
        arg.split(',').inject({}) do |memo, pfxuri|
          pfx,uri = pfxuri.split(':', 2)
          memo.merge(pfx.to_sym => RDF::URI(uri))
        end
    end,
    RDF::CLI::Option.new(
      symbol: :base_uri,
      datatype: RDF::URI,
      on: ["--uri URI"],
      description: "Base URI of input file, defaults to the filename.") {|arg| RDF::URI(arg)},
    RDF::CLI::Option.new(
      symbol: :validate,
      datatype: TrueClass,
      on: ["--validate"],
      description: "Validate input file.") {true},
  ]
end
to_sym() click to toggle source

Returns a symbol appropriate to use with ::for @return [Symbol]

# File lib/rdf/reader.rb, line 206
def self.to_sym
  self.format.to_sym
end

Private Class Methods

inherited(child) click to toggle source

@private @return [void]

Calls superclass method
# File lib/rdf/reader.rb, line 549
def self.inherited(child)
  @@subclasses << child
  super
end

Public Instance Methods

base_uri() click to toggle source

Returns the base URI determined by this reader.

@example

reader.prefixes[:dc]  #=> RDF::URI('http://purl.org/dc/terms/')

@return [RDF::URI] @since 0.3.0

# File lib/rdf/reader.rb, line 276
def base_uri
  RDF::URI(@options[:base_uri]) if @options[:base_uri]
end
canonicalize?() click to toggle source

Returns `true` if parsed values should be canonicalized.

@return [Boolean] `true` or `false` @since 0.3.0

# File lib/rdf/reader.rb, line 529
def canonicalize?
  @options[:canonicalize]
end
close() click to toggle source

Closes the input stream, after which an `IOError` will be raised for further read attempts.

If the input stream is already closed, does nothing.

@return [void] @since 0.2.2 @see ruby-doc.org/core-2.2.2/IO.html#method-i-close

# File lib/rdf/reader.rb, line 416
def close
  @input.close unless @input.closed?
end
Also aliased as: close!
close!()
Alias for: close
each(&block)
Alias for: each_statement
each_statement(&block) click to toggle source

Iterates the given block for each RDF statement.

If no block was given, returns an enumerator.

Statements are yielded in the order that they are read from the input stream.

@overload #each_statement

@yield  [statement]
  each statement
@yieldparam  [RDF::Statement] statement
@yieldreturn [void] ignored
@return [void]

@overload #each_statement

@return [Enumerator]

@return [void] @raise [RDF::ReaderError] on invalid data @see RDF::Enumerable#each_statement

# File lib/rdf/reader.rb, line 351
def each_statement(&block)
  if block_given?
    begin
      loop { block.call(read_statement) }
    rescue EOFError => e
      rewind rescue nil
    end
  end
  enum_for(:each_statement)
end
Also aliased as: each
each_triple(&block) click to toggle source

Iterates the given block for each RDF triple.

If no block was given, returns an enumerator.

Triples are yielded in the order that they are read from the input stream.

@overload #each_triple

@yield  [subject, predicate, object]
  each triple
@yieldparam  [RDF::Resource] subject
@yieldparam  [RDF::URI]      predicate
@yieldparam  [RDF::Term]     object
@yieldreturn [void] ignored
@return [void]

@overload #each_triple

@return [Enumerator]

@return [void] @see RDF::Enumerable#each_triple

# File lib/rdf/reader.rb, line 385
def each_triple(&block)
  if block_given?
    begin
      loop { block.call(*read_triple) }
    rescue EOFError => e
      rewind rescue nil
    end
  end
  enum_for(:each_triple)
end
encoding() click to toggle source

Returns the encoding of the input stream.

@return [Encoding]

# File lib/rdf/reader.rb, line 504
def encoding
  case @options[:encoding]
  when String, Symbol
    Encoding.find(@options[:encoding].to_s)
  when Encoding
    @options[:encoding]
  else
    @options[:encoding] ||= Encoding.find(self.class.format.content_encoding.to_s)
  end
end
intern?() click to toggle source

Returns `true` if parsed URIs should be interned.

@return [Boolean] `true` or `false` @since 0.3.0

# File lib/rdf/reader.rb, line 538
def intern?
  @options[:intern]
end
lineno() click to toggle source

Current line number being processed. For formats that can associate generated {Statement} with a particular line number from input, this value reflects that line number. @return [Integer]

# File lib/rdf/reader.rb, line 424
def lineno
  @input.lineno
end
prefix(name, uri = nil) click to toggle source

Defines the given named URI prefix for this reader.

@example Defining a URI prefix

reader.prefix :dc, RDF::URI('http://purl.org/dc/terms/')

@example Returning a URI prefix

reader.prefix(:dc)    #=> RDF::URI('http://purl.org/dc/terms/')

@overload prefix(name, uri)

@param  [Symbol, #to_s]   name
@param  [RDF::URI, #to_s] uri

@overload prefix(name)

@param  [Symbol, #to_s]   name

@return [RDF::URI]

# File lib/rdf/reader.rb, line 324
def prefix(name, uri = nil)
  name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
  uri.nil? ? prefixes[name] : prefixes[name] = uri
end
Also aliased as: prefix!
prefix!(name, uri = nil)
Alias for: prefix
prefixes() click to toggle source

Returns the URI prefixes currently defined for this reader.

@example

reader.prefixes[:dc]  #=> RDF::URI('http://purl.org/dc/terms/')

@return [Hash{Symbol => RDF::URI}] @since 0.3.0

# File lib/rdf/reader.rb, line 288
def prefixes
  @options[:prefixes] ||= {}
end
prefixes=(prefixes) click to toggle source

Defines the given URI prefixes for this reader.

@example

reader.prefixes = {
  dc: RDF::URI('http://purl.org/dc/terms/'),
}

@param [Hash{Symbol => RDF::URI}] prefixes @return [Hash{Symbol => RDF::URI}] @since 0.3.0

# File lib/rdf/reader.rb, line 303
def prefixes=(prefixes)
  @options[:prefixes] = prefixes
end
rewind() click to toggle source

Rewinds the input stream to the beginning of input.

@return [void] @since 0.2.3 @see ruby-doc.org/core-2.2.2/IO.html#method-i-rewind

# File lib/rdf/reader.rb, line 402
def rewind
  @input.rewind
end
Also aliased as: rewind!
rewind!()
Alias for: rewind
to_sym() click to toggle source

Returns a symbol appropriate to use with ::for @return [Symbol]

# File lib/rdf/reader.rb, line 213
def to_sym
  self.class.to_sym
end
valid?() click to toggle source

@return [Boolean]

@note this parses the full input and is valid only in the reader block.

Use `Reader.new(input, validate: true)` if you intend to capture the 
result.

@example Parsing RDF statements from a file

RDF::NTriples::Reader.new("!!invalid input??") do |reader|
  reader.valid? # => false
end

@see RDF::Value#validate! for Literal & URI validation relevant to

error handling.

@see RDF::Enumerable#valid?

Calls superclass method RDF::Enumerable#valid?
# File lib/rdf/reader.rb, line 443
def valid?
  super && !log_statistics[:error]
rescue ArgumentError, RDF::ReaderError => e
  log_error(e.message)
  false
end
validate?() click to toggle source

Returns `true` if parsed statements and values should be validated.

@return [Boolean] `true` or `false` @since 0.3.0

# File lib/rdf/reader.rb, line 520
def validate?
  @options[:validate]
end

Protected Instance Methods

fail_object() click to toggle source

Raises an “expected object” parsing error on the current line.

@return [void] @raise [RDF::ReaderError]

# File lib/rdf/reader.rb, line 495
def fail_object
  log_error("Expected object (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
end
fail_predicate() click to toggle source

Raises an “expected predicate” parsing error on the current line.

@return [void] @raise [RDF::ReaderError]

# File lib/rdf/reader.rb, line 486
def fail_predicate
  log_error("Expected predicate (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
end
fail_subject() click to toggle source

Raises an “expected subject” parsing error on the current line.

@return [void] @raise [RDF::ReaderError]

# File lib/rdf/reader.rb, line 477
def fail_subject
  log_error("Expected subject (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
end
read_statement() click to toggle source

Reads a statement from the input stream.

@return [RDF::Statement] a statement @raise [NotImplementedError] unless implemented in subclass @abstract

# File lib/rdf/reader.rb, line 458
def read_statement
  Statement.new(*read_triple)
end
read_triple() click to toggle source

Reads a triple from the input stream.

@return [Array(RDF::Term)] a triple @raise [NotImplementedError] unless implemented in subclass @abstract

# File lib/rdf/reader.rb, line 468
def read_triple
  raise NotImplementedError, "#{self.class}#read_triple" # override in subclasses
end

Private Instance Methods

blank?() click to toggle source

@return [Boolean]

# File lib/rdf/reader.rb, line 589
def blank?
  @line.nil? || @line.empty?
end
current_line() click to toggle source

@private @return [String] The most recently read line of the input

# File lib/rdf/reader.rb, line 557
def current_line
  @line
end
match(pattern) click to toggle source

@param [Regexp] pattern @return [Object]

# File lib/rdf/reader.rb, line 596
def match(pattern)
  if @line =~ pattern
    result, @line = $1, $'.lstrip
    result || true
  end
end
readline() click to toggle source

@return [String]

# File lib/rdf/reader.rb, line 563
def readline
  @line = @line_rest || @input.readline
  @line, @line_rest = @line.split("\r", 2)
  @line = @line.to_s.chomp
  begin
    @line.encode!(encoding)
  rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError, Encoding::ConverterNotFoundError
    # It is likely the persisted line was not encoded on initial write
    # (i.e. persisted via RDF <= 1.0.9 and read via RDF >= 1.0.10)
    #
    # Encoding::UndefinedConversionError is raised by MRI.
    # Encoding::InvalidByteSequenceError is raised by jruby >= 1.7.5
    # Encoding::ConverterNotFoundError is raised by jruby < 1.7.5
    @line = RDF::NTriples::Reader.unescape(@line).encode(encoding)
  end
  @line
end
strip!() click to toggle source

@return [void]

# File lib/rdf/reader.rb, line 583
def strip!
  @line.strip!
end