class RDF::NTriples::Reader
N-Triples parser.
@example Obtaining an NTriples reader class
RDF::Reader.for(:ntriples) #=> RDF::NTriples::Reader RDF::Reader.for("etc/doap.nt") RDF::Reader.for(file_name: "etc/doap.nt") RDF::Reader.for(file_extension: "nt") RDF::Reader.for(content_type: "application/n-triples")
@example Parsing RDF statements from an NTriples file
RDF::NTriples::Reader.open("etc/doap.nt") do |reader| reader.each_statement do |statement| puts statement.inspect end end
@example Parsing RDF statements from an NTriples string
data = StringIO.new(File.read("etc/doap.nt")) RDF::NTriples::Reader.new(data) do |reader| reader.each_statement do |statement| puts statement.inspect end end
@see www.w3.org/TR/rdf-testcases/#ntriples @see www.w3.org/TR/n-triples/
Constants
- BLANK_NODE_LABEL
141s
- COMMENT
- DATATYPE_URI
- ECHAR
159s
- END_OF_STATEMENT
- ESCAPE_CHARS
- IRIREF
18
- IRI_RANGE
- LANGTAG
144s
- LITERAL
- LITERAL_PLAIN
- LITERAL_WITH_DATATYPE
- LITERAL_WITH_LANGUAGE
- NODEID
- OBJECT
- PN_CHARS
166s
- PN_CHARS_BASE
163s
- PN_CHARS_U
164s
- PREDICATE
- STRING_LITERAL_QUOTE
22
- SUBJECT
- UCHAR
- UCHAR4
- UCHAR8
- URIREF
- U_CHARS1
Terminals from rdf-turtle.
@see www.w3.org/TR/n-triples/ @see www.w3.org/TR/turtle/
Unicode regular expressions.
- U_CHARS2
Public Class Methods
(see unserialize) @return [RDF::Literal]
# File lib/rdf/ntriples/reader.rb, line 143 def self.parse_literal(input, options = {}) case input when LITERAL_WITH_LANGUAGE RDF::Literal.new(unescape($1), language: $4) when LITERAL_WITH_DATATYPE RDF::Literal.new(unescape($1), datatype: $4) when LITERAL_PLAIN RDF::Literal.new(unescape($1)) end end
(see unserialize) @return [RDF::Node]
# File lib/rdf/ntriples/reader.rb, line 124 def self.parse_node(input, options = {}) if input =~ NODEID RDF::Node.new($1) end end
(see unserialize)
# File lib/rdf/ntriples/reader.rb, line 117 def self.parse_object(input, options = {}) parse_uri(input, options) || parse_node(input, options) || parse_literal(input, options) end
(see unserialize) @return [RDF::URI]
# File lib/rdf/ntriples/reader.rb, line 111 def self.parse_predicate(input, options = {}) parse_uri(input, intern: true) end
(see unserialize) @return [RDF::Resource]
# File lib/rdf/ntriples/reader.rb, line 104 def self.parse_subject(input, options = {}) parse_uri(input, options) || parse_node(input, options) end
(see unserialize) @return [RDF::URI]
# File lib/rdf/ntriples/reader.rb, line 133 def self.parse_uri(input, options = {}) if input =~ URIREF uri_str = unescape($1) RDF::URI.send(options[:intern] ? :intern : :new, unescape($1)) end end
@param [String] string @return [String] @see www.w3.org/TR/rdf-testcases/#ntrip_strings @see blog.grayproductions.net/articles/understanding_m17n @see yehudakatz.com/2010/05/17/encodings-unabridged/
# File lib/rdf/ntriples/reader.rb, line 160 def self.unescape(string) string = string.dup.force_encoding(Encoding::UTF_8) # Decode \t|\n|\r|\"|\\ character escapes: ESCAPE_CHARS.each { |escape| string.gsub!(escape.inspect[1...-1], escape) } # Decode \uXXXX and \UXXXXXXXX code points: string.gsub!(UCHAR) do [($1 || $2).hex].pack('U*') end string end
Reconstructs an RDF value from its serialized N-Triples representation.
@param [String] input @param [{Symbol => Object}] options
From {RDF::Reader#initialize}
@return [RDF::Term]
# File lib/rdf/ntriples/reader.rb, line 94 def self.unserialize(input, options = {}) case input when nil then nil else self.new(input, {logger: []}.merge(options)).read_value end end
Public Instance Methods
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (comment)
# File lib/rdf/ntriples/reader.rb, line 215 def read_comment match(COMMENT) end
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (triple)
# File lib/rdf/ntriples/reader.rb, line 267 def read_eos match(END_OF_STATEMENT) end
@return [RDF::Literal] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (literal)
# File lib/rdf/ntriples/reader.rb, line 247 def read_literal if literal_str = match(LITERAL_PLAIN) literal_str = self.class.unescape(literal_str) literal = case when language = match(LANGTAG) RDF::Literal.new(literal_str, language: language) when datatype = match(/^(\^\^)/) # FIXME RDF::Literal.new(literal_str, datatype: read_uriref || fail_object) else RDF::Literal.new(literal_str) # plain string literal end literal.validate! if validate? literal.canonicalize! if canonicalize? literal end end
@return [RDF::Node] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (nodeID)
# File lib/rdf/ntriples/reader.rb, line 237 def read_node if node_id = match(NODEID) @nodes ||= {} @nodes[node_id] ||= RDF::Node.new(node_id) end end
@return [Array] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar
# File lib/rdf/ntriples/reader.rb, line 189 def read_triple loop do readline.strip! # EOFError thrown on end of input line = @line # for backtracking input in case of parse error begin unless blank? || read_comment subject = read_uriref || read_node || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || fail_object if validate? && !read_eos log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end return [subject, predicate, object] end rescue RDF::ReaderError => e @line = line # this allows #read_value to work raise e end end end
@return [RDF::URI] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (uriref)
# File lib/rdf/ntriples/reader.rb, line 222 def read_uriref(options = {}) if uri_str = match(URIREF) uri_str = self.class.unescape(uri_str) uri = RDF::URI.send(intern? && options[:intern] ? :intern : :new, uri_str) uri.validate! if validate? uri.canonicalize! if canonicalize? uri end rescue ArgumentError => e log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError) end
@return [RDF::Term]
# File lib/rdf/ntriples/reader.rb, line 176 def read_value begin read_statement rescue RDF::ReaderError value = read_uriref || read_node || read_literal log_recover value end end