diff --git a/Purtle.php b/Purtle.php new file mode 100644 index 0000000..ee2262c --- /dev/null +++ b/Purtle.php @@ -0,0 +1,19 @@ +prefix( 'acme', 'http://acme.test/terms/' ); + + $writer->about( 'http://quux.test/Something' ) + ->a( 'acme', 'Thing' ) + ->say( 'acme', 'name' )->text( 'Thingy' )->text( 'Dingsda', 'de' ) + ->say( 'acme', 'owner' )->is( 'http://quux.test/' ); + + +## Release notes + +### 0.1 (dev) + +Initial release. diff --git a/init.mw.php b/init.mw.php new file mode 100644 index 0000000..370f6ed --- /dev/null +++ b/init.mw.php @@ -0,0 +1,23 @@ + __FILE__, + 'name' => 'Purtle', + 'version' => WIKIBASE_VIEW_VERSION, + 'author' => array( + 'Daniel Kinzler', + 'Stas Malyshev', + ), + 'url' => 'https://git.wikimedia.org/blob/mediawiki%2Fextensions%2FWikibase/master/purtle%2FREADME.md', + 'description' => 'A fast, lightweight RDF generator', + 'license-name' => 'GPL-2.0+' +); + + +$GLOBALS['wgHooks']['UnitTestsList'][] = function( array &$paths ) { + $paths[] = __DIR__ . '/tests/phpunit'; +}; \ No newline at end of file diff --git a/src/BNodeLabeler.php b/src/BNodeLabeler.php new file mode 100644 index 0000000..9ee83bb --- /dev/null +++ b/src/BNodeLabeler.php @@ -0,0 +1,61 @@ += 1' ); + } + + $this->prefix = $prefix; + $this->counter = $start; + } + + /** + * @param string|null $label node label, will be generated if not given. + * + * @return string + */ + public function getLabel( $label = null ) { + if ( $label === null ) { + $label = $this->prefix . $this->counter; + $this->counter ++; + } + + return $label; + } + +} diff --git a/src/N3Quoter.php b/src/N3Quoter.php new file mode 100644 index 0000000..35f3a76 --- /dev/null +++ b/src/N3Quoter.php @@ -0,0 +1,46 @@ +escaper = $escapeUnicode ? new UnicodeEscaper() : null; + } + + public function escapeIRI( $iri ) { + //FIXME: apply unicode escaping?! + return strtr( $iri, array( + ' ' => '%20', + '"' => '%22', + '<' => '%3C', + '>' => '%3E', + ) ); + } + + public function escapeLiteral( $s ) { + $escaped = addcslashes( $s, "\x0..\x1F\"\\" ); + + if ( $this->escaper !== null ) { + $escaped = $this->escaper->escapeString( $escaped ); + } + + return $escaped; + } + +} diff --git a/src/N3RdfWriterBase.php b/src/N3RdfWriterBase.php new file mode 100644 index 0000000..575844e --- /dev/null +++ b/src/N3RdfWriterBase.php @@ -0,0 +1,111 @@ +quoter = $quoter ?: new N3Quoter(); + } + + /** + * @return bool + */ + public function getTrustIRIs() { + return $this->trustIRIs; + } + + /** + * @param bool $trustIRIs + */ + public function setTrustIRIs( $trustIRIs ) { + $this->trustIRIs = $trustIRIs; + } + + protected function writeRef( $base, $local = null ) { + if ( $local === null ) { + if( $base === 'a' ) { + $this->write( 'a' ); + } else { + $this->writeIRI( $base ); + } + } else { + $this->write( "$base:$local" ); + } + } + + protected function writeShorthand( $shorthand ) { +// if ( $shorthand === null || $shorthand === '' ) { +// throw new InvalidArgumentException( '$shorthand must not be empty' ); +// } + + $this->write( $shorthand ); + } + + protected function writeIRI( $iri ) { +// if ( $iri === null || $iri === '' ) { +// throw new InvalidArgumentException( '$iri must not be empty' ); +// } + +// if ( $iri[0] === '_' || $iri[0] === ':' || $iri[0] === '/' || $iri[0] === '#' ) { +// throw new InvalidArgumentException( '$iri must be an absolute iri: ' . $iri ); +// } + if ( !$this->trustIRIs ) { + $iri = $this->quoter->escapeIRI( $iri ); + } + $this->write( "<$iri>" ); + } + + protected function writeQName( $base, $local ) { +// if ( $base === null ) { +// throw new InvalidArgumentException( '$base must not be null' ); +// } + +// if ( $local === null || $local === '' ) { +// throw new InvalidArgumentException( '$local must not be empty' ); +// } + + $this->write( "$base:$local" ); + } + + + protected function writeText( $text, $language = null ) { + $value = $this->quoter->escapeLiteral( $text ); + $this->write( '"' . $value . '"' ); + + if ( $language !== null ) { + $this->write( '@' . $language ); + } + } + + protected function writeValue( $value, $typeBase = null, $typeLocal = null ) { + $value = $this->quoter->escapeLiteral( $value ); + $this->write( '"' . $value. '"' ); + + if ( $typeBase !== null ) { + $this->write( '^^' ); + $this->writeRef( $typeBase, $typeLocal ); + } + } + +} diff --git a/src/NTriplesRdfWriter.php b/src/NTriplesRdfWriter.php new file mode 100644 index 0000000..df4d385 --- /dev/null +++ b/src/NTriplesRdfWriter.php @@ -0,0 +1,103 @@ +quoter->setEscapeUnicode( true ); + + $this->transitionTable[self::STATE_OBJECT] = array( + self::STATE_DOCUMENT => " .\n", + self::STATE_SUBJECT => " .\n", + self::STATE_PREDICATE => " .\n", + self::STATE_OBJECT => " .\n", + ); + } + + protected function expandSubject( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + protected function writeSubject( $base, $local = null ) { + // noop + } + + protected function expandPredicate( &$base, &$local ) { + $this->expandShorthand( $base, $local ); // e.g. ( 'a', null ) => ( 'rdf', 'type' ) + $this->expandQName( $base, $local ); // e.g. ( 'acme', 'foo' ) => ( 'http://amce.test/foo', null ) + } + + protected function writePredicate( $base, $local = null ) { + // noop + } + + private function writeSubjectAndObject() { + $this->writeRef( $this->currentSubject[0], $this->currentSubject[1] ); + $this->write( ' ' ); + $this->writeRef( $this->currentPredicate[0], $this->currentPredicate[1] ); + } + + protected function expandResource( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + protected function expandType( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + protected function writeResource( $base, $local = null ) { + $this->writeSubjectAndObject(); + $this->write( ' ' ); + $this->writeRef( $base, $local ); + } + + + protected function writeText( $text, $language = null ) { + $this->writeSubjectAndObject(); + $this->write( ' ' ); + + parent::writeText( $text, $language ); + } + + protected function writeValue( $value, $typeBase = null, $typeLocal = null ) { + $this->writeSubjectAndObject(); + $this->write( ' ' ); + + parent::writeValue( $value, $typeBase, $typeLocal ); + } + + /** + * @param string $role + * @param BNodeLabeler $labeler + * + * @return RdfWriterBase + */ + protected function newSubWriter( $role, BNodeLabeler $labeler ) { + $writer = new self( $role, $labeler, $this->quoter ); + + return $writer; + } + + /** + * @return string a MIME type + */ + public function getMimeType() { + //NOTE: Add charset=UTF-8 if and when the constructor configures $this->quoter + // to write utf-8. + return 'application/n-triples'; + } + +} diff --git a/src/RdfWriter.php b/src/RdfWriter.php new file mode 100644 index 0000000..28ce9a6 --- /dev/null +++ b/src/RdfWriter.php @@ -0,0 +1,181 @@ +prefix( 'acme', 'http://acme.test/terms/' ); + * $writer->about( 'http://quux.test/Something' ) + * ->say( 'acme', 'name' )->text( 'Thingy' )->text( 'Dingsda', 'de' ) + * ->say( 'acme', 'owner' )->is( 'http://quux.test/' ); + * @endcode + * + * To get the generated RDF output, use the drain() method. + * + * @note: The contract of this interface follows the GIGO principle, that is, + * implementations are not required to ensure valid output or prompt failure on + * invalid input. Speed should generally be favored over safety. + * + * Caveats: + * - no relative iris + * - predicates must be qnames + * - no inline/nested blank nodes + * - no comments + * - no collections + * - no automatic conversion of iris to qnames + * + * @license GPL 2+ + * @author Daniel Kinzler + */ +interface RdfWriter { + + //TODO: split: generic RdfWriter class with shorthands, use RdfFormatters for output + + /** + * Returns the local name of a blank node, for use with the "_" prefix. + * + * @param string|null $label node label, will be generated if not given. + * + * @return string A local name for the blank node, for use with the '_' prefix. + */ + public function blank( $label = null ); + + /** + * Start the document. May generate a header. + */ + public function start(); + + /** + * Finish the document. May generate a footer. + */ + public function finish(); + + /** + * Generates an RDF string from the current buffers state and returns it. + * The buffer is reset to the empty state. + * Before the result string is generated, implementations should close any + * pending syntactical structures (close tags, generate footers, etc). + * + * @return string The RDF output + */ + public function drain(); + + /** + * Declare a prefix for later use. Prefixes should be declared before being used. + * Should not be called after start(). + * + * @param string $prefix + * @param string $iri a IRI + */ + public function prefix( $prefix, $iri ); + + /** + * Start an "about" (subject) clause, given a subject. + * Can occur at the beginning odf the output sequence, but can later only follow + * a call to is(), text(), or value(). + * Should fail if called at an inappropriate time in the output sequence. + * + * @param string $base A QName prefix if $local is given, or an IRI if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI. + * + * @return RdfWriter $this + */ + public function about( $base, $local = null ); + + /** + * Start a predicate clause. + * Can only follow a call to about() or say(). + * Should fail if called at an inappropriate time in the output sequence. + * + * @note Unlike about() and is(), say() cannot be called with a full IRI, + * but must always use qname form. This is required to cater to output + * formats that do not allow IRIs to be used as predicates directly, + * like RDF/XML. + * + * @param string $base A QName prefix if $local is given, or a shorthand. MUST NOT be an IRI. + * @param string|null $local A QName suffix, or null if $base is a shorthand. + * + * @return RdfWriter $this + */ + public function say( $base, $local = null ); + + /** + * Produce a resource as the object of a statement. + * Can only follow a call to say() or a call to one of is(), text(), or value(). + * Should fail if called at an inappropriate time in the output sequence. + * + * @param string $base A QName prefix if $local is given, or an IRI or shorthand if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI or shorthand. + * + * @return RdfWriter $this + */ + public function is( $base, $local = null ); + + /** + * Produce a text literal as the object of a statement. + * Can only follow a call to say() or a call to one of is(), text(), or value(). + * Should fail if called at an inappropriate time in the output sequence. + * + * @param string $text the text to be placed in the output + * @param string|null $language the language the text is in + * + * @return RdfWriter $this + */ + public function text( $text, $language = null ); + + + /** + * Produce a typed or untyped literal as the object of a statement. + * Can only follow a call to say() or a call to one of is(), text(), or value(). + * Should fail if called at an inappropriate time in the output sequence. + * + * @param string $value the value encoded as a string + * @param string $typeBase The data type's QName prefix if $typeLocal is given, + * or an IRI or shorthand if $typeLocal is null. + * @param string|null $typeLocal The data type's QName suffix, + * or null if $typeBase is an IRI or shorthand. + * + * @return RdfWriter $this + */ + public function value( $value, $typeBase = null, $typeLocal = null ); + + /** + * Shorthand for say( 'a' )->is( $type ). + * + * @param string $typeBase The data type's QName prefix if $typeLocal is given, + * or an IRI or shorthand if $typeLocal is null. + * @param string|null $typeLocal The data type's QName suffix, + * or null if $typeBase is an IRI or shorthand. + * + * @return RdfWriter $this + */ + public function a( $typeBase, $typeLocal = null ); + + /** + * Returns a document-level sub-writer. + * This can be used to generate parts statements out of sequence. + * + * @note: do not call drain() on sub-writers! + * + * @return RdfWriter + */ + public function sub(); + + /** + * Resets any state the writer may be holding. + */ + public function reset(); + + /** + * Returns the MIME type of the RDF serialization the writer produces. + * + * @return string a MIME type + */ + public function getMimeType(); +} diff --git a/src/RdfWriterBase.php b/src/RdfWriterBase.php new file mode 100644 index 0000000..347a46a --- /dev/null +++ b/src/RdfWriterBase.php @@ -0,0 +1,621 @@ +role = $role; + + $this->labeler = $labeler?: new BNodeLabeler(); + + $this->registerShorthand( 'a', 'rdf', 'type' ); + + $this->prefix( 'rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ); + $this->prefix( 'xsd', 'http://www.w3.org/2001/XMLSchema#' ); + } + + /** + * @param string $role + * @param BNodeLabeler $labeler + * + * @return RdfWriterBase + */ + abstract protected function newSubWriter( $role, BNodeLabeler $labeler ); + + /** + * Registers a shorthand that can be used instead of a qname, + * like 'a' can be used instead of 'rdf:type'. + * + * @param string $shorthand + * @param string $prefix + * @param string $local + */ + protected function registerShorthand( $shorthand, $prefix, $local ) { + $this->shorthands[$shorthand] = array( $prefix, $local ); + } + + /** + * Registers a prefix + * + * @param string $prefix + * @param string $iri The base IRI + */ + public function prefix( $prefix, $iri ) { + if( $this->prefixesLocked ) { + throw new \LogicException("Prefixes can not be added after start()"); + } + $this->prefixes[$prefix] = $iri; + } + + /** + * Determines whether $shorthand can be used as a shorthand. + * + * @param string $shorthand + * + * @return bool + */ + protected function isShorthand( $shorthand ) { + return isset( $this->shorthands[$shorthand] ); + } + + /** + * Determines whether $shorthand can legally be used as a prefix. + * + * @param string $prefix + * + * @return bool + */ + protected function isPrefix( $prefix ) { + return isset( $this->prefixes[$prefix] ); + } + + /** + * Returns the prefix map. + * + * @return string[] An associative array mapping prefixes to base IRIs. + */ + public function getPrefixes() { + return $this->prefixes; + } + + /** + * @return RdfWriter + */ + final public function sub() { + //FIXME: don't mess with the state, enqueue the writer to be placed in the buffer + // later, on the next transtion to subject|document|drain + $writer = $this->newSubWriter( self::SUBDOCUMENT_ROLE, $this->labeler ); + $writer->state = self::STATE_DOCUMENT; + + // share registered prefixes + $writer->prefixes =& $this->prefixes; + + $this->write( $writer ); + return $writer; + } + + /** + * Returns the writers role. The role determines the behavior of the writer with respect + * to which states and transitions are possible: a BNODE_ROLE writer would for instance + * not accept a call to about(), since it can only process triples about a single subject + * (the blank node it represents). + * + * @return string A string corresponding to one of the the XXX_ROLE constants. + */ + final public function getRole() { + return $this->role; + } + + /** + * Appends string to the output buffer. + * @param string $w + */ + final protected function write( $w ) { + $this->buffer[] = $w; + } + + /** + * If $base is a shorthand, $base and $local are updated to hold whatever qname + * the shorthand was associated with. + * + * Otherwise, $base and $local remain unchanged. + * + * @param string &$base + * @param string|null &$local + */ + protected function expandShorthand( &$base, &$local ) { + if ( $local === null && isset( $this->shorthands[$base] ) ) { + list( $base, $local ) = $this->shorthands[$base]; + } + } + + /** + * If $base is a registered prefix, $base will be replaced by the base IRI associated with + * that prefix, with $local appended. $local will be set to null. + * + * Otherwise, $base and $local remain unchanged. + * + * @param string &$base + * @param string|null &$local + * + * @throws LogicException + */ + protected function expandQName( &$base, &$local ) { + if ( $local !== null && $base !== '_' ) { + if ( isset( $this->prefixes[$base] ) ) { + $base = $this->prefixes[$base] . $local; //XXX: can we avoid this concat? + $local = null; + } else { + throw new LogicException( 'Unknown prefix: ' . $base ); + } + } + } + + /** + * @see RdfWriter::blank() + * + * @param string|null $label node label, will be generated if not given. + * + * @return string + */ + final public function blank( $label = null ) { + return $this->labeler->getLabel( $label ); + } + + /** + * @see RdfWriter::start() + */ + final public function start() { + $this->state( self::STATE_DOCUMENT ); + $this->prefixesLocked = true; + } + + /** + * @see RdfWriter::finish() + */ + final public function finish() { + // close all unclosed states + $this->state( self::STATE_DOCUMENT ); + // and then finalize + $this->state( self::STATE_FINISH ); + } + + /** + * @see RdfWriter::drain() + * + * @return string RDF + */ + final public function drain() { + // we can drain after finish, but finish state is sticky + if( $this->state != self::STATE_FINISH ) { + $this->state( self::STATE_DOCUMENT ); + } + $this->flattenBuffer(); + + $rdf = join( '', $this->buffer ); + $this->buffer = array(); + + return $rdf; + } + + /** + * @see RdfWriter::reset() + * + * @note Does not reset the blank node counter, because it may be shared. + */ + public function reset() { + $this->buffer = array(); + $this->state = self::STATE_DOCUMENT; //TODO: may depend on role + + $this->currentSubject = array( null, null ); + $this->currentPredicate = array( null, null ); + +// $this->prefixes = array(); +// $this->prefix( 'rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ); +// $this->prefix( 'xsd', 'http://www.w3.org/2001/XMLSchema#' ); + } + + /** + * Calls drain() an any RdfWriter instances in $this->buffer, and replaces them + * in $this->buffer with the string returned by the drain() call. Any closures + * present in the $this->buffer will be called, and replaced by their return value. + */ + private function flattenBuffer() { + foreach ( $this->buffer as &$b ) { + if ( $b instanceof Closure ) { + $b = $b(); + } + if ( $b instanceof RdfWriter ) { + $b = $b->drain(); + } + } + } + + /** + * @see RdfWriter::about() + * + * @param string $base A QName prefix if $local is given, or an IRI if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI. + * + * @return RdfWriter $this + */ + final public function about( $base, $local = null ) { + $this->expandSubject( $base, $local ); + + if ( $base === $this->currentSubject[0] && $local === $this->currentSubject[1] ) { + return $this; // redundant about() call + } + + $this->state( self::STATE_SUBJECT ); + + $this->currentSubject[0] = $base; + $this->currentSubject[1] = $local; + $this->currentPredicate[0] = null; + $this->currentPredicate[1] = null; + + $this->writeSubject( $base, $local ); + return $this; + } + + /** + * @see RdfWriter::a() + * Shorthand for say( 'a' )->is( $type ). + * + * @param string $typeBase The data type's QName prefix if $typeLocal is given, + * or an IRI or shorthand if $typeLocal is null. + * @param string|null $typeLocal The data type's QName suffix, + * or null if $typeBase is an IRI or shorthand. + * + * @return RdfWriter $this + */ + final public function a( $typeBase, $typeLocal = null ) { + return $this->say( 'a' )->is( $typeBase, $typeLocal ); + } + + /** + * @see RdfWriter::say() + * + * @param string $base A QName prefix. + * @param string $local A QName suffix. + * + * @return RdfWriter $this + */ + final public function say( $base, $local = null ) { + $this->expandPredicate( $base, $local ); + + if ( $base === $this->currentPredicate[0] && $local === $this->currentPredicate[1] ) { + return $this; // redundant about() call + } + + $this->state( self::STATE_PREDICATE ); + + $this->currentPredicate[0] = $base; + $this->currentPredicate[1] = $local; + + $this->writePredicate( $base, $local ); + return $this; + } + + /** + * @see RdfWriter::is() + * + * @param string $base A QName prefix if $local is given, or an IRI if $local is null. + * @param string|null $local A QName suffix, or null if $base is an IRI. + * + * @return RdfWriter $this + */ + final public function is( $base, $local = null ) { + $this->state( self::STATE_OBJECT ); + + $this->expandResource( $base, $local ); + $this->writeResource( $base, $local ); + return $this; + } + + /** + * @see RdfWriter::text() + * + * @param string $text the text to be placed in the output + * @param string|null $language the language the text is in + * + * @return $this + */ + final public function text( $text, $language = null ) { + $this->state( self::STATE_OBJECT ); + + $this->writeText( $text, $language ); + return $this; + } + + /** + * @see RdfWriter::value() + * + * @param string $value the value encoded as a string + * @param string $typeBase The data type's QName prefix if $typeLocal is given, + * or an IRI or shorthand if $typeLocal is null. + * @param string|null $typeLocal The data type's QName suffix, + * or null if $typeBase is an IRI or shorthand. + * + * @return $this + */ + final public function value( $value, $typeBase = null, $typeLocal = null ) { + $this->state( self::STATE_OBJECT ); + + if ( $typeBase === null && !is_string( $value ) ) { + $vtype = gettype( $value ); + switch ( $vtype ) { + case 'integer': + $typeBase = 'xsd'; + $typeLocal = 'integer'; + $value = "$value"; + break; + + case 'double': + $typeBase = 'xsd'; + $typeLocal = 'double'; + $value = "$value"; + break; + + case 'boolean': + $typeBase = 'xsd'; + $typeLocal = 'boolean'; + $value = $value ? 'true' : 'false'; + break; + } + } + + $this->expandType( $typeBase, $typeLocal ); + + $this->writeValue( $value, $typeBase, $typeLocal ); + return $this; + } + + /** + * State transition table + * First state is "from", second is "to" + * @var array + */ + protected $transitionTable = array( + self::STATE_START => array( + self::STATE_DOCUMENT => true, + ), + self::STATE_DOCUMENT => array( + self::STATE_DOCUMENT => true, + self::STATE_SUBJECT => true, + self::STATE_FINISH => true, + ), + self::STATE_SUBJECT => array( + self::STATE_PREDICATE => true, + ), + self::STATE_PREDICATE => array( + self::STATE_OBJECT => true, + ), + self::STATE_OBJECT => array( + self::STATE_DOCUMENT => true, + self::STATE_SUBJECT => true, + self::STATE_PREDICATE => true, + self::STATE_OBJECT => true, + ), + ); + + /** + * Perform a state transition. Writer states roughly correspond to states in a naive + * regular parser for the respective syntax. State transitions may generate output, + * particularly of structural elements which correspond to terminals in a respective + * parser. + * + * @param $newState + * + * @throws InvalidArgumentException + */ + final protected function state( $newState ) { + if( !isset( $this->transitionTable[$this->state][$newState] ) ) { + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . $newState ); + } + $action = $this->transitionTable[$this->state][$newState]; + if( $action !== true ) { + if( is_string( $action ) ) { + $this->write( $action ); + } else { + $action(); + } + } + + $this->state = $newState; + } + + /** + * Must be implemented to generate output that starts a statement (or set of statements) + * about a subject. Depending on the requirements of the output format, the implementation + * may be empty. + * + * @note: $base and $local are given as passed to about() and processed by expandSubject(). + * + * @param string $base + * @param string|null + */ + protected abstract function writeSubject( $base, $local = null ); + + /** + * Must be implemented to generate output that represents the association of a predicate + * with a subject that was previously defined by a call to writeSubject(). + * + * @note: $base and $local are given as passed to say() and processed by expandPredicate(). + * + * @param string $base + * @param string|null + */ + protected abstract function writePredicate( $base, $local = null ); + + /** + * Must be implemented to generate output that represents a resource used as the object + * of a statement. + * + * @note: $base and $local are given as passed to is() and processed by expandObject(). + * + * @param string $base + * @param string|null + */ + protected abstract function writeResource( $base, $local = null ); + + /** + * Must be implemented to generate output that represents a text used as the object + * of a statement. + * + * @param string $text the text to be placed in the output + * @param string|null $language the language the text is in + */ + protected abstract function writeText( $text, $language ); + + /** + * Must be implemented to generate output that represents a (typed) literal used as the object + * of a statement. + * + * @note: $typeBase and $typeLocal are given as passed to value() and processed by expandType(). + * + * @param string $value the value encoded as a string + * @param string $typeBase + * @param string|null $typeLocal + */ + protected abstract function writeValue( $value, $typeBase, $typeLocal = null ); + + /** + * Perform any expansion (shorthand to qname, qname to IRI) desired + * for subject identifiers. + * + * @param string &$base + * @param string|null &$local + */ + protected function expandSubject( &$base, &$local ) { + } + + /** + * Perform any expansion (shorthand to qname, qname to IRI) desired + * for predicate identifiers. + * + * @param string &$base + * @param string|null &$local + */ + protected function expandPredicate( &$base, &$local ) { + } + + /** + * Perform any expansion (shorthand to qname, qname to IRI) desired + * for resource identifiers. + * + * @param string &$base + * @param string|null &$local + */ + protected function expandResource( &$base, &$local ) { + } + + /** + * Perform any expansion (shorthand to qname, qname to IRI) desired + * for type identifiers. + * + * @param string &$base + * @param string|null &$local + */ + protected function expandType( &$base, &$local ) { + } + +} diff --git a/src/RdfWriterFactory.php b/src/RdfWriterFactory.php new file mode 100644 index 0000000..46a04f0 --- /dev/null +++ b/src/RdfWriterFactory.php @@ -0,0 +1,159 @@ +transitionTable[self::STATE_OBJECT] = array( + self::STATE_DOCUMENT => " .\n", + self::STATE_SUBJECT => " .\n\n", + self::STATE_PREDICATE => " ;\n\t", + self::STATE_OBJECT => ",\n\t\t", + ); + $this->transitionTable[self::STATE_DOCUMENT][self::STATE_SUBJECT] = "\n"; + $this->transitionTable[self::STATE_SUBJECT][self::STATE_PREDICATE] = " "; + $this->transitionTable[self::STATE_PREDICATE][self::STATE_OBJECT] = " "; + $self = $this; + $this->transitionTable[self::STATE_START][self::STATE_DOCUMENT] = function() use($self) { + $self->beginDocument(); + }; + } + + /** + * Write prefixes + */ + public function beginDocument( ) { + foreach( $this->getPrefixes() as $prefix => $uri ) { + $this->write( "@prefix $prefix: <" . $this->quoter->escapeIRI( $uri ) . "> .\n" ); + } + } + + protected function writeSubject( $base, $local = null ) { + if( $local !== null ) { + $this->write( "$base:$local" ); + } else { + $this->writeIRI( $base ); + } + } + + protected function writePredicate( $base, $local = null ) { + if( $base === 'a' ) { + $this->write( 'a' ); + return; + } + if( $local !== null ) { + $this->write( "$base:$local" ); + } else { + $this->writeIRI( $base ); + } + } + + protected function writeResource( $base, $local = null ) { + if( $local !== null) { + $this->write( "$base:$local" ); + } else { + $this->writeIRI( $base ); + } + } + +// protected function writeValue( $value, $typeBase = null, $typeLocal = null ) { +// //TODO: shorthand form for xsd:integer|decimal|double|boolean +// parent::writeValue( $value, $typeBase, $typeLocal ); +// } + + /** + * @param string $role + * @param BNodeLabeler $labeler + * + * @return RdfWriterBase + */ + protected function newSubWriter( $role, BNodeLabeler $labeler ) { + $writer = new self( $role, $labeler, $this->quoter ); + + return $writer; + } + + /** + * @return string a MIME type + */ + public function getMimeType() { + return 'text/turtle; charset=UTF-8'; + } + + + +} diff --git a/src/UnicodeEscaper.php b/src/UnicodeEscaper.php new file mode 100644 index 0000000..777de3a --- /dev/null +++ b/src/UnicodeEscaper.php @@ -0,0 +1,127 @@ +escChars[$c] ) ) { + $this->escChars[$c] = $this->escapedChar($c); + } + $result .= $this->escChars[$c]; + } + return $result; + } + + /** + * @ignore + */ + protected function unicodeCharNo( $cUtf ) { + $bl = strlen( $cUtf ); /* binary length */ + $r = 0; + switch ( $bl ) { + case 1: /* 0####### (0-127) */ + $r = ord( $cUtf ); + break; + case 2: /* 110##### 10###### = 192+x 128+x */ + $r = ( ( ord( $cUtf[0] ) - 192 ) * 64 ) + + ( ord( $cUtf[1] ) - 128 ); + break; + case 3: /* 1110#### 10###### 10###### = 224+x 128+x 128+x */ + $r = ( ( ord( $cUtf[0] ) - 224 ) * 4096 ) + + ( ( ord( $cUtf[1] ) - 128 ) * 64 ) + + ( ord( $cUtf[2] ) - 128 ); + break; + case 4: /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */ + $r = ( ( ord( $cUtf[0] ) - 240 ) * 262144 ) + + ( ( ord( $cUtf[1] ) - 128 ) * 4096 ) + + ( ( ord( $cUtf[2] ) - 128 ) * 64 ) + + ( ord( $cUtf[3] ) - 128 ); + break; + } + return $r; + } + + /** + * @ignore + */ + protected function escapedChar( $c ) { + $no = $this->unicodeCharNo( $c ); + /* see http://www.w3.org/TR/rdf-testcases/#ntrip_strings */ + if ( $no < 9 ) { + return "\\u" . sprintf( '%04X', $no ); /* #x0-#x8 (0-8) */ + } elseif ( $no == 9 ) { + return '\t'; /* #x9 (9) */ + } elseif ( $no == 10 ) { + return '\n'; /* #xA (10) */ + } elseif ( $no < 13 ) { + return "\\u" . sprintf( '%04X', $no ); /* #xB-#xC (11-12) */ + } elseif ( $no == 13 ) { + return '\r'; /* #xD (13) */ + } elseif ( $no < 32 ) { + return "\\u" . sprintf( '%04X', $no ); /* #xE-#x1F (14-31) */ + } elseif ( $no < 34 ) { + return $c; /* #x20-#x21 (32-33) */ + } elseif ( $no == 34 ) { + return '\"'; /* #x22 (34) */ + } elseif ( $no < 92 ) { + return $c; /* #x23-#x5B (35-91) */ + } elseif ( $no == 92 ) { + return '\\'; /* #x5C (92) */ + } elseif ( $no < 127 ) { + return $c; /* #x5D-#x7E (93-126) */ + } elseif ( $no < 65536 ) { + return "\\u" . sprintf( '%04X', $no ); /* #x7F-#xFFFF (128-65535) */ + } elseif ( $no < 1114112 ) { + return "\\U" . sprintf( '%08X', $no ); /* #x10000-#x10FFFF (65536-1114111) */ + } else { + return ''; /* not defined => ignore */ + } + } + +} diff --git a/src/XmlRdfWriter.php b/src/XmlRdfWriter.php new file mode 100644 index 0000000..6e4c82b --- /dev/null +++ b/src/XmlRdfWriter.php @@ -0,0 +1,217 @@ +transitionTable[self::STATE_START][self::STATE_DOCUMENT] = function() use($self) { + $self->beginDocument(); + }; + array($this, 'beginDocument'); + $this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] = function() use($self) { + $self->finishDocument(); + }; + $this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] = function() use($self) { + $self->finishSubject(); + }; + $this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] = function() use($self) { + $self->finishSubject(); + }; + } + + private function escape( $text ) { + return htmlspecialchars( $text, ENT_QUOTES ); + } + + protected function expandSubject( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + protected function expandPredicate( &$base, &$local ) { + $this->expandShorthand( $base, $local ); + } + + protected function expandResource( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + protected function expandType( &$base, &$local ) { + $this->expandQName( $base, $local ); + } + + private function tag( $ns, $name, $attributes = array(), $content = null ) { + $sep = $ns === '' ? '' : ':'; + $this->write( '<' . $ns . $sep . $name ); + + foreach ( $attributes as $attr => $value ) { + if ( is_int( $attr ) ) { + // positional array entries are passed verbatim, may be callbacks. + $this->write( $value ); + continue; + } + + $this->write( " $attr=\"" . $this->escape( $value ) . '"' ); + } + + if ( $content === null ) { + $this->write( '>' ); + } elseif ( $content === '' ) { + $this->write( '/>' ); + } else { + $this->write( '>' . $content ); + $this->close( $ns, $name ); + } + } + + private function close( $ns, $name ) { + $sep = $ns === '' ? '' : ':'; + $this->write( '' ); + } + + /** + * Generates an attribute list, containing the attribute given by $name, or rdf:nodeID + * if $target is a blank node id (starting with "_:"). If $target is a qname, an attempt + * is made to resolve it into a full IRI based on the namespaces registered by calling + * prefix(). + * + * @param string $name the attribute name (without the 'rdf:' prefix) + * @param string|null $base + * @param string|null $local + * + * @throws InvalidArgumentException + * @return string[] + */ + private function getTargetAttributes( $name, $base, $local ) { + if ( $base === null && $local === null ) { + return array(); + } + + // handle blank + if ( $base === '_' ) { + $name = 'nodeID'; + $value = $local; + } elseif ( $local !== null ) { + throw new InvalidArgumentException( "Expected IRI, got QName: $base:$local" ); + } else { + $value = $base; + } + + return array( + "rdf:$name" => $value + ); + } + + /** + * Emit a document header. + */ + public function beginDocument() { + $this->write( "\n" ); + + // define a callback for generating namespace attributes + $self = $this; + $namespaceAttrCallback = function() use ( $self ) { + $attr = ''; + + $namespaces = $self->getPrefixes(); + foreach ( $namespaces as $ns => $uri ) { + $escapedUri = htmlspecialchars( $uri, ENT_QUOTES ); + $nss = $ns === '' ? '' : ":$ns"; + $attr .= " xmlns$nss=\"$escapedUri\""; + } + + return $attr; + }; + + $this->tag( 'rdf', 'RDF', array( $namespaceAttrCallback ) ); + $this->write( "\n" ); + } + + protected function writeSubject( $base, $local = null ) { + $attr = $this->getTargetAttributes( 'about', $base, $local ); + + $this->write( "\t" ); + $this->tag( 'rdf', 'Description', $attr ); + $this->write( "\n" ); + } + + /** + * Emit the root element + */ + public function finishSubject() { + $this->write( "\t" ); + $this->close( 'rdf', 'Description' ); + $this->write( "\n" ); + } + + /** + * Write document footer + */ + public function finishDocument() { + // close document element + $this->close( 'rdf', 'RDF' ); + $this->write( "\n" ); + } + + protected function writePredicate( $base, $local = null ) { + // noop + } + + protected function writeResource( $base, $local = null ) { + $attr = $this->getTargetAttributes( 'resource', $base, $local ); + + $this->write( "\t\t" ); + $this->tag( $this->currentPredicate[0], $this->currentPredicate[1], $attr, '' ); + $this->write( "\n" ); + } + + protected function writeText( $text, $language = null ) { + $attr = empty( $language ) ? array() : array( 'xml:lang' => $language ); + + $this->write( "\t\t" ); + $this->tag( $this->currentPredicate[0], $this->currentPredicate[1], $attr, $this->escape( $text ) ); + $this->write( "\n" ); + } + + public function writeValue( $literal, $typeBase, $typeLocal = null ) { + $attr = $this->getTargetAttributes( 'datatype', $typeBase, $typeLocal ); + + $this->write( "\t\t" ); + $this->tag( $this->currentPredicate[0], $this->currentPredicate[1], $attr, $this->escape( $literal ) ); + $this->write( "\n" ); + } + + /** + * @param string $role + * @param BNodeLabeler $labeler + * + * @return RdfWriterBase + */ + protected function newSubWriter( $role, BNodeLabeler $labeler ) { + $writer = new self( $role, $labeler ); + + return $writer; + } + + /** + * @return string a MIME type + */ + public function getMimeType() { + return 'application/rdf+xml; charset=UTF-8'; + } + + +} diff --git a/tests/data/EricMiller.nt b/tests/data/EricMiller.nt new file mode 100644 index 0000000..6e94cfa --- /dev/null +++ b/tests/data/EricMiller.nt @@ -0,0 +1,4 @@ + . + "Eric Miller" . + . + "Dr." . diff --git a/tests/data/EricMiller.rdf b/tests/data/EricMiller.rdf new file mode 100644 index 0000000..193e8b9 --- /dev/null +++ b/tests/data/EricMiller.rdf @@ -0,0 +1,9 @@ + + + + + Eric Miller + + Dr. + + \ No newline at end of file diff --git a/tests/data/EricMiller.ttl b/tests/data/EricMiller.ttl new file mode 100644 index 0000000..e51dba6 --- /dev/null +++ b/tests/data/EricMiller.ttl @@ -0,0 +1,8 @@ +@prefix rdf: . +@prefix xsd: . +@prefix contact: . + + rdf:type contact:Person ; + contact:fullName "Eric Miller" ; + contact:mailbox ; + contact:personalTitle "Dr." . diff --git a/tests/data/LabeledBlankNode.nt b/tests/data/LabeledBlankNode.nt new file mode 100644 index 0000000..332ae35 --- /dev/null +++ b/tests/data/LabeledBlankNode.nt @@ -0,0 +1,5 @@ + _:johnaddress . +_:johnaddress "1501 Grant Avenue" . +_:johnaddress "Bedfort" . +_:johnaddress "Massachusetts" . +_:johnaddress "01730" . diff --git a/tests/data/LabeledBlankNode.rdf b/tests/data/LabeledBlankNode.rdf new file mode 100644 index 0000000..61cc8c7 --- /dev/null +++ b/tests/data/LabeledBlankNode.rdf @@ -0,0 +1,12 @@ + + + + + + + 1501 Grant Avenue + Bedfort + Massachusetts + 01730 + + \ No newline at end of file diff --git a/tests/data/LabeledBlankNode.ttl b/tests/data/LabeledBlankNode.ttl new file mode 100644 index 0000000..8a9ab8e --- /dev/null +++ b/tests/data/LabeledBlankNode.ttl @@ -0,0 +1,11 @@ +@prefix rdf: . +@prefix xsd: . +@prefix exterms: . +@prefix exstaff: . + +exstaff:85740 exterms:address _:johnaddress . + +_:johnaddress exterms:street "1501 Grant Avenue" ; + exterms:city "Bedfort" ; + exterms:state "Massachusetts" ; + exterms:postalCode "01730" . diff --git a/tests/data/NumberedBlankNode.nt b/tests/data/NumberedBlankNode.nt new file mode 100644 index 0000000..f179ee1 --- /dev/null +++ b/tests/data/NumberedBlankNode.nt @@ -0,0 +1,4 @@ + _:genid1 . +_:genid1 "Antology of Time" . + _:genid2 . +_:genid2 "Anthony of Time" . diff --git a/tests/data/NumberedBlankNode.rdf b/tests/data/NumberedBlankNode.rdf new file mode 100644 index 0000000..0608a77 --- /dev/null +++ b/tests/data/NumberedBlankNode.rdf @@ -0,0 +1,15 @@ + + + + + + + Antology of Time + + + + + + Anthony of Time + + \ No newline at end of file diff --git a/tests/data/NumberedBlankNode.ttl b/tests/data/NumberedBlankNode.ttl new file mode 100644 index 0000000..9659d84 --- /dev/null +++ b/tests/data/NumberedBlankNode.ttl @@ -0,0 +1,13 @@ +@prefix rdf: . +@prefix xsd: . +@prefix exterms: . +@prefix exstaff: . +@prefix ex: . + +exstaff:Sue exterms:publication _:genid1 . + +_:genid1 exterms:title "Antology of Time" . + +exstaff:Jack exterms:publication _:genid2 . + +_:genid2 exterms:title "Anthony of Time" . diff --git a/tests/data/Numbers.nt b/tests/data/Numbers.nt new file mode 100644 index 0000000..31b9c75 --- /dev/null +++ b/tests/data/Numbers.nt @@ -0,0 +1,3 @@ + "5"^^ . + "7"^^ . + "6"^^ . diff --git a/tests/data/Numbers.rdf b/tests/data/Numbers.rdf new file mode 100644 index 0000000..190ff29 --- /dev/null +++ b/tests/data/Numbers.rdf @@ -0,0 +1,10 @@ + + + + 5 + 7 + + + 6 + + \ No newline at end of file diff --git a/tests/data/Numbers.ttl b/tests/data/Numbers.ttl new file mode 100644 index 0000000..d2b8710 --- /dev/null +++ b/tests/data/Numbers.ttl @@ -0,0 +1,8 @@ +@prefix rdf: . +@prefix xsd: . +@prefix acme: . + +acme:Bongos acme:stock "5"^^xsd:integer, + "7"^^xsd:integer . + +acme:Tablas acme:stock "6"^^xsd:integer . diff --git a/tests/data/Predicates.nt b/tests/data/Predicates.nt new file mode 100644 index 0000000..40d76b1 --- /dev/null +++ b/tests/data/Predicates.nt @@ -0,0 +1,4 @@ + . + "Banana" . + "Banane"@de . + "Apple" . diff --git a/tests/data/Predicates.rdf b/tests/data/Predicates.rdf new file mode 100644 index 0000000..b8da7f9 --- /dev/null +++ b/tests/data/Predicates.rdf @@ -0,0 +1,11 @@ + + + + + Banana + Banane + + + Apple + + \ No newline at end of file diff --git a/tests/data/Predicates.ttl b/tests/data/Predicates.ttl new file mode 100644 index 0000000..bf1a171 --- /dev/null +++ b/tests/data/Predicates.ttl @@ -0,0 +1,9 @@ +@prefix rdf: . +@prefix xsd: . +@prefix : . + + a ; + :name "Banana", + "Banane"@de . + + :name "Apple" . diff --git a/tests/data/Resources.nt b/tests/data/Resources.nt new file mode 100644 index 0000000..525a42e --- /dev/null +++ b/tests/data/Resources.nt @@ -0,0 +1,2 @@ + . + . diff --git a/tests/data/Resources.rdf b/tests/data/Resources.rdf new file mode 100644 index 0000000..f2502d5 --- /dev/null +++ b/tests/data/Resources.rdf @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/data/Resources.ttl b/tests/data/Resources.ttl new file mode 100644 index 0000000..e9153e4 --- /dev/null +++ b/tests/data/Resources.ttl @@ -0,0 +1,6 @@ +@prefix rdf: . +@prefix xsd: . +@prefix acme: . + +acme:Bongos acme:sounds acme:Bing, + . diff --git a/tests/data/Texts.nt b/tests/data/Texts.nt new file mode 100644 index 0000000..d927334 --- /dev/null +++ b/tests/data/Texts.nt @@ -0,0 +1,2 @@ + "Bom"@de . + "Bam"@en . diff --git a/tests/data/Texts.rdf b/tests/data/Texts.rdf new file mode 100644 index 0000000..e6d14ed --- /dev/null +++ b/tests/data/Texts.rdf @@ -0,0 +1,7 @@ + + + + Bom + Bam + + \ No newline at end of file diff --git a/tests/data/Texts.ttl b/tests/data/Texts.ttl new file mode 100644 index 0000000..ccb4c2a --- /dev/null +++ b/tests/data/Texts.ttl @@ -0,0 +1,6 @@ +@prefix rdf: . +@prefix xsd: . +@prefix acme: . + +acme:Bongos acme:sounds "Bom"@de, + "Bam"@en . diff --git a/tests/data/Triples.nt b/tests/data/Triples.nt new file mode 100644 index 0000000..a185271 --- /dev/null +++ b/tests/data/Triples.nt @@ -0,0 +1,3 @@ + . + "5.5"^^ . + "brown" . diff --git a/tests/data/Triples.rdf b/tests/data/Triples.rdf new file mode 100644 index 0000000..c61e00d --- /dev/null +++ b/tests/data/Triples.rdf @@ -0,0 +1,10 @@ + + + + + + + 5.5 + brown + + \ No newline at end of file diff --git a/tests/data/Triples.ttl b/tests/data/Triples.ttl new file mode 100644 index 0000000..b7575bc --- /dev/null +++ b/tests/data/Triples.ttl @@ -0,0 +1,8 @@ +@prefix rdf: . +@prefix xsd: . +@prefix acme: . + + a . + +acme:Nuts acme:weight "5.5"^^xsd:decimal ; + acme:color "brown" . diff --git a/tests/data/Values.nt b/tests/data/Values.nt new file mode 100644 index 0000000..ff36320 --- /dev/null +++ b/tests/data/Values.nt @@ -0,0 +1,19 @@ + "A" . + "B" . + "C" . + "foo"^^ . + "-5"^^ . + "-5"^^ . + "-5"^^ . + "true"^^ . + "false"^^ . + "-5"^^ . + "3.14"^^ . + "true"^^ . + "false"^^ . + "-5"^^ . + "3.14"^^ . + "1"^^ . + ""^^ . + "foo" . + "foo"^^ . \ No newline at end of file diff --git a/tests/data/Values.rdf b/tests/data/Values.rdf new file mode 100644 index 0000000..4819b75 --- /dev/null +++ b/tests/data/Values.rdf @@ -0,0 +1,24 @@ + + + + A + B + C + foo + -5 + -5 + -5 + true + false + -5 + 3.14 + true + false + -5 + 3.14 + 1 + + foo + foo + + \ No newline at end of file diff --git a/tests/data/Values.ttl b/tests/data/Values.ttl new file mode 100644 index 0000000..ba5c0d3 --- /dev/null +++ b/tests/data/Values.ttl @@ -0,0 +1,23 @@ +@prefix rdf: . +@prefix xsd: . +@prefix acme: . + + acme:multi "A", + "B", + "C" ; + acme:type "foo"^^acme:thing, + "-5"^^xsd:integer, + "-5"^^xsd:decimal, + "-5"^^xsd:double, + "true"^^xsd:boolean, + "false"^^xsd:boolean ; + acme:autotype "-5"^^xsd:integer, + "3.14"^^xsd:double, + "true"^^xsd:boolean, + "false"^^xsd:boolean ; + acme:no-autotype "-5"^^xsd:decimal, + "3.14"^^xsd:string, + "1"^^xsd:string, + ""^^xsd:string ; + acme:shorthand "foo" ; + acme:typed-shorthand "foo"^^acme:thing . diff --git a/tests/phpunit/BNodeLabelerTest.php b/tests/phpunit/BNodeLabelerTest.php new file mode 100644 index 0000000..a660d4a --- /dev/null +++ b/tests/phpunit/BNodeLabelerTest.php @@ -0,0 +1,26 @@ +assertEquals( 'test2', $labeler->getLabel() ); + $this->assertEquals( 'test3', $labeler->getLabel() ); + $this->assertEquals( 'foo', $labeler->getLabel( 'foo' ) ); + $this->assertEquals( 'test4', $labeler->getLabel() ); + } + +} diff --git a/tests/phpunit/N3QuoterTest.php b/tests/phpunit/N3QuoterTest.php new file mode 100644 index 0000000..fe239af --- /dev/null +++ b/tests/phpunit/N3QuoterTest.php @@ -0,0 +1,54 @@ +', 'http://acme.com/%3Cwacky%20stuff%3E' ), + ); + } + + /** + * @dataProvider provideEscapeIRI + */ + public function testEscapeIRI( $iri, $expected ) { + $quoter = new N3Quoter(); + + $this->assertEquals( $expected, $quoter->escapeIRI( $iri ) ); + } + + public function provideEscapeLiteral() { + return array( + array( "Hello World", 'Hello World' ), + array( "Hello\nWorld", 'Hello\nWorld' ), + array( "Hello\tWorld", 'Hello\tWorld' ), + array( "Hällo Wörld", 'Hällo Wörld', false ), + array( "Hällo Wörld", 'H\u00E4llo W\u00F6rld', true ), + ); + } + + /** + * @dataProvider provideEscapeLiteral + */ + public function testEscapeLiteral( $literal, $expected, $escapeUnicode = false ) { + $quoter = new N3Quoter(); + $quoter->setEscapeUnicode( $escapeUnicode ); + + $this->assertEquals( $expected, $quoter->escapeLiteral( $literal ) ); + } + +} diff --git a/tests/phpunit/NTriplesRdfWriterTest.php b/tests/phpunit/NTriplesRdfWriterTest.php new file mode 100644 index 0000000..b3d5f63 --- /dev/null +++ b/tests/phpunit/NTriplesRdfWriterTest.php @@ -0,0 +1,28 @@ +getSupportedFormats(); + + $this->assertInternalType( 'array', $formats ); + $this->assertNotEmpty( $formats ); + } + + public function testGetWriter() { + $factory = new RdfWriterFactory(); + + foreach ( $factory->getSupportedFormats() as $format ) { + $writer = $factory->getWriter( $format ); + + $this->assertInstanceOf( 'Wikimedia\Purtle\RdfWriter', $writer ); + } + } + + public function testGetFormatName() { + $factory = new RdfWriterFactory(); + + foreach ( $factory->getSupportedFormats() as $format ) { + $actual = $factory->getFormatName( $format ); + + // the canonical name should just stay + $this->assertEquals( $format, $actual ); + } + } + + public function provideFormats() { + return array( + // N3 (currently falls through to turtle) + array( 'n3', 'n3', 'n3', 'text/n3' ), + array( 'text/n3', 'n3', 'n3', 'text/n3' ), + array( 'text/rdf+n3', 'n3', 'n3', 'text/n3' ), + + array( 'ttl', 'turtle', 'ttl', 'text/turtle' ), + array( 'turtle', 'turtle', 'ttl', 'text/turtle' ), + array( 'text/turtle', 'turtle', 'ttl', 'text/turtle' ), + array( 'application/x-turtle', 'turtle', 'ttl', 'text/turtle' ), + + array( 'nt', 'ntriples', 'nt', 'application/n-triples' ), + array( 'ntriples', 'ntriples', 'nt', 'application/n-triples' ), + array( 'n-triples', 'ntriples', 'nt', 'application/n-triples' ), + array( 'text/plain', 'ntriples', 'nt', 'application/n-triples' ), + array( 'text/n-triples', 'ntriples', 'nt', 'application/n-triples' ), + array( 'application/ntriples', 'ntriples', 'nt', 'application/n-triples' ), + array( 'application/n-triples', 'ntriples', 'nt', 'application/n-triples' ), + + array( 'xml', 'rdfxml', 'rdf', 'application/rdf+xml' ), + array( 'rdf', 'rdfxml', 'rdf', 'application/rdf+xml' ), + array( 'rdfxml', 'rdfxml', 'rdf', 'application/rdf+xml' ), + array( 'application/rdf+xml', 'rdfxml', 'rdf', 'application/rdf+xml' ), + array( 'application/xml', 'rdfxml', 'rdf', 'application/rdf+xml' ), + array( 'text/xml', 'rdfxml', 'rdf', 'application/rdf+xml' ), + ); + } + + /** + * @dataProvider provideFormats + */ + public function testFormats( $name, $canonicalName, $expectedFileExtension, $expectedMimeType ) { + $factory = new RdfWriterFactory(); + + $this->assertEquals( $canonicalName, $factory->getFormatName( $name ) ); + $this->assertEquals( $expectedFileExtension, $factory->getFileExtension( $canonicalName ) ); + $this->assertContains( $expectedMimeType, $factory->getMimeTypes( $canonicalName ) ); + + $writer = $factory->getWriter( $canonicalName ); + $this->assertInstanceOf( 'Wikimedia\Purtle\RdfWriter', $writer ); + } + + public function testGetMimeTypes() { + $factory = new RdfWriterFactory(); + + foreach ( $factory->getSupportedFormats() as $format ) { + $mimeTypes = $factory->getMimeTypes( $format ); + + $this->assertInternalType( 'array', $mimeTypes ); + $this->assertNotEmpty( $mimeTypes ); + } + } + + public function testGetFileExtensions() { + $factory = new RdfWriterFactory(); + + foreach ( $factory->getSupportedFormats() as $format ) { + $extension = $factory->getFileExtension( $format ); + + $this->assertInternalType( 'string', $extension ); + } + } +} diff --git a/tests/phpunit/RdfWriterTestBase.php b/tests/phpunit/RdfWriterTestBase.php new file mode 100644 index 0000000..a683b1f --- /dev/null +++ b/tests/phpunit/RdfWriterTestBase.php @@ -0,0 +1,259 @@ +getFileSuffix(); + return $path; + } + + private function normalizeLines( array $lines ) { + $normalized = array(); + + foreach ( $lines as $s ) { + $s = trim( $s, "\r\n" ); + $normalized[] = $s; + } + + return $normalized; + } + + protected function assertOutputLines( $datasetName, $actual ) { + if ( is_string( $actual ) ) { + $actual = trim( $actual, "\r\n" ); + $actual = explode( "\n", $actual ); + } + + $path = $this->getExpectedOutputFile( $datasetName ); + + // Create test data file if it doesn't exist. + //if ( !file_exists( $path ) ) { + // file_put_contents( $path . '.actual', join( "\n", $actual ) ); + //} + + $expected = file( $path ); + + $expected = $this->normalizeLines( $expected ); + $actual = $this->normalizeLines( $actual ); + + $this->assertEquals( $expected, $actual, 'Result mismatches data in ' . $path ); + } + + /** + * @return RdfWriter + */ + protected abstract function newWriter(); + + public function testTriples() { + $writer = $this->newWriter(); + + $writer->prefix( 'acme', 'http://acme.test/' ); + $writer->start(); + + $writer->about( 'http://foobar.test/Bananas' ) + ->say( 'a' )->is( 'http://foobar.test/Fruit' ); // shorthand name "a" + + $writer->about( 'acme', 'Nuts' ) + ->say( 'acme', 'weight' )->value( '5.5', 'xsd', 'decimal' ); + + // redundant about( 'acme', 'Nuts' ) + $writer->about( 'acme', 'Nuts' ) + ->say( 'acme', 'color' )->value( 'brown' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Triples', $rdf ); + } + + public function testPredicates() { + $writer = $this->newWriter(); + + $writer->prefix( '', 'http://acme.test/' ); // empty prefix + $writer->start(); + + $writer->about( 'http://foobar.test/Bananas' ) + ->a( 'http://foobar.test/Fruit' ) // shorthand function a() + ->say( '', 'name' ) // empty prefix + ->text( 'Banana' ) + ->say( '', 'name' ) // redundant say( '', 'name' ) + ->text( 'Banane', 'de' ); + + $writer->about( 'http://foobar.test/Apples' ) + ->say( '', 'name' ) // subsequent call to say( '', 'name' ) for a different subject + ->text( 'Apple' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Predicates', $rdf ); + } + + public function testValues() { + $writer = $this->newWriter(); + + $writer->prefix( 'acme', 'http://acme.test/' ); + $writer->start(); + + $writer->about( 'http://foobar.test/Bananas' ) + ->say( 'acme', 'multi' ) + ->value( 'A' ) + ->value( 'B' ) + ->value( 'C' ) + ->say( 'acme', 'type' ) + ->value( 'foo', 'acme', 'thing' ) + ->value( '-5', 'xsd', 'integer' ) + ->value( '-5', 'xsd', 'decimal' ) + ->value( '-5', 'xsd', 'double' ) + ->value( 'true', 'xsd', 'boolean' ) + ->value( 'false', 'xsd', 'boolean' ) + ->say( 'acme', 'autotype' ) + ->value( -5 ) + ->value( 3.14 ) + ->value( true ) + ->value( false ) + ->say( 'acme', 'no-autotype' ) + ->value( -5, 'xsd', 'decimal' ) + ->value( 3.14, 'xsd', 'string' ) + ->value( true, 'xsd', 'string' ) + ->value( false, 'xsd', 'string' ) + ->say( 'acme', 'shorthand' )->value( 'foo' ) + ->say( 'acme', 'typed-shorthand' )->value( 'foo', 'acme', 'thing' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Values', $rdf ); + } + + public function testResources() { + $writer = $this->newWriter(); + + $writer->prefix( 'acme', 'http://acme.test/' ); + $writer->start(); + + $writer->about( 'acme', 'Bongos' ) + ->say( 'acme', 'sounds' ) + ->is( 'acme', 'Bing' ) + ->is( 'http://foobar.test/sound/Bang' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Resources', $rdf ); + + } + + public function testTexts() { + $writer = $this->newWriter(); + + $writer->prefix( 'acme', 'http://acme.test/' ); + $writer->start(); + + $writer->about( 'acme', 'Bongos' ) + ->say( 'acme', 'sounds' ) + ->text( 'Bom', 'de' ) + ->text( 'Bam', 'en' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Texts', $rdf ); + } + + public function testNumbers() { + $writer = $this->newWriter(); + + $writer->prefix( 'acme', 'http://acme.test/' ); + $writer->start(); + + $writer->about( 'acme', 'Bongos' ) + ->say( 'acme', 'stock' )->value( 5, 'xsd', 'integer' ) + ->value( 7 ) + ->about( 'acme', 'Tablas' ) + ->say( 'acme', 'stock' )->value( 6 ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'Numbers', $rdf ); + } + + public function testEricMiller() { + // exampel taken from http://www.w3.org/2007/02/turtle/primer/ + + $writer = $this->newWriter(); + + $writer->prefix( 'contact', 'http://www.w3.org/2000/10/swap/pim/contact#' ); + $writer->start(); + + $writer->about( 'http://www.w3.org/People/EM/contact#me' ) + ->say( 'rdf', 'type' )->is( 'contact', 'Person' ) + ->say( 'contact', 'fullName' )->text( 'Eric Miller' ) + ->say( 'contact', 'mailbox' )->is( 'mailto:em@w3.org' ) + ->say( 'contact', 'personalTitle' )->text( 'Dr.' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'EricMiller', $rdf ); + } + + public function testLabeledBlankNode() { + // exampel taken from http://www.w3.org/2007/02/turtle/primer/ + + $writer = $this->newWriter(); + + $writer->prefix( 'exterms', 'http://www.example.org/terms/' ); + $writer->prefix( 'exstaff', 'http://www.example.org/staffid/' ); + $writer->start(); + + $writer->about( 'exstaff', '85740' ) + ->say( 'exterms', 'address' )->is( '_', $label = $writer->blank( 'johnaddress' ) ) + ->about( '_', $label ) + ->say( 'exterms', 'street' )->text( "1501 Grant Avenue" ) + ->say( 'exterms', 'city' )->text( "Bedfort" ) + ->say( 'exterms', 'state' )->text( "Massachusetts" ) + ->say( 'exterms', 'postalCode' )->text( "01730" ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'LabeledBlankNode', $rdf ); + } + + public function testNumberedBlankNodes() { + // exampel taken from http://www.w3.org/2007/02/turtle/primer/ + + $writer = $this->newWriter(); + + $writer->prefix( 'exterms', 'http://www.example.org/terms/' ); + $writer->prefix( 'exstaff', 'http://www.example.org/staffid/' ); + $writer->prefix( 'ex', 'http://example.org/packages/vocab#' ); + $writer->start(); + + $writer->about( 'exstaff', 'Sue' ) + ->say( 'exterms', 'publication' )->is( '_', $label1 = $writer->blank() ); + $writer->about( '_', $label1 ) + ->say( 'exterms', 'title' )->text( 'Antology of Time' ); + + $writer->about( 'exstaff', 'Jack' ) + ->say( 'exterms', 'publication' )->is( '_', $label2 = $writer->blank() ); + $writer->about( '_', $label2 ) + ->say( 'exterms', 'title' )->text( 'Anthony of Time' ); + $writer->finish(); + + $rdf = $writer->drain(); + $this->assertOutputLines( 'NumberedBlankNode', $rdf ); + } + + //FIXME: test quoting/escapes! + //FIXME: test non-ascii literals! + //FIXME: test uerl-encoding + //FIXME: test IRIs! +} diff --git a/tests/phpunit/TurtleRdfWriterTest.php b/tests/phpunit/TurtleRdfWriterTest.php new file mode 100644 index 0000000..404c1f1 --- /dev/null +++ b/tests/phpunit/TurtleRdfWriterTest.php @@ -0,0 +1,29 @@ +assertEquals( $expected, $escaper->escapeString( $input ) ); + } + +} diff --git a/tests/phpunit/XmlRdfWriterTest.php b/tests/phpunit/XmlRdfWriterTest.php new file mode 100644 index 0000000..02cd696 --- /dev/null +++ b/tests/phpunit/XmlRdfWriterTest.php @@ -0,0 +1,29 @@ +