/* -*- Mode: c; c-basic-offset: 2 -*-
*
* raptor_serialize_rdfxml.c - RDF/XML serializer
*
* Copyright (C) 2004-2006, David Beckett http://purl.org/net/dajobe/
* Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/
*
* This package is Free Software and part of Redland http://librdf.org/
*
* It is licensed under the following three licenses as alternatives:
* 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
* 2. GNU General Public License (GPL) V2 or any newer version
* 3. Apache License, V2.0 or any newer version
*
* You may not use this file except in compliance with at least one of
* the above three licenses.
*
* See LICENSE.html or LICENSE.txt at the top of this package for the
* complete terms and further detail along with the license texts for
* the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
*
*/
#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif
#ifdef WIN32
#include <win32_raptor_config.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
/* Raptor includes */
#include "raptor.h"
#include "raptor_internal.h"
/*
* Raptor RDF/XML serializer object
*/
typedef struct {
/* Namespace stack */
raptor_namespace_stack *nstack;
/* the rdf: namespace - this is destroyed when nstack above is deleted */
raptor_namespace *rdf_nspace;
/* the rdf:RDF element */
raptor_xml_element* rdf_RDF_element;
/* where the xml is being written */
raptor_xml_writer *xml_writer;
/* User declared namespaces */
raptor_sequence *namespaces;
/* URI of rdf:XMLLiteral */
raptor_uri* rdf_xml_literal_uri;
/* non zero if rdf:RDF has been written (and thus no new namespaces
* can be declared).
*/
int written_header;
} raptor_rdfxml_serializer_context;
/* local prototypes */
/* create a new serializer */
static int
raptor_rdfxml_serialize_init(raptor_serializer* serializer, const char *name)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
raptor_uri_handler *uri_handler;
void *uri_context;
raptor_uri_get_handler(&uri_handler, &uri_context);
context->nstack=raptor_new_namespaces(uri_handler, uri_context,
(raptor_simple_message_handler)raptor_serializer_simple_error,
serializer,
1);
context->rdf_nspace=raptor_new_namespace(context->nstack,
(const unsigned char*)"rdf",
(const unsigned char*)raptor_rdf_namespace_uri,
0);
context->namespaces=raptor_new_sequence(NULL, NULL);
/* Note: item 0 in the list is rdf:RDF's namespace */
raptor_sequence_push(context->namespaces, context->rdf_nspace);
context->rdf_xml_literal_uri=raptor_new_uri(raptor_xml_literal_datatype_uri_string);
return 0;
}
/* destroy a serializer */
static void
raptor_rdfxml_serialize_terminate(raptor_serializer* serializer)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
if(context->xml_writer)
raptor_free_xml_writer(context->xml_writer);
if(context->rdf_RDF_element)
raptor_free_xml_element(context->rdf_RDF_element);
if(context->rdf_nspace)
raptor_free_namespace(context->rdf_nspace);
if(context->rdf_xml_literal_uri)
raptor_free_uri(context->rdf_xml_literal_uri);
if(context->namespaces) {
int i;
/* Note: item 0 in the list is rdf:RDF's namespace and freed above */
for(i=1; i< raptor_sequence_size(context->namespaces); i++) {
raptor_namespace* ns=(raptor_namespace*)raptor_sequence_get_at(context->namespaces, i);
if(ns)
raptor_free_namespace(ns);
}
raptor_free_sequence(context->namespaces);
}
if(context->nstack)
raptor_free_namespaces(context->nstack);
}
#define RDFXML_NAMESPACE_DEPTH 0
/* add a namespace */
static int
raptor_rdfxml_serialize_declare_namespace_from_namespace(raptor_serializer* serializer,
raptor_namespace *nspace)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
int i;
if(context->written_header)
return 1;
for(i=0; i< raptor_sequence_size(context->namespaces); i++) {
raptor_namespace* ns;
ns=(raptor_namespace*)raptor_sequence_get_at(context->namespaces, i);
/* If prefix is already declared, ignore it */
if(!ns->prefix && !nspace->prefix)
return 1;
if(ns->prefix && nspace->prefix &&
!strcmp((const char*)ns->prefix, (const char*)nspace->prefix))
return 1;
if(ns->uri && nspace->uri &&
raptor_uri_equals(ns->uri, nspace->uri))
return 1;
}
nspace=raptor_new_namespace_from_uri(context->nstack,
nspace->prefix, nspace->uri,
RDFXML_NAMESPACE_DEPTH);
if(!nspace)
return 1;
raptor_sequence_push(context->namespaces, nspace);
return 0;
}
/* add a namespace */
static int
raptor_rdfxml_serialize_declare_namespace(raptor_serializer* serializer,
raptor_uri *uri,
const unsigned char *prefix)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
raptor_namespace *ns;
int rc;
ns=raptor_new_namespace_from_uri(context->nstack, prefix, uri,
RDFXML_NAMESPACE_DEPTH);
rc=raptor_rdfxml_serialize_declare_namespace_from_namespace(serializer,
ns);
raptor_free_namespace(ns);
return rc;
}
/* start a serialize */
static int
raptor_rdfxml_serialize_start(raptor_serializer* serializer)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
raptor_xml_writer* xml_writer;
raptor_uri_handler *uri_handler;
void *uri_context;
raptor_uri_get_handler(&uri_handler, &uri_context);
if(context->xml_writer)
raptor_free_xml_writer(context->xml_writer);
xml_writer=raptor_new_xml_writer(context->nstack,
uri_handler, uri_context,
serializer->iostream,
(raptor_simple_message_handler)raptor_serializer_simple_error,
serializer,
1);
raptor_xml_writer_set_feature(xml_writer, RAPTOR_FEATURE_WRITER_XML_VERSION,
serializer->xml_version);
context->xml_writer=xml_writer;
context->written_header=0;
return 0;
}
static void
raptor_rdfxml_ensure_writen_header(raptor_serializer* serializer,
raptor_rdfxml_serializer_context* context)
{
raptor_xml_writer* xml_writer;
raptor_qname *qname;
raptor_uri *base_uri;
int i;
if(context->written_header)
return;
xml_writer=context->xml_writer;
qname=raptor_new_qname_from_namespace_local_name(context->rdf_nspace,
(const unsigned char*)"RDF", NULL);
base_uri=serializer->base_uri;
if(base_uri)
base_uri=raptor_uri_copy(base_uri);
context->rdf_RDF_element=raptor_new_xml_element(qname, NULL, base_uri);
/* NOTE: Starts it item 1 as item 0 is the element's namespace (rdf)
* and does not need to be declared
*/
for(i=1; i< raptor_sequence_size(context->namespaces); i++) {
raptor_namespace* ns=(raptor_namespace*)raptor_sequence_get_at(context->namespaces, i);
raptor_xml_element_declare_namespace(context->rdf_RDF_element, ns);
}
raptor_xml_writer_start_element(xml_writer, context->rdf_RDF_element);
raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1);
context->written_header=1;
}
/* serialize a statement */
static int
raptor_rdfxml_serialize_statement(raptor_serializer* serializer,
const raptor_statement *statement)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
raptor_xml_writer* xml_writer=context->xml_writer;
unsigned char* uri_string=NULL; /* predicate URI */
unsigned char ordinal_name[20];
unsigned char* name=NULL; /* where to split predicate name */
unsigned char* subject_uri_string=NULL;
unsigned char* object_uri_string=NULL;
const unsigned char* nsprefix=(const unsigned char*)"ns0";
int rc;
size_t len;
raptor_qname* rdf_Description_qname;
raptor_xml_element* rdf_Description_element;
raptor_uri* predicate_ns_uri=NULL;
raptor_namespace* predicate_ns=NULL;
int free_predicate_ns=0;
raptor_qname* predicate_qname=NULL;
raptor_xml_element* predicate_element=NULL;
raptor_qname **attrs;
int attrs_count=0;
raptor_uri* base_uri=NULL;
raptor_identifier_type object_type;
int allocated=1;
raptor_rdfxml_ensure_writen_header(serializer, context);
if(statement->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
predicate_ns=context->rdf_nspace;
sprintf((char*)ordinal_name, "_%d", *((int*)statement->predicate));
name=ordinal_name;
} else {
unsigned char *p;
size_t uri_len;
size_t name_len=1;
unsigned char c;
uri_string=raptor_uri_as_counted_string((raptor_uri*)statement->predicate,
&uri_len);
p= uri_string;
name_len=uri_len;
while(name_len >0) {
if(raptor_xml_name_check(p, name_len, 10)) {
name=p;
break;
}
p++; name_len--;
}
if(!name || (name == uri_string)) {
raptor_serializer_error(serializer, "Cannot split predicate URI %s into an XML qname - skipping statement", uri_string);
return 1;
}
c=*name; *name='\0';
predicate_ns_uri=raptor_new_uri(uri_string);
*name=c;
predicate_ns=raptor_namespaces_find_namespace_by_uri(context->nstack,
predicate_ns_uri);
if(!predicate_ns) {
predicate_ns=raptor_new_namespace_from_uri(context->nstack,
nsprefix,
predicate_ns_uri, 0);
free_predicate_ns=1;
}
raptor_free_uri(predicate_ns_uri);
}
rdf_Description_qname=raptor_new_qname_from_namespace_local_name(context->rdf_nspace,
(unsigned const char*)"Description", NULL);
if(serializer->base_uri)
base_uri=raptor_uri_copy(serializer->base_uri);
rdf_Description_element=raptor_new_xml_element(rdf_Description_qname, NULL,
base_uri);
attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 3, sizeof(raptor_qname*));
attrs_count=0;
/* subject */
rc=0;
switch(statement->subject_type) {
case RAPTOR_IDENTIFIER_TYPE_ANONYMOUS:
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"nodeID", (unsigned char*)statement->subject);
break;
case RAPTOR_IDENTIFIER_TYPE_RESOURCE:
case RAPTOR_IDENTIFIER_TYPE_ORDINAL:
allocated=1;
if(statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
subject_uri_string=(unsigned char*)RAPTOR_MALLOC(cstring, raptor_rdf_namespace_uri_len+13);
sprintf((char*)subject_uri_string, "%s_%d",
raptor_rdf_namespace_uri, *((int*)statement->subject));
} else {
if(serializer->feature_relative_uris)
subject_uri_string=raptor_uri_to_relative_uri_string(serializer->base_uri,
(raptor_uri*)statement->subject);
else {
subject_uri_string=raptor_uri_as_string((raptor_uri*)statement->subject);
allocated=0;
}
}
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"about", subject_uri_string);
if(allocated)
RAPTOR_FREE(cstring, subject_uri_string);
break;
case RAPTOR_IDENTIFIER_TYPE_PREDICATE:
case RAPTOR_IDENTIFIER_TYPE_LITERAL:
case RAPTOR_IDENTIFIER_TYPE_XML_LITERAL:
case RAPTOR_IDENTIFIER_TYPE_UNKNOWN:
default:
raptor_serializer_error(serializer, "Do not know how to serialize node type %d\n", statement->subject_type);
}
if(attrs_count)
raptor_xml_element_set_attributes(rdf_Description_element, attrs, attrs_count);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)" ", 2);
raptor_xml_writer_start_element(xml_writer, rdf_Description_element);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1);
/* predicate */
predicate_qname=raptor_new_qname_from_namespace_local_name(predicate_ns,
name, NULL);
if(serializer->base_uri)
base_uri=raptor_uri_copy(serializer->base_uri);
predicate_element=raptor_new_xml_element(predicate_qname, NULL, base_uri);
/* object */
attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 3, sizeof(raptor_qname*));
attrs_count=0;
object_type=statement->object_type;
switch(object_type) {
case RAPTOR_IDENTIFIER_TYPE_LITERAL:
if(statement->object_literal_datatype &&
raptor_uri_equals(statement->object_literal_datatype,
context->rdf_xml_literal_uri))
object_type = RAPTOR_IDENTIFIER_TYPE_XML_LITERAL;
/* FALLTHROUGH */
case RAPTOR_IDENTIFIER_TYPE_XML_LITERAL:
if(statement->object_literal_language)
attrs[attrs_count++]=raptor_new_qname(context->nstack,
(unsigned char*)"xml:lang",
(unsigned char*)statement->object_literal_language,
(raptor_simple_message_handler)raptor_serializer_simple_error,
serializer);
len=strlen((const char*)statement->object);
if(object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) {
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"parseType", (const unsigned char*)"Literal");
raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)" ", 4);
raptor_xml_writer_start_element(xml_writer, predicate_element);
/* Print without escaping XML */
if(len)
raptor_xml_writer_raw_counted(xml_writer,
(const unsigned char*)statement->object,
len);
} else {
if(statement->object_literal_datatype)
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"datatype", (unsigned char*)raptor_uri_as_string((raptor_uri*)statement->object_literal_datatype));
raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count);
raptor_xml_writer_cdata_counted(xml_writer,
(const unsigned char*)" ", 4);
raptor_xml_writer_start_element(xml_writer, predicate_element);
if(len)
raptor_xml_writer_cdata_counted(xml_writer,
(const unsigned char*)statement->object, len);
}
raptor_xml_writer_end_element(xml_writer, predicate_element);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1);
break;
case RAPTOR_IDENTIFIER_TYPE_ANONYMOUS:
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"nodeID", (unsigned char*)statement->object);
raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count);
raptor_xml_writer_cdata_counted(xml_writer,
(const unsigned char*)" ", 4);
raptor_xml_writer_empty_element(xml_writer, predicate_element);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1);
break;
case RAPTOR_IDENTIFIER_TYPE_RESOURCE:
case RAPTOR_IDENTIFIER_TYPE_ORDINAL:
allocated=1;
if(object_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
object_uri_string=(unsigned char*)RAPTOR_MALLOC(cstring, raptor_rdf_namespace_uri_len+13);
sprintf((char*)object_uri_string, "%s_%d",
raptor_rdf_namespace_uri, *((int*)statement->object));
} else {
/* must be URI */
if(serializer->feature_relative_uris)
object_uri_string=raptor_uri_to_relative_uri_string(serializer->base_uri,
(raptor_uri*)statement->object);
else {
object_uri_string=raptor_uri_to_string((raptor_uri*)statement->object);
allocated=0;
}
}
attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"resource", object_uri_string);
if(allocated)
RAPTOR_FREE(cstring, object_uri_string);
raptor_xml_element_set_attributes(predicate_element, attrs, 1);
raptor_xml_writer_cdata_counted(xml_writer,
(const unsigned char*)" ", 4);
raptor_xml_writer_empty_element(xml_writer, predicate_element);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1);
break;
case RAPTOR_IDENTIFIER_TYPE_PREDICATE:
case RAPTOR_IDENTIFIER_TYPE_UNKNOWN:
default:
raptor_serializer_error(serializer, "Do not know how to serialize node type %d\n", object_type);
}
raptor_free_xml_element(predicate_element);
if(free_predicate_ns) {
raptor_free_namespace(predicate_ns);
}
raptor_xml_writer_cdata_counted(xml_writer,
(const unsigned char*)" ", 2);
raptor_xml_writer_end_element(xml_writer, rdf_Description_element);
raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1);
raptor_free_xml_element(rdf_Description_element);
return 0;
}
/* end a serialize */
static int
raptor_rdfxml_serialize_end(raptor_serializer* serializer)
{
raptor_rdfxml_serializer_context* context=(raptor_rdfxml_serializer_context*)serializer->context;
raptor_xml_writer* xml_writer=context->xml_writer;
raptor_rdfxml_ensure_writen_header(serializer, context);
raptor_xml_writer_end_element(xml_writer, context->rdf_RDF_element);
raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1);
raptor_free_xml_element(context->rdf_RDF_element);
context->rdf_RDF_element=NULL;
return 0;
}
/* finish the serializer factory */
static void
raptor_rdfxml_serialize_finish_factory(raptor_serializer_factory* factory)
{
}
static void
raptor_rdfxml_serializer_register_factory(raptor_serializer_factory *factory)
{
factory->context_length = sizeof(raptor_rdfxml_serializer_context);
factory->init = raptor_rdfxml_serialize_init;
factory->terminate = raptor_rdfxml_serialize_terminate;
factory->declare_namespace = raptor_rdfxml_serialize_declare_namespace;
factory->declare_namespace_from_namespace = raptor_rdfxml_serialize_declare_namespace_from_namespace;
factory->serialize_start = raptor_rdfxml_serialize_start;
factory->serialize_statement = raptor_rdfxml_serialize_statement;
factory->serialize_end = raptor_rdfxml_serialize_end;
factory->finish_factory = raptor_rdfxml_serialize_finish_factory;
}
void
raptor_init_serializer_rdfxml(void) {
raptor_serializer_register_factory("rdfxml", "RDF/XML",
"application/rdf+xml",
NULL,
(const unsigned char*)"http://www.w3.org/TR/rdf-syntax-grammar",
&raptor_rdfxml_serializer_register_factory);
}