Skip to content

Commit

Permalink
Remove -inXML as command, add application/xml as format. Add data isl…
Browse files Browse the repository at this point in the history
…and test
  • Loading branch information
Tim Berners-Lee committed Feb 7, 2016
1 parent 72f1cb0 commit 5136226
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 35 deletions.
53 changes: 51 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,51 @@
# rabel
Program for reading and writing linked data in various formats. Short for "RDF Babel".
# rabel - linked data format converter

Program for reading and writing linked data in various formats.

To install,

npm install -g rabel

## Command line

Commands look like unix options are executed *in order* from left to right. They include:
```
-base=rrrr Set the current base URI (relative URI)
-clear Clear the current store
-dump Serialize the store in current content-type to the console
-format=cccc Set the current content-type
-help This message
-in=uri Load a web resource or file
-out=filename Output in the current content type
-size Give the current store size in triples
-version Give the version of this program
```

Formats cccc are given as MIME types. These can be used for input or output:

* text/turtle *(default)*
* application/rdf+xml

whereas these can only input:

* application/rdfa
* application/xml

#### Example

```
rabel -format=application/xml -in=foo.xml -format=text/turtle -out=foo.ttl
rabel part*.ttl -out=whole.ttl
```
## Details
Currently rabel can read from the web or files, and write only to files. Filenames are deemed to be relative URIs just taken relative to file:///{pwd}/ where {pwd} is the current working directory.

One use case is testing the all the parsers. Another is providing a stable serialization. The output serialization is designed to be stable under small changes of the the data, to allow data files to be checked into source code control systems.

The name comes from RDF and Babel.

### XML

When loading XML, elements are mapped to arcs, and text content to trimmed RDF strings. For the XML namespace used for IANA registry documents, custom mapping is done, both of properties and datatypes, and local identifier generation.
(See the source for details!)
95 changes: 62 additions & 33 deletions rabel.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Commands in unix option form are executed left to right, and include:\n\
-out=filename Output in eth current content type\n\
-size Give the current store\n\
-version Give the version of this program\n\
\n\
Formats are given as MIME types, such as text/turtle (default), application/rdf+xml, etc\n\
\n"


Expand Down Expand Up @@ -46,12 +48,36 @@ var exitMessage = function(message) {
}

var doNext = function(remaining) {

var loadResource = function(right) {
targetDocument = $rdf.sym($rdf.uri.join(right, base))
//console.log("Document is " + targetDocument)
if (contentType == 'application/xml') {
readXML(targetDocument, {}, function(ok, body, xhr) {
check(ok, body, xhr? xhr.status : undefined);
console.log("Loaded XML " + targetDocument);
doNext(remaining);
}); // target, kb, base, contentType, callback
} else {
fetcher.nowOrWhenFetched(targetDocument, {}, function(ok, body, xhr) {
check(ok, body, xhr? xhr.status : undefined);
console.log("Loaded " + targetDocument);
doNext(remaining);
}); // target, kb, base, contentType, callback
}
}


while (remaining.length) {
// console.log("... remaining " + remaining.join(' '));

var command = remaining.shift().split('=');
var left = command[0],
right = command[1];
if (left.slice(0,1) !== '-') {
loadResource(left);
return;
}
switch(left) {
case '-base':
base = $rdf.uri.join(right, base)
Expand Down Expand Up @@ -80,24 +106,8 @@ var doNext = function(remaining) {
break;

case '-in':
targetDocument = $rdf.sym($rdf.uri.join(right, base))
//console.log("Document is " + targetDocument)
fetcher.nowOrWhenFetched(targetDocument, {}, function(ok, body, xhr) {
check(ok, body, xhr? xhr.status : undefined);
console.log("Loaded " + targetDocument);
doNext(remaining);
}); // target, kb, base, contentType, callback
return; // STOP processing at this level

case '-inXML':
targetDocument = $rdf.sym($rdf.uri.join(right, base))
//console.log("Document is " + targetDocument)
readXML(targetDocument, {}, function(ok, body, xhr) {
check(ok, body, xhr? xhr.status : undefined);
console.log("Loaded XML " + targetDocument);
doNext(remaining);
}); // target, kb, base, contentType, callback
return; // STOP processing at this level
loadResource(right);
return;

case '-out':
doc = $rdf.sym($rdf.uri.join(right, base));
Expand All @@ -114,7 +124,7 @@ var doNext = function(remaining) {
if (err) {
exitMessage("Error writing file <"+right+"> :" + err);
}
console.log("Written " + fileName);
console.log("Written " + doc);
doNext(remaining);
});
return;
Expand Down Expand Up @@ -184,7 +194,22 @@ readXML = function(targetDocument, options, callback) {
return text;
}


/////////////////////////// GPX SPECIAL


var GPX_predicateMap = {
time: { uri: 'http://www.w3.org/2003/01/geo/wgs84_pos#time',
type: 'http://www.w3.org/2001/XMLSchema#dateTime'},

lat: { uri: 'http://www.w3.org/2003/01/geo/wgs84_pos#lat'},
lon: { uri: 'http://www.w3.org/2003/01/geo/wgs84_pos#long'},
ele: { uri: 'http://www.w3.org/2003/01/geo/wgs84_pos#altitude'}
};



/////////////////////////// IANA SPECIAL

var IANA_predicateMap = {
created: { uri: 'http://purl.org/dc/terms/created',
type: 'http://www.w3.org/2001/XMLSchema#date'}, // @@CHECK
Expand Down Expand Up @@ -222,12 +247,14 @@ readXML = function(targetDocument, options, callback) {
}
return null;
}

//////////////////////////////////

var convert = function(ele, node, indent) {
indent = indent || '';
var pred, obj, type;
console.log(indent + "nodeName: " + ele.nodeName + " type " + ele.nodeType)
console.log(indent + "tagName: " + ele.tagName);
//console.log(indent + "nodeName: " + ele.nodeName + " type " + ele.nodeType)
//console.log(indent + "tagName: " + ele.tagName);
if (ele.nodeType in ignore) { // PI
return;
}
Expand All @@ -236,7 +263,7 @@ readXML = function(targetDocument, options, callback) {
pred = kb.sym(ns + id);
if (options.predicateMap && options.predicateMap[id]) {
var p = options.predicateMap[id];
console.log(indent + "Mapping to " + p.uri)
// console.log(indent + "Mapping to " + p.uri)
if (p.uri) {
pred = kb.sym(p.uri);
}
Expand All @@ -248,10 +275,10 @@ readXML = function(targetDocument, options, callback) {

if (ele.attributes) {
var attrs = ele.attributes, a;
console.log(indent + 'attributes: ' + attrs.length)
// console.log(indent + 'attributes: ' + attrs.length)
for (var j=0; j < attrs.length; j++) {
a = attrs.item(j);
console.log(indent + j + ") " +a.nodeName + " = " + a.nodeValue);
// console.log(indent + j + ") " +a.nodeName + " = " + a.nodeValue);
if (a.nodeName === 'xmlns') {
defaultNamespace = a.nodeValue;

Expand All @@ -263,6 +290,8 @@ readXML = function(targetDocument, options, callback) {

} else if (defaultNamespace === 'http://www.topografix.com/GPX/1/1') {
ns ='http://hackdiary.com/ns/gps#'; // @@@ u
options.predicateMap = GPX_predicateMap;
console.log('GPX Mode');
}
continue;
}
Expand All @@ -271,39 +300,39 @@ readXML = function(targetDocument, options, callback) {
}
}
if (ele.childNodes) {
console.log(indent + "children " +ele.childNodes.length)
// console.log(indent + "children " +ele.childNodes.length)
for(var i=0; i<ele.childNodes.length; i++) {
console.log(indent + ' i ' + i)
// console.log(indent + ' i ' + i)
var child = ele.childNodes[i];
type = null;
setPred(child.nodeName);
if (child.nodeType === 3) { // text
//throw "We should not see text nodes at this level"
console.log(indent + " nodeName: " + child.nodeName + " type " + child.nodeType)
// console.log(indent + " nodeName: " + child.nodeName + " type " + child.nodeType)
obj = child.nodeValue.trim(); // @@ optional
if (obj.length !== 0) {
console.log($rdf.lit(obj, undefined, type))
kb.add(node, kb.sym(ns + ele.nodeName), $rdf.lit(obj, undefined, type), targetDocument)
console.log(indent + 'actual text ' + obj)
// console.log(indent + 'actual text ' + obj)
} else {
console.log(indent + 'whitespace')
// console.log(indent + 'whitespace')
}
} else if (!(child.nodeType in ignore)){
var txt = justTextContent(child);
if (txt !== false) {
if (txt.length > 0) {
kb.add(node, pred, $rdf.lit(txt, undefined, type), targetDocument)
console.log($rdf.lit(txt, undefined, type))
// console.log($rdf.lit(txt, undefined, type))
}
} else if (options.iana && magicIANAxref(child)) {
kb.add(node, kb.sym(ns + child.nodeName), magicIANAxref(child), targetDocument);
console.log(indent + "Magic IANA xref " + magicIANAxref(child))
// console.log(indent + "Magic IANA xref " + magicIANAxref(child))
} else {
if (child.attributes && child.attributes.getNamedItem('id')){
obj = kb.sym(local + child.attributes.getNamedItem('id').nodeValue);
} else if (options.iana && magicIANAvalue(child)) {
obj = magicIANAvalue(child);
console.log(indent + "Magic IANA value " + obj)
// console.log(indent + "Magic IANA value " + obj)
kb.add(obj, RDF('type'), RDF('Property') , targetDocument);
} else {
obj = kb.bnode();
Expand Down
32 changes: 32 additions & 0 deletions test/html/xml-data-island.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<title>XML Data Block Demo</title>
<link rel="derivedfrom" href="https://developer.mozilla.org/en/docs/Using_XML_Data_Islands_in_Mozilla"/>
<script id="purchase-order" type="application/xml">
<purchaseOrder xmlns="http://example.mozilla.org/PurchaseOrderML">
<lineItem>
<name>Line Item 1</name>
<price>1.25</price>
</lineItem>
<lineItem>
<name>Line Item 2</name>
<price>2.48</price>
</lineItem>
</purchaseOrder>
</script>
<script>
function runDemo() {
var orderSource = document.getElementById("purchase-order").textContent;
var parser = new DOMParser();
var doc = parser.parseFromString(orderSource, "application/xml");
var lineItems = doc.getElementsByTagNameNS("http://example.mozilla.org/PurchaseOrderML", "lineItem");
var firstPrice = lineItems[0].getElementsByTagNameNS("http://example.mozilla.org/PurchaseOrderML", "price")[0].textContent;
document.body.textContent = "The purchase order contains " + lineItems.length + " line items. The price of the first line item is " + firstPrice + ".";
}
</script>
</head>
<body onload="runDemo()";>
Demo did not run
</body>
</html>
File renamed without changes.
File renamed without changes.

0 comments on commit 5136226

Please sign in to comment.