-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Radisk
The Radisk Storage Engine (RAD) is an in-memory, as well as on-disk radix tree that saves the GUN database graph for fast and performant look-ups.
Radix trees have a constant lookup-time.
GUN is a modular system, which allows adapters to 'hook' into the GUN instance through subscriptions to general events (such as gun.on('in')
) or data events specifically subscribed to via data.on(callback)
.
When data is put via gun.get(key).put({object})
, GUN adds the {object} into it's internal graph (in-memory) and then hands the data to the next subscribed adapter. (This may be RAD, localStorage or AWS S3 etc.)
RAD is called from gun/lib/store.js, which should be the template for anyone to start building their own storage adapter for GUN.
var storage = Object(null)
gun._.opt.store = {};
gun._.opt.store.put = function(file, data, cb){
console.log(`put with ${file}, data ${data}, callback`);
storage[file] = data
cb(undefined, 1)
}
gun._.opt.store.get = function(file, cb){
console.log(`get with ${file}, callback`);
var temp = storage[file] || undefined
console.log(`Found ${file}: ${temp}`);
cb(temp)
}
gun._.opt.store.list = function(cb){
console.log(`list with callback`);
var arr= [];
console.log(`Listed ${Object.entries(storage)[0]}`);
arr = Object.entries(storage)[0];
if(arr) {
var i = 0;
var l = arr.length;
for(i;i<l;i++){
if(cb(arr[i])){
break;
}
}
}
cb()
}
var fs = require('fs');
var Gun = require('../gun');
var Radix = require('./radix');
function Radisk(opt){
opt = opt || {};
opt.file = String(opt.file || 'radata');
opt.until = opt.until || opt.wait || 1000; // default for HDDs
opt.batch = opt.batch || 10 * 1000;
opt.chunk = opt.chunk || (1024 * 1024 * 10); // 10MB
opt.code = opt.code || {};
opt.code.from = opt.code.from || '!';
opt comes from the GUN instance.
We make sure opt.file is a string or defaults to 'radata'.
opt.until - the delay before writing to disk
opt.batch - the limit of 'entries' per batch
opt.chunk - file size limit before splitting to a new file
opt.code - TODO
opt.code.from - TODO
function ename(t){ return encodeURIComponent(t).replace(/\*/g, '%2A') }
ename()
makes sure files are not named with symbols that would create errors on OS's (such as / - etc)
if(!opt.store){
return Gun.log("ERROR: Radisk needs `opt.store` interface with `{get: fn, put: fn, list: fn}`!");
}
if(!opt.store.put){
return Gun.log("ERROR: Radisk needs `store.put` interface with `(file, data, cb)`!");
}
if(!opt.store.get){
return Gun.log("ERROR: Radisk needs `store.get` interface with `(file, cb)`!");
}
if(!opt.store.list){
return Gun.log("ERROR: Radisk needs a streaming `store.list` interface with `(cb)`!");
}
Check if store interface has been created and if the 3 API's are available to us.
/*
Any and all storage adapters should...
1. Because writing to disk takes time, we should batch data to disk. This improves performance, and reduces potential disk corruption.
2. If a batch exceeds a certain number of writes, we should immediately write to disk when physically possible. This caps total performance, but reduces potential loss.
*/
var r = function(key, val, cb){
key = ''+key;
if(val instanceof Function){
cb = val;
val = r.batch(key);
if(u !== val){
return cb(u, val);
}
if(r.thrash.at){
val = r.thrash.at(key);
if(u !== val){
return cb(u, val);
}
}
//console.log("READ FROM DISK");
return r.read(key, cb);
}
r.batch(key, val);
if(cb){ r.batch.acks.push(cb) }
if(++r.batch.ed >= opt.batch){ return r.thrash() } // (2)
clearTimeout(r.batch.to); // (1)
r.batch.to = setTimeout(r.thrash, opt.until || 1);
}
Function r can be called 2 ways: GET (key, cb) PUT (key, val, cb)
GET Case
key stringify input
r.batch(key) reads back in-memory batch to check if key is waiting to be written to disk
If found, return it to caller
If not found, check the batch about to be written (staged for thrashing/flushing)
If found, return to caller
If not found, read from disk
PUT Case
key stringify input
r.batch(key,val) write key/val pair to batch
If a callback was attached we attach it in turn to acks in the batch. (acks are acknowledgments, sent out after the batch is written to disk)
Check if the batch count limit is reached and if so, flush to disk.
Also increase the counter r.batch.ed
opt.until or as default 1ms is he idle time between put calls, before a flush occurs