"Sample diff" diffs two big files with a few of them, and require only one IO per file. The sample algorithm is called Reservoir sampling.
var SampleDiff = require('sample-diff');
var opts = {
'fetch_item_id_func' : function(line1) {
return parseInt(line1.match(/uid\": ?([0-9]+)[,\}]/)[1]);
},
'diff_items_func' : function(a, b) {
console.log(difflet.compare(a, b), "\n")
return _.isEqual(a, b);
},
'data_normalization_func' : function(line1) {
item1 = process(line1);
return item1;
},
'filter_func' : function(line1) {
return false;
}
};
var checker = SampleDiff(fileA, fileB, sample_count, opts);
checker.run(function(result) {
// process result
});
npm install sample-diff -g --verbose
# install node and npm
npm install --global --verbose grunt-cli
npm install --global --verbose grunt-contrib-coffee
npm install --verbose # install deps
grunt --verbose # compile coffee scripts
npm test
MIT. David Chen @ 17zuoye.