From 2b2557ba05c2eb7ce66870f6060f19b2875252d5 Mon Sep 17 00:00:00 2001 From: Andreas Lind Petersen Date: Thu, 3 Apr 2014 23:10:16 +0200 Subject: [PATCH 1/7] Implemented diffJson. It takes two objects, serializes them as canonical JSON, then does a line-based diff that ignores differences in trailing commas. See discussion here: https://github.com/visionmedia/mocha/pull/1182 --- diff.js | 60 ++++++++++++++++++++++++++++++++++++++++++++++-- test/diffTest.js | 16 +++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/diff.js b/diff.js index c084609a..e63ab21b 100644 --- a/diff.js +++ b/diff.js @@ -127,8 +127,11 @@ var JsDiff = (function() { while (newPos+1 < newLen && oldPos+1 < oldLen && this.equals(newString[newPos+1], oldString[oldPos+1])) { newPos++; oldPos++; - - this.pushComponent(basePath.components, newString[newPos], undefined, undefined); + var value = newString[newPos]; + if (this.useLongestToken && oldString[oldPos].length > value.length) { + value = oldString[oldPos]; + } + this.pushComponent(basePath.components, value, undefined, undefined); } basePath.newPos = newPos; return oldPos; @@ -183,6 +186,52 @@ var JsDiff = (function() { return retLines; }; + JsonDiff = new Diff(); + JsonDiff.useLongestToken = true; + JsonDiff.tokenize = LineDiff.tokenize; + JsonDiff.equals = function(left, right) { + return LineDiff.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1')); + }; + + function canonicalize(obj, stack) { + stack = stack || []; + + var i; + + for (var i = 0 ; i < stack.length ; i += 1) { + if (stack[i] === obj) { + return obj; + } + } + + var canonicalizedObj; + + if ('[object Array]' == {}.toString.call(obj)) { + stack.push(obj); + canonicalizedObj = new Array(obj.length); + for (i = 0 ; i < obj.length ; i += 1) { + canonicalizedObj[i] = canonicalize(obj[i], stack); + } + stack.pop(); + } else if (typeof obj === 'object' && obj !== null) { + stack.push(obj); + canonicalizedObj = {}; + var sortedKeys = []; + for (var key in obj) { + sortedKeys.push(key); + } + sortedKeys.sort(); + for (i = 0 ; i < sortedKeys.length ; i += 1) { + var key = sortedKeys[i]; + canonicalizedObj[key] = canonicalize(obj[key], stack); + } + stack.pop(); + } else { + canonicalizedObj = obj; + } + return canonicalizedObj; + } + return { Diff: Diff, @@ -191,6 +240,13 @@ var JsDiff = (function() { diffWordsWithSpace: function(oldStr, newStr) { return WordWithSpaceDiff.diff(oldStr, newStr); }, diffLines: function(oldStr, newStr) { return LineDiff.diff(oldStr, newStr); }, + diffJson: function(oldObj, newObj) { + return JsonDiff.diff( + JSON.stringify(canonicalize(oldObj), undefined, " "), + JSON.stringify(canonicalize(newObj), undefined, " ") + ); + }, + diffCss: function(oldStr, newStr) { return CssDiff.diff(oldStr, newStr); }, createPatch: function(fileName, oldStr, newStr, oldHeader, newHeader) { diff --git a/test/diffTest.js b/test/diffTest.js index e2aa8eab..d4d009dc 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -112,6 +112,22 @@ describe('#diffLines', function() { }); }); +describe('#diffJson', function () { + it('should ignore trailing comma on the previous line when the property has been removed', function() { + var diffResult = diff.diffJson( + {a: 123, b: 456, c: 789}, + {a: 123, b: 456}); + diff.convertChangesToXML(diffResult).should.equal('{\n "a": 123,\n "b": 456,\n "c": 789\n}'); + }); + + it('should ignore the missing trailing comma on the last line when a property has been added after it', function() { + var diffResult = diff.diffJson( + {a: 123, b: 456}, + {a: 123, b: 456, c: 789}); + diff.convertChangesToXML(diffResult).should.equal('{\n "a": 123,\n "b": 456,\n "c": 789\n}'); + }); +}); + describe('convertToDMP', function() { it('should output diff-match-patch format', function() { var diffResult = diff.diffWords('New Value ', 'New ValueMoreData '); From e119b1a110c144d969c9d8a855f0c2aab3d99fbc Mon Sep 17 00:00:00 2001 From: Andreas Lind Date: Fri, 8 Aug 2014 22:56:11 +0200 Subject: [PATCH 2/7] diffJson: Assume already stringified JSON when receiving a string. --- diff.js | 4 ++-- test/diffTest.js | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/diff.js b/diff.js index e63ab21b..22db075b 100644 --- a/diff.js +++ b/diff.js @@ -242,8 +242,8 @@ var JsDiff = (function() { diffJson: function(oldObj, newObj) { return JsonDiff.diff( - JSON.stringify(canonicalize(oldObj), undefined, " "), - JSON.stringify(canonicalize(newObj), undefined, " ") + typeof oldObj === 'string' ? oldObj : JSON.stringify(canonicalize(oldObj), undefined, " "), + typeof newObj === 'string' ? newObj : JSON.stringify(canonicalize(newObj), undefined, " ") ); }, diff --git a/test/diffTest.js b/test/diffTest.js index d4d009dc..8a6811ff 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -112,7 +112,29 @@ describe('#diffLines', function() { }); }); -describe('#diffJson', function () { +describe('#diffJson', function() { + it('should accept objects', function() { + diff.diffJson( + {a: 123, b: 456, c: 789}, + {a: 123, b: 456} + ).should.eql([ + { value: '{\n "a": 123,\n "b": 456,\n', added: undefined, removed: undefined }, + { value: ' "c": 789\n', added: undefined, removed: true }, + { value: '}', added: undefined, removed: undefined } + ]); + }); + + it('should accept already stringified JSON', function() { + diff.diffJson( + JSON.stringify({a: 123, b: 456, c: 789}, undefined, " "), + JSON.stringify({a: 123, b: 456}, undefined, " ") + ).should.eql([ + { value: '{\n "a": 123,\n "b": 456,\n', added: undefined, removed: undefined }, + { value: ' "c": 789\n', added: undefined, removed: true }, + { value: '}', added: undefined, removed: undefined } + ]); + }); + it('should ignore trailing comma on the previous line when the property has been removed', function() { var diffResult = diff.diffJson( {a: 123, b: 456, c: 789}, From 90e6979075825eeae9f089fbfa02a262df1ec32d Mon Sep 17 00:00:00 2001 From: Andreas Lind Petersen Date: Tue, 2 Sep 2014 09:38:18 +0200 Subject: [PATCH 3/7] Added a diffJson test with a nested array to gain more coverage of canonicalize. --- test/diffTest.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/diffTest.js b/test/diffTest.js index 8a6811ff..18b35971 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -124,6 +124,17 @@ describe('#diffJson', function() { ]); }); + it('should accept objects with nested structures', function() { + diff.diffJson( + {a: 123, b: 456, c: [1, 2, {foo: 'bar'}, 4]}, + {a: 123, b: 456, c: [1, {foo: 'bar'}, 4]} + ).should.eql([ + { value: '{\n "a": 123,\n "b": 456,\n "c": [\n 1,\n', added: undefined, removed: undefined }, + { value: ' 2,\n', added: undefined, removed: true }, + { value: ' {\n "foo": "bar"\n },\n 4\n ]\n}', added: undefined, removed: undefined } + ]); + }); + it('should accept already stringified JSON', function() { diff.diffJson( JSON.stringify({a: 123, b: 456, c: 789}, undefined, " "), From c7a5bab406a48ad9bc2c5f9079ea7aeee30d8095 Mon Sep 17 00:00:00 2001 From: Andreas Lind Petersen Date: Tue, 2 Sep 2014 09:38:46 +0200 Subject: [PATCH 4/7] Put Object.prototype.toString into a var to avoid creating an empty object each time it's used. --- diff.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/diff.js b/diff.js index 22db075b..4e412fda 100644 --- a/diff.js +++ b/diff.js @@ -193,6 +193,8 @@ var JsDiff = (function() { return LineDiff.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1')); }; + var objectPrototypeToString = Object.prototype.toString; + function canonicalize(obj, stack) { stack = stack || []; @@ -206,7 +208,7 @@ var JsDiff = (function() { var canonicalizedObj; - if ('[object Array]' == {}.toString.call(obj)) { + if ('[object Array]' === objectPrototypeToString.call(obj)) { stack.push(obj); canonicalizedObj = new Array(obj.length); for (i = 0 ; i < obj.length ; i += 1) { From a9191cb2d9967c6d45f226359eed26a44b396ec1 Mon Sep 17 00:00:00 2001 From: Andreas Lind Date: Thu, 4 Sep 2014 21:40:31 +0200 Subject: [PATCH 5/7] Document the useLongestToken property. --- diff.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/diff.js b/diff.js index 4e412fda..c98738ad 100644 --- a/diff.js +++ b/diff.js @@ -187,6 +187,8 @@ var JsDiff = (function() { }; JsonDiff = new Diff(); + // Discriminate between two lines of pretty-printed, serialized JSON where one of them has a + // dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output: JsonDiff.useLongestToken = true; JsonDiff.tokenize = LineDiff.tokenize; JsonDiff.equals = function(left, right) { From d786a63e574529c47074fc3c66a3e8102cb256bf Mon Sep 17 00:00:00 2001 From: Andreas Lind Date: Thu, 4 Sep 2014 21:50:26 +0200 Subject: [PATCH 6/7] diffJson: Test that a circular reference causes JSON.stringify to throw (rather than an infinite loop). --- test/diffTest.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/diffTest.js b/test/diffTest.js index 18b35971..abd04b37 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -159,6 +159,17 @@ describe('#diffJson', function() { {a: 123, b: 456, c: 789}); diff.convertChangesToXML(diffResult).should.equal('{\n "a": 123,\n "b": 456,\n "c": 789\n}'); }); + + it('should throw an error if one of the objects being diffed has a circular reference', function() { + var circular = {foo: 123}; + circular.bar = circular; + (function () { + diff.diffJson( + circular, + {foo: 123, bar: {}} + ); + }).should.throw('Converting circular structure to JSON'); + }); }); describe('convertToDMP', function() { From d12c46d3078f0752f204482a1b0f7b2de4b72156 Mon Sep 17 00:00:00 2001 From: Andreas Lind Date: Thu, 4 Sep 2014 22:14:42 +0200 Subject: [PATCH 7/7] canonicalize: Keep track of the replacement objects so the correct object can be substituted when a circular reference is detected. --- diff.js | 21 +++++++++++++++------ test/canonicalize.js | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 test/canonicalize.js diff --git a/diff.js b/diff.js index c98738ad..2e3dc5f9 100644 --- a/diff.js +++ b/diff.js @@ -197,14 +197,17 @@ var JsDiff = (function() { var objectPrototypeToString = Object.prototype.toString; - function canonicalize(obj, stack) { + // This function handles the presence of circular references by bailing out when encountering an + // object that is already on the "stack" of items being processed. + function canonicalize(obj, stack, replacementStack) { stack = stack || []; + replacementStack = replacementStack || []; var i; for (var i = 0 ; i < stack.length ; i += 1) { if (stack[i] === obj) { - return obj; + return replacementStack[i]; } } @@ -213,13 +216,16 @@ var JsDiff = (function() { if ('[object Array]' === objectPrototypeToString.call(obj)) { stack.push(obj); canonicalizedObj = new Array(obj.length); + replacementStack.push(canonicalizedObj); for (i = 0 ; i < obj.length ; i += 1) { - canonicalizedObj[i] = canonicalize(obj[i], stack); + canonicalizedObj[i] = canonicalize(obj[i], stack, replacementStack); } stack.pop(); + replacementStack.pop(); } else if (typeof obj === 'object' && obj !== null) { stack.push(obj); canonicalizedObj = {}; + replacementStack.push(canonicalizedObj); var sortedKeys = []; for (var key in obj) { sortedKeys.push(key); @@ -227,14 +233,15 @@ var JsDiff = (function() { sortedKeys.sort(); for (i = 0 ; i < sortedKeys.length ; i += 1) { var key = sortedKeys[i]; - canonicalizedObj[key] = canonicalize(obj[key], stack); + canonicalizedObj[key] = canonicalize(obj[key], stack, replacementStack); } stack.pop(); + replacementStack.pop(); } else { canonicalizedObj = obj; } return canonicalizedObj; - } + }; return { Diff: Diff, @@ -420,7 +427,9 @@ var JsDiff = (function() { ret.push([(change.added ? 1 : change.removed ? -1 : 0), change.value]); } return ret; - } + }, + + canonicalize: canonicalize }; })(); diff --git a/test/canonicalize.js b/test/canonicalize.js new file mode 100644 index 00000000..7df0600c --- /dev/null +++ b/test/canonicalize.js @@ -0,0 +1,37 @@ +const VERBOSE = false; + +var diff = require('../diff'); + +function getKeys(obj) { + var keys = []; + for (var key in obj) { + if (obj.hasOwnProperty(key)) { + keys.push(key); + } + } + return keys; +} + +describe('#canonicalize', function() { + it('should put the keys in canonical order', function() { + getKeys(diff.canonicalize({b: 456, a: 123})).should.eql(['a', 'b']); + }); + + it('should dive into nested objects', function() { + var canonicalObj = diff.canonicalize({b: 456, a: {d: 123, c: 456}}); + getKeys(canonicalObj.a).should.eql(['c', 'd']); + }); + + it('should dive into nested arrays', function() { + var canonicalObj = diff.canonicalize({b: 456, a: [789, {d: 123, c: 456}]}); + getKeys(canonicalObj.a[1]).should.eql(['c', 'd']); + }); + + it('should handle circular references correctly', function() { + var obj = {b: 456}; + obj.a = obj; + var canonicalObj = diff.canonicalize(obj); + getKeys(canonicalObj).should.eql(['a', 'b']); + getKeys(canonicalObj.a).should.eql(['a', 'b']); + }); +});