-
Notifications
You must be signed in to change notification settings - Fork 505
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implemented diffJson #28
Changes from all commits
2b2557b
e119b1a
90e6979
c7a5bab
a9191cb
d786a63
d12c46d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,8 +127,11 @@ var JsDiff = (function() { | |
while (newPos+1 < newLen && oldPos+1 < oldLen && this.equals(newString[newPos+1], oldString[oldPos+1])) { | ||
newPos++; | ||
oldPos++; | ||
|
||
this.pushComponent(basePath.components, newString[newPos], undefined, undefined); | ||
var value = newString[newPos]; | ||
if (this.useLongestToken && oldString[oldPos].length > value.length) { | ||
value = oldString[oldPos]; | ||
} | ||
this.pushComponent(basePath.components, value, undefined, undefined); | ||
} | ||
basePath.newPos = newPos; | ||
return oldPos; | ||
|
@@ -183,6 +186,63 @@ var JsDiff = (function() { | |
return retLines; | ||
}; | ||
|
||
JsonDiff = new Diff(); | ||
// Discriminate between two lines of pretty-printed, serialized JSON where one of them has a | ||
// dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output: | ||
JsonDiff.useLongestToken = true; | ||
JsonDiff.tokenize = LineDiff.tokenize; | ||
JsonDiff.equals = function(left, right) { | ||
return LineDiff.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1')); | ||
}; | ||
|
||
var objectPrototypeToString = Object.prototype.toString; | ||
|
||
// This function handles the presence of circular references by bailing out when encountering an | ||
// object that is already on the "stack" of items being processed. | ||
function canonicalize(obj, stack, replacementStack) { | ||
stack = stack || []; | ||
replacementStack = replacementStack || []; | ||
|
||
var i; | ||
|
||
for (var i = 0 ; i < stack.length ; i += 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this loop doing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It checks that the item being canonicalized is not already on the stack (ie. that it's working on a circular structure). This avoids infinite recursion, and it's safe to bail out because that object will canonicalized eventually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's what I thought but was not certain. A few things here:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Fixed in d12c46d and added tests. The tests required me to expose |
||
if (stack[i] === obj) { | ||
return replacementStack[i]; | ||
} | ||
} | ||
|
||
var canonicalizedObj; | ||
|
||
if ('[object Array]' === objectPrototypeToString.call(obj)) { | ||
stack.push(obj); | ||
canonicalizedObj = new Array(obj.length); | ||
replacementStack.push(canonicalizedObj); | ||
for (i = 0 ; i < obj.length ; i += 1) { | ||
canonicalizedObj[i] = canonicalize(obj[i], stack, replacementStack); | ||
} | ||
stack.pop(); | ||
replacementStack.pop(); | ||
} else if (typeof obj === 'object' && obj !== null) { | ||
stack.push(obj); | ||
canonicalizedObj = {}; | ||
replacementStack.push(canonicalizedObj); | ||
var sortedKeys = []; | ||
for (var key in obj) { | ||
sortedKeys.push(key); | ||
} | ||
sortedKeys.sort(); | ||
for (i = 0 ; i < sortedKeys.length ; i += 1) { | ||
var key = sortedKeys[i]; | ||
canonicalizedObj[key] = canonicalize(obj[key], stack, replacementStack); | ||
} | ||
stack.pop(); | ||
replacementStack.pop(); | ||
} else { | ||
canonicalizedObj = obj; | ||
} | ||
return canonicalizedObj; | ||
}; | ||
|
||
return { | ||
Diff: Diff, | ||
|
||
|
@@ -191,6 +251,13 @@ var JsDiff = (function() { | |
diffWordsWithSpace: function(oldStr, newStr) { return WordWithSpaceDiff.diff(oldStr, newStr); }, | ||
diffLines: function(oldStr, newStr) { return LineDiff.diff(oldStr, newStr); }, | ||
|
||
diffJson: function(oldObj, newObj) { | ||
return JsonDiff.diff( | ||
typeof oldObj === 'string' ? oldObj : JSON.stringify(canonicalize(oldObj), undefined, " "), | ||
typeof newObj === 'string' ? newObj : JSON.stringify(canonicalize(newObj), undefined, " ") | ||
); | ||
}, | ||
|
||
diffCss: function(oldStr, newStr) { return CssDiff.diff(oldStr, newStr); }, | ||
|
||
createPatch: function(fileName, oldStr, newStr, oldHeader, newHeader) { | ||
|
@@ -360,7 +427,9 @@ var JsDiff = (function() { | |
ret.push([(change.added ? 1 : change.removed ? -1 : 0), change.value]); | ||
} | ||
return ret; | ||
} | ||
}, | ||
|
||
canonicalize: canonicalize | ||
}; | ||
})(); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
const VERBOSE = false; | ||
|
||
var diff = require('../diff'); | ||
|
||
function getKeys(obj) { | ||
var keys = []; | ||
for (var key in obj) { | ||
if (obj.hasOwnProperty(key)) { | ||
keys.push(key); | ||
} | ||
} | ||
return keys; | ||
} | ||
|
||
describe('#canonicalize', function() { | ||
it('should put the keys in canonical order', function() { | ||
getKeys(diff.canonicalize({b: 456, a: 123})).should.eql(['a', 'b']); | ||
}); | ||
|
||
it('should dive into nested objects', function() { | ||
var canonicalObj = diff.canonicalize({b: 456, a: {d: 123, c: 456}}); | ||
getKeys(canonicalObj.a).should.eql(['c', 'd']); | ||
}); | ||
|
||
it('should dive into nested arrays', function() { | ||
var canonicalObj = diff.canonicalize({b: 456, a: [789, {d: 123, c: 456}]}); | ||
getKeys(canonicalObj.a[1]).should.eql(['c', 'd']); | ||
}); | ||
|
||
it('should handle circular references correctly', function() { | ||
var obj = {b: 456}; | ||
obj.a = obj; | ||
var canonicalObj = diff.canonicalize(obj); | ||
getKeys(canonicalObj).should.eql(['a', 'b']); | ||
getKeys(canonicalObj.a).should.eql(['a', 'b']); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I must admit that it's been a while since I've looked at this. How does useLongestToken change the behavior here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's being used to discriminate between two lines of pretty-printed, serialized JSON where one of them has a dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output. For example when diffing
{"foo": 123, "bar": 456, "quux": 789}
and{"foo": 123, "bar": 456}
you'll get:{ "foo": 123, "bar": 456, - "quux": 456 }
rather than:
{ "foo": 123, "bar": 456 - "quux": 456 }
I think it reads better with that dangling comma after 123.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha. This makes total sense but we should certainly comment it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in a9191cb (added part of the above as a comment)