diff --git a/lib/escape.js b/lib/escape.js index a20b61a5..5fd682e7 100644 --- a/lib/escape.js +++ b/lib/escape.js @@ -22,11 +22,21 @@ var unescapeXMLTable = { function unescapeXMLReplace (match) { if (match[1] === '#') { + var num if (match[2] === 'x') { - return String.fromCodePoint(parseInt(match.slice(3), 16)) + num = parseInt(match.slice(3), 16) } else { - return String.fromCodePoint(parseInt(match.slice(2), 10)) + num = parseInt(match.slice(2), 10) } + // https://www.w3.org/TR/xml/#NT-Char defines legal XML characters: + // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + if (num === 0x9 || num === 0xA || num === 0xD || + (num >= 0x20 && num <= 0xD7FF) || + (num >= 0xE000 && num <= 0xFFFD) || + (num >= 0x10000 && num <= 0x10FFFF)) { + return String.fromCodePoint(num) + } + return '' } return unescapeXMLTable[match] } diff --git a/test/escape-test.js b/test/escape-test.js index 6bcbef12..7fcc7156 100644 --- a/test/escape-test.js +++ b/test/escape-test.js @@ -52,6 +52,9 @@ vows.describe('escape').addBatch({ 'unescapes numeric entities': function () { assert.equal(unescapeXML('@'), '@') }, + 'strips control characters': function () { + assert.equal(unescapeXML('�'), '') + }, 'unescapes hexadecimal entities': function () { assert.equal(unescapeXML('@'), '@') },