Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #245 #253

Merged
merged 1 commit into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions dist/xss.js
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ function parseTag(html, onTag, escapeHtml) {
return rethtml;
}

var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9_:\.\-]/gim;
var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9\\_:.-]/gim;

/**
* parse input attributes and returns processed attributes
Expand All @@ -633,6 +633,7 @@ function parseAttr(html, onAttr) {
"use strict";

var lastPos = 0;
var lastMarkPos = 0;
var retAttrs = [];
var tmpName = false;
var len = html.length;
Expand All @@ -652,19 +653,18 @@ function parseAttr(html, onAttr) {
if (tmpName === false && c === "=") {
tmpName = html.slice(lastPos, i);
lastPos = i + 1;
lastMarkPos = html.charAt(lastPos) === '"' || html.charAt(lastPos) === "'" ? lastPos : findNextQuotationMark(html, i + 1);
continue;
}
if (tmpName !== false) {
if (
i === lastPos &&
(c === '"' || c === "'") &&
html.charAt(i - 1) === "="
i === lastMarkPos
) {
j = html.indexOf(c, i + 1);
if (j === -1) {
break;
} else {
v = _.trim(html.slice(lastPos + 1, j));
v = _.trim(html.slice(lastMarkPos + 1, j));
addAttr(tmpName, v);
tmpName = false;
i = j;
Expand Down Expand Up @@ -723,6 +723,15 @@ function findNextEqual(str, i) {
}
}

function findNextQuotationMark(str, i) {
for (; i < str.length; i++) {
var c = str[i];
if (c === " ") continue;
if (c === "'" || c === '"') return i;
return -1;
}
}

function findBeforeEqual(str, i) {
for (; i > 0; i--) {
var c = str[i];
Expand Down Expand Up @@ -941,7 +950,7 @@ FilterXSS.prototype.process = function (html) {
sourcePosition: sourcePosition,
position: position,
isClosing: isClosing,
isWhite: whiteList.hasOwnProperty(tag),
isWhite: Object.prototype.hasOwnProperty.call(whiteList, tag),
};

// call `onTag()`
Expand Down
19 changes: 14 additions & 5 deletions lib/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function parseTag(html, onTag, escapeHtml) {
return rethtml;
}

var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9_:.-]/gim;
var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9\\_:.-]/gim;

/**
* parse input attributes and returns processed attributes
Expand All @@ -126,6 +126,7 @@ function parseAttr(html, onAttr) {
"use strict";

var lastPos = 0;
var lastMarkPos = 0;
var retAttrs = [];
var tmpName = false;
var len = html.length;
Expand All @@ -145,19 +146,18 @@ function parseAttr(html, onAttr) {
if (tmpName === false && c === "=") {
tmpName = html.slice(lastPos, i);
lastPos = i + 1;
lastMarkPos = html.charAt(lastPos) === '"' || html.charAt(lastPos) === "'" ? lastPos : findNextQuotationMark(html, i + 1);
continue;
}
if (tmpName !== false) {
if (
i === lastPos &&
(c === '"' || c === "'") &&
html.charAt(i - 1) === "="
i === lastMarkPos
) {
j = html.indexOf(c, i + 1);
if (j === -1) {
break;
} else {
v = _.trim(html.slice(lastPos + 1, j));
v = _.trim(html.slice(lastMarkPos + 1, j));
addAttr(tmpName, v);
tmpName = false;
i = j;
Expand Down Expand Up @@ -216,6 +216,15 @@ function findNextEqual(str, i) {
}
}

function findNextQuotationMark(str, i) {
for (; i < str.length; i++) {
var c = str[i];
if (c === " ") continue;
if (c === "'" || c === '"') return i;
return -1;
}
}

function findBeforeEqual(str, i) {
for (; i > 0; i--) {
var c = str[i];
Expand Down
15 changes: 12 additions & 3 deletions test/test_html_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ describe("test HTML parser", function() {
it("#parseAttr", function() {
var i = 0;
var html = parseAttr(
'href="#"attr1=b attr2=c attr3 attr4=\'value4"\'attr5/',
'href="#"attr1=b attr2=c attr3 attr4=\'value4"\'attr5/ attr6\\" attr7= "123 456"',
function(name, value) {
i++;
debug(arguments);
Expand Down Expand Up @@ -103,15 +103,24 @@ describe("test HTML parser", function() {
assert.equal(name, "attr5");
assert.equal(value, "");
return attr(name, value);
} else {
} else if(i === 7) {
assert.equal(name, "attr6\\");
assert.equal(value, "");
return attr(name, value);
} else if(i === 8){
assert.equal(name , "attr7");
assert.equal(value , "123 456");
return attr(name, value);
}
else {
throw new Error();
}
}
);
debug(html);
assert.equal(
html,
'href="#" attr1="b" attr2="c" attr3 attr4="value4&quote;" attr5'
'href="#" attr1="b" attr2="c" attr3 attr4="value4&quote;" attr5 attr6\\ attr7="123 456"'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里结果不太符合预期,attr6\\ 应该为 attr6\\"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这是因为正则var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9\\_:.-]/gim会对属性字符串过滤,双引号不在有效的属性符号内,所以被过滤掉了,这个PR理论上不涉及这个正则的逻辑更改

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果你需要把双引号认为是合法的属性字符的话,我可以继续更改这个PR

);
});

Expand Down