-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prevent various XSS attacks [rebase and update of #276] #495
Changes from 13 commits
1140613
bf5105c
1d4296f
b3d45c4
6bb66db
af04ac9
131ba75
6d0156d
e4bb123
4dc98b6
aee3963
4bae1c9
054ba3c
dc30cb4
2e4afde
226f636
c63b690
b1e5aeb
bbb7687
67c3efb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,26 @@ function setUrlsLinked($urlsLinked) | |
|
||
protected $urlsLinked = true; | ||
|
||
function setSafeLinksEnabled($safeLinksEnabled) | ||
{ | ||
$this->safeLinksEnabled = $safeLinksEnabled; | ||
|
||
return $this; | ||
} | ||
|
||
protected $safeLinksEnabled = true; | ||
|
||
protected $safeLinksWhitelist = array( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this have to be an instance member? Seems like it can be moved to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory it would let an extension define additional protocols without having to worry about implementation of how to whitelist them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I.e. you could fix a bug in the filter implementation that wouldn't have to be reflected in the extension if they're just adding data. |
||
'http://', | ||
'https://', | ||
'ftp://', | ||
'ftps://', | ||
'mailto:', | ||
'data:image/png;base64,', | ||
'data:image/gif;base64,', | ||
'data:image/jpeg;base64,', | ||
); | ||
|
||
# | ||
# Lines | ||
# | ||
|
@@ -342,8 +362,6 @@ protected function blockCodeComplete($Block) | |
{ | ||
$text = $Block['element']['text']['text']; | ||
|
||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
|
||
$Block['element']['text']['text'] = $text; | ||
|
||
return $Block; | ||
|
@@ -457,8 +475,6 @@ protected function blockFencedCodeComplete($Block) | |
{ | ||
$text = $Block['element']['text']['text']; | ||
|
||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
|
||
$Block['element']['text']['text'] = $text; | ||
|
||
return $Block; | ||
|
@@ -515,10 +531,10 @@ protected function blockList($Line) | |
), | ||
); | ||
|
||
if($name === 'ol') | ||
if($name === 'ol') | ||
{ | ||
$listStart = stristr($matches[0], '.', true); | ||
|
||
if($listStart !== '1') | ||
{ | ||
$Block['element']['attributes'] = array('start' => $listStart); | ||
|
@@ -1074,7 +1090,6 @@ protected function inlineCode($Excerpt) | |
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches)) | ||
{ | ||
$text = $matches[2]; | ||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
$text = preg_replace("/[ ]*\n/", ' ', $text); | ||
|
||
return array( | ||
|
@@ -1253,8 +1268,6 @@ protected function inlineLink($Excerpt) | |
$Element['attributes']['title'] = $Definition['title']; | ||
} | ||
|
||
$Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); | ||
|
||
return array( | ||
'extent' => $extent, | ||
'element' => $Element, | ||
|
@@ -1343,14 +1356,16 @@ protected function inlineUrl($Excerpt) | |
|
||
if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) | ||
{ | ||
$url = $matches[0][0]; | ||
|
||
$Inline = array( | ||
'extent' => strlen($matches[0][0]), | ||
'position' => $matches[0][1], | ||
'element' => array( | ||
'name' => 'a', | ||
'text' => $matches[0][0], | ||
'text' => $url, | ||
'attributes' => array( | ||
'href' => $matches[0][0], | ||
'href' => $url, | ||
), | ||
), | ||
); | ||
|
@@ -1363,7 +1378,7 @@ protected function inlineUrlTag($Excerpt) | |
{ | ||
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) | ||
{ | ||
$url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); | ||
$url = $matches[1]; | ||
|
||
return array( | ||
'extent' => strlen($matches[0]), | ||
|
@@ -1401,6 +1416,8 @@ protected function unmarkedText($text) | |
|
||
protected function element(array $Element) | ||
{ | ||
$Element = $this->sanitiseElement($Element); | ||
|
||
$markup = '<'.$Element['name']; | ||
|
||
if (isset($Element['attributes'])) | ||
|
@@ -1412,7 +1429,7 @@ protected function element(array $Element) | |
continue; | ||
} | ||
|
||
$markup .= ' '.$name.'="'.$value.'"'; | ||
$markup .= ' '.$name.'="'.self::escape($value).'"'; | ||
} | ||
} | ||
|
||
|
@@ -1426,7 +1443,7 @@ protected function element(array $Element) | |
} | ||
else | ||
{ | ||
$markup .= $Element['text']; | ||
$markup .= self::escape($Element['text'], true); | ||
} | ||
|
||
$markup .= '</'.$Element['name'].'>'; | ||
|
@@ -1485,10 +1502,80 @@ function parse($text) | |
return $markup; | ||
} | ||
|
||
protected function sanitiseElement(array $Element) | ||
{ | ||
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; | ||
static $safeUrlNameToAtt = array( | ||
'a' => 'href', | ||
'img' => 'src', | ||
); | ||
|
||
if (isset($safeUrlNameToAtt[$Element['name']])) | ||
{ | ||
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); | ||
} | ||
|
||
if ( ! empty($Element['attributes'])) | ||
{ | ||
foreach ($Element['attributes'] as $att => $val) | ||
{ | ||
# filter out badly parsed attribute | ||
if ( ! preg_match($goodAttribute, $att)) | ||
{ | ||
unset($Element['attributes'][$att]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was needed when I used |
||
} | ||
# dump onevent attribute | ||
elseif (preg_match('/^on/i', $att)) | ||
{ | ||
unset($Element['attributes'][$att]); | ||
} | ||
} | ||
} | ||
|
||
return $Element; | ||
} | ||
|
||
protected function filterUnsafeUrlInAttribute(array $Element, $attribute) | ||
{ | ||
if ($this->safeLinksEnabled) | ||
{ | ||
$safe = false; | ||
|
||
foreach ($this->safeLinksWhitelist as $scheme) | ||
{ | ||
if (stripos($Element['attributes'][$attribute], $scheme) === 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't test this, but I think it treats relative links like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Furthermore: Don't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, it'll treat that as unsafe. Problem with allowing Also see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_newline_to_break_up_XSS for some examples of getting XSS in a HTML attribute via some obscenely permissive browser parsing :( e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As an alternate method for what's done here, one idea I've had is selectively partially url encoding attributes that can't be guaranteed safe by the whitelist. I've written an implementation here. One thing I wasn't sure about was how permissive to be with which characters to allow though. I think holding back on encoding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevertheless are relative paths an important use case. It might be a good idea to look into the specs to find out how URI schemes must look like to be valid. Just guessing here, but I think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still not entirely convinced that this is best solution in every detail, but you've convinced me that the approach is right. There might still be some open detail questions, I unfortunately don't have time to look through everything in detail at the moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if we used your approach https://github.com/erusev/parsedown/pull/495/files/aee3963e6b97186b1e5526c118bf5d2d872cd8ee#r114438464 to make That way custom protocols are supported for authors (who trust their own content), but will be off for user generated content (that they do not trust). Perhaps we could ease the complication of having to enable two options in sync by introducing a new option that does both behind the scenes: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The one has nothing to do with the other, never ever introduce BC breaking changes without incrementing the first digit of the version (i.e. when this feature is enabled by default, it must be released as Parsedown 2.0). You'll break all dependent applications otherwise - and Parsedown is no end user project, it's a library, thus you'll break hundreds of applications with thousands of users. See http://semver.org. The only exception is behavior that never was expected (like some weird edge cases when parsing markup), neither officially, nor how it was practically used by others. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'd tend to agree with that. In-fact even if we change no public default behaviour, IMO it would be valuable to consider how much of a version jump changes to the protected API would warrant (see: #495 (comment)) What do you think about introducing a new option though – in theory if we introduced something like Having this off by default would probably ease concerns about custom protocols (and as you said, having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is disabled by default - sure, why not. However, as I've said before (#495 (comment)), please keep in mind that people might want to escape markup without having security/user-generated content in mind. Thus there might still be users who do want to escape markup, but don't want to have Nevertheless, this is no crucial question for me. Important is that this isn't enabled by default, as this would break BC in an absolute catastrophic way. |
||
{ | ||
$safe = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simply return immediately ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, good catch! |
||
|
||
break; | ||
} | ||
} | ||
|
||
if ( ! $safe) | ||
{ | ||
$Element['attributes'][$attribute] = preg_replace_callback( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we find a simpler alternative to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current way it's set out is to have a whitelist of characters we don't want to encode (and encode everything else). I could achieve the same result with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah sorry, didn't see your edit. Could do it with preg_match_all with match positions returned and applying encoding to those ranges, again, not sure it'll be faster? P.S. the regex will match as many chars as it can in one go before making the function call at present There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @erusev this would be an alternative using if (
preg_match_all(
'/[^\/#?&=%]++/',
$Element['attributes'][$attribute],
$matches,
PREG_OFFSET_CAPTURE
)
) {
$offset = 0;
foreach ($matches[0] as $match)
{
$len = strlen($match[0]);
$encoded = urlencode($match[0]);
$Element['attributes'][$attribute] = substr_replace(
$Element['attributes'][$attribute],
$encoded,
$offset + $match[1],
$len
);
$offset += strlen($encoded) - $len;
}
} More likely to introduce bugs with this though IMO, and I'm not sure it'll save that much on performance just looking at it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure |
||
'/[^\/#?&=%]++/', | ||
function (array $match) | ||
{ | ||
return urlencode($match[0]); | ||
}, | ||
$Element['attributes'][$attribute] | ||
); | ||
} | ||
} | ||
|
||
return $Element; | ||
} | ||
|
||
# | ||
# Static Methods | ||
# | ||
|
||
protected static function escape($text, $allowQuotes = false) | ||
{ | ||
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); | ||
} | ||
|
||
static function instance($name = 'default') | ||
{ | ||
if (isset(self::$instances[$name])) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<p><a href="https://www.example.com"">xss</a></p> | ||
<p><img src="https://www.example.com"" alt="xss" /></p> | ||
<p><a href="https://www.example.com'">xss</a></p> | ||
<p><img src="https://www.example.com'" alt="xss" /></p> | ||
<p><img src="https://www.example.com" alt="xss"" /></p> | ||
<p><img src="https://www.example.com" alt="xss'" /></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[xss](https://www.example.com") | ||
|
||
![xss](https://www.example.com") | ||
|
||
[xss](https://www.example.com') | ||
|
||
![xss](https://www.example.com') | ||
|
||
![xss"](https://www.example.com) | ||
|
||
![xss'](https://www.example.com) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<p><a href="javascript%3Aalert%281%29">xss</a></p> | ||
<p><a href="javascript%3Aalert%281%29">xss</a></p> | ||
<p><a href="javascript%3A//alert%281%29">xss</a></p> | ||
<p><a href="javascript&colon%3Balert%281%29">xss</a></p> | ||
<p><img src="javascript%3Aalert%281%29" alt="xss" /></p> | ||
<p><img src="javascript%3Aalert%281%29" alt="xss" /></p> | ||
<p><img src="javascript%3A//alert%281%29" alt="xss" /></p> | ||
<p><img src="javascript&colon%3Balert%281%29" alt="xss" /></p> | ||
<p><a href="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p> | ||
<p><a href="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p> | ||
<p><a href="data%3A//text/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p> | ||
<p><a href="data&colon%3Btext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p> | ||
<p><img src="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p> | ||
<p><img src="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p> | ||
<p><img src="data%3A//text/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p> | ||
<p><img src="data&colon%3Btext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[xss](javascript:alert(1)) | ||
|
||
[xss]( javascript:alert(1)) | ||
|
||
[xss](javascript://alert(1)) | ||
|
||
[xss](javascript:alert(1)) | ||
|
||
![xss](javascript:alert(1)) | ||
|
||
![xss]( javascript:alert(1)) | ||
|
||
![xss](javascript://alert(1)) | ||
|
||
![xss](javascript:alert(1)) | ||
|
||
[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<p><script>alert(1)</script></p> | ||
<p><script></p> | ||
<p>alert(1)</p> | ||
<p></script></p> | ||
<p><script> | ||
alert(1) | ||
</script></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<script>alert(1)</script> | ||
|
||
<script> | ||
|
||
alert(1) | ||
|
||
</script> | ||
|
||
|
||
<script> | ||
alert(1) | ||
</script> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO this shouldn't be enabled by default, it breaks BC. Enabling this without
setMarkupEscaped(true)
makes no sense anyway - it prevents absolutely nothing without escaped markup. Never confuse people by letting them do pointless things.How about something like this (disallow enabling
safeLinksEnabled
withoutmarkupEscaped
and always enable/disablesafeLinksEnabled
andmarkupEscaped
at the same time; however, disablingsafeLinksEnabled
whenmarkupEscaped
is enabled is still possible - escaping markup doesn't necessarily imply a "safe mode", but in contrast, a "safe mode" isn't possible without escaping markup)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, so some of this is spoken to by decisions made in the original PR.
I think the default was @erusev's suggestion as far as I can tell though, see #276 (comment)
If it was up to me I'd make
markupEscaped
default to on too 😉I'm happy to go with the flow here though, so I'll await some more feedback
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My 👎 for this, BC is IMO very important 😝
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hehe, I mean as an initial decision (of course too late now! ;p)