-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prevent various XSS attacks [rebase and update of #276] #495
Changes from 10 commits
1140613
bf5105c
1d4296f
b3d45c4
6bb66db
af04ac9
131ba75
6d0156d
e4bb123
4dc98b6
aee3963
4bae1c9
054ba3c
dc30cb4
2e4afde
226f636
c63b690
b1e5aeb
bbb7687
67c3efb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,27 @@ function setUrlsLinked($urlsLinked) | |
|
||
protected $urlsLinked = true; | ||
|
||
function setSafeLinksEnabled($safeLinksEnabled) | ||
{ | ||
$this->safeLinksEnabled = $safeLinksEnabled; | ||
|
||
return $this; | ||
} | ||
|
||
protected $safeLinksEnabled = true; | ||
|
||
protected $safeLinksWhitelist = array( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this have to be an instance member? Seems like it can be moved to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory it would let an extension define additional protocols without having to worry about implementation of how to whitelist them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I.e. you could fix a bug in the filter implementation that wouldn't have to be reflected in the extension if they're just adding data. |
||
'http://', | ||
'https://', | ||
'/', | ||
'ftp://', | ||
'ftps://', | ||
'mailto:', | ||
'data:image/png;base64,', | ||
'data:image/gif;base64,', | ||
'data:image/jpg;base64,', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's actually There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cheers ;-) Is jpg allowed as a variant? (i.e. should I add both?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's invalid. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, ty! 😃 |
||
); | ||
|
||
# | ||
# Lines | ||
# | ||
|
@@ -342,8 +363,6 @@ protected function blockCodeComplete($Block) | |
{ | ||
$text = $Block['element']['text']['text']; | ||
|
||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
|
||
$Block['element']['text']['text'] = $text; | ||
|
||
return $Block; | ||
|
@@ -457,8 +476,6 @@ protected function blockFencedCodeComplete($Block) | |
{ | ||
$text = $Block['element']['text']['text']; | ||
|
||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
|
||
$Block['element']['text']['text'] = $text; | ||
|
||
return $Block; | ||
|
@@ -515,10 +532,10 @@ protected function blockList($Line) | |
), | ||
); | ||
|
||
if($name === 'ol') | ||
if($name === 'ol') | ||
{ | ||
$listStart = stristr($matches[0], '.', true); | ||
|
||
if($listStart !== '1') | ||
{ | ||
$Block['element']['attributes'] = array('start' => $listStart); | ||
|
@@ -1074,7 +1091,6 @@ protected function inlineCode($Excerpt) | |
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches)) | ||
{ | ||
$text = $matches[2]; | ||
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); | ||
$text = preg_replace("/[ ]*\n/", ' ', $text); | ||
|
||
return array( | ||
|
@@ -1253,8 +1269,6 @@ protected function inlineLink($Excerpt) | |
$Element['attributes']['title'] = $Definition['title']; | ||
} | ||
|
||
$Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); | ||
|
||
return array( | ||
'extent' => $extent, | ||
'element' => $Element, | ||
|
@@ -1343,14 +1357,16 @@ protected function inlineUrl($Excerpt) | |
|
||
if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) | ||
{ | ||
$url = $matches[0][0]; | ||
|
||
$Inline = array( | ||
'extent' => strlen($matches[0][0]), | ||
'position' => $matches[0][1], | ||
'element' => array( | ||
'name' => 'a', | ||
'text' => $matches[0][0], | ||
'text' => $url, | ||
'attributes' => array( | ||
'href' => $matches[0][0], | ||
'href' => $url, | ||
), | ||
), | ||
); | ||
|
@@ -1363,7 +1379,7 @@ protected function inlineUrlTag($Excerpt) | |
{ | ||
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) | ||
{ | ||
$url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); | ||
$url = $matches[1]; | ||
|
||
return array( | ||
'extent' => strlen($matches[0]), | ||
|
@@ -1401,6 +1417,8 @@ protected function unmarkedText($text) | |
|
||
protected function element(array $Element) | ||
{ | ||
$Element = $this->sanitiseElement($Element); | ||
|
||
$markup = '<'.$Element['name']; | ||
|
||
if (isset($Element['attributes'])) | ||
|
@@ -1412,7 +1430,7 @@ protected function element(array $Element) | |
continue; | ||
} | ||
|
||
$markup .= ' '.$name.'="'.$value.'"'; | ||
$markup .= ' '.$name.'="'.self::escape($value).'"'; | ||
} | ||
} | ||
|
||
|
@@ -1426,7 +1444,7 @@ protected function element(array $Element) | |
} | ||
else | ||
{ | ||
$markup .= $Element['text']; | ||
$markup .= self::escape($Element['text'], true); | ||
} | ||
|
||
$markup .= '</'.$Element['name'].'>'; | ||
|
@@ -1485,10 +1503,80 @@ function parse($text) | |
return $markup; | ||
} | ||
|
||
protected function sanitiseElement(array $Element) | ||
{ | ||
static $badAttributeChars = "\"'= \t\n\r\0\x0B"; | ||
static $safeUrlNameToAtt = array( | ||
'a' => 'href', | ||
'img' => 'src', | ||
); | ||
|
||
if (isset($safeUrlNameToAtt[$Element['name']])) | ||
{ | ||
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); | ||
} | ||
|
||
if ( ! empty($Element['attributes'])) | ||
{ | ||
foreach ($Element['attributes'] as $att => $val) | ||
{ | ||
# clear out nulls | ||
if ($val === null) | ||
{ | ||
unset($Element['attributes'][$att]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was needed when I used |
||
} | ||
# filter out badly parsed attribute | ||
elseif (strpbrk($att, $badAttributeChars) !== false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for this? Letting badly parsed attributes produce gibberish is IMO better than silently ignoring it, it's more explicit and gibberish actually helps people to understand that something went wrong. 😉 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I'd rather throw an exception here TBH, but there isn't really precedent so for in Parsedown for throwing them (so I'll defer that decision to @erusev) In my experience coming across this stuff in the wild, there's a pretty high chance that people will not test their applications with bad data (especially if they don't know it could even be valid). Just for reference, the only chars in that list that would be "valid"-(ish) in the attribute name are All the whitespace characters are invalid in an HTML tag's attribute name, and would certainly create two attributes (so very likely there is an attack). By filtering out attributes containing these characters we only need check for a prefix at the start of the string since we guarantee that there can not be multiple attributes (as interpreted by the browser) contained in the single attribute we are considering. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw this isn't technically exhaustive if it happens to be the first attribute, see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Non-alpha-non-digit_XSS
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sigh I guess we'll also have to ensure that the attribute name starts with an alphanumeric. Perhaps it's best to switch over to a whitelist. I'd think E.g. Chrome parses
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So 4bae1c9 implements this via a whitelist (which is safer), though @erusev should decide whether this is worth the performance hit given Parsedown base doesn't allow user generated attributes vs. allowing extensions todo basically no checking on what attributes they allow, but Parsedown handling them safely. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
{ | ||
unset($Element['attributes'][$att]); | ||
} | ||
} | ||
|
||
$onEventAttributeKeys = preg_grep('/^on/i', array_keys($Element['attributes'])); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expensive and unnecessary approach. You're iterating through all attributes anyway, why don't you simply test for attributes starting with "on" using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup you're right about that, needs to be case insensitive though. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitly! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So are we proposing? # dump onevent attribute
elseif (strlen($att) > 1 and strtolower(substr($att, 0, 2)) === 'on')
{
unset($Element['attributes'][$att]);
} instead of # dump onevent attribute
elseif (stripos($att, 'on') === 0)
{
unset($Element['attributes'][$att]);
} ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How's this 2e4afde ? |
||
|
||
foreach ($onEventAttributeKeys as $att) | ||
{ | ||
unset($Element['attributes'][$att]); | ||
} | ||
} | ||
|
||
return $Element; | ||
} | ||
|
||
protected function filterUnsafeUrlInAttribute(array $Element, $attribute) | ||
{ | ||
if ($this->safeLinksEnabled) | ||
{ | ||
$safe = false; | ||
|
||
foreach ($this->safeLinksWhitelist as $scheme) | ||
{ | ||
if (stripos($Element['attributes'][$attribute], $scheme) === 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't test this, but I think it treats relative links like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Furthermore: Don't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, it'll treat that as unsafe. Problem with allowing Also see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_newline_to_break_up_XSS for some examples of getting XSS in a HTML attribute via some obscenely permissive browser parsing :( e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As an alternate method for what's done here, one idea I've had is selectively partially url encoding attributes that can't be guaranteed safe by the whitelist. I've written an implementation here. One thing I wasn't sure about was how permissive to be with which characters to allow though. I think holding back on encoding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevertheless are relative paths an important use case. It might be a good idea to look into the specs to find out how URI schemes must look like to be valid. Just guessing here, but I think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still not entirely convinced that this is best solution in every detail, but you've convinced me that the approach is right. There might still be some open detail questions, I unfortunately don't have time to look through everything in detail at the moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if we used your approach https://github.com/erusev/parsedown/pull/495/files/aee3963e6b97186b1e5526c118bf5d2d872cd8ee#r114438464 to make That way custom protocols are supported for authors (who trust their own content), but will be off for user generated content (that they do not trust). Perhaps we could ease the complication of having to enable two options in sync by introducing a new option that does both behind the scenes: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The one has nothing to do with the other, never ever introduce BC breaking changes without incrementing the first digit of the version (i.e. when this feature is enabled by default, it must be released as Parsedown 2.0). You'll break all dependent applications otherwise - and Parsedown is no end user project, it's a library, thus you'll break hundreds of applications with thousands of users. See http://semver.org. The only exception is behavior that never was expected (like some weird edge cases when parsing markup), neither officially, nor how it was practically used by others. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'd tend to agree with that. In-fact even if we change no public default behaviour, IMO it would be valuable to consider how much of a version jump changes to the protected API would warrant (see: #495 (comment)) What do you think about introducing a new option though – in theory if we introduced something like Having this off by default would probably ease concerns about custom protocols (and as you said, having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is disabled by default - sure, why not. However, as I've said before (#495 (comment)), please keep in mind that people might want to escape markup without having security/user-generated content in mind. Thus there might still be users who do want to escape markup, but don't want to have Nevertheless, this is no crucial question for me. Important is that this isn't enabled by default, as this would break BC in an absolute catastrophic way. |
||
{ | ||
$safe = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simply return immediately ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, good catch! |
||
|
||
break; | ||
} | ||
} | ||
|
||
if ( ! $safe) | ||
{ | ||
unset($Element['attributes'][$attribute]); | ||
} | ||
} | ||
|
||
return $Element; | ||
} | ||
|
||
# | ||
# Static Methods | ||
# | ||
|
||
protected static function escape($text, $allowQuotes = false) | ||
{ | ||
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); | ||
} | ||
|
||
static function instance($name = 'default') | ||
{ | ||
if (isset(self::$instances[$name])) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<p><a href="https://www.example.com"">xss</a></p> | ||
<p><img src="https://www.example.com"" alt="xss" /></p> | ||
<p><a href="https://www.example.com'">xss</a></p> | ||
<p><img src="https://www.example.com'" alt="xss" /></p> | ||
<p><img src="https://www.example.com" alt="xss"" /></p> | ||
<p><img src="https://www.example.com" alt="xss'" /></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[xss](https://www.example.com") | ||
|
||
![xss](https://www.example.com") | ||
|
||
[xss](https://www.example.com') | ||
|
||
![xss](https://www.example.com') | ||
|
||
![xss"](https://www.example.com) | ||
|
||
![xss'](https://www.example.com) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><a>xss</a></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> | ||
<p><img alt="xss" /></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[xss](javascript:alert(1)) | ||
|
||
[xss]( javascript:alert(1)) | ||
|
||
[xss](javascript://alert(1)) | ||
|
||
[xss](javascript:alert(1)) | ||
|
||
![xss](javascript:alert(1)) | ||
|
||
![xss]( javascript:alert(1)) | ||
|
||
![xss](javascript://alert(1)) | ||
|
||
![xss](javascript:alert(1)) | ||
|
||
[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) | ||
|
||
![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<p><script>alert(1)</script></p> | ||
<p><script></p> | ||
<p>alert(1)</p> | ||
<p></script></p> | ||
<p><script> | ||
alert(1) | ||
</script></p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<script>alert(1)</script> | ||
|
||
<script> | ||
|
||
alert(1) | ||
|
||
</script> | ||
|
||
|
||
<script> | ||
alert(1) | ||
</script> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO this shouldn't be enabled by default, it breaks BC. Enabling this without
setMarkupEscaped(true)
makes no sense anyway - it prevents absolutely nothing without escaped markup. Never confuse people by letting them do pointless things.How about something like this (disallow enabling
safeLinksEnabled
withoutmarkupEscaped
and always enable/disablesafeLinksEnabled
andmarkupEscaped
at the same time; however, disablingsafeLinksEnabled
whenmarkupEscaped
is enabled is still possible - escaping markup doesn't necessarily imply a "safe mode", but in contrast, a "safe mode" isn't possible without escaping markup)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, so some of this is spoken to by decisions made in the original PR.
I think the default was @erusev's suggestion as far as I can tell though, see #276 (comment)
If it was up to me I'd make
markupEscaped
default to on too 😉I'm happy to go with the flow here though, so I'll await some more feedback
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My 👎 for this, BC is IMO very important 😝
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hehe, I mean as an initial decision (of course too late now! ;p)