erusev · erusev · Feb 28, 2018 · Jan 21, 2015 · Jan 24, 2015 · Jan 25, 2015
diff --git a/Parsedown.php b/Parsedown.php
@@ -75,6 +75,26 @@ function setUrlsLinked($urlsLinked)
 
     protected $urlsLinked = true;
 
+    function setSafeLinksEnabled($safeLinksEnabled)
+    {
+        $this->safeLinksEnabled = $safeLinksEnabled;
+
+        return $this;
+    }
+
+    protected $safeLinksEnabled = true;
+
+    protected $safeLinksWhitelist = array(
+        'http://',
+        'https://',
+        'ftp://',
+        'ftps://',
+        'mailto:',
+        'data:image/png;base64,',
+        'data:image/gif;base64,',
+        'data:image/jpeg;base64,',
+    );
+
     #
     # Lines
     #
@@ -342,8 +362,6 @@ protected function blockCodeComplete($Block)
     {
         $text = $Block['element']['text']['text'];
 
-        $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
-
         $Block['element']['text']['text'] = $text;
 
         return $Block;
@@ -457,8 +475,6 @@ protected function blockFencedCodeComplete($Block)
     {
         $text = $Block['element']['text']['text'];
 
-        $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
-
         $Block['element']['text']['text'] = $text;
 
         return $Block;
@@ -515,10 +531,10 @@ protected function blockList($Line)
                 ),
             );
 
-            if($name === 'ol') 
+            if($name === 'ol')
             {
                 $listStart = stristr($matches[0], '.', true);
-                
+
                 if($listStart !== '1')
                 {
                     $Block['element']['attributes'] = array('start' => $listStart);
@@ -1074,7 +1090,6 @@ protected function inlineCode($Excerpt)
         if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
         {
             $text = $matches[2];
-            $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
             $text = preg_replace("/[ ]*\n/", ' ', $text);
 
             return array(
@@ -1253,8 +1268,6 @@ protected function inlineLink($Excerpt)
             $Element['attributes']['title'] = $Definition['title'];
         }
 
-        $Element['attributes']['href'] = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Element['attributes']['href']);
-
         return array(
             'extent' => $extent,
             'element' => $Element,
@@ -1343,14 +1356,16 @@ protected function inlineUrl($Excerpt)
 
         if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
         {
+            $url = $matches[0][0];
+
             $Inline = array(
                 'extent' => strlen($matches[0][0]),
                 'position' => $matches[0][1],
                 'element' => array(
                     'name' => 'a',
-                    'text' => $matches[0][0],
+                    'text' => $url,
                     'attributes' => array(
-                        'href' => $matches[0][0],
+                        'href' => $url,
                     ),
                 ),
             );
@@ -1363,7 +1378,7 @@ protected function inlineUrlTag($Excerpt)
     {
         if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
         {
-            $url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
+            $url = $matches[1];
 
             return array(
                 'extent' => strlen($matches[0]),
@@ -1401,6 +1416,8 @@ protected function unmarkedText($text)
 
     protected function element(array $Element)
     {
+        $Element = $this->sanitiseElement($Element);
+
         $markup = '<'.$Element['name'];
 
         if (isset($Element['attributes']))
@@ -1412,7 +1429,7 @@ protected function element(array $Element)
                     continue;
                 }
 
-                $markup .= ' '.$name.'="'.$value.'"';
+                $markup .= ' '.$name.'="'.self::escape($value).'"';
             }
         }
 
@@ -1426,7 +1443,7 @@ protected function element(array $Element)
             }
             else
             {
-                $markup .= $Element['text'];
+                $markup .= self::escape($Element['text'], true);
             }
 
             $markup .= '</'.$Element['name'].'>';
@@ -1485,10 +1502,80 @@ function parse($text)
         return $markup;
     }
 
+    protected function sanitiseElement(array $Element)
+    {
+        static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
+        static $safeUrlNameToAtt  = array(
+            'a'   => 'href',
+            'img' => 'src',
+        );
+
+        if (isset($safeUrlNameToAtt[$Element['name']]))
+        {
+            $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
+        }
+
+        if ( ! empty($Element['attributes']))
+        {
+            foreach ($Element['attributes'] as $att => $val)
+            {
+                # filter out badly parsed attribute
+                if ( ! preg_match($goodAttribute, $att))
+                {
+                    unset($Element['attributes'][$att]);
+                }
+                # dump onevent attribute
+                elseif (preg_match('/^on/i', $att))
+                {
+                    unset($Element['attributes'][$att]);
+                }
+            }
+        }
+
+        return $Element;
+    }
+
+    protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
+    {
+        if ($this->safeLinksEnabled)
+        {
+            $safe = false;
+
+            foreach ($this->safeLinksWhitelist as $scheme)
+            {
+                if (stripos($Element['attributes'][$attribute], $scheme) === 0)
+                {
+                    $safe = true;
+
+                    break;
+                }
+            }
+
+            if ( ! $safe)
+            {
+                $Element['attributes'][$attribute] = preg_replace_callback(
+                    '/[^\/#?&=%]++/',
+                    function (array $match)
+                    {
+                        return urlencode($match[0]);
+                    },
+                    $Element['attributes'][$attribute]
+                );
+            }
+        }
+
+        return $Element;
+    }
+
     #
     # Static Methods
     #
 
+    protected static function escape($text, $allowQuotes = false)
+    {
+        return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
+    }
+
     static function instance($name = 'default')
     {
         if (isset(self::$instances[$name]))

diff --git a/test/ParsedownTest.php b/test/ParsedownTest.php
@@ -46,6 +46,8 @@ function test_($test, $dir)
         $expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
         $expectedMarkup = str_replace("\r", "\n", $expectedMarkup);
 
+        $this->Parsedown->setMarkupEscaped($test === 'xss_text_encoding');
+
         $actualMarkup = $this->Parsedown->text($markdown);
 
         $this->assertEquals($expectedMarkup, $actualMarkup);

diff --git a/test/data/inline_link.html b/test/data/inline_link.html
@@ -1,5 +1,5 @@
 <p><a href="http://example.com">link</a></p>
-<p><a href="/url-(parentheses)">link</a> with parentheses in URL </p>
+<p><a href="/url-%28parentheses%29">link</a> with parentheses in URL </p>
 <p>(<a href="/index.php">link</a>) in parentheses</p>
 <p><a href="http://example.com"><code>link</code></a></p>
 <p><a href="http://example.com"><img src="http://parsedown.org/md.png" alt="MD Logo" /></a></p>

diff --git a/test/data/xss_attribute_encoding.html b/test/data/xss_attribute_encoding.html
@@ -0,0 +1,6 @@
+<p><a href="https://www.example.com&quot;">xss</a></p>
+<p><img src="https://www.example.com&quot;" alt="xss" /></p>
+<p><a href="https://www.example.com&#039;">xss</a></p>
+<p><img src="https://www.example.com&#039;" alt="xss" /></p>
+<p><img src="https://www.example.com" alt="xss&quot;" /></p>
+<p><img src="https://www.example.com" alt="xss&#039;" /></p>
diff --git a/test/data/xss_attribute_encoding.md b/test/data/xss_attribute_encoding.md
@@ -0,0 +1,11 @@
+[xss](https://www.example.com")
+
+![xss](https://www.example.com")
+
+[xss](https://www.example.com')
+
+![xss](https://www.example.com')
+
+![xss"](https://www.example.com)
+
+![xss'](https://www.example.com)
diff --git a/test/data/xss_bad_url.html b/test/data/xss_bad_url.html
@@ -0,0 +1,16 @@
+<p><a href="javascript%3Aalert%281%29">xss</a></p>
+<p><a href="javascript%3Aalert%281%29">xss</a></p>
+<p><a href="javascript%3A//alert%281%29">xss</a></p>
+<p><a href="javascript&amp;colon%3Balert%281%29">xss</a></p>
+<p><img src="javascript%3Aalert%281%29" alt="xss" /></p>
+<p><img src="javascript%3Aalert%281%29" alt="xss" /></p>
+<p><img src="javascript%3A//alert%281%29" alt="xss" /></p>
+<p><img src="javascript&amp;colon%3Balert%281%29" alt="xss" /></p>
+<p><a href="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p>
+<p><a href="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p>
+<p><a href="data%3A//text/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p>
+<p><a href="data&amp;colon%3Btext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">xss</a></p>
+<p><img src="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p>
+<p><img src="data%3Atext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p>
+<p><img src="data%3A//text/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p>
+<p><img src="data&amp;colon%3Btext/html%3Bbase64%2CPHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" alt="xss" /></p>
diff --git a/test/data/xss_bad_url.md b/test/data/xss_bad_url.md
@@ -0,0 +1,31 @@
+[xss](javascript:alert(1))
+
+[xss]( javascript:alert(1))
+
+[xss](javascript://alert(1))
+
+[xss](javascript&colon;alert(1))
+
+![xss](javascript:alert(1))
+
+![xss]( javascript:alert(1))
+
+![xss](javascript://alert(1))
+
+![xss](javascript&colon;alert(1))
+
+[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+[xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+[xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+[xss](data&colon;text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+![xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+![xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
+
+![xss](data&colon;text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==)
diff --git a/test/data/xss_text_encoding.html b/test/data/xss_text_encoding.html
@@ -0,0 +1,7 @@
+<p>&lt;script&gt;alert(1)&lt;/script&gt;</p>
+<p>&lt;script&gt;</p>
+<p>alert(1)</p>
+<p>&lt;/script&gt;</p>
+<p>&lt;script&gt;
+alert(1)
+&lt;/script&gt;</p>
diff --git a/test/data/xss_text_encoding.md b/test/data/xss_text_encoding.md
@@ -0,0 +1,12 @@
+<script>alert(1)</script>
+
+<script>
+
+alert(1)
+
+</script>
+
+
+<script>
+alert(1)
+</script>