Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added safe mode #177

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
parsedown.komodoproject
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have you added this?

test.php
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this?

89 changes: 76 additions & 13 deletions Parsedown.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

class Parsedown
{

private $safeMode = false;

#
# Philosophy

Expand All @@ -29,6 +32,9 @@ class Parsedown

function text($text)
{
if ($this->safeMode)
$text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8');

# standardize line breaks
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
Expand Down Expand Up @@ -66,6 +72,13 @@ function setBreaksEnabled($breaksEnabled)

return $this;
}

function setSafeMode($safeMode)
{
$this->safeMode = $safeMode;

return $this;
}

#
# Lines
Expand Down Expand Up @@ -127,6 +140,16 @@ private function lines(array $lines)

continue;
}

# If line begins with '>' htmlentity, convert to '>' for quoting
if ($this->safeMode and substr(trim($line), 0, 4) == ">")
{
$strpos = strpos($line, ">");
if ($strpos === 0 || (!isset($CurrentBlock['interrupted']) || !$CurrentBlock['interrupted']))
{
$line = substr_replace($line, ">", $strpos, 4);
}
}

$indent = 0;

Expand Down Expand Up @@ -345,7 +368,8 @@ protected function completeCodeBlock($Block)
{
$text = $Block['element']['text']['text'];

$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
if (!$this->safeMode)
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');

$Block['element']['text']['text'] = $text;

Expand Down Expand Up @@ -418,7 +442,8 @@ protected function completeFencedCode($Block)
{
$text = $Block['element']['text']['text'];

$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
if (!$this->safeMode)
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');

$Block['element']['text']['text'] = $text;

Expand Down Expand Up @@ -805,18 +830,31 @@ protected function addToTable($Line, array $Block)

protected function identifyReference($Line)
{
if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+(["\'(]?)(.+?)(["\')]?))?[ ]*$/', $Line['text'], $matches))
{
if (isset($matches[3]) and trim($matches[3]) == "")
{
if (!$this->safeMode or !isset($matches[4]) or !in_array(substr($matches[4], 0, 6), array("&quot;", "&#039;")))
{
return;
}
}
$url = $matches[2];
if ($this->safeMode && stripos($url, "javascript:") !== false)
{
$url = "";
}

$Definition = array(
'id' => strtolower($matches[1]),
'data' => array(
'url' => $matches[2],
'url' => $url,
),
);

if (isset($matches[3]))
if (!$this->safeMode and isset($matches[4]))
{
$Definition['data']['title'] = $matches[3];
$Definition['data']['title'] = $matches[4];
}

return $Definition;
Expand Down Expand Up @@ -992,7 +1030,15 @@ protected function identifyUrl($excerpt, $text)

if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $text, $matches, PREG_OFFSET_CAPTURE))
{
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[0][0]);
$url = $matches[0][0];
if (!$this->safeMode)
{
$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $url);
}
elseif (stripos($url, "javascript:") !== false)
{
$url = "";
}

return array(
'extent' => strlen($matches[0][0]),
Expand Down Expand Up @@ -1062,7 +1108,15 @@ protected function identifyUrlTag($excerpt)
{
if (strpos($excerpt, '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $excerpt, $matches))
{
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
$url = $matches[1];
if (!$this->safeMode)
{
$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $url);
}
elseif (stripos($url, "javascript:") !== false)
{
$url = "";
}

return array(
'extent' => strlen($matches[0]),
Expand Down Expand Up @@ -1112,7 +1166,8 @@ protected function identifyInlineCode($excerpt)
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/', $excerpt, $matches))
{
$text = $matches[2];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
if (!$this->safeMode)
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');

return array(
'extent' => strlen($matches[0]),
Expand Down Expand Up @@ -1160,7 +1215,7 @@ protected function identifyLink($excerpt)
$extent += strlen($matches[0]);
}
}
elseif (preg_match('/^\([ ]*(.*?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*\)/', $substring, $matches))
elseif (preg_match('/^\([ ]*(.*?)(?:[ ]+[\'"]?(.+?)[\'"]?)?[ ]*\)/', $substring, $matches))
{
$Link['url'] = $matches[1];

Expand All @@ -1181,8 +1236,16 @@ protected function identifyLink($excerpt)
return;
}

$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Link['url']);

$url = $Link['url'];
if (!$this->safeMode)
{
$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $url);
}
elseif (stripos($url, "javascript:") !== false)
{
$url = "";
}

if ($excerpt[0] === '!')
{
$Element = array(
Expand All @@ -1205,7 +1268,7 @@ protected function identifyLink($excerpt)
);
}

if (isset($Link['title']))
if (!$this->safeMode && isset($Link['title']))
{
$Element['attributes']['title'] = $Link['title'];
}
Expand Down
48 changes: 48 additions & 0 deletions test/Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,27 @@

class Test extends PHPUnit_Framework_TestCase
{
private $safeModeAltResults = array(
"automatic_link",
"block-level_html",
"code_block",
"email",
"escaping",
"fenced_code_block",
"html_entity",
"html_simple",
"image_title",
"implicit_reference",
"inline_link_title",
"inline_title",
"nested_block-level_html",
"reference_title",
"span-level_html",
"special_characters",
"strikethrough",
"tab-indented_code_block"
);

public function __construct($name = null, array $data = array(), $dataName = '')
{
$this->dataDir = dirname(__FILE__).'/data/';
Expand All @@ -16,6 +37,13 @@ public function __construct($name = null, array $data = array(), $dataName = '')
*/
function test_($filename)
{
if (strpos($filename, "_escaped"))
{
$this->markTestSkipped(
'Escaped tests are for safe mode only.'
);
}

$markdown = file_get_contents($this->dataDir . $filename . '.md');

$expectedMarkup = file_get_contents($this->dataDir . $filename . '.html');
Expand All @@ -27,6 +55,26 @@ function test_($filename)

$this->assertEquals($expectedMarkup, $actualMarkup);
}

/**
* @dataProvider data
*/
function testSafeMode($filename) {

$markdown = file_get_contents($this->dataDir . $filename . '.md');

if (in_array($filename, $this->safeModeAltResults))
$expectedMarkup = file_get_contents($this->dataDir . $filename . '_escaped.html');
else
$expectedMarkup = file_get_contents($this->dataDir . $filename . '.html');

$expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
$expectedMarkup = str_replace("\r", "\n", $expectedMarkup);

$actualMarkup = Parsedown::instance()->setSafeMode(true)->text($markdown);

$this->assertEquals($expectedMarkup, $actualMarkup);
}

function data()
{
Expand Down
1 change: 1 addition & 0 deletions test/data/automatic_link_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>&lt;<a href="http://example.com&gt">http://example.com&gt</a>;</p>
5 changes: 5 additions & 0 deletions test/data/block-level_html_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p>&lt;div&gt;<em>content</em>&lt;/div&gt;</p>
<p>sparse:</p>
<p>&lt;div&gt;
<em>content</em>
&lt;/div&gt;</p>
8 changes: 8 additions & 0 deletions test/data/code_block_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<pre><code>&lt;?php

$message = &#039;Hello World!&#039;;
echo $message;</code></pre>
<hr />
<pre><code>&gt; not a quote
- not a list item
[not a reference]: http://foo.com</code></pre>
1 change: 1 addition & 0 deletions test/data/email_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>my email is &lt;me@example.com&gt;</p>
4 changes: 4 additions & 0 deletions test/data/escaping_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p>escaped *emphasis*.</p>
<p><code>escaped \*emphasis\* in a code span</code></p>
<pre><code>escaped \*emphasis\* in a code block</code></pre>
<p>\ ` * _ { } [ ] ( ) \&gt; # + - . !</p>
6 changes: 6 additions & 0 deletions test/data/fenced_code_block_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<pre><code>&lt;?php

$message = &#039;fenced code block&#039;;
echo $message;</code></pre>
<pre><code>tilde</code></pre>
<pre><code class="language-php">echo &#039;language identifier&#039;;</code></pre>
1 change: 1 addition & 0 deletions test/data/html_entity_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>&amp;amp; &amp;copy; &amp;#123;</p>
28 changes: 28 additions & 0 deletions test/data/html_simple_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<p>Headings:</p>
<p>&lt;h2 id=&quot;overview&quot;&gt;Overview&lt;/h2&gt;
blah
&lt;H2 id=&quot;block&quot;&gt;Block Elements&lt;/H2&gt;
blah
&lt;h3 id=&quot;span&quot;&gt;</p>
<pre><code> Span Elements</code></pre>
<p>&lt;/h3&gt;
blah</p>
<p>Hr&#039;s:</p>
<p>&lt;hr&gt;
blah</p>
<p>&lt;hr/&gt;
blah</p>
<p>&lt;hr /&gt;
blah</p>
<p>&lt;hr&gt; <br />
blah</p>
<p>&lt;hr/&gt;<br />
blah</p>
<p>&lt;hr /&gt;
blah</p>
<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; /&gt;
blah</p>
<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot;/&gt;
blah</p>
<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; &gt;
blah</p>
1 change: 1 addition & 0 deletions test/data/image_title_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><img alt="alt" src="/md.png" /></p>
3 changes: 3 additions & 0 deletions test/data/implicit_reference_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<p>an <a href="http://example.com">implicit</a> reference link</p>
<p>an <a href="http://example.com">implicit</a> reference link with an empty link definition</p>
<p>an <a href="http://example.com">explicit</a> reference link with a title</p>
1 change: 1 addition & 0 deletions test/data/inline_link_title_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><a href="http://example.com">single quotes</a> and <a href="http://example.com">double quotes</a></p>
1 change: 1 addition & 0 deletions test/data/inline_title_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><a href="http://example.com">single quotes</a> and <a href="http://example.com">double quotes</a></p>
10 changes: 10 additions & 0 deletions test/data/nested_block-level_html_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<p>&lt;div&gt;
<em>parent</em>
&lt;div&gt;
<em>child</em>
&lt;/div&gt;
&lt;pre&gt;
<em>adopted child</em>
&lt;/pre&gt;
&lt;/div&gt;</p>
<p><em>outside</em></p>
2 changes: 2 additions & 0 deletions test/data/reference_title_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<p><a href="http://example.com">double quotes</a> and <a href="http://example.com">single quotes</a> and <a href="http://example.com">parentheses</a></p>
<p>[invalid title]: <a href="http://example.com">http://example.com</a> example title</p>
1 change: 1 addition & 0 deletions test/data/safe_mode_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><a href="">click</a></p>
1 change: 1 addition & 0 deletions test/data/safe_mode_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[click](javascript:alert%28%22xss%22%28)
7 changes: 0 additions & 7 deletions test/data/self-closing_block-level_html.md

This file was deleted.

5 changes: 5 additions & 0 deletions test/data/span-level_html_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p>an &lt;b&gt;important&lt;/b&gt; &lt;a href=&#039;&#039;&gt;link&lt;/a&gt;</p>
<p>broken&lt;br/&gt;
line</p>
<p>&lt;b&gt;inline tag&lt;/b&gt; at the beginning</p>
<p>&lt;span&gt;<a href="http://example.com&lt;/span&gt">http://example.com&lt;/span&gt</a>;</p>
6 changes: 6 additions & 0 deletions test/data/special_characters_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<p>AT&amp;T has an ampersand in their name</p>
<p>this &amp; that</p>
<p>4 &lt; 5 and 6 &gt; 5</p>
<p>&lt;<a href="http://example.com/autolink?a=1&amp;b=2&gt">http://example.com/autolink?a=1&amp;b=2&gt</a>;</p>
<p><a href="/script?a=1&amp;b=2">inline link</a></p>
<p><a href="http://example.com/?a=1&amp;b=2">reference link</a></p>
3 changes: 3 additions & 0 deletions test/data/strikethrough_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<p><del>strikethrough</del></p>
<p>here&#039;s <del>one</del> followed by <del>another one</del></p>
<p>~~ this ~~ is not one neither is ~this~</p>
Loading