Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HtmlEscaping and SafeLinks #181

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 69 additions & 12 deletions Parsedown.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,24 @@ function setBreaksEnabled($breaksEnabled)
return $this;
}

private $escapingEnabled;

function setHtmlEscaping($escapingEnabled)
{
$this->escapingEnabled = $escapingEnabled;

return $this;
}

private $safeLinksEnabled;

function setSafeLinks($safeLinksEnabled)
{
$this->safeLinksEnabled = $safeLinksEnabled;

return $this;
}

#
# Lines
#
Expand All @@ -92,7 +110,7 @@ function setBreaksEnabled($breaksEnabled)
'_' => array('Rule'),
'`' => array('FencedCode'),
'|' => array('Table'),
'~' => array('FencedCode'),
'~' => array('FencedCode')
);

# ~
Expand Down Expand Up @@ -350,6 +368,11 @@ protected function completeCodeBlock($Block)

protected function identifyComment($Line)
{
if ($this->escapingEnabled)
{
return;
}

if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
{
$Block = array(
Expand All @@ -367,7 +390,7 @@ protected function identifyComment($Line)

protected function addToComment($Line, array $Block)
{
if (isset($Block['closed']))
if (isset($Block['closed']) || $this->escapingEnabled)
{
return;
}
Expand Down Expand Up @@ -619,6 +642,11 @@ protected function identifySetext($Line, array $Block = null)

protected function identifyMarkup($Line)
{
if ($this->escapingEnabled)
{
return;
}

if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>\/]*)?(\/?)[ ]*>/', $Line['text'], $matches))
{
if (in_array($matches[1], $this->textLevelElements))
Expand Down Expand Up @@ -646,7 +674,7 @@ protected function identifyMarkup($Line)

protected function addToMarkup($Line, array $Block)
{
if (isset($Block['closed']))
if (isset($Block['closed']) || $this->escapingEnabled)
{
return;
}
Expand Down Expand Up @@ -946,16 +974,17 @@ protected function elements(array $Elements)
'*' => array('Emphasis'),
'/' => array('Url'),
'<' => array('UrlTag', 'EmailTag', 'Tag', 'LessThan'),
'>' => array('GreaterThan'),
'[' => array('Link'),
'_' => array('Emphasis'),
'`' => array('InlineCode'),
'~' => array('Strikethrough'),
'\\' => array('EscapeSequence'),
'\\' => array('EscapeSequence')
);

# ~

protected $spanMarkerList = '*_!&[</`~\\';
protected $spanMarkerList = '*_!&[<>/`~\\';

#
# ~
Expand Down Expand Up @@ -1040,7 +1069,7 @@ protected function identifyUrl($Excerpt)

if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
{
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[0][0]);
$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $matches[0][0]);

return array(
'extent' => strlen($matches[0][0]),
Expand Down Expand Up @@ -1091,10 +1120,20 @@ protected function identifyEscapeSequence($Excerpt)
{
if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
{
return array(
'markup' => $Excerpt['text'][1],
'extent' => 2,
);
if ($this->escapingEnabled && $Excerpt['text'][1] == '>')
{
return array(
'markup' => '&gt;',
'extent' => 2,
);
}
else
{
return array(
'markup' => $Excerpt['text'][1],
'extent' => 2,
);
}
}
}

Expand All @@ -1106,11 +1145,19 @@ protected function identifyLessThan()
);
}

protected function identifyGreaterThan()
{
return array(
'markup' => '&gt;',
'extent' => 1,
);
}

protected function identifyUrlTag($Excerpt)
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $Excerpt['text'], $matches))
{
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $matches[1]);

return array(
'extent' => strlen($matches[0]),
Expand Down Expand Up @@ -1144,6 +1191,11 @@ protected function identifyEmailTag($Excerpt)

protected function identifyTag($Excerpt)
{
if ($this->escapingEnabled)
{
return;
}

if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<\/?\w.*?>/', $Excerpt['text'], $matches))
{
return array(
Expand Down Expand Up @@ -1229,7 +1281,12 @@ protected function identifyLink($Excerpt)
return;
}

$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Link['url']);
if ($this->safeLinksEnabled && stripos($Link['url'], 'javascript:') !== false)
{
return;
}

$url = str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $Link['url']);

if ($Excerpt['text'][0] === '!')
{
Expand Down
9 changes: 8 additions & 1 deletion test/Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ function test_($filename)
$expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
$expectedMarkup = str_replace("\r", "\n", $expectedMarkup);

$actualMarkup = Parsedown::instance()->text($markdown);
if (strpos($filename, '_escaped') !== false)
{
$actualMarkup = Parsedown::instance('escaped')->setHtmlEscaping(true)->text($markdown);
}
else
{
$actualMarkup = Parsedown::instance()->text($markdown);
}

$this->assertEquals($expectedMarkup, $actualMarkup);
}
Expand Down
5 changes: 5 additions & 0 deletions test/data/HTML_Comment_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p>&lt;!-- single line --&gt;</p>
<p>paragraph</p>
<p>&lt;!--
multiline --&gt;</p>
<p>paragraph</p>
8 changes: 8 additions & 0 deletions test/data/HTML_Comment_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<!-- single line -->

paragraph

<!--
multiline -->

paragraph
18 changes: 18 additions & 0 deletions test/data/aesthetic_table_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<table>
<thead>
<tr>
<th>header 1</th>
<th>header 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>cell 1.1</td>
<td>cell 1.2</td>
</tr>
<tr>
<td>cell 2.1</td>
<td>cell 2.2</td>
</tr>
</tbody>
</table>
4 changes: 4 additions & 0 deletions test/data/aesthetic_table_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
| header 1 | header 2 |
| -------- | -------- |
| cell 1.1 | cell 1.2 |
| cell 2.1 | cell 2.2 |
21 changes: 21 additions & 0 deletions test/data/aligned_table_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<table>
<thead>
<tr>
<th align="left">header 1</th>
<th align="center">header 2</th>
<th align="right">header 2</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">cell 1.1</td>
<td align="center">cell 1.2</td>
<td align="right">cell 1.3</td>
</tr>
<tr>
<td align="left">cell 2.1</td>
<td align="center">cell 2.2</td>
<td align="right">cell 2.3</td>
</tr>
</tbody>
</table>
4 changes: 4 additions & 0 deletions test/data/aligned_table_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
| header 1 | header 2 | header 2 |
| :------- | :------: | -------: |
| cell 1.1 | cell 1.2 | cell 1.3 |
| cell 2.1 | cell 2.2 | cell 2.3 |
8 changes: 8 additions & 0 deletions test/data/atx_heading_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<h1>h1</h1>
<h2>h2</h2>
<h3>h3</h3>
<h4>h4</h4>
<h5>h5</h5>
<h6>h6</h6>
<h1>closed h1</h1>
<p>#</p>
15 changes: 15 additions & 0 deletions test/data/atx_heading_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# h1

## h2

### h3

#### h4

##### h5

###### h6

# closed h1 #

#
1 change: 1 addition & 0 deletions test/data/automatic_link_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><a href="http://example.com">http://example.com</a></p>
1 change: 1 addition & 0 deletions test/data/automatic_link_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<http://example.com>
8 changes: 8 additions & 0 deletions test/data/block-level_html_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<p>&lt;div&gt;<em>content</em>&lt;/div&gt;</p>
<p>sparse:</p>
<p>&lt;div&gt;
&lt;div class="inner"&gt;
<em>content</em>
&lt;/div&gt;
&lt;/div&gt;</p>
<p>paragraph</p>
11 changes: 11 additions & 0 deletions test/data/block-level_html_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<div>_content_</div>

sparse:

<div>
<div class="inner">
_content_
</div>
</div>

paragraph
8 changes: 8 additions & 0 deletions test/data/code_block_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<pre><code>&lt;?php

$message = 'Hello World!';
echo $message;</code></pre>
<hr />
<pre><code>&gt; not a quote
- not a list item
[not a reference]: http://foo.com</code></pre>
10 changes: 10 additions & 0 deletions test/data/code_block_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

$message = 'Hello World!';
echo $message;

---

> not a quote
- not a list item
[not a reference]: http://foo.com
6 changes: 6 additions & 0 deletions test/data/code_span_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<p>a <code>code span</code></p>
<p><code>this is also a codespan</code> trailing text</p>
<p><code>and look at this one!</code></p>
<p>single backtick in a code span: <code>`</code></p>
<p>backtick-delimited string in a code span: <code>`foo`</code></p>
<p><code>sth `` sth</code></p>
11 changes: 11 additions & 0 deletions test/data/code_span_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
a `code span`

`this is also a codespan` trailing text

`and look at this one!`

single backtick in a code span: `` ` ``

backtick-delimited string in a code span: `` `foo` ``

`sth `` sth`
9 changes: 9 additions & 0 deletions test/data/compound_blockquote_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<blockquote>
<h2>header</h2>
<p>paragraph</p>
<ul>
<li>li</li>
</ul>
<hr />
<p>paragraph</p>
</blockquote>
10 changes: 10 additions & 0 deletions test/data/compound_blockquote_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
> header
> ------
>
> paragraph
>
> - li
>
> ---
>
> paragraph
2 changes: 2 additions & 0 deletions test/data/compound_emphasis_escaped.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<p><em><code>code</code></em> <strong><code>code</code></strong></p>
<p><em><code>code</code><strong><code>code</code></strong><code>code</code></em></p>
4 changes: 4 additions & 0 deletions test/data/compound_emphasis_escaped.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
_`code`_ __`code`__

*`code`**`code`**`code`*

Loading