Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge request for CommonMark compliance improvements #334

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
144 changes: 115 additions & 29 deletions Parsedown.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,23 @@ function setUrlsLinked($urlsLinked)

protected $urlsLinked = true;

function setTabsExpanded($tabsExpanded)
{
$this->tabsExpanded = $tabsExpanded;

return $this;
}

protected $tabsExpanded = true;

function setCommonMarkHtmlBlocks($commonMarkHtmlBlocks)
{
$this->commonMarkHtmlBlocks = $commonMarkHtmlBlocks;

return $this;
}

protected $commonMarkHtmlBlocks = false;
#
# Lines
#
Expand Down Expand Up @@ -131,7 +148,7 @@ protected function lines(array $lines)
continue;
}

if (strpos($line, "\t") !== false)
if ($this->tabsExpanded && (strpos($line, "\t") !== false))
{
$parts = explode("\t", $line);

Expand All @@ -149,17 +166,23 @@ protected function lines(array $lines)
}

$indent = 0;
$effective_indent=0;

while (isset($line[$indent]) and $line[$indent] === ' ')
while (isset($line[$indent]) and ($line[$indent] === ' ' or $line[$indent] === "\t"))
{
$effective_indent++;
if($line[$indent]==="\t")
{
$effective_indent+=4-$effective_indent%4;
}
$indent ++;
}

$text = $indent > 0 ? substr($line, $indent) : $line;

# ~

$Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
$Line = array('body' => $line, 'indent' => $effective_indent, 'text' => $text);

# ~

Expand Down Expand Up @@ -294,6 +317,17 @@ protected function isBlockCompletable($Type)
#
# Code

protected function stripIndent($text){
for ($pos=0;$pos<4;++$pos){
if ($text[$pos]==="\t"){
return substr($text,$pos+1);
}
if($text[$pos]!==' ')
return substr($text,$pos);
}
return substr($text,4);
}

protected function blockCode($Line, $Block = null)
{
if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
Expand All @@ -303,7 +337,7 @@ protected function blockCode($Line, $Block = null)

if ($Line['indent'] >= 4)
{
$text = substr($Line['body'], 4);
$text = $this->stripIndent($Line['body']);

$Block = array(
'element' => array(
Expand Down Expand Up @@ -333,7 +367,7 @@ protected function blockCodeContinue($Line, $Block)

$Block['element']['text']['text'] .= "\n";

$text = substr($Line['body'], 4);
$text = $this->stripIndent($Line['body']);

$Block['element']['text']['text'] .= $text;

Expand Down Expand Up @@ -399,16 +433,16 @@ protected function blockCommentContinue($Line, array $Block)

protected function blockFencedCode($Line)
{
if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches))
if (preg_match('/^(['.$Line['text'][0].']{3,})\s*([\w-]+)?\s*$/', $Line['text'], $matches))
{
$Element = array(
'name' => 'code',
'text' => '',
);

if (isset($matches[1]))
if (isset($matches[2]))
{
$class = 'language-'.$matches[1];
$class = 'language-'.$matches[2];

$Element['attributes'] = array(
'class' => $class,
Expand Down Expand Up @@ -442,7 +476,7 @@ protected function blockFencedCodeContinue($Line, $Block)
unset($Block['interrupted']);
}

if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
if (preg_match('/^'.$Block['char'].'{3,}\s*$/', $Line['text']))
{
$Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);

Expand Down Expand Up @@ -507,7 +541,7 @@ protected function blockList($Line)
{
list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');

if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
if (preg_match('/^('.$pattern.'\s+)(.*)/', $Line['text'], $matches))
{
$Block = array(
'indent' => $Line['indent'],
Expand All @@ -534,7 +568,7 @@ protected function blockList($Line)

protected function blockListContinue($Line, array $Block)
{
if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:\s+(.*)|$)/', $Line['text'], $matches))
{
if (isset($Block['interrupted']))
{
Expand Down Expand Up @@ -567,7 +601,7 @@ protected function blockListContinue($Line, array $Block)

if ( ! isset($Block['interrupted']))
{
$text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
$text = $this->stripIndent($Line['body']);

$Block['li']['text'] []= $text;

Expand All @@ -578,7 +612,7 @@ protected function blockListContinue($Line, array $Block)
{
$Block['li']['text'] []= '';

$text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
$text = $this->stripIndent($Line['body']);

$Block['li']['text'] []= $text;

Expand All @@ -593,7 +627,7 @@ protected function blockListContinue($Line, array $Block)

protected function blockQuote($Line)
{
if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
if (preg_match('/^>\s?(.*)/', $Line['text'], $matches))
{
$Block = array(
'element' => array(
Expand All @@ -609,7 +643,7 @@ protected function blockQuote($Line)

protected function blockQuoteContinue($Line, array $Block)
{
if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
if ($Line['text'][0] === '>' and preg_match('/^>\s?(.*)/', $Line['text'], $matches))
{
if (isset($Block['interrupted']))
{
Expand All @@ -636,7 +670,8 @@ protected function blockQuoteContinue($Line, array $Block)

protected function blockRule($Line)
{
if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
if (($Line['indent']<4) and
preg_match('/^(['.$Line['text'][0].'])(\s*\1){2,}\s*$/', $Line['text']))
{
$Block = array(
'element' => array(
Expand Down Expand Up @@ -676,7 +711,29 @@ protected function blockMarkup($Line)
return;
}

if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
if($this->commonMarkHtmlBlocks){
if (preg_match('/^<\/?(\w+)((\s|>).*)?$/', $Line['text'], $matches)){
if(in_array(strtolower($matches[1]),$this->commonMarkHtmlElements) or
in_array(strtolower($matches[1]),$this->commonMarkLiteralHtmlElements))
{
$Block = array(
'name' => $matches[1],
'markup' => $Line['text'],
);
return $Block;
}
}
if (preg_match('/^<\/?(\w+)(\s[^>]*)?>$/', $Line['text'], $matches)){
$Block = array(
'name' => $matches[1],
'markup' => $Line['text'],
);
return $Block;
}
return;
}

if (preg_match('/^<(\w*)(?:\s*'.$this->regexHtmlAttribute.')*\s*(\/)?>/', $Line['text'], $matches))
{
$element = strtolower($matches[1]);

Expand Down Expand Up @@ -711,7 +768,7 @@ protected function blockMarkup($Line)
return;
}

if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
if (preg_match('/<\/'.$matches[1].'>\s*$/i', $remainder))
{
$Block['closed'] = true;
}
Expand All @@ -728,12 +785,26 @@ protected function blockMarkupContinue($Line, array $Block)
return;
}

if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
if($this->commonMarkHtmlBlocks)
{
if(isset($Block['interrupted']))
{
unset($Block['interrupted']);
$Block['markup'] .= "\n";
if(!in_array(strtolower($Block['name']),$this->commonMarkLiteralHtmlElements)){
return;
}
}
$Block['markup'] .= "\n".$Line['text'];
return $Block;
}

if (preg_match('/^<'.$Block['name'].'(?:\s*'.$this->regexHtmlAttribute.')*\s*>/i', $Line['text'])) # open
{
$Block['depth'] ++;
}

if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
if (preg_match('/(.*?)<\/'.$Block['name'].'>\s*$/i', $Line['text'], $matches)) # close
{
if ($Block['depth'] > 0)
{
Expand All @@ -748,7 +819,6 @@ protected function blockMarkupContinue($Line, array $Block)
if (isset($Block['interrupted']))
{
$Block['markup'] .= "\n";

unset($Block['interrupted']);
}

Expand All @@ -762,7 +832,7 @@ protected function blockMarkupContinue($Line, array $Block)

protected function blockReference($Line)
{
if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
if (preg_match('/^\[(.+?)\]:\s*<?(\S+?)>?(?:\s+["\'(](.+)["\')])?\s*$/', $Line['text'], $matches))
{
$id = strtolower($matches[1]);

Expand Down Expand Up @@ -1064,11 +1134,11 @@ protected function inlineCode($Excerpt)
{
$marker = $Excerpt['text'][0];

if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
if (preg_match('/^('.$marker.'+)\s*(.+?)\s*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
{
$text = $matches[2];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$text = preg_replace("/[ ]*\n/", ' ', $text);
$text = preg_replace("/\s*\n/", ' ', $text);

return array(
'extent' => strlen($matches[0]),
Expand Down Expand Up @@ -1210,7 +1280,7 @@ protected function inlineLink($Excerpt)
return;
}

if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches))
if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:\s+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches))
{
$Element['attributes']['href'] = $matches[1];

Expand Down Expand Up @@ -1261,7 +1331,7 @@ protected function inlineMarkup($Excerpt)
return;
}

if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches))
if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*\s*>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
Expand All @@ -1277,7 +1347,7 @@ protected function inlineMarkup($Excerpt)
);
}

if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:\s*'.$this->regexHtmlAttribute.')*\s*\/?>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
Expand Down Expand Up @@ -1377,11 +1447,11 @@ protected function unmarkedText($text)
{
if ($this->breaksEnabled)
{
$text = preg_replace('/[ ]*\n/', "<br />\n", $text);
$text = preg_replace('/\s*\n/', "<br />\n", $text);
}
else
{
$text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
$text = preg_replace('/(?:\s\s+|\s*\\\\)\n/', "<br />\n", $text);
$text = str_replace(" \n", "\n", $text);
}

Expand Down Expand Up @@ -1527,6 +1597,22 @@ static function instance($name = 'default')
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
);

protected $commonMarkHtmlElements = array(
'address','article', 'aside', 'base', 'basefont', 'blockquote', 'body',
'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dialog',
'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure',
'footer', 'form', 'frame', 'frameset', 'h1', 'head', 'header', 'hr',
'html', 'legend', 'li', 'link', 'main', 'menu', 'menuitem', 'meta',
'nav', 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'pre',
'section', 'source', 'title', 'summary', 'table', 'tbody', 'td',
'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul'
);

protected $commonMarkLiteralHtmlElements = array(
'script','pre','style'
);


protected $textLevelElements = array(
'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
Expand Down
3 changes: 3 additions & 0 deletions test/CommonMarkTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ function test_($section, $markdown, $expectedHtml)
{
$Parsedown = new Parsedown();
$Parsedown->setUrlsLinked(false);
$Parsedown->setTabsExpanded(false);
$Parsedown->setCommonMarkHtmlBlocks(true);

$actualHtml = $Parsedown->text($markdown);
$actualHtml = $this->normalizeMarkup($actualHtml);
Expand All @@ -48,6 +50,7 @@ function($matches) use ( & $tests, & $currentSection, & $testCount) {
$markdown = preg_replace('/→/', "\t", $markdown);
$expectedHtml = $matches[2];
$expectedHtml = $this->normalizeMarkup($expectedHtml);
$expectedHtml = preg_replace('/→/', "\t", $expectedHtml);
$tests []= array(
$currentSection, # section
$markdown, # markdown
Expand Down