Skip to content

Commit

Permalink
[FEATURE] Add HtmlPruner::removeRedundantClasses
Browse files Browse the repository at this point in the history
Part of #380.
  • Loading branch information
JakeQZ committed Sep 26, 2019
1 parent 4949af5 commit 4c902e5
Show file tree
Hide file tree
Showing 3 changed files with 297 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ This project adheres to [Semantic Versioning](https://semver.org/).
## x.y.z

### Added
- Add `HtmlPruner::removeRedundantClasses`
([#380](https://github.com/MyIntervals/emogrifier/issues/380),
[#708](https://github.com/MyIntervals/emogrifier/pull/708))
- Add an `ArrayIntersector` class
([#708](https://github.com/MyIntervals/emogrifier/pull/708),
[#710](https://github.com/MyIntervals/emogrifier/pull/710))
Expand Down
43 changes: 43 additions & 0 deletions src/Emogrifier/HtmlProcessor/HtmlPruner.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,47 @@ public function removeInvisibleNodes()

return $this;
}

/**
* Removes classes that are no longer required (e.g. because there are no longer any CSS rules that reference them)
* from `class` attributes.
*
* Note that this does not inspect the CSS, but expects to be provided with a list of classes that are still in use.
*
* This method also has the (presumably beneficial) side-effect of minifying (removing superfluous whitespace from)
* `class` attributes.
*
* @param string[] $classesToKeep list of class names that should not be removed
*
* @return self fluent interface
*/
public function removeRedundantClasses(array $classesToKeep)
{
$nodesWithClassAttribute = $this->xPath->query('//*[@class]');

if ($classesToKeep !== []) {
// https://stackoverflow.com/questions/6329211/php-array-intersect-efficiency
// It's more efficient to invert the array and use `array_intersect_key()` when doing many intersections.
$classesToKeepAsKeys = \array_flip($classesToKeep);

foreach ($nodesWithClassAttribute as $node) {
$nodeClasses = \preg_split('/\\s++/', \trim($node->getAttribute('class')));
$nodeClassesToKeep = \array_flip(\array_intersect_key(
\array_flip($nodeClasses),
$classesToKeepAsKeys
));
if ($nodeClassesToKeep !== []) {
$node->setAttribute('class', \implode(' ', $nodeClassesToKeep));
} else {
$node->removeAttribute('class');
}
}
} else {
foreach ($nodesWithClassAttribute as $node) {
$node->removeAttribute('class');
}
}

return $this;
}
}
251 changes: 251 additions & 0 deletions tests/Unit/Emogrifier/HtmlProcessor/HtmlPrunerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,255 @@ public function removeInvisibleNodesRemovesNodesWithDisplayNone($displayNone)

self::assertNotContains('<div', $subject->render());
}

/**
* @test
*/
public function removeRedundantClassesProvidesFluentInterface()
{
$subject = HtmlPruner::fromHtml('<html></html>');

$result = $subject->removeRedundantClasses([]);

self::assertSame($subject, $result);
}

/**
* @return string[][][]
*/
public function classesToKeepDataProvider()
{
return [
'no classes to keep' => [[]],
'1 class to keep' => [['foo']],
];
}

/**
* @test
*
* @param string[] $classesToKeep
*
* @dataProvider classesToKeepDataProvider
*/
public function removeRedundantClassesPreservesHtmlWithoutClasses(array $classesToKeep)
{
$html = '<p style="color: green;">hello</p>';
$subject = HtmlPruner::fromHtml('<html>' . $html . '</html>');

$subject->removeRedundantClasses($classesToKeep);

self::assertContains($html, $subject->render());
}

/**
* @return (string|string[])[][]
*/
public function nonMatchedClassesDataProvider()
{
return [
'1 attribute, 1 class, no classes to keep' => [
'html' => '<p class="foo">hello</p>',
'classes to keep' => [],
'classes expected to be removed' => ['foo'],
],
'2 attributes, 1 different class each, no classes to keep' => [
'html' => '<p class="foo">hello</p><p class="bar">world</p>',
'classes to keep' => [],
'classes expected to be removed' => ['foo', 'bar'],
],
'1 attribute, 1 class, 1 different class to keep' => [
'html' => '<p class="foo">hello</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo'],
],
'2 attributes, 1 different class each, 1 different class to keep' => [
'html' => '<p class="foo">hello</p><p class="bar">world</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo', 'bar'],
],
'2 attributes, same 1 class each, 1 different class to keep' => [
'html' => '<p class="foo">hello</p><p class="foo">world</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo'],
],
'1 attribute, 2 classes, 1 different class to keep' => [
'html' => '<p class="foo bar">hello</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo', 'bar'],
],
'1 attribute, 1 class with extra whitespace, 1 different class to keep' => [
'html' => '<p class=" foo ">hello</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo'],
],
'1 attribute, 2 classes with extra whitespace, 1 different class to keep' => [
'html' => '<p class=" foo bar ">hello</p>',
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo', 'bar'],
],
'1 attribute, 2 classes separated by newline, 1 different class to keep' => [
'html' => "<p class=\"foo\nbar\">hello</p>",
'classes to keep' => ['baz'],
'classes expected to be removed' => ['foo', 'bar'],
],
];
}

/**
* @test
*
* @param string $html
* @param string[] $classesToKeep
*
* @dataProvider nonMatchedClassesDataProvider
*/
public function removeRedundantClassesRemovesClassAttributesContainingNoClassesToKeep($html, array $classesToKeep)
{
$subject = HtmlPruner::fromHtml('<html>' . $html . '</html>');

$subject->removeRedundantClasses($classesToKeep);

self::assertNotContains('class', $subject->render());
}

/**
* @return (string|string[])[][]
*/
public function classesDataProvider()
{
$nonMatchedClassesData = $this->nonMatchedClassesDataProvider();
$matchedClassesData = [
'1 attribute, 1 class, that class to be kept' => [
'html' => '<p class="foo">hello</p>',
'classes to keep' => ['foo'],
],
'2 attributes, 1 different class each, both classes to be kept' => [
'html' => '<p class="foo">hello</p><p class="bar">world</p>',
'classes to keep' => ['foo', 'bar'],
],
'2 attributes, 1 different class each, 1st class to be kept' => [
'html' => '<p class="foo">hello</p><p class="bar">world</p>',
'classes to keep' => ['foo'],
'classes expected to be removed' => ['bar'],
],
'2 attributes, 1 different class each, 2nd class to be kept' => [
'html' => '<p class="foo">hello</p><p class="bar">world</p>',
'classes to keep' => ['bar'],
'classes expected to be removed' => ['foo'],
],
'2 attributes, same 1 class each, that class to be kept' => [
'html' => '<p class="foo">hello</p><p class="foo">world</p>',
'classes to keep' => ['foo'],
],
'1 attribute, 2 classes, both to be kept' => [
'html' => '<p class="foo bar">hello</p>',
'classes to keep' => ['foo', 'bar'],
],
'first class in attribute is to be removed' => [
'html' => '<p class="foo bar baz">hello</p>',
'classes to keep' => ['bar', 'baz'],
'classes expected to be removed' => ['foo'],
],
'middle class in attribute is to be removed' => [
'html' => '<p class="foo bar baz">hello</p>',
'classes to keep' => ['foo', 'baz'],
'classes expected to be removed' => ['bar'],
],
'last class in attribute is to be removed' => [
'html' => '<p class="foo bar baz">hello</p>',
'classes to keep' => ['foo', 'bar'],
'classes expected to be removed' => ['baz'],
],
];
$matchedClassesWithExtraWhitespaceData = $this->matchedClassesWithExtraWhitespaceDataProvider();

return $nonMatchedClassesData + $matchedClassesData + $matchedClassesWithExtraWhitespaceData;
}

/**
* @return (string|string[])[][]
*/
public function matchedClassesWithExtraWhitespaceDataProvider()
{
return [
'1 attribute, 1 class with extra whitespace, that class to be kept' => [
'html' => '<p class=" foo ">hello</p>',
'classes to keep' => ['foo'],
],
'1 attribute, 2 classes with extra whitespace, both to be kept' => [
'html' => '<p class=" foo bar ">hello</p>',
'classes to keep' => ['foo', 'bar'],
],
'1 attribute, 2 classes with extra whitespace, 1 to be kept' => [
'html' => '<p class=" foo bar ">hello</p>',
'classes to keep' => ['foo'],
'classes expected to be removed' => ['bar'],
],
'1 attribute, 2 classes separated by newline, both to be kept' => [
'html' => "<p class=\"foo\nbar\">hello</p>",
'classes to keep' => ['foo', 'bar'],
],
'1 attribute, 2 classes separated by newline, 1 to be kept' => [
'html' => "<p class=\"foo\nbar\">hello</p>",
'classes to keep' => ['foo'],
'classes expected to be removed' => ['bar'],
],
];
}

/**
* @test
*
* @param string $html
* @param string[] $classesToKeep
* @param string[] $classesExpectedToBeRemoved
*
* @dataProvider classesDataProvider
*/
public function removeRedundantClassesRemovesOnlyClassesNotToKeep(
$html,
array $classesToKeep,
array $classesExpectedToBeRemoved = []
) {
$subject = HtmlPruner::fromHtml('<html>' . $html . '</html>');

$subject->removeRedundantClasses($classesToKeep);

$result = $subject->render();
foreach ($classesToKeep as $class) {
$expectedInstanceCount = \substr_count($html, $class);
if ($expectedInstanceCount !== 0) {
self::assertSame(
$expectedInstanceCount,
\substr_count($result, $class),
'asserting \'' . $result . '\' contains ' . $expectedInstanceCount . ' instance(s) of "' . $class
. '"'
);
}
}
foreach ($classesExpectedToBeRemoved as $class) {
self::assertNotContains($class, $result);
}
}

/**
* @test
*
* @param string $html
* @param string[] $classesToKeep
*
* @dataProvider matchedClassesWithExtraWhitespaceDataProvider
*/
public function removeRedundantClassesMinifiesClassAttributes($html, array $classesToKeep)
{
$subject = HtmlPruner::fromHtml('<html>' . $html . '</html>');

$subject->removeRedundantClasses($classesToKeep);

\preg_match_all('/class="([^"]*+)"/', $subject->render(), $classAttributeMatches);
foreach ($classAttributeMatches[1] as $classAttributeValue) {
self::assertNotRegExp('/^\\s|\\s{2}|\\s$/', $classAttributeValue);
}
}
}

0 comments on commit 4c902e5

Please sign in to comment.