Skip to content

Commit

Permalink
Use HtmlString to distinguish between HTML and plain text strings
Browse files Browse the repository at this point in the history
Also fix bunch of over and under escaping and document the string semantics in API.
  • Loading branch information
jtojnar committed Feb 15, 2023
1 parent 25bb04e commit 9042236
Show file tree
Hide file tree
Showing 15 changed files with 140 additions and 75 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

### Customization changes
- Custom spout parameter declarations should now use constants from `Parameter` interface. ([#1409](https://github.com/fossar/selfoss/pull/1409))
- Custom spouts are expected to pass `HtmlString` object to items’ title and content. ([#1368](https://github.com/fossar/selfoss/pull/1368))

### Other changes
- `tidy` PHP extension is now required if you want to use “Content extractor” spout. ([#1392](https://github.com/fossar/selfoss/pull/1392))
Expand Down
4 changes: 2 additions & 2 deletions docs/api-description.json
Original file line number Diff line number Diff line change
Expand Up @@ -951,12 +951,12 @@
"example": "2013-04-07T13:43:00+01:00"
},
"title": {
"description": "the title of the article",
"description": "The title of the article, can contain HTML tags",
"type": "string",
"example": "FTTH: Google Fiber für eine neue Großstadt"
},
"content": {
"description": "the full content of the article",
"description": "The full content of the article as a HTML fragment.",
"type": "string",
"example": "\n<p>Das 1-GBit/s-Angebot Google Fiber kommt nach Austin, die Hauptstadt des US-Bundesstaates Texas..."
},
Expand Down
9 changes: 6 additions & 3 deletions src/daos/mysql/Items.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use DateTime;
use DateTimeImmutable;
use helpers\Configuration;
use helpers\HtmlString;
use Monolog\Logger;

/**
Expand Down Expand Up @@ -103,6 +104,8 @@ public function unstarr(int $id): void {

/**
* add new item
*
* @param array{datetime: \DateTimeInterface, title: HtmlString, content: HtmlString, thumbnail: ?string, icon: ?string, source: int, uid: string, link: string, author: ?string} $values
*/
public function add(array $values): void {
$this->database->exec('INSERT INTO ' . $this->configuration->dbPrefix . 'items (
Expand Down Expand Up @@ -131,9 +134,9 @@ public function add(array $values): void {
:author
)',
[
':datetime' => $values['datetime'],
':title' => $values['title'],
':content' => $values['content'],
':datetime' => $values['datetime']->format('Y-m-d H:i:s'),
':title' => $values['title']->getRaw(),
':content' => $values['content']->getRaw(),
':thumbnail' => $values['thumbnail'],
':icon' => $values['icon'],
':unread' => 1,
Expand Down
39 changes: 20 additions & 19 deletions src/helpers/ContentLoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,10 @@ public function fetch($source): void {
$sourceIconUrl = null;
$itemsSeen = [];
foreach ($items as $item) {
$titlePlainText = $item->getTitle()->getPlainText();
// item already in database?
if (isset($itemsFound[$item->getId()])) {
$this->logger->debug('item "' . $item->getTitle() . '" already in database.');
$this->logger->debug('item "' . $titlePlainText . '" already in database.');
$itemsSeen[] = $itemsFound[$item->getId()];
continue;
}
Expand All @@ -163,7 +164,7 @@ public function fetch($source): void {
$itemDate = new \DateTimeImmutable();
}
if ($itemDate < $minDate) {
$this->logger->debug('item "' . $item->getTitle() . '" (' . $itemDate->format(\DateTime::ATOM) . ') older than ' . $this->configuration->itemsLifetime . ' days');
$this->logger->debug('item "' . $titlePlainText . '" (' . $itemDate->format(\DateTime::ATOM) . ') older than ' . $this->configuration->itemsLifetime . ' days');
continue;
}

Expand All @@ -174,7 +175,7 @@ public function fetch($source): void {
}

// insert new item
$this->logger->debug('start insertion of new item "' . $item->getTitle() . '"');
$this->logger->debug('start insertion of new item "' . $titlePlainText . '"');

try {
// fetch content
Expand All @@ -184,14 +185,14 @@ public function fetch($source): void {
$content = $this->sanitizeContent($content);
} catch (\Throwable $e) {
$content = 'Error: Content not fetched. Reason: ' . $e->getMessage();
$this->logger->error('Can not fetch "' . $item->getTitle() . '"', ['exception' => $e]);
$this->logger->error('Can not fetch "' . $titlePlainText . '"', ['exception' => $e]);
}

// sanitize title
$title = trim($this->sanitizeField($item->getTitle()));
$title = HtmlString::fromRaw(trim($this->sanitizeField($item->getTitle())->getRaw()));

// Check sanitized title against filter
if ($this->filter($source, $title, $content) === false) {
if ($this->filter($source, $title->getRaw(), $content->getRaw()) === false) {
continue;
}

Expand All @@ -201,7 +202,7 @@ public function fetch($source): void {
'title' => $title,
'content' => $content,
'source' => $source['id'],
'datetime' => $itemDate->format('Y-m-d H:i:s'),
'datetime' => $itemDate,
'uid' => $item->getId(),
'link' => htmLawed($item->getLink(), ['deny_attribute' => '*', 'elements' => '-*']),
'author' => $item->getAuthor(),
Expand Down Expand Up @@ -313,13 +314,13 @@ protected function filter($source, string $title, string $content): bool {
/**
* Sanitize content for preventing XSS attacks.
*
* @param string $content content of the given feed
* @param HtmlString $content content of the given feed
*
* @return mixed|string sanitized content
* @return HtmlString sanitized content
*/
protected function sanitizeContent(string $content) {
return htmLawed(
$content,
protected function sanitizeContent(HtmlString $content): HtmlString {
return HtmlString::fromRaw(htmLawed(
$content->getRaw(),
[
'safe' => 1,
'deny_attribute' => '* -alt -title -src -href -target',
Expand All @@ -329,24 +330,24 @@ protected function sanitizeContent(string $content) {
'elements' => 'div,p,ul,li,a,img,dl,dt,dd,h1,h2,h3,h4,h5,h6,ol,br,table,tr,td,blockquote,pre,ins,del,th,thead,tbody,b,i,strong,em,tt,sub,sup,s,strike,code',
],
'img=width, height'
);
));
}

/**
* Sanitize a simple field
*
* @param string $value content of the given field
* @param HtmlString $value content of the given field
*
* @return mixed|string sanitized content
* @return HtmlString sanitized content
*/
protected function sanitizeField(string $value) {
return htmLawed(
$value,
protected function sanitizeField(HtmlString $value): HtmlString {
return HtmlString::fromRaw(htmLawed(
$value->getRaw(),
[
'deny_attribute' => '* -href -title -target',
'elements' => 'a,br,ins,del,b,i,strong,em,tt,sub,sup,s,code',
]
);
));
}

/**
Expand Down
45 changes: 45 additions & 0 deletions src/helpers/HtmlString.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

declare(strict_types=1);

namespace helpers;

/**
* A string wrapper representing a HTML fragment.
*/
class HtmlString {
/** @var string */
private $content;

private function __construct(string $content) {
$this->content = $content;
}

/**
* Creates a new HtmlString from a string containing a HTML fragment.
*/
public static function fromRaw(string $content): self {
return new self($content);
}

/**
* Creates a new HtmlString from a plain text string.
*/
public static function fromPlainText(string $content): self {
return new self(htmlspecialchars($content));
}

/**
* Returns a HTML fragment represented by the object.
*/
public function getRaw(): string {
return $this->content;
}

/**
* Returns a plain text without any HTML tags.
*/
public function getPlainText(): string {
return htmlspecialchars_decode(strip_tags($this->content));
}
}
2 changes: 1 addition & 1 deletion src/helpers/ImageUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public static function findFirstImageSource(string $html): ?string {
if (stripos($html, '<img') !== false) {
$imgsrc_regex = '#<\s*img [^\>]*src\s*=\s*(["\'])(.*?)\1#im';
if (preg_match($imgsrc_regex, $html, $matches)) {
return $matches[2];
return htmlspecialchars_decode($matches[2]);
} else {
return null;
}
Expand Down
17 changes: 9 additions & 8 deletions src/spouts/Item.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace spouts;

use DateTimeInterface;
use helpers\HtmlString;

/**
* Value object representing a source item (e.g. an article).
Expand All @@ -15,10 +16,10 @@ class Item {
/** @var string an unique id for this item */
private $id;

/** @var string title */
/** @var HtmlString title */
private $title;

/** @var string content */
/** @var HtmlString content */
private $content;

/** @var ?string thumbnail */
Expand All @@ -44,8 +45,8 @@ class Item {
*/
public function __construct(
string $id,
string $title,
string $content,
HtmlString $title,
HtmlString $content,
?string $thumbnail,
?string $icon,
string $link,
Expand Down Expand Up @@ -89,14 +90,14 @@ public function withId(string $id): self {
* If the spout allows HTML in the title, HTML special chars are expected to be decoded by the spout
* (for instance when the spout feed is XML).
*/
public function getTitle(): string {
public function getTitle(): HtmlString {
return $this->title;
}

/**
* @return static
*/
public function withTitle(string $title): self {
public function withTitle(HtmlString $title): self {
$modified = clone $this;
$modified->title = $title;

Expand All @@ -109,14 +110,14 @@ public function withTitle(string $title): self {
* HTML special chars are expected to be decoded by the spout
* (for instance when the spout feed is XML).
*/
public function getContent(): string {
public function getContent(): HtmlString {
return $this->content;
}

/**
* @return static
*/
public function withContent(string $content): self {
public function withContent(HtmlString $content): self {
$modified = clone $this;
$modified->content = $content;

Expand Down
13 changes: 7 additions & 6 deletions src/spouts/facebook/page.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace spouts\facebook;

use GuzzleHttp\Psr7\Uri;
use helpers\HtmlString;
use helpers\WebClient;
use spouts\Item;
use spouts\Parameter;
Expand Down Expand Up @@ -107,7 +108,7 @@ public function getIcon(): ?string {
public function getItems(): iterable {
foreach ($this->items as $item) {
$id = $item['id'];
$title = mb_strlen($item['message']) > 80 ? mb_substr($item['message'], 0, 100) . '' : $item['message'];
$title = HtmlString::fromPlainText((mb_strlen($item['message']) > 80 ? mb_substr($item['message'], 0, 100) . '' : $item['message']));
$content = $this->getPostContent($item);
$thumbnail = null;
$icon = null;
Expand All @@ -131,25 +132,25 @@ public function getItems(): iterable {
}
}

private function getPostContent(array $item): string {
$message = $item['message'];
private function getPostContent(array $item): HtmlString {
$message = htmlspecialchars($item['message']);

if (isset($item['attachments']) && count($item['attachments']['data']) > 0) {
foreach ($item['attachments']['data'] as $media) {
if ($media['type'] === 'photo') {
$message .= '<figure>' . PHP_EOL;
$message .= '<a href="' . $media['target']['url'] . '"><img src="' . $media['media']['image']['src'] . '" alt=""></a>' . PHP_EOL;
$message .= '<a href="' . htmlspecialchars($media['target']['url'], ENT_QUOTES) . '"><img src="' . htmlspecialchars($media['media']['image']['src'], ENT_QUOTES) . '" alt=""></a>' . PHP_EOL;

// Some photos will have the same description, no need to display it twice
if ($media['description'] !== $item['message']) {
$message .= '<figcaption>' . $media['description'] . '</figcaption>' . PHP_EOL;
$message .= '<figcaption>' . htmlspecialchars($media['description']) . '</figcaption>' . PHP_EOL;
}

$message .= '</figure>' . PHP_EOL;
}
}
}

return $message;
return HtmlString::fromRaw($message);
}
}
5 changes: 3 additions & 2 deletions src/spouts/github/commits.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace spouts\github;

use helpers\HtmlString;
use helpers\WebClient;
use spouts\Item;
use spouts\Parameter;
Expand Down Expand Up @@ -107,8 +108,8 @@ public function getItems(): iterable {
$message = $item['commit']['message'];

$id = $item['sha'];
$title = htmlspecialchars(self::cutTitle($message));
$content = nl2br(htmlspecialchars($message), false);
$title = HtmlString::fromPlainText(self::cutTitle($message));
$content = HtmlString::fromRaw(nl2br(htmlspecialchars($message), false));
$thumbnail = null;
$icon = null;
$link = $item['html_url'];
Expand Down
Loading

0 comments on commit 9042236

Please sign in to comment.