Skip to content

Commit

Permalink
Negotiate cache_expiration_time (#26)
Browse files Browse the repository at this point in the history
* Add support for HTTP cache headers
- `Cache-Control: max-age` minus `Age`, extendable by `$simplepie_cache_duration`
- `Cache-Control: must-revalidate` will prevent `$simplepie_cache_duration` from extending past the `max-age`
- `Cache-Control: no-cache` will return the current time
- `Cache-Control: no-store` will return `0`
- `Expires` but only if `Cache-Control: max-age` is absent

* Refined logic

* More doc

* Add support for `s-maxage`

* Typos

* Pragmatism?

* Typos

* Remove support of s-maxage
Seems to only make sense for CDNs

* Move order of headers

* Remove superfluous comments

* Remove wrong comment

* Workaround for buggy servers returning wrong cache-control headers for 304 responses

* fix logic for 304 workaround

* Fix whitespace

* Minor simplification
  • Loading branch information
Alkarex authored Sep 20, 2024
1 parent 1cabd55 commit 7090eed
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 12 deletions.
79 changes: 79 additions & 0 deletions src/HTTP/Utils.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php

declare(strict_types=1);

namespace SimplePie\HTTP;

/**
* HTTP util functions
* FreshRSS
* @internal
*/
final class Utils
{
/**
* Extracts `max-age` from the `Cache-Control` HTTP headers
*
* @param array<string,mixed> $http_headers HTTP headers of the response
* @return int|null The `max-age` value or `null` if not found
*
* FreshRSS
*/
public static function get_http_max_age(array $http_headers): ?int
{
$cache_control = $http_headers['cache-control'] ?? null;
if (is_string($cache_control) && preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) {
return (int) $matches[1];
}
return null;
}

/**
* Negotiate the cache expiration time based on the HTTP response headers.
* Return the cache duration time in number of seconds since the Unix Epoch, accounting for:
* - `Cache-Control: max-age` minus `Age`, bounded by `$cache_duration_min` and `$cache_duration_max`
* - `Cache-Control: must-revalidate` will set `$cache_duration` to `$cache_duration_min`
* - `Cache-Control: no-cache` will return `time() + $cache_duration_min`
* - `Cache-Control: no-store` will return `time() + $cache_duration_min - 3`
* - `Expires` like `Cache-Control: max-age` but only if it is absent
*
* @param array<string,mixed> $http_headers HTTP headers of the response
* @param int $cache_duration Desired cache duration in seconds, potentially overridden by HTTP response headers
* @param int $cache_duration_min Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`,
* @param int $cache_duration_max Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control: max-age` and `Expires`,
* @return int The negotiated cache expiration time in seconds since the Unix Epoch
*
* FreshRSS
*/
public static function negociate_cache_expiration_time(array $http_headers, int $cache_duration, int $cache_duration_min, int $cache_duration_max): int
{
$cache_control = $http_headers['cache-control'] ?? '';
if ($cache_control !== '') {
if (preg_match('/\bno-store\b/', $cache_control)) {
return time() + $cache_duration_min - 3; // -3 to distinguish from no-cache if needed
}
if (preg_match('/\bno-cache\b/', $cache_control)) {
return time() + $cache_duration_min;
}
if (preg_match('/\bmust-revalidate\b/', $cache_control)) {
$cache_duration = $cache_duration_min;
}
if (preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) {
$max_age = (int) $matches[1];
$age = $http_headers['age'] ?? '';
if (is_numeric($age)) {
$max_age -= (int) $age;
}
return time() + min(max($max_age, $cache_duration), $cache_duration_max);
}
}
$expires = $http_headers['expires'] ?? '';
if ($expires !== '') {
$expire_date = \SimplePie\Misc::parse_date($expires);
if ($expire_date !== false) {
return min(max($expire_date, time() + $cache_duration), time() + $cache_duration_max);
}
}
return time() + $cache_duration;
}
}
73 changes: 61 additions & 12 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,28 @@ class SimplePie
public $force_cache_fallback = false;

/**
* @var int Cache duration (in seconds)
* @var int Cache duration (in seconds), but may be overridden by HTTP response headers (FreshRSS)
* @see SimplePie::set_cache_duration()
* @access private
*/
public $cache_duration = 3600;

/**
* @var int Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`
* @see SimplePie::set_cache_duration()
* @access private
* FreshRSS
*/
public $cache_duration_min = 60;

/**
* @var int Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`
* @see SimplePie::set_cache_duration()
* @access private
* FreshRSS
*/
public $cache_duration_max = 86400;

/**
* @var int Auto-discovery cache duration (in seconds)
* @see SimplePie::set_autodiscovery_cache_duration()
Expand Down Expand Up @@ -989,12 +1005,26 @@ public function force_cache_fallback(bool $enable = false)
* Set the length of time (in seconds) that the contents of a feed will be
* cached
*
* @param int $seconds The feed content cache duration
* FreshRSS: The cache is (partially) HTTP compliant, with the following rules:
*
* @param int $seconds The feed content cache duration, which may be overridden by HTTP response headers)
* @param int $min The minimum cache duration (default: 60s), overriding HTTP response headers `Cache-Control` and `Expires`
* @param int $max The maximum cache duration (default: 24h), overriding HTTP response headers `Cache-Control` and `Expires`
* @return void
*/
public function set_cache_duration(int $seconds = 3600)
public function set_cache_duration(int $seconds = 3600, ?int $min = null, ?int $max = null)
{
$this->cache_duration = $seconds;
$this->cache_duration = max(0, $seconds);
if (is_int($min)) { // FreshRSS
$this->cache_duration_min = min(max(0, $min), $seconds);
} elseif ($this->cache_duration_min > $seconds) {
$this->cache_duration_min = $seconds;
}
if (is_int($max)) { // FreshRSS
$this->cache_duration_max = max($seconds, $max);
} elseif ($this->cache_duration_max < $seconds) {
$this->cache_duration_max = $seconds;
}
}

/**
Expand Down Expand Up @@ -1851,7 +1881,7 @@ public function init()
$this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS

// Cache the file if caching is enabled
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);

if ($cache && !$cache->set_data($this->get_cache_filename($this->feed_url), $this->data, $this->cache_duration)) {
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
Expand Down Expand Up @@ -1972,8 +2002,10 @@ protected function fetch_data(&$cache)
$this->status_code = 0;

if ($this->force_cache_fallback) {
$this->data['cache_expiration_time'] = $this->cache_duration + time(); // FreshRSS
$cache->set_data($cacheKey, $this->data, $this->cache_duration);
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); // FreshRSS
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}

return true;
}
Expand All @@ -1986,13 +2018,28 @@ protected function fetch_data(&$cache)
// is still valid.
$this->raw_data = false;
if (isset($file)) { // FreshRSS
$old_cache_control = $this->data['headers']['cache-control'] ?? '';
$old_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']);

// Update cache metadata
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['headers'] = array_map(function (array $values): string {
return implode(',', $values);
}, $file->get_headers());

// Workaround for buggy servers returning wrong cache-control headers for 304 responses
if ($old_max_age !== null) {
$new_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']);
if ($new_max_age === null || $new_max_age > $old_max_age) {
// Allow servers to return a shorter cache duration for 304 responses, but not longer
$this->data['headers']['cache-control'] = $old_cache_control;
}
}

$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);
}
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
$cache->set_data($cacheKey, $this->data, $this->cache_duration);

return true;
}
Expand All @@ -2001,11 +2048,13 @@ protected function fetch_data(&$cache)
$hash = $this->clean_hash($file->get_body_content());
if (($this->data['hash'] ?? null) === $hash) {
// Update cache metadata
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['headers'] = array_map(function (array $values): string {
return implode(',', $values);
}, $file->get_headers());
$cache->set_data($cacheKey, $this->data, $this->cache_duration);
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) {
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}

return true; // Content unchanged even though server did not send a 304
} else {
Expand Down Expand Up @@ -2138,7 +2187,7 @@ protected function fetch_data(&$cache)
'url' => $this->feed_url,
'feed_url' => $file->get_final_requested_uri(),
'build' => Misc::get_build(),
'cache_expiration_time' => $this->cache_duration + time(),
'cache_expiration_time' => \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max), // FreshRSS
'cache_version' => self::CACHE_VERSION, // FreshRSS
'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS
];
Expand Down

0 comments on commit 7090eed

Please sign in to comment.