From 6254b8593e2f7636db65db23c1228482e38be44f Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 19 Jul 2023 05:05:49 +0200 Subject: [PATCH] refactor(cache): extract and encapsulate cache expiration logic (#3547) * refactor(cache): extract and encapsulate cache expiration logic * fix: logic bug in getSimpleHTMLDOMCached * fix: silly me, index should of course be on the key column * silly me again, PRIMARY keys get index by default lol * comment out the delete portion in loadData * remove a few log statements * tweak twitter cache timeout --- actions/DisplayAction.php | 29 +++++++++++----------- bridges/DemoBridge.php | 1 + bridges/SpotifyBridge.php | 17 +++++-------- bridges/TwitterBridge.php | 6 +---- caches/FileCache.php | 30 ++++++++++++++--------- caches/MemcachedCache.php | 51 ++++++++++++++++++++------------------- caches/NullCache.php | 4 +-- caches/SQLiteCache.php | 40 ++++++++++++++++++------------ lib/BridgeAbstract.php | 22 ++++------------- lib/CacheInterface.php | 4 +-- lib/TwitterClient.php | 15 ++++++------ lib/contents.php | 42 ++++++++++++-------------------- 12 files changed, 124 insertions(+), 137 deletions(-) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 91cfb95f..157937fb 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -90,36 +90,34 @@ class DisplayAction implements ActionInterface $cache = RssBridge::getCache(); $cache->setScope(''); $cache->setKey($cache_params); - // This cache purge will basically delete all cache items older than 24h, regardless of scope and key - $cache->purgeCache(86400); $items = []; $infos = []; - $mtime = $cache->getTime(); + + $feed = $cache->loadData($cacheTimeout); if ( - $mtime - && (time() - $cacheTimeout < $mtime) + $feed && !Debug::isEnabled() ) { - // At this point we found the feed in the cache and debug mode is disabled - if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { + $modificationTime = $cache->getTime(); // The client wants to know if the feed has changed since its last check - $stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); - if ($mtime <= $stime) { - $lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT'; + $modifiedSince = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); + if ($modificationTime <= $modifiedSince) { + $lastModified2 = gmdate('D, d M Y H:i:s ', $modificationTime) . 'GMT'; return new Response('', 304, ['Last-Modified' => $lastModified2]); } } - // Load the feed from cache and prepare it - $cached = $cache->loadData(); - if (isset($cached['items']) && isset($cached['extraInfos'])) { - foreach ($cached['items'] as $item) { + if ( + isset($feed['items']) + && isset($feed['extraInfos']) + ) { + foreach ($feed['items'] as $item) { $items[] = new FeedItem($item); } - $infos = $cached['extraInfos']; + $infos = $feed['extraInfos']; } } else { // At this point we did NOT find the feed in the cache or debug mode is enabled. @@ -173,6 +171,7 @@ class DisplayAction implements ActionInterface }, $items), 'extraInfos' => $infos ]); + $cache->purgeCache(); } $format->setItems($items); diff --git a/bridges/DemoBridge.php b/bridges/DemoBridge.php index 06ec4e1e..15ab7377 100644 --- a/bridges/DemoBridge.php +++ b/bridges/DemoBridge.php @@ -6,6 +6,7 @@ class DemoBridge extends BridgeAbstract const NAME = 'DemoBridge'; const URI = 'http://github.com/rss-bridge/rss-bridge'; const DESCRIPTION = 'Bridge used for demos'; + const CACHE_TIMEOUT = 15; const PARAMETERS = [ 'testCheckbox' => [ diff --git a/bridges/SpotifyBridge.php b/bridges/SpotifyBridge.php index 169075e2..7b7e2b1d 100644 --- a/bridges/SpotifyBridge.php +++ b/bridges/SpotifyBridge.php @@ -282,14 +282,10 @@ class SpotifyBridge extends BridgeAbstract $cacheKey = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret')); $cache->setScope('SpotifyBridge'); $cache->setKey([$cacheKey]); - - $time = null; - if ($cache->getTime()) { - $time = (new DateTime())->getTimestamp() - $cache->getTime(); - } - - if (!$cache->getTime() || $time >= 3600) { - // fetch token + $token = $cache->loadData(3600); + if ($token) { + $this->token = $token; + } else { $basicAuth = base64_encode(sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'))); $json = getContents('https://accounts.spotify.com/api/token', [ "Authorization: Basic $basicAuth" @@ -298,10 +294,9 @@ class SpotifyBridge extends BridgeAbstract ]); $data = Json::decode($json); $this->token = $data['access_token']; - + $cache->setScope('SpotifyBridge'); + $cache->setKey([$cacheKey]); $cache->saveData($this->token); - } else { - $this->token = $cache->loadData(); } } diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index befb8064..f0f0ee52 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -7,7 +7,7 @@ class TwitterBridge extends BridgeAbstract const API_URI = 'https://api.twitter.com'; const GUEST_TOKEN_USES = 100; const GUEST_TOKEN_EXPIRY = 10800; // 3hrs - const CACHE_TIMEOUT = 300; // 5min + const CACHE_TIMEOUT = 60 * 15; // 15min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'arnd-s'; const PARAMETERS = [ @@ -224,10 +224,6 @@ EOD switch ($this->queriedContext) { case 'By username': $cache = RssBridge::getCache(); - $cache->setScope('twitter'); - $cache->setKey(['cache']); - // todo: inspect mtime instead of purging with 3h - $cache->purgeCache(60 * 60 * 3); $api = new TwitterClient($cache); $screenName = $this->getInput('u'); diff --git a/caches/FileCache.php b/caches/FileCache.php index a4f48962..6e150cb4 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -1,5 +1,8 @@ config; } - public function loadData() + public function loadData(int $timeout = 86400) { + clearstatcache(); if (!file_exists($this->getCacheFile())) { return null; } - $data = unserialize(file_get_contents($this->getCacheFile())); - if ($data === false) { - // Intentionally not throwing an exception - Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile())); - return null; + $modificationTime = filemtime($this->getCacheFile()); + if (time() - $timeout < $modificationTime) { + $data = unserialize(file_get_contents($this->getCacheFile())); + if ($data === false) { + Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile())); + // Intentionally not throwing an exception + return null; + } + return $data; } - return $data; + // It's a good idea to delete the expired item here, but commented out atm + // unlink($this->getCacheFile()); + return null; } public function saveData($data): void @@ -49,9 +59,7 @@ class FileCache implements CacheInterface public function getTime(): ?int { - // https://www.php.net/manual/en/function.clearstatcache.php clearstatcache(); - $cacheFile = $this->getCacheFile(); if (file_exists($cacheFile)) { $time = filemtime($cacheFile); @@ -64,7 +72,7 @@ class FileCache implements CacheInterface return null; } - public function purgeCache(int $seconds): void + public function purgeCache(int $timeout = 86400): void { if (! $this->config['enable_purge']) { return; @@ -90,7 +98,7 @@ class FileCache implements CacheInterface continue; } elseif ($cacheFile->isFile()) { $filepath = $cacheFile->getPathname(); - if (filemtime($filepath) < time() - $seconds) { + if (filemtime($filepath) < time() - $timeout) { // todo: sometimes this file doesn't exists unlink($filepath); } diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index 8cd1932b..afc654ea 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -6,8 +6,6 @@ class MemcachedCache implements CacheInterface private string $key; private $conn; private $expiration = 0; - private $time = null; - private $data = null; public function __construct() { @@ -43,50 +41,53 @@ class MemcachedCache implements CacheInterface $this->conn = $conn; } - public function loadData() + public function loadData(int $timeout = 86400) { - if ($this->data) { - return $this->data; - } - $result = $this->conn->get($this->getCacheKey()); - if ($result === false) { + $value = $this->conn->get($this->getCacheKey()); + if ($value === false) { return null; } - - $this->time = $result['time']; - $this->data = $result['data']; - return $result['data']; + if (time() - $timeout < $value['time']) { + return $value['data']; + } + return null; } public function saveData($data): void { - $time = time(); - $object_to_save = [ + $value = [ 'data' => $data, - 'time' => $time, + 'time' => time(), ]; - $result = $this->conn->set($this->getCacheKey(), $object_to_save, $this->expiration); - + $result = $this->conn->set($this->getCacheKey(), $value, $this->expiration); if ($result === false) { - throw new \Exception('Cannot write the cache to memcached server'); + Logger::warning('Failed to store an item in memcached', [ + 'scope' => $this->scope, + 'key' => $this->key, + 'expiration' => $this->expiration, + 'code' => $this->conn->getLastErrorCode(), + 'message' => $this->conn->getLastErrorMessage(), + 'number' => $this->conn->getLastErrorErrno(), + ]); + // Intentionally not throwing an exception } - - $this->time = $time; } public function getTime(): ?int { - if ($this->time === null) { - $this->loadData(); + $value = $this->conn->get($this->getCacheKey()); + if ($value === false) { + return null; } - return $this->time; + return $value['time']; } - public function purgeCache(int $seconds): void + public function purgeCache(int $timeout = 86400): void { + $this->conn->flush(); // Note: does not purges cache right now // Just sets cache expiration and leave cache purging for memcached itself - $this->expiration = $seconds; + $this->expiration = $timeout; } public function setScope(string $scope): void diff --git a/caches/NullCache.php b/caches/NullCache.php index 1a01df2f..fe43fe06 100644 --- a/caches/NullCache.php +++ b/caches/NullCache.php @@ -12,7 +12,7 @@ class NullCache implements CacheInterface { } - public function loadData() + public function loadData(int $timeout = 86400) { } @@ -25,7 +25,7 @@ class NullCache implements CacheInterface return null; } - public function purgeCache(int $seconds): void + public function purgeCache(int $timeout = 86400): void { } } diff --git a/caches/SQLiteCache.php b/caches/SQLiteCache.php index f9258a88..d7ab1374 100644 --- a/caches/SQLiteCache.php +++ b/caches/SQLiteCache.php @@ -29,27 +29,37 @@ class SQLiteCache implements CacheInterface $this->db = new \SQLite3($config['file']); $this->db->enableExceptions(true); $this->db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)"); - $this->db->exec('CREATE INDEX idx_storage_updated ON storage (updated)'); } $this->db->busyTimeout($config['timeout']); } - public function loadData() + public function loadData(int $timeout = 86400) { - $stmt = $this->db->prepare('SELECT value FROM storage WHERE key = :key'); + $stmt = $this->db->prepare('SELECT value, updated FROM storage WHERE key = :key'); $stmt->bindValue(':key', $this->getCacheKey()); $result = $stmt->execute(); - if ($result) { - $row = $result->fetchArray(\SQLITE3_ASSOC); - if ($row !== false) { - $blob = $row['value']; - $data = unserialize($blob); - if ($data !== false) { - return $data; - } - Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($blob, 0, 100))); - } + if (!$result) { + return null; } + $row = $result->fetchArray(\SQLITE3_ASSOC); + if ($row === false) { + return null; + } + $value = $row['value']; + $modificationTime = $row['updated']; + if (time() - $timeout < $modificationTime) { + $data = unserialize($value); + if ($data === false) { + Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($value, 0, 100))); + return null; + } + return $data; + } + // It's a good idea to delete expired cache items. + // However I'm seeing lots of SQLITE_BUSY errors so commented out for now + // $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key'); + // $stmt->bindValue(':key', $this->getCacheKey()); + // $stmt->execute(); return null; } @@ -78,13 +88,13 @@ class SQLiteCache implements CacheInterface return null; } - public function purgeCache(int $seconds): void + public function purgeCache(int $timeout = 86400): void { if (!$this->config['enable_purge']) { return; } $stmt = $this->db->prepare('DELETE FROM storage WHERE updated < :expired'); - $stmt->bindValue(':expired', time() - $seconds); + $stmt->bindValue(':expired', time() - $timeout); $stmt->execute(); } diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index f024393d..eb9d5a3c 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -409,26 +409,15 @@ abstract class BridgeAbstract implements BridgeInterface /** * Loads a cached value for the specified key * - * @param int $duration Cache duration (optional) + * @param int $timeout Cache duration (optional) * @return mixed Cached value or null if the key doesn't exist or has expired */ - protected function loadCacheValue(string $key, $duration = null) + protected function loadCacheValue(string $key, int $timeout = 86400) { $cache = RssBridge::getCache(); - // Create class name without the namespace part - $scope = $this->getShortName(); - $cache->setScope($scope); + $cache->setScope($this->getShortName()); $cache->setKey([$key]); - $timestamp = $cache->getTime(); - - if ( - $duration - && $timestamp - && $timestamp < time() - $duration - ) { - return null; - } - return $cache->loadData(); + return $cache->loadData($timeout); } /** @@ -439,8 +428,7 @@ abstract class BridgeAbstract implements BridgeInterface protected function saveCacheValue(string $key, $value) { $cache = RssBridge::getCache(); - $scope = $this->getShortName(); - $cache->setScope($scope); + $cache->setScope($this->getShortName()); $cache->setKey([$key]); $cache->saveData($value); } diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 414a9c84..85aa830f 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -6,11 +6,11 @@ interface CacheInterface public function setKey(array $key): void; - public function loadData(); + public function loadData(int $timeout = 86400); public function saveData($data): void; public function getTime(): ?int; - public function purgeCache(int $seconds): void; + public function purgeCache(int $timeout = 86400): void; } diff --git a/lib/TwitterClient.php b/lib/TwitterClient.php index cdedcbc1..341610a4 100644 --- a/lib/TwitterClient.php +++ b/lib/TwitterClient.php @@ -11,8 +11,13 @@ class TwitterClient public function __construct(CacheInterface $cache) { $this->cache = $cache; - $this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR'; + + $cache->setScope('twitter'); + $cache->setKey(['cache']); + $cache->purgeCache(60 * 60 * 3); + $this->data = $this->cache->loadData() ?? []; + $this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR'; } public function fetchUserTweets(string $screenName): \stdClass @@ -22,7 +27,6 @@ class TwitterClient $userInfo = $this->fetchUserInfoByScreenName($screenName); } catch (HttpException $e) { if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); $this->data['guest_token'] = null; $this->fetchGuestToken(); $userInfo = $this->fetchUserInfoByScreenName($screenName); @@ -35,7 +39,6 @@ class TwitterClient $timeline = $this->fetchTimeline($userInfo->rest_id); } catch (HttpException $e) { if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); $this->data['guest_token'] = null; $this->fetchGuestToken(); $timeline = $this->fetchTimeline($userInfo->rest_id); @@ -88,7 +91,6 @@ class TwitterClient private function fetchGuestToken(): void { if (isset($this->data['guest_token'])) { - Logger::info('Reusing cached guest token: ' . $this->data['guest_token']); return; } $url = 'https://api.twitter.com/1.1/guest/activate.json'; @@ -99,7 +101,6 @@ class TwitterClient $this->cache->setScope('twitter'); $this->cache->setKey(['cache']); $this->cache->saveData($this->data); - Logger::info("Fetch new guest token: $guest_token"); } private function fetchUserInfoByScreenName(string $screenName) @@ -115,7 +116,7 @@ class TwitterClient 'https://twitter.com/i/api/graphql/hc-pka9A7gyS3xODIafnrQ/UserByScreenName?variables=%s', urlencode(json_encode($variables)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); if (isset($response->errors)) { // Grab the first error message throw new \Exception(sprintf('From twitter api: "%s"', $response->errors[0]->message)); @@ -168,7 +169,7 @@ class TwitterClient urlencode(json_encode($variables)), urlencode(json_encode($features)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); return $response; } diff --git a/lib/contents.php b/lib/contents.php index b6b74539..454f2066 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -100,9 +100,6 @@ function getContents( bool $returnFull = false ) { $httpClient = RssBridge::getHttpClient(); - $cache = RssBridge::getCache(); - $cache->setScope('server'); - $cache->setKey([$url]); // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ @@ -138,6 +135,11 @@ function getContents( if (Configuration::getConfig('proxy', 'url') && !defined('NOPROXY')) { $config['proxy'] = Configuration::getConfig('proxy', 'url'); } + + $cache = RssBridge::getCache(); + $cache->setScope('server'); + $cache->setKey([$url]); + if (!Debug::isEnabled() && $cache->getTime()) { $config['if_not_modified_since'] = $cache->getTime(); } @@ -384,7 +386,7 @@ function getSimpleHTMLDOM( * _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds). * * @param string $url The URL. - * @param int $duration Cache duration in seconds. + * @param int $timeout Cache duration in seconds. * @param array $header (optional) A list of cURL header. * For more information follow the links below. * * https://php.net/manual/en/function.curl-setopt.php @@ -409,7 +411,7 @@ function getSimpleHTMLDOM( */ function getSimpleHTMLDOMCached( $url, - $duration = 86400, + $timeout = 86400, $header = [], $opts = [], $lowercase = true, @@ -422,29 +424,15 @@ function getSimpleHTMLDOMCached( $cache = RssBridge::getCache(); $cache->setScope('pages'); $cache->setKey([$url]); - - // Determine if cached file is within duration - $time = $cache->getTime(); - if ( - $time - && time() - $duration < $time - && !Debug::isEnabled() - ) { - // Cache hit - $content = $cache->loadData(); - } else { - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); - if ($content) { - $cache->setScope('pages'); - $cache->setKey([$url]); - $cache->saveData($content); - } + $content = $cache->loadData($timeout); + if (!$content || Debug::isEnabled()) { + $content = getContents($url, $header ?? [], $opts ?? []); + } + if ($content) { + $cache->setScope('pages'); + $cache->setKey([$url]); + $cache->saveData($content); } - return str_get_html( $content, $lowercase,