diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 6ca59cc5..57e12fbd 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -92,7 +92,12 @@ class AO3Bridge extends BridgeAbstract private function collectWork($id) { $url = self::URI . "/works/$id/navigate"; - $response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']); + $httpClient = RssBridge::getHttpClient(); + + $response = $httpClient->request($url, [ + 'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)', + ]); + $html = \str_get_html($response['body']); $html = defaultLinkTo($html, self::URI); diff --git a/contrib/prepare_release/fetch_contributors.php b/contrib/prepare_release/fetch_contributors.php index ad04458a..cfe2c5b2 100644 --- a/contrib/prepare_release/fetch_contributors.php +++ b/contrib/prepare_release/fetch_contributors.php @@ -14,7 +14,8 @@ while ($next) { /* Collect all contributors */ 'Content-Type' => 'application/json', 'User-Agent' => 'RSS-Bridge', ]; - $result = _http_request($url, ['headers' => $headers]); + $httpClient = new CurlHttpClient(); + $result = $httpClient->request($url, ['headers' => $headers]); foreach (json_decode($result['body']) as $contributor) { $contributors[] = $contributor; diff --git a/lib/RssBridge.php b/lib/RssBridge.php index e8b07a65..8969dc54 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -2,6 +2,7 @@ final class RssBridge { + private static HttpClient $httpClient; private static CacheInterface $cache; public function main(array $argv = []) @@ -71,9 +72,10 @@ final class RssBridge // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); - // Create cache $cacheFactory = new CacheFactory(); - self::setCache($cacheFactory->create()); + + self::$httpClient = new CurlHttpClient(); + self::$cache = $cacheFactory->create(); if (Configuration::getConfig('authentication', 'enable')) { $authenticationMiddleware = new AuthenticationMiddleware(); @@ -105,13 +107,13 @@ final class RssBridge } } + public static function getHttpClient(): HttpClient + { + return self::$httpClient; + } + public static function getCache(): CacheInterface { return self::$cache; } - - public static function setCache(CacheInterface $cache): void - { - self::$cache = $cache; - } } diff --git a/lib/contents.php b/lib/contents.php index c6edba7b..419432df 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -99,6 +99,7 @@ function getContents( array $curlOptions = [], bool $returnFull = false ) { + $httpClient = RssBridge::getHttpClient(); $cache = RssBridge::getCache(); $cache->setScope('server'); $cache->setKey([$url]); @@ -141,20 +142,14 @@ function getContents( $config['if_not_modified_since'] = $cache->getTime(); } - $result = _http_request($url, $config); - $response = [ - 'code' => $result['code'], - 'status_lines' => $result['status_lines'], - 'header' => $result['headers'], - 'content' => $result['body'], - ]; + $response = $httpClient->request($url, $config); - switch ($result['code']) { + switch ($response['code']) { case 200: case 201: case 202: - if (isset($result['headers']['cache-control'])) { - $cachecontrol = $result['headers']['cache-control']; + if (isset($response['headers']['cache-control'])) { + $cachecontrol = $response['headers']['cache-control']; $lastValue = array_pop($cachecontrol); $directives = explode(',', $lastValue); $directives = array_map('trim', $directives); @@ -163,7 +158,7 @@ function getContents( break; } } - $cache->saveData($result['body']); + $cache->saveData($response['body']); break; case 301: case 302: @@ -172,16 +167,16 @@ function getContents( break; case 304: // Not Modified - $response['content'] = $cache->loadData(); + $response['body'] = $cache->loadData(); break; default: $exceptionMessage = sprintf( '%s resulted in %s %s %s', $url, - $result['code'], - Response::STATUS_CODES[$result['code']] ?? '', + $response['code'], + Response::STATUS_CODES[$response['code']] ?? '', // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '', + Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '', ); // The following code must be extracted if it grows too much @@ -192,137 +187,141 @@ function getContents( 'Security | Glassdoor', ]; foreach ($cloudflareTitles as $cloudflareTitle) { - if (str_contains($result['body'], $cloudflareTitle)) { - throw new CloudFlareException($exceptionMessage, $result['code']); + if (str_contains($response['body'], $cloudflareTitle)) { + throw new CloudFlareException($exceptionMessage, $response['code']); } } - throw new HttpException(trim($exceptionMessage), $result['code']); } if ($returnFull === true) { + // For legacy reasons, use content instead of body + $response['content'] = $response['body']; + unset($response['body']); return $response; } - return $response['content']; + return $response['body']; } -/** - * Fetch content from url - * - * @internal Private function used internally - * @throws HttpException - */ -function _http_request(string $url, array $config = []): array +interface HttpClient { - $defaults = [ - 'useragent' => null, - 'timeout' => 5, - 'headers' => [], - 'proxy' => null, - 'curl_options' => [], - 'if_not_modified_since' => null, - 'retries' => 3, - 'max_filesize' => null, - 'max_redirections' => 5, - ]; - $config = array_merge($defaults, $config); + public function request(string $url, array $config = []): array; +} - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); - curl_setopt($ch, CURLOPT_HEADER, false); - $httpHeaders = []; - foreach ($config['headers'] as $name => $value) { - $httpHeaders[] = sprintf('%s: %s', $name, $value); - } - curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); - if ($config['useragent']) { - curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); - } - curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); +final class CurlHttpClient implements HttpClient +{ + public function request(string $url, array $config = []): array + { + $defaults = [ + 'useragent' => null, + 'timeout' => 5, + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 3, + 'max_filesize' => null, + 'max_redirections' => 5, + ]; + $config = array_merge($defaults, $config); - if ($config['max_filesize']) { - // This option inspects the Content-Length header - curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); - curl_setopt($ch, CURLOPT_NOPROGRESS, false); - // This progress function will monitor responses who omit the Content-Length header - curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { - if ($downloaded > $config['max_filesize']) { - // Return a non-zero value to abort the transfer - return -1; + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); + curl_setopt($ch, CURLOPT_HEADER, false); + $httpHeaders = []; + foreach ($config['headers'] as $name => $value) { + $httpHeaders[] = sprintf('%s: %s', $name, $value); + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); + if ($config['useragent']) { + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); + } + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + + if ($config['max_filesize']) { + // This option inspects the Content-Length header + curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); + curl_setopt($ch, CURLOPT_NOPROGRESS, false); + // This progress function will monitor responses who omit the Content-Length header + curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { + if ($downloaded > $config['max_filesize']) { + // Return a non-zero value to abort the transfer + return -1; + } + return 0; + }); + } + + if ($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); + } + if (curl_setopt_array($ch, $config['curl_options']) === false) { + throw new \Exception('Tried to set an illegal curl option'); + } + + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); + curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + } + + $responseStatusLines = []; + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { + $len = strlen($rawHeader); + if ($rawHeader === "\r\n") { + return $len; } - return 0; + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { + $responseStatusLines[] = $rawHeader; + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; }); - } - if ($config['proxy']) { - curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); - } - if (curl_setopt_array($ch, $config['curl_options']) === false) { - throw new \Exception('Tried to set an illegal curl option'); - } + $attempts = 0; + while (true) { + $attempts++; + $data = curl_exec($ch); + if ($data !== false) { + // The network call was successful, so break out of the loop + break; + } + if ($attempts > $config['retries']) { + // Finally give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); + } + } - if ($config['if_not_modified_since']) { - curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); - curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + return [ + 'code' => $statusCode, + 'status_lines' => $responseStatusLines, + 'headers' => $responseHeaders, + 'body' => $data, + ]; } - - $responseStatusLines = []; - $responseHeaders = []; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { - $len = strlen($rawHeader); - if ($rawHeader === "\r\n") { - return $len; - } - if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { - $responseStatusLines[] = $rawHeader; - return $len; - } - $header = explode(':', $rawHeader); - if (count($header) === 1) { - return $len; - } - $name = mb_strtolower(trim($header[0])); - $value = trim(implode(':', array_slice($header, 1))); - if (!isset($responseHeaders[$name])) { - $responseHeaders[$name] = []; - } - $responseHeaders[$name][] = $value; - return $len; - }); - - $attempts = 0; - while (true) { - $attempts++; - $data = curl_exec($ch); - if ($data !== false) { - // The network call was successful, so break out of the loop - break; - } - if ($attempts > $config['retries']) { - // Finally give up - $curl_error = curl_error($ch); - $curl_errno = curl_errno($ch); - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - $curl_error, - $curl_errno, - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); - } - } - - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'code' => $statusCode, - 'status_lines' => $responseStatusLines, - 'headers' => $responseHeaders, - 'body' => $data, - ]; } /**