refactor: extract CurlHttpClient (#3532)

* refactor: extract CurlHttpClient

* refactor

* interface
This commit is contained in:
Dag 2023-07-16 22:07:34 +02:00 committed by GitHub
parent 7b46b97abd
commit a59793e8d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 145 additions and 138 deletions

View File

@ -92,7 +92,12 @@ class AO3Bridge extends BridgeAbstract
private function collectWork($id)
{
$url = self::URI . "/works/$id/navigate";
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
$httpClient = RssBridge::getHttpClient();
$response = $httpClient->request($url, [
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
]);
$html = \str_get_html($response['body']);
$html = defaultLinkTo($html, self::URI);

View File

@ -14,7 +14,8 @@ while ($next) { /* Collect all contributors */
'Content-Type' => 'application/json',
'User-Agent' => 'RSS-Bridge',
];
$result = _http_request($url, ['headers' => $headers]);
$httpClient = new CurlHttpClient();
$result = $httpClient->request($url, ['headers' => $headers]);
foreach (json_decode($result['body']) as $contributor) {
$contributors[] = $contributor;

View File

@ -2,6 +2,7 @@
final class RssBridge
{
private static HttpClient $httpClient;
private static CacheInterface $cache;
public function main(array $argv = [])
@ -71,9 +72,10 @@ final class RssBridge
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
// Create cache
$cacheFactory = new CacheFactory();
self::setCache($cacheFactory->create());
self::$httpClient = new CurlHttpClient();
self::$cache = $cacheFactory->create();
if (Configuration::getConfig('authentication', 'enable')) {
$authenticationMiddleware = new AuthenticationMiddleware();
@ -105,13 +107,13 @@ final class RssBridge
}
}
public static function getHttpClient(): HttpClient
{
return self::$httpClient;
}
public static function getCache(): CacheInterface
{
return self::$cache;
}
public static function setCache(CacheInterface $cache): void
{
self::$cache = $cache;
}
}

View File

@ -99,6 +99,7 @@ function getContents(
array $curlOptions = [],
bool $returnFull = false
) {
$httpClient = RssBridge::getHttpClient();
$cache = RssBridge::getCache();
$cache->setScope('server');
$cache->setKey([$url]);
@ -141,20 +142,14 @@ function getContents(
$config['if_not_modified_since'] = $cache->getTime();
}
$result = _http_request($url, $config);
$response = [
'code' => $result['code'],
'status_lines' => $result['status_lines'],
'header' => $result['headers'],
'content' => $result['body'],
];
$response = $httpClient->request($url, $config);
switch ($result['code']) {
switch ($response['code']) {
case 200:
case 201:
case 202:
if (isset($result['headers']['cache-control'])) {
$cachecontrol = $result['headers']['cache-control'];
if (isset($response['headers']['cache-control'])) {
$cachecontrol = $response['headers']['cache-control'];
$lastValue = array_pop($cachecontrol);
$directives = explode(',', $lastValue);
$directives = array_map('trim', $directives);
@ -163,7 +158,7 @@ function getContents(
break;
}
}
$cache->saveData($result['body']);
$cache->saveData($response['body']);
break;
case 301:
case 302:
@ -172,16 +167,16 @@ function getContents(
break;
case 304:
// Not Modified
$response['content'] = $cache->loadData();
$response['body'] = $cache->loadData();
break;
default:
$exceptionMessage = sprintf(
'%s resulted in %s %s %s',
$url,
$result['code'],
Response::STATUS_CODES[$result['code']] ?? '',
$response['code'],
Response::STATUS_CODES[$response['code']] ?? '',
// If debug, include a part of the response body in the exception message
Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '',
Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '',
);
// The following code must be extracted if it grows too much
@ -192,137 +187,141 @@ function getContents(
'<title>Security | Glassdoor',
];
foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($result['body'], $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $result['code']);
if (str_contains($response['body'], $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $response['code']);
}
}
throw new HttpException(trim($exceptionMessage), $result['code']);
}
if ($returnFull === true) {
// For legacy reasons, use content instead of body
$response['content'] = $response['body'];
unset($response['body']);
return $response;
}
return $response['content'];
return $response['body'];
}
/**
* Fetch content from url
*
* @internal Private function used internally
* @throws HttpException
*/
function _http_request(string $url, array $config = []): array
interface HttpClient
{
$defaults = [
'useragent' => null,
'timeout' => 5,
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
'max_filesize' => null,
'max_redirections' => 5,
];
$config = array_merge($defaults, $config);
public function request(string $url, array $config = []): array;
}
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
curl_setopt($ch, CURLOPT_HEADER, false);
$httpHeaders = [];
foreach ($config['headers'] as $name => $value) {
$httpHeaders[] = sprintf('%s: %s', $name, $value);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
if ($config['useragent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
}
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
final class CurlHttpClient implements HttpClient
{
public function request(string $url, array $config = []): array
{
$defaults = [
'useragent' => null,
'timeout' => 5,
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
'max_filesize' => null,
'max_redirections' => 5,
];
$config = array_merge($defaults, $config);
if ($config['max_filesize']) {
// This option inspects the Content-Length header
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
// This progress function will monitor responses who omit the Content-Length header
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
if ($downloaded > $config['max_filesize']) {
// Return a non-zero value to abort the transfer
return -1;
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
curl_setopt($ch, CURLOPT_HEADER, false);
$httpHeaders = [];
foreach ($config['headers'] as $name => $value) {
$httpHeaders[] = sprintf('%s: %s', $name, $value);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
if ($config['useragent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
}
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if ($config['max_filesize']) {
// This option inspects the Content-Length header
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
// This progress function will monitor responses who omit the Content-Length header
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
if ($downloaded > $config['max_filesize']) {
// Return a non-zero value to abort the transfer
return -1;
}
return 0;
});
}
if ($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
if (curl_setopt_array($ch, $config['curl_options']) === false) {
throw new \Exception('Tried to set an illegal curl option');
}
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
}
return 0;
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = $rawHeader;
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
}
if ($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
if (curl_setopt_array($ch, $config['curl_options']) === false) {
throw new \Exception('Tried to set an illegal curl option');
}
$attempts = 0;
while (true) {
$attempts++;
$data = curl_exec($ch);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return [
'code' => $statusCode,
'status_lines' => $responseStatusLines,
'headers' => $responseHeaders,
'body' => $data,
];
}
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
}
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = $rawHeader;
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$attempts = 0;
while (true) {
$attempts++;
$data = curl_exec($ch);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return [
'code' => $statusCode,
'status_lines' => $responseStatusLines,
'headers' => $responseHeaders,
'body' => $data,
];
}
/**