refactor: extract CurlHttpClient (#3532)

* refactor: extract CurlHttpClient

* refactor

* interface
This commit is contained in:
Dag 2023-07-16 22:07:34 +02:00 committed by GitHub
parent 7b46b97abd
commit a59793e8d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 145 additions and 138 deletions

View File

@ -92,7 +92,12 @@ class AO3Bridge extends BridgeAbstract
private function collectWork($id) private function collectWork($id)
{ {
$url = self::URI . "/works/$id/navigate"; $url = self::URI . "/works/$id/navigate";
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']); $httpClient = RssBridge::getHttpClient();
$response = $httpClient->request($url, [
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
]);
$html = \str_get_html($response['body']); $html = \str_get_html($response['body']);
$html = defaultLinkTo($html, self::URI); $html = defaultLinkTo($html, self::URI);

View File

@ -14,7 +14,8 @@ while ($next) { /* Collect all contributors */
'Content-Type' => 'application/json', 'Content-Type' => 'application/json',
'User-Agent' => 'RSS-Bridge', 'User-Agent' => 'RSS-Bridge',
]; ];
$result = _http_request($url, ['headers' => $headers]); $httpClient = new CurlHttpClient();
$result = $httpClient->request($url, ['headers' => $headers]);
foreach (json_decode($result['body']) as $contributor) { foreach (json_decode($result['body']) as $contributor) {
$contributors[] = $contributor; $contributors[] = $contributor;

View File

@ -2,6 +2,7 @@
final class RssBridge final class RssBridge
{ {
private static HttpClient $httpClient;
private static CacheInterface $cache; private static CacheInterface $cache;
public function main(array $argv = []) public function main(array $argv = [])
@ -71,9 +72,10 @@ final class RssBridge
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone')); date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
// Create cache
$cacheFactory = new CacheFactory(); $cacheFactory = new CacheFactory();
self::setCache($cacheFactory->create());
self::$httpClient = new CurlHttpClient();
self::$cache = $cacheFactory->create();
if (Configuration::getConfig('authentication', 'enable')) { if (Configuration::getConfig('authentication', 'enable')) {
$authenticationMiddleware = new AuthenticationMiddleware(); $authenticationMiddleware = new AuthenticationMiddleware();
@ -105,13 +107,13 @@ final class RssBridge
} }
} }
public static function getHttpClient(): HttpClient
{
return self::$httpClient;
}
public static function getCache(): CacheInterface public static function getCache(): CacheInterface
{ {
return self::$cache; return self::$cache;
} }
public static function setCache(CacheInterface $cache): void
{
self::$cache = $cache;
}
} }

View File

@ -99,6 +99,7 @@ function getContents(
array $curlOptions = [], array $curlOptions = [],
bool $returnFull = false bool $returnFull = false
) { ) {
$httpClient = RssBridge::getHttpClient();
$cache = RssBridge::getCache(); $cache = RssBridge::getCache();
$cache->setScope('server'); $cache->setScope('server');
$cache->setKey([$url]); $cache->setKey([$url]);
@ -141,20 +142,14 @@ function getContents(
$config['if_not_modified_since'] = $cache->getTime(); $config['if_not_modified_since'] = $cache->getTime();
} }
$result = _http_request($url, $config); $response = $httpClient->request($url, $config);
$response = [
'code' => $result['code'],
'status_lines' => $result['status_lines'],
'header' => $result['headers'],
'content' => $result['body'],
];
switch ($result['code']) { switch ($response['code']) {
case 200: case 200:
case 201: case 201:
case 202: case 202:
if (isset($result['headers']['cache-control'])) { if (isset($response['headers']['cache-control'])) {
$cachecontrol = $result['headers']['cache-control']; $cachecontrol = $response['headers']['cache-control'];
$lastValue = array_pop($cachecontrol); $lastValue = array_pop($cachecontrol);
$directives = explode(',', $lastValue); $directives = explode(',', $lastValue);
$directives = array_map('trim', $directives); $directives = array_map('trim', $directives);
@ -163,7 +158,7 @@ function getContents(
break; break;
} }
} }
$cache->saveData($result['body']); $cache->saveData($response['body']);
break; break;
case 301: case 301:
case 302: case 302:
@ -172,16 +167,16 @@ function getContents(
break; break;
case 304: case 304:
// Not Modified // Not Modified
$response['content'] = $cache->loadData(); $response['body'] = $cache->loadData();
break; break;
default: default:
$exceptionMessage = sprintf( $exceptionMessage = sprintf(
'%s resulted in %s %s %s', '%s resulted in %s %s %s',
$url, $url,
$result['code'], $response['code'],
Response::STATUS_CODES[$result['code']] ?? '', Response::STATUS_CODES[$response['code']] ?? '',
// If debug, include a part of the response body in the exception message // If debug, include a part of the response body in the exception message
Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '', Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '',
); );
// The following code must be extracted if it grows too much // The following code must be extracted if it grows too much
@ -192,137 +187,141 @@ function getContents(
'<title>Security | Glassdoor', '<title>Security | Glassdoor',
]; ];
foreach ($cloudflareTitles as $cloudflareTitle) { foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($result['body'], $cloudflareTitle)) { if (str_contains($response['body'], $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $result['code']); throw new CloudFlareException($exceptionMessage, $response['code']);
} }
} }
throw new HttpException(trim($exceptionMessage), $result['code']); throw new HttpException(trim($exceptionMessage), $result['code']);
} }
if ($returnFull === true) { if ($returnFull === true) {
// For legacy reasons, use content instead of body
$response['content'] = $response['body'];
unset($response['body']);
return $response; return $response;
} }
return $response['content']; return $response['body'];
} }
/** interface HttpClient
* Fetch content from url
*
* @internal Private function used internally
* @throws HttpException
*/
function _http_request(string $url, array $config = []): array
{ {
$defaults = [ public function request(string $url, array $config = []): array;
'useragent' => null, }
'timeout' => 5,
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
'max_filesize' => null,
'max_redirections' => 5,
];
$config = array_merge($defaults, $config);
$ch = curl_init($url); final class CurlHttpClient implements HttpClient
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); public function request(string $url, array $config = []): array
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); {
curl_setopt($ch, CURLOPT_HEADER, false); $defaults = [
$httpHeaders = []; 'useragent' => null,
foreach ($config['headers'] as $name => $value) { 'timeout' => 5,
$httpHeaders[] = sprintf('%s: %s', $name, $value); 'headers' => [],
} 'proxy' => null,
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); 'curl_options' => [],
if ($config['useragent']) { 'if_not_modified_since' => null,
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); 'retries' => 3,
} 'max_filesize' => null,
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); 'max_redirections' => 5,
curl_setopt($ch, CURLOPT_ENCODING, ''); ];
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); $config = array_merge($defaults, $config);
if ($config['max_filesize']) { $ch = curl_init($url);
// This option inspects the Content-Length header curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_NOPROGRESS, false); curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
// This progress function will monitor responses who omit the Content-Length header curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { $httpHeaders = [];
if ($downloaded > $config['max_filesize']) { foreach ($config['headers'] as $name => $value) {
// Return a non-zero value to abort the transfer $httpHeaders[] = sprintf('%s: %s', $name, $value);
return -1; }
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
if ($config['useragent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
}
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if ($config['max_filesize']) {
// This option inspects the Content-Length header
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
// This progress function will monitor responses who omit the Content-Length header
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
if ($downloaded > $config['max_filesize']) {
// Return a non-zero value to abort the transfer
return -1;
}
return 0;
});
}
if ($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
if (curl_setopt_array($ch, $config['curl_options']) === false) {
throw new \Exception('Tried to set an illegal curl option');
}
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
} }
return 0; if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = $rawHeader;
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
}); });
}
if ($config['proxy']) { $attempts = 0;
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); while (true) {
} $attempts++;
if (curl_setopt_array($ch, $config['curl_options']) === false) { $data = curl_exec($ch);
throw new \Exception('Tried to set an illegal curl option'); if ($data !== false) {
} // The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
if ($config['if_not_modified_since']) { $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); curl_close($ch);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); return [
'code' => $statusCode,
'status_lines' => $responseStatusLines,
'headers' => $responseHeaders,
'body' => $data,
];
} }
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
}
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = $rawHeader;
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$attempts = 0;
while (true) {
$attempts++;
$data = curl_exec($ch);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return [
'code' => $statusCode,
'status_lines' => $responseStatusLines,
'headers' => $responseHeaders,
'body' => $data,
];
} }
/** /**