fix: improve FeedExpander (#3103)

* fix: improve FeedExpander

Include the first libxml error in exception.

Give better error message if trying to parse the empty string.

Log all libxml errors if debug mode is enabled.

* error handling and logging tweak
This commit is contained in:
Dag 2022-10-29 10:27:02 +02:00 committed by GitHub
parent f9cd397900
commit e027bd9274
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 22 deletions

View File

@ -101,13 +101,34 @@ abstract class FeedExpander extends BridgeAbstract
];
$httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)];
$content = getContents($url, $httpHeaders);
// Suppress php errors. We will check return value for success.
$rssContent = @simplexml_load_string(trim($content));
if ($rssContent === false) {
throw new \Exception(sprintf('Unable to parse xml from "%s"', $url));
if ($content === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url));
}
// Maybe move this call earlier up the stack frames
// Disable triggering of the php error-handler and handle errors manually instead
libxml_use_internal_errors(true);
// Consider replacing libxml with https://www.php.net/domdocument
// Intentionally not using the silencing operator (@) because it has no effect here
$rssContent = simplexml_load_string(trim($content));
if ($rssContent === false) {
$xmlErrors = libxml_get_errors();
foreach ($xmlErrors as $xmlError) {
if (Debug::isEnabled()) {
Debug::log(trim($xmlError->message));
}
}
if ($xmlErrors) {
// Render only the first error into exception message
$firstXmlErrorMessage = $xmlErrors[0]->message;
}
throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''));
}
// Restore previous behaviour in case other code relies on it being off
libxml_use_internal_errors(false);
// Commented out because it's spammy
// Debug::log(sprintf("RSS content is ===========\n%s===========", var_export($rssContent, true)));
Debug::log('Detecting feed format/version');
switch (true) {
case isset($rssContent->item[0]):
Debug::log('Detected RSS 1.0 format');
@ -149,7 +170,6 @@ abstract class FeedExpander extends BridgeAbstract
{
$this->loadRss2Data($rssContent->channel[0]);
foreach ($rssContent->item as $item) {
Debug::log(sprintf('Parsing item %s', var_export($item, true)));
$tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) {
$this->items[] = $tmp_item;
@ -165,24 +185,16 @@ abstract class FeedExpander extends BridgeAbstract
*
* @link http://www.rssboard.org/rss-specification RSS 2.0 Specification
*
* @param object $rssContent The RSS content
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @param int $maxItems Maximum number of items to collect from the feed (`-1`: no limit).
* @return void
*
* @todo Instead of passing $maxItems to all functions, just add all items
* and remove excessive items later.
* @todo Instead of passing $maxItems to all functions, just add all items and remove excessive items later.
*/
protected function collectRss2($rssContent, $maxItems)
protected function collectRss2(\SimpleXMLElement $rssContent, $maxItems)
{
$rssContent = $rssContent->channel[0];
Debug::log('RSS content is ===========\n'
. var_export($rssContent, true)
. '===========');
$this->loadRss2Data($rssContent);
foreach ($rssContent->item as $item) {
Debug::log('parsing item ' . var_export($item, true));
$tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) {
$this->items[] = $tmp_item;
@ -210,7 +222,6 @@ abstract class FeedExpander extends BridgeAbstract
{
$this->loadAtomData($content);
foreach ($content->entry as $item) {
Debug::log('parsing item ' . var_export($item, true));
$tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) {
$this->items[] = $tmp_item;

View File

@ -321,7 +321,7 @@ class FeedItem
if (is_string($content)) {
$this->content = $content;
} else {
Debug::log('Content must be a string!');
Debug::log(sprintf('Feed content must be a string but got %s', gettype($content)));
}
return $this;

View File

@ -6,8 +6,10 @@ final class Logger
{
public static function debug(string $message, array $context = [])
{
if (Debug::isEnabled()) {
self::log('DEBUG', $message, $context);
}
}
public static function info(string $message, array $context = []): void
{

View File

@ -41,7 +41,25 @@ final class RssBridge
// Drop the current frame
Logger::warning($text);
if (Debug::isEnabled()) {
print sprintf('<pre>%s</pre>', $text);
print sprintf("<pre>%s</pre>\n", e($text));
}
});
// There might be some fatal errors which are not caught by set_error_handler() or \Throwable.
register_shutdown_function(function () {
$error = error_get_last();
if ($error) {
$message = sprintf(
'Fatal Error %s: %s in %s line %s',
$error['type'],
$error['message'],
trim_path_prefix($error['file']),
$error['line']
);
Logger::error($message);
if (Debug::isEnabled()) {
print sprintf("<pre>%s</pre>\n", e($message));
}
}
});

View File

@ -364,7 +364,7 @@ function getSimpleHTMLDOMCached(
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT
) {
Debug::log('Caching url ' . $url . ', duration ' . $duration);
Logger::debug(sprintf('Caching url %s, duration %d', $url, $duration));
// Initialize cache
$cacheFactory = new CacheFactory();