From e027bd927498ef719fc720e3b708d05ce3b34542 Mon Sep 17 00:00:00 2001 From: Dag Date: Sat, 29 Oct 2022 10:27:02 +0200 Subject: [PATCH] fix: improve FeedExpander (#3103) * fix: improve FeedExpander Include the first libxml error in exception. Give better error message if trying to parse the empty string. Log all libxml errors if debug mode is enabled. * error handling and logging tweak --- lib/FeedExpander.php | 47 +++++++++++++++++++++++++++----------------- lib/FeedItem.php | 2 +- lib/Logger.php | 4 +++- lib/RssBridge.php | 20 ++++++++++++++++++- lib/contents.php | 2 +- 5 files changed, 53 insertions(+), 22 deletions(-) diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index b1b43972..f537a76a 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -101,13 +101,34 @@ abstract class FeedExpander extends BridgeAbstract ]; $httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)]; $content = getContents($url, $httpHeaders); - // Suppress php errors. We will check return value for success. - $rssContent = @simplexml_load_string(trim($content)); - if ($rssContent === false) { - throw new \Exception(sprintf('Unable to parse xml from "%s"', $url)); + if ($content === '') { + throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url)); } + // Maybe move this call earlier up the stack frames + // Disable triggering of the php error-handler and handle errors manually instead + libxml_use_internal_errors(true); + // Consider replacing libxml with https://www.php.net/domdocument + // Intentionally not using the silencing operator (@) because it has no effect here + $rssContent = simplexml_load_string(trim($content)); + if ($rssContent === false) { + $xmlErrors = libxml_get_errors(); + foreach ($xmlErrors as $xmlError) { + if (Debug::isEnabled()) { + Debug::log(trim($xmlError->message)); + } + } + if ($xmlErrors) { + // Render only the first error into exception message + $firstXmlErrorMessage = $xmlErrors[0]->message; + } + throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? '')); + } + // Restore previous behaviour in case other code relies on it being off + libxml_use_internal_errors(false); + + // Commented out because it's spammy + // Debug::log(sprintf("RSS content is ===========\n%s===========", var_export($rssContent, true))); - Debug::log('Detecting feed format/version'); switch (true) { case isset($rssContent->item[0]): Debug::log('Detected RSS 1.0 format'); @@ -149,7 +170,6 @@ abstract class FeedExpander extends BridgeAbstract { $this->loadRss2Data($rssContent->channel[0]); foreach ($rssContent->item as $item) { - Debug::log(sprintf('Parsing item %s', var_export($item, true))); $tmp_item = $this->parseItem($item); if (!empty($tmp_item)) { $this->items[] = $tmp_item; @@ -165,24 +185,16 @@ abstract class FeedExpander extends BridgeAbstract * * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification * - * @param object $rssContent The RSS content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). + * @param int $maxItems Maximum number of items to collect from the feed (`-1`: no limit). * @return void * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. + * @todo Instead of passing $maxItems to all functions, just add all items and remove excessive items later. */ - protected function collectRss2($rssContent, $maxItems) + protected function collectRss2(\SimpleXMLElement $rssContent, $maxItems) { $rssContent = $rssContent->channel[0]; - Debug::log('RSS content is ===========\n' - . var_export($rssContent, true) - . '==========='); - $this->loadRss2Data($rssContent); foreach ($rssContent->item as $item) { - Debug::log('parsing item ' . var_export($item, true)); $tmp_item = $this->parseItem($item); if (!empty($tmp_item)) { $this->items[] = $tmp_item; @@ -210,7 +222,6 @@ abstract class FeedExpander extends BridgeAbstract { $this->loadAtomData($content); foreach ($content->entry as $item) { - Debug::log('parsing item ' . var_export($item, true)); $tmp_item = $this->parseItem($item); if (!empty($tmp_item)) { $this->items[] = $tmp_item; diff --git a/lib/FeedItem.php b/lib/FeedItem.php index a59b0b03..3d1b4509 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -321,7 +321,7 @@ class FeedItem if (is_string($content)) { $this->content = $content; } else { - Debug::log('Content must be a string!'); + Debug::log(sprintf('Feed content must be a string but got %s', gettype($content))); } return $this; diff --git a/lib/Logger.php b/lib/Logger.php index 0b01c07f..1e536443 100644 --- a/lib/Logger.php +++ b/lib/Logger.php @@ -6,7 +6,9 @@ final class Logger { public static function debug(string $message, array $context = []) { - self::log('DEBUG', $message, $context); + if (Debug::isEnabled()) { + self::log('DEBUG', $message, $context); + } } public static function info(string $message, array $context = []): void diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 9146913b..867be30b 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -41,7 +41,25 @@ final class RssBridge // Drop the current frame Logger::warning($text); if (Debug::isEnabled()) { - print sprintf('
%s
', $text); + print sprintf("
%s
\n", e($text)); + } + }); + + // There might be some fatal errors which are not caught by set_error_handler() or \Throwable. + register_shutdown_function(function () { + $error = error_get_last(); + if ($error) { + $message = sprintf( + 'Fatal Error %s: %s in %s line %s', + $error['type'], + $error['message'], + trim_path_prefix($error['file']), + $error['line'] + ); + Logger::error($message); + if (Debug::isEnabled()) { + print sprintf("
%s
\n", e($message)); + } } }); diff --git a/lib/contents.php b/lib/contents.php index ae215869..33f20cc2 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -364,7 +364,7 @@ function getSimpleHTMLDOMCached( $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT ) { - Debug::log('Caching url ' . $url . ', duration ' . $duration); + Logger::debug(sprintf('Caching url %s, duration %d', $url, $duration)); // Initialize cache $cacheFactory = new CacheFactory();