fix: improve FeedExpander (#3103)

* fix: improve FeedExpander

Include the first libxml error in exception.

Give better error message if trying to parse the empty string.

Log all libxml errors if debug mode is enabled.

* error handling and logging tweak
This commit is contained in:
Dag 2022-10-29 10:27:02 +02:00 committed by GitHub
parent f9cd397900
commit e027bd9274
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 22 deletions

View File

@ -101,13 +101,34 @@ abstract class FeedExpander extends BridgeAbstract
]; ];
$httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)]; $httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)];
$content = getContents($url, $httpHeaders); $content = getContents($url, $httpHeaders);
// Suppress php errors. We will check return value for success. if ($content === '') {
$rssContent = @simplexml_load_string(trim($content)); throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url));
if ($rssContent === false) {
throw new \Exception(sprintf('Unable to parse xml from "%s"', $url));
} }
// Maybe move this call earlier up the stack frames
// Disable triggering of the php error-handler and handle errors manually instead
libxml_use_internal_errors(true);
// Consider replacing libxml with https://www.php.net/domdocument
// Intentionally not using the silencing operator (@) because it has no effect here
$rssContent = simplexml_load_string(trim($content));
if ($rssContent === false) {
$xmlErrors = libxml_get_errors();
foreach ($xmlErrors as $xmlError) {
if (Debug::isEnabled()) {
Debug::log(trim($xmlError->message));
}
}
if ($xmlErrors) {
// Render only the first error into exception message
$firstXmlErrorMessage = $xmlErrors[0]->message;
}
throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''));
}
// Restore previous behaviour in case other code relies on it being off
libxml_use_internal_errors(false);
// Commented out because it's spammy
// Debug::log(sprintf("RSS content is ===========\n%s===========", var_export($rssContent, true)));
Debug::log('Detecting feed format/version');
switch (true) { switch (true) {
case isset($rssContent->item[0]): case isset($rssContent->item[0]):
Debug::log('Detected RSS 1.0 format'); Debug::log('Detected RSS 1.0 format');
@ -149,7 +170,6 @@ abstract class FeedExpander extends BridgeAbstract
{ {
$this->loadRss2Data($rssContent->channel[0]); $this->loadRss2Data($rssContent->channel[0]);
foreach ($rssContent->item as $item) { foreach ($rssContent->item as $item) {
Debug::log(sprintf('Parsing item %s', var_export($item, true)));
$tmp_item = $this->parseItem($item); $tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) { if (!empty($tmp_item)) {
$this->items[] = $tmp_item; $this->items[] = $tmp_item;
@ -165,24 +185,16 @@ abstract class FeedExpander extends BridgeAbstract
* *
* @link http://www.rssboard.org/rss-specification RSS 2.0 Specification * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification
* *
* @param object $rssContent The RSS content * @param int $maxItems Maximum number of items to collect from the feed (`-1`: no limit).
* @param int $maxItems Maximum number of items to collect from the feed
* (`-1`: no limit).
* @return void * @return void
* *
* @todo Instead of passing $maxItems to all functions, just add all items * @todo Instead of passing $maxItems to all functions, just add all items and remove excessive items later.
* and remove excessive items later.
*/ */
protected function collectRss2($rssContent, $maxItems) protected function collectRss2(\SimpleXMLElement $rssContent, $maxItems)
{ {
$rssContent = $rssContent->channel[0]; $rssContent = $rssContent->channel[0];
Debug::log('RSS content is ===========\n'
. var_export($rssContent, true)
. '===========');
$this->loadRss2Data($rssContent); $this->loadRss2Data($rssContent);
foreach ($rssContent->item as $item) { foreach ($rssContent->item as $item) {
Debug::log('parsing item ' . var_export($item, true));
$tmp_item = $this->parseItem($item); $tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) { if (!empty($tmp_item)) {
$this->items[] = $tmp_item; $this->items[] = $tmp_item;
@ -210,7 +222,6 @@ abstract class FeedExpander extends BridgeAbstract
{ {
$this->loadAtomData($content); $this->loadAtomData($content);
foreach ($content->entry as $item) { foreach ($content->entry as $item) {
Debug::log('parsing item ' . var_export($item, true));
$tmp_item = $this->parseItem($item); $tmp_item = $this->parseItem($item);
if (!empty($tmp_item)) { if (!empty($tmp_item)) {
$this->items[] = $tmp_item; $this->items[] = $tmp_item;

View File

@ -321,7 +321,7 @@ class FeedItem
if (is_string($content)) { if (is_string($content)) {
$this->content = $content; $this->content = $content;
} else { } else {
Debug::log('Content must be a string!'); Debug::log(sprintf('Feed content must be a string but got %s', gettype($content)));
} }
return $this; return $this;

View File

@ -6,8 +6,10 @@ final class Logger
{ {
public static function debug(string $message, array $context = []) public static function debug(string $message, array $context = [])
{ {
if (Debug::isEnabled()) {
self::log('DEBUG', $message, $context); self::log('DEBUG', $message, $context);
} }
}
public static function info(string $message, array $context = []): void public static function info(string $message, array $context = []): void
{ {

View File

@ -41,7 +41,25 @@ final class RssBridge
// Drop the current frame // Drop the current frame
Logger::warning($text); Logger::warning($text);
if (Debug::isEnabled()) { if (Debug::isEnabled()) {
print sprintf('<pre>%s</pre>', $text); print sprintf("<pre>%s</pre>\n", e($text));
}
});
// There might be some fatal errors which are not caught by set_error_handler() or \Throwable.
register_shutdown_function(function () {
$error = error_get_last();
if ($error) {
$message = sprintf(
'Fatal Error %s: %s in %s line %s',
$error['type'],
$error['message'],
trim_path_prefix($error['file']),
$error['line']
);
Logger::error($message);
if (Debug::isEnabled()) {
print sprintf("<pre>%s</pre>\n", e($message));
}
} }
}); });

View File

@ -364,7 +364,7 @@ function getSimpleHTMLDOMCached(
$defaultBRText = DEFAULT_BR_TEXT, $defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT $defaultSpanText = DEFAULT_SPAN_TEXT
) { ) {
Debug::log('Caching url ' . $url . ', duration ' . $duration); Logger::debug(sprintf('Caching url %s, duration %d', $url, $duration));
// Initialize cache // Initialize cache
$cacheFactory = new CacheFactory(); $cacheFactory = new CacheFactory();