diff --git a/bridges/TapasBridge.php b/bridges/TapasBridge.php index 19995a23..ea6a7ff6 100644 --- a/bridges/TapasBridge.php +++ b/bridges/TapasBridge.php @@ -40,7 +40,7 @@ class TapasBridge extends FeedExpander $this->id = $html->find('meta[property$=":url"]', 0)->content; $this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id); } - $this->collectExpandableDatas($this->getURI()); + $this->collectExpandableDatas($this->getURI(), 10); } protected function parseItem(array $item) @@ -55,9 +55,8 @@ class TapasBridge extends FeedExpander if ($this->getInput('extend_content')) { $html = getSimpleHTMLDOM($item['uri']); - if (!$item['content']) { - $item['content'] = ''; - } + $item['content'] = $item['content'] ?? ''; + if ($html->find('article.main__body', 0)) { foreach ($html->find('article', 0)->find('img') as $line) { $item['content'] .= ''; diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index f9cff900..361df4d9 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract if ($xmlString === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); } + // prepare/massage the xml to make it more acceptable + $badStrings = [ + '»', + ]; + $xmlString = str_replace($badStrings, '', $xmlString); $feedParser = new FeedParser(); $this->feed = $feedParser->parseFeed($xmlString); $items = array_slice($this->feed['items'], 0, $maxItems); diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 0a5b4679..7c8a5232 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -11,7 +11,10 @@ final class FeedParser $xmlErrors = libxml_get_errors(); libxml_use_internal_errors(false); if ($xml === false) { - throw new \Exception('Unable to parse xml'); + if ($xmlErrors) { + $firstXmlErrorMessage = $xmlErrors[0]->message; + } + throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? '')); } $feed = [ 'title' => null, @@ -123,7 +126,6 @@ final class FeedParser { // Primary data is compatible to 0.91 with some additional data $item = $this->parseRss091Item($feedItem); - $namespaces = $feedItem->getNamespaces(true); if (isset($namespaces['dc'])) { $dc = $feedItem->children($namespaces['dc']); @@ -192,7 +194,14 @@ final class FeedParser public function parseRss091Item(\SimpleXMLElement $feedItem): array { - $item = []; + $item = [ + 'uri' => null, + 'title' => null, + 'content' => null, + 'timestamp' => null, + 'author' => null, + 'enclosures' => [], + ]; if (isset($feedItem->link)) { // todo: trim uri $item['uri'] = (string)$feedItem->link;