diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php index 1e096d6c..bc92d306 100644 --- a/bridges/NextgovBridge.php +++ b/bridges/NextgovBridge.php @@ -27,29 +27,30 @@ class NextgovBridge extends FeedExpander public function collectData() { $url = self::URI . 'rss/' . $this->getInput('category') . '/'; - $this->collectExpandableDatas($url, 10); + $limit = 10; + $this->collectExpandableDatas($url, $limit); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); $article_thumbnail = 'https://cdn.nextgov.com/nextgov/images/logo.png'; $item['content'] = '

' . $item['content'] . '

'; - $namespaces = $newsItem->getNamespaces(true); - if (isset($namespaces['media'])) { - $media = $newsItem->children($namespaces['media']); - if (isset($media->content)) { - $attributes = $media->content->attributes(); - $item['content'] = '

' . $item['content']; - $article_thumbnail = str_replace( - 'large.jpg', - 'small.jpg', - strval($attributes['url']) - ); - } - } +// $namespaces = $newsItem->getNamespaces(true); +// if (isset($namespaces['media'])) { +// $media = $newsItem->children($namespaces['media']); +// if (isset($media->content)) { +// $attributes = $media->content->attributes(); +// $item['content'] = '

' . $item['content']; +// $article_thumbnail = str_replace( +// 'large.jpg', +// 'small.jpg', +// strval($attributes['url']) +// ); +// } +// } $item['enclosures'] = [$article_thumbnail]; $item['content'] .= $this->extractContent($item['uri']); diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index 3b5ad3ad..f7eea07f 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -1,6 +1,6 @@ collectExpandableDatas($this->getURI(), 20); - } + // Manually parsing because we need to acccess the nyaa namespace in the xml + $xml = simplexml_load_string(getContents($this->getURI())); + $channel = $xml->channel[0]; + $feed = []; + $feed['title'] = trim((string)$channel->title); + $feed['uri'] = trim((string)$channel->link); + if (!empty($channel->image)) { + $feed['icon'] = trim((string)$channel->image->url); + } + $items = $xml->channel[0]->item; + foreach ($items as $feedItem) { + $item = [ + 'title' => (string) $feedItem->title, + 'uri' => (string) $feedItem->link, + ]; - protected function parseItem($newsItem) - { - $item = parent::parseItem($newsItem); - $nyaaFields = (array)($newsItem->children('nyaa', true)); + $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); - $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); + $nyaaNamespace = (array)($feedItem->children('nyaa', true)); + $item = array_merge($item, $nyaaNamespace); - $item = array_merge($item, $nyaaFields); + // Convert URI from torrent file to web page + $item['uri'] = str_replace('/download/', '/view/', $item['uri']); + $item['uri'] = str_replace('.torrent', '', $item['uri']); - // Convert URI from torrent file to web page - $item['uri'] = str_replace('/download/', '/view/', $item['uri']); - $item['uri'] = str_replace('.torrent', '', $item['uri']); + $item_html = getSimpleHTMLDOMCached($item['uri']); + if ($item_html) { + // Retrieve full description from page contents + $item_desc = str_get_html( + markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext)) + ); - $item_html = getSimpleHTMLDOMCached($item['uri']); - if ($item_html) { - // Retrieve full description from page contents - $item_desc = str_get_html( - markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext)) - ); - - // Retrieve image for thumbnail or generic logo fallback - $item_image = $this->getURI() . 'static/img/avatar/default.png'; - foreach ($item_desc->find('img') as $img) { - if (strpos($img->src, 'prez') === false) { - $item_image = $img->src; - break; + // Retrieve image for thumbnail or generic logo fallback + $item_image = $this->getURI() . 'static/img/avatar/default.png'; + foreach ($item_desc->find('img') as $img) { + if (strpos($img->src, 'prez') === false) { + $item_image = $img->src; + break; + } } + + $item['enclosures'] = [$item_image]; + $item['content'] = $item_desc; } - $item['enclosures'] = [$item_image]; - $item['content'] = $item_desc; + $this->items[] = $item; + if (count($this->items) >= 10) { + break; + } } - - return $item; } public function getIcon() diff --git a/bridges/TapasBridge.php b/bridges/TapasBridge.php index 11a9551d..ddfbfb92 100644 --- a/bridges/TapasBridge.php +++ b/bridges/TapasBridge.php @@ -43,20 +43,20 @@ class TapasBridge extends FeedExpander $this->collectExpandableDatas($this->getURI()); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); - $namespaces = $feedItem->getNamespaces(true); - if (isset($namespaces['content'])) { - $description = $feedItem->children($namespaces['content']); - if (isset($description->encoded)) { - $item['content'] = (string)$description->encoded; - } - } +// $namespaces = $feedItem->getNamespaces(true); +// if (isset($namespaces['content'])) { +// $description = $feedItem->children($namespaces['content']); +// if (isset($description->encoded)) { +// $item['content'] = (string)$description->encoded; +// } +// } if ($this->getInput('extend_content')) { - $html = getSimpleHTMLDOM($item['uri']) or returnServerError('Could not request ' . $this->getURI()); + $html = getSimpleHTMLDOM($item['uri']); if (!$item['content']) { $item['content'] = ''; } @@ -79,8 +79,6 @@ class TapasBridge extends FeedExpander { if ($this->id) { return self::URI . 'rss/series/' . $this->id; - } else { - return self::URI . 'series/' . $this->getInput('title') . '/info/'; } return self::URI; } diff --git a/lib/FeedParser.php b/lib/FeedParser.php index a9aabde0..04452e7d 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -6,7 +6,10 @@ final class FeedParser { public function parseFeed(string $xmlString): array { + libxml_use_internal_errors(true); $xml = simplexml_load_string(trim($xmlString)); + $xmlErrors = libxml_get_errors(); + libxml_use_internal_errors(false); if ($xml === false) { throw new \Exception('Unable to parse xml'); }