This commit is contained in:
Dag 2023-10-13 01:02:19 +02:00 committed by GitHub
parent 44fb2c98bc
commit e379019db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 57 deletions

View File

@ -27,29 +27,30 @@ class NextgovBridge extends FeedExpander
public function collectData()
{
$url = self::URI . 'rss/' . $this->getInput('category') . '/';
$this->collectExpandableDatas($url, 10);
$limit = 10;
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($newsItem)
protected function parseItem($item)
{
$item = parent::parseItem($newsItem);
$item = parent::parseItem($item);
$article_thumbnail = 'https://cdn.nextgov.com/nextgov/images/logo.png';
$item['content'] = '<p><b>' . $item['content'] . '</b></p>';
$namespaces = $newsItem->getNamespaces(true);
if (isset($namespaces['media'])) {
$media = $newsItem->children($namespaces['media']);
if (isset($media->content)) {
$attributes = $media->content->attributes();
$item['content'] = '<p><img src="' . $attributes['url'] . '"></p>' . $item['content'];
$article_thumbnail = str_replace(
'large.jpg',
'small.jpg',
strval($attributes['url'])
);
}
}
// $namespaces = $newsItem->getNamespaces(true);
// if (isset($namespaces['media'])) {
// $media = $newsItem->children($namespaces['media']);
// if (isset($media->content)) {
// $attributes = $media->content->attributes();
// $item['content'] = '<p><img src="' . $attributes['url'] . '"></p>' . $item['content'];
// $article_thumbnail = str_replace(
// 'large.jpg',
// 'small.jpg',
// strval($attributes['url'])
// );
// }
// }
$item['enclosures'] = [$article_thumbnail];
$item['content'] .= $this->extractContent($item['uri']);

View File

@ -1,6 +1,6 @@
<?php
class NyaaTorrentsBridge extends FeedExpander
class NyaaTorrentsBridge extends BridgeAbstract
{
const MAINTAINER = 'ORelio & Jisagi';
const NAME = 'NyaaTorrents';
@ -62,44 +62,57 @@ class NyaaTorrentsBridge extends FeedExpander
public function collectData()
{
$this->collectExpandableDatas($this->getURI(), 20);
}
// Manually parsing because we need to acccess the nyaa namespace in the xml
$xml = simplexml_load_string(getContents($this->getURI()));
$channel = $xml->channel[0];
$feed = [];
$feed['title'] = trim((string)$channel->title);
$feed['uri'] = trim((string)$channel->link);
if (!empty($channel->image)) {
$feed['icon'] = trim((string)$channel->image->url);
}
$items = $xml->channel[0]->item;
foreach ($items as $feedItem) {
$item = [
'title' => (string) $feedItem->title,
'uri' => (string) $feedItem->link,
];
protected function parseItem($newsItem)
{
$item = parent::parseItem($newsItem);
$nyaaFields = (array)($newsItem->children('nyaa', true));
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
$nyaaNamespace = (array)($feedItem->children('nyaa', true));
$item = array_merge($item, $nyaaNamespace);
$item = array_merge($item, $nyaaFields);
// Convert URI from torrent file to web page
$item['uri'] = str_replace('/download/', '/view/', $item['uri']);
$item['uri'] = str_replace('.torrent', '', $item['uri']);
// Convert URI from torrent file to web page
$item['uri'] = str_replace('/download/', '/view/', $item['uri']);
$item['uri'] = str_replace('.torrent', '', $item['uri']);
$item_html = getSimpleHTMLDOMCached($item['uri']);
if ($item_html) {
// Retrieve full description from page contents
$item_desc = str_get_html(
markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext))
);
$item_html = getSimpleHTMLDOMCached($item['uri']);
if ($item_html) {
// Retrieve full description from page contents
$item_desc = str_get_html(
markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext))
);
// Retrieve image for thumbnail or generic logo fallback
$item_image = $this->getURI() . 'static/img/avatar/default.png';
foreach ($item_desc->find('img') as $img) {
if (strpos($img->src, 'prez') === false) {
$item_image = $img->src;
break;
// Retrieve image for thumbnail or generic logo fallback
$item_image = $this->getURI() . 'static/img/avatar/default.png';
foreach ($item_desc->find('img') as $img) {
if (strpos($img->src, 'prez') === false) {
$item_image = $img->src;
break;
}
}
$item['enclosures'] = [$item_image];
$item['content'] = $item_desc;
}
$item['enclosures'] = [$item_image];
$item['content'] = $item_desc;
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
}
return $item;
}
public function getIcon()

View File

@ -43,20 +43,20 @@ class TapasBridge extends FeedExpander
$this->collectExpandableDatas($this->getURI());
}
protected function parseItem($feedItem)
protected function parseItem($item)
{
$item = parent::parseItem($feedItem);
$item = parent::parseItem($item);
$namespaces = $feedItem->getNamespaces(true);
if (isset($namespaces['content'])) {
$description = $feedItem->children($namespaces['content']);
if (isset($description->encoded)) {
$item['content'] = (string)$description->encoded;
}
}
// $namespaces = $feedItem->getNamespaces(true);
// if (isset($namespaces['content'])) {
// $description = $feedItem->children($namespaces['content']);
// if (isset($description->encoded)) {
// $item['content'] = (string)$description->encoded;
// }
// }
if ($this->getInput('extend_content')) {
$html = getSimpleHTMLDOM($item['uri']) or returnServerError('Could not request ' . $this->getURI());
$html = getSimpleHTMLDOM($item['uri']);
if (!$item['content']) {
$item['content'] = '';
}
@ -79,8 +79,6 @@ class TapasBridge extends FeedExpander
{
if ($this->id) {
return self::URI . 'rss/series/' . $this->id;
} else {
return self::URI . 'series/' . $this->getInput('title') . '/info/';
}
return self::URI;
}

View File

@ -6,7 +6,10 @@ final class FeedParser
{
public function parseFeed(string $xmlString): array
{
libxml_use_internal_errors(true);
$xml = simplexml_load_string(trim($xmlString));
$xmlErrors = libxml_get_errors();
libxml_use_internal_errors(false);
if ($xml === false) {
throw new \Exception('Unable to parse xml');
}