refactor: remove parent calls to parseItem (#3747)

This commit is contained in:
Dag 2023-10-13 01:59:05 +02:00 committed by GitHub
parent e379019db2
commit 2880524dfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
55 changed files with 96 additions and 293 deletions

View File

@ -25,10 +25,8 @@ class AcrimedBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOM($item['uri']);
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
$article = defaultLinkTo($article, static::URI);

View File

@ -33,10 +33,8 @@ class ArsTechnicaBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item_html = getSimpleHTMLDOMCached($item['uri'] . '&amp');
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] = $item_html->find('.amp-wp-article-content', 0);

View File

@ -13,10 +13,8 @@ class BleepingComputerBridge extends FeedExpander
$this->collectExpandableDatas($feed);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article_html = getSimpleHTMLDOMCached($item['uri']);
if (!$article_html) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';

View File

@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
foreach ($this->bannedTitle as $term) {
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
return null;

View File

@ -34,10 +34,8 @@ class CaschyBridge extends FeedExpander
);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
return $item;
}

View File

@ -12,9 +12,8 @@ class CommonDreamsBridge extends FeedExpander
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] = $this->extractContent($item['uri']);
return $item;
}

View File

@ -13,10 +13,8 @@ class CourrierInternationalBridge extends FeedExpander
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('.article-text, depeche-text', 0);
if (!$content) {

View File

@ -56,10 +56,8 @@ class DarkReadingBridge extends FeedExpander
$this->collectExpandableDatas($feed_url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article = getSimpleHTMLDOMCached($item['uri']);
$item['content'] = $this->extractArticleContent($article);
$item['enclosures'] = []; //remove author profile picture

View File

@ -43,9 +43,8 @@ class DauphineLibereBridge extends FeedExpander
$this->collectExpandableDatas($url, 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] = $this->extractContent($item['uri']);
return $item;
}

View File

@ -71,10 +71,8 @@ class DeutscheWelleBridge extends FeedExpander
$this->collectExpandableDatas($this->getInput('feed'));
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$parsedUrl = parse_url($item['uri']);
unset($parsedUrl['query']);
$url = $this->unparseUrl($parsedUrl);

View File

@ -176,10 +176,8 @@ class DeveloppezDotComBridge extends FeedExpander
* Parse the content of every RSS item. And will try to get the full article
* pointed by the item URL intead of the default abstract.
*/
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
if (count($this->items) >= $this->getInput('limit')) {
return null;
}

View File

@ -97,9 +97,8 @@ class EconomistBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOM($item['uri']);
$article = $dom->find('#new-article-template', 0);

View File

@ -15,10 +15,8 @@ class EngadgetBridge extends FeedExpander
$this->collectExpandableDatas($url, $max);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$itemUrl = trim($item['uri']);
if (!$itemUrl) {
return $item;

View File

@ -31,10 +31,8 @@ class EsquerdaNetBridge extends FeedExpander
parent::collectExpandableDatas($this->getURI());
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$html = getSimpleHTMLDOMCached($item['uri']);
$content = $html->find('div#content div.content', 0);
## Fix author

View File

@ -14,9 +14,9 @@ class FeedExpanderTestBridge extends FeedExpander
public function collectData()
{
$url = 'http://static.userland.com/gems/backend/sampleRss.xml'; // rss 0.91
//$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
//$url = 'https://dvikan.no/feed.xml'; // rss 2.0
//$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
$url = 'https://dvikan.no/feed.xml'; // rss 2.0
$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
$this->collectExpandableDatas($url);
}

View File

@ -82,10 +82,8 @@ class FilterBridge extends FeedExpander
$this->collectExpandableDatas($this->getURI());
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
// Generate title from first 50 characters of content?
if ($this->getInput('title_from_content') && array_key_exists('content', $item)) {
$content = str_get_html($item['content']);

View File

@ -29,10 +29,8 @@ class FolhaDeSaoPauloBridge extends FeedExpander
]
];
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
if ($this->getInput('deep_crawl')) {
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
if ($articleHTMLContent) {

View File

@ -12,10 +12,8 @@ class ForGifsBridge extends FeedExpander
$this->collectExpandableDatas('https://forgifs.com/gallery/srss/7');
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = str_get_html($item['content']);
$img = $dom->find('img', 0);
$poster = $img->src;

View File

@ -14,10 +14,8 @@ class FreeCodeCampBridge extends FeedExpander
$this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOM($item['uri']);
// figure contain's the main article image

View File

@ -85,10 +85,8 @@ class FuturaSciencesBridge extends FeedExpander
$this->collectExpandableDatas($url, 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
$dom = getSimpleHTMLDOMCached($item['uri']);
$item['content'] = $this->extractArticleContent($dom);

View File

@ -8,10 +8,8 @@ class GizmodoBridge extends FeedExpander
const CACHE_TIMEOUT = 1800; // 30min
const DESCRIPTION = 'Returns the newest posts from Gizmodo.';
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$html = getSimpleHTMLDOMCached($item['uri']);
$html = defaultLinkTo($html, $this->getURI());

View File

@ -63,9 +63,8 @@ class GolemBridge extends FeedExpander
);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] ??= '';
$uri = $item['uri'];

View File

@ -12,10 +12,8 @@ class HardwareInfoBridge extends FeedExpander
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$itemUrl = $item['uri'];
$articlePage = getSimpleHTMLDOMCached($itemUrl);

View File

@ -125,10 +125,8 @@ class HeiseBridge extends FeedExpander
);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$sessioncookie = $this->getInput('sessioncookie');
// strip rss parameter

View File

@ -15,10 +15,8 @@ class IGNBridge extends FeedExpander
// IGNs feed is both hidden and incomplete. This bridge tries to fix this.
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOM($item['uri']);
// List of BS elements

View File

@ -7,10 +7,8 @@ class KoreusBridge extends FeedExpander
const URI = 'https://www.koreus.com/';
const DESCRIPTION = 'Returns the newest posts from Koreus (full text)';
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$html = getSimpleHTMLDOMCached($item['uri']);
$text = $html->find('p.itemText', 0)->innertext;
$item['content'] = utf8_encode($text);

View File

@ -12,10 +12,8 @@ class LeMondeInformatiqueBridge extends FeedExpander
$this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article_html = getSimpleHTMLDOMCached($item['uri']);
//Deduce thumbnail URL from article image URL

View File

@ -13,9 +13,8 @@ class ListverseBridge extends FeedExpander
$this->collectExpandableDatas('https://listverse.com/feed/', 15);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOM($item['uri']);
$article = $dom->find('#articlecontentonly', 0);
$item['content'] = $article;

View File

@ -29,10 +29,8 @@ class MediapartBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$itemUrl = $item['uri'];
// Mediapart provide multiple type of contents.

View File

@ -25,10 +25,8 @@ class MsnMondeBridge extends FeedExpander
$this->collectExpandableDatas(self::FEED_URL, 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
if (!preg_match('#fr-fr/actualite.*/ar-(?<id>[\w]*)\?#', $item['uri'], $matches)) {
return null;
}

View File

@ -14,10 +14,8 @@ class NYTBridge extends FeedExpander
$this->collectExpandableDatas($url, 40);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article = '';
try {

View File

@ -88,10 +88,8 @@ class NextInpactBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] = $this->extractContent($item, $item['uri']);
if (is_null($item['content'])) {
return null; //Filtered article

View File

@ -31,10 +31,8 @@ class NextgovBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article_thumbnail = 'https://cdn.nextgov.com/nextgov/images/logo.png';
$item['content'] = '<p><b>' . $item['content'] . '</b></p>';

View File

@ -12,10 +12,8 @@ class NiceMatinBridge extends FeedExpander
$this->collectExpandableDatas(self::URI . 'derniere-minute/rss', 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] = $this->extractContent($item['uri']);
return $item;
}

View File

@ -123,9 +123,8 @@ class OnVaSortirBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOMCached($item['uri']);
$text = $dom->find('div.corpsMax', 0)->innertext;
$item['content'] = utf8_encode($text);

View File

@ -29,10 +29,8 @@ but some RSS readers don\'t support this. "img" tag are supported by most browse
$this->collectExpandableDatas('https://www.phoronix.com/rss.php', $this->getInput('n'));
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$itemUrl = $item['uri'];
$articlePage = getSimpleHTMLDOM($itemUrl);

View File

@ -11,14 +11,12 @@ class QwantzBridge extends FeedExpander
$this->collectExpandableDatas(self::URI . 'rssfeed.php');
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['author'] = 'Ryan North';
preg_match('/title="(.*?)"/', $item['content'], $matches);
$title = $matches[1];
$title = $matches[1] ?? '';
$content = str_get_html(html_entity_decode($item['content']));
$comicURL = $content->find('img')[0]->{'src'};

View File

@ -12,10 +12,8 @@ class RaceDepartmentBridge extends FeedExpander
$this->collectExpandableDatas('https://www.racedepartment.com/ams/index.rss', 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOMCached($item['uri']);
$coverImage = $articlePage->find('img.js-articleCoverImage', 0);

View File

@ -42,10 +42,8 @@ class ScribbleHubBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
//For series, filter out other series from 'All' feed
if (
$this->queriedContext === 'Series'

View File

@ -27,10 +27,8 @@ class SplCenterBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
foreach ($articleHtml->find('.file') as $index => $media) {

View File

@ -43,10 +43,8 @@ class TapasBridge extends FeedExpander
$this->collectExpandableDatas($this->getURI());
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
// $namespaces = $feedItem->getNamespaces(true);
// if (isset($namespaces['content'])) {
// $description = $feedItem->children($namespaces['content']);

View File

@ -56,10 +56,8 @@ class TheGuardianBridge extends FeedExpander
$this->collectExpandableDatas($url, 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOM($item['uri']);
// figure contain's the main article image
$article = $articlePage->find('figure', 0);

View File

@ -14,10 +14,8 @@ class TwitterEngineeringBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOMCached($item['uri']);
if (!$dom) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';

View File

@ -13,10 +13,8 @@ class VarietyBridge extends FeedExpander
$this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
// $articlePage gets the entire page's contents
$articlePage = getSimpleHTMLDOM($item['uri']);
// Remove Script tags

View File

@ -32,10 +32,8 @@ class ViceBridge extends FeedExpander
$this->collectExpandableDatas($feedURL, 10);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$articlePage = getSimpleHTMLDOM($item['uri']);
// text and embedded content
$article = $articlePage->find('.article__body', 0);

View File

@ -12,10 +12,8 @@ class WeLiveSecurityBridge extends FeedExpander
],
];
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$html = getSimpleHTMLDOMCached($item['uri']);
if (!$html) {
$item['content'] .= '<br /><p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';

View File

@ -50,10 +50,8 @@ class WiredBridge extends FeedExpander
$this->collectExpandableDatas($feed_url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$originalContent = $item['content'];
$article = getSimpleHTMLDOMCached($item['uri']);

View File

@ -34,10 +34,8 @@ class WordPressBridge extends FeedExpander
}
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$dom = getSimpleHTMLDOMCached($item['uri']);
// Find article body

View File

@ -30,9 +30,8 @@ class WorldOfTanksBridge extends FeedExpander
$this->collectExpandableDatas(sprintf('https://worldoftanks.eu/%s/rss/news/', $this->getInput('lang')));
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['content'] = $this->loadFullArticle($item['uri']);
return $item;
}

View File

@ -174,10 +174,8 @@ class ZDNetBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$article = getSimpleHTMLDOMCached($item['uri']);
if (!$article) {
$this->logger->info('Unable to parse the dom from ' . $item['uri']);

View File

@ -59,10 +59,8 @@ class ZeitBridge extends FeedExpander
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
protected function parseItem(array $item)
{
$item = parent::parseItem($item);
$item['enclosures'] = [];
$headers = [

View File

@ -1,85 +1,35 @@
`FeedExpander` extends [`BridgeAbstract`](./02_BridgeAbstract.md) and adds functions to collect data from existing feeds.
**Usage example**: _You have discovered a site that provides feeds which are hidden and inaccessible by normal means. You want your bridge to directly read the feeds and provide them via **RSS-Bridge**_
To create a new Bridge extending `FeedExpander` you must implement all required functions of [`BridgeAbstract`](./02_BridgeAbstract.md). `FeedExpander` additionally provides following functions:
* [`parseItem`](#the-parseitem-function)
* [`getName`](#the-getname-function)
* [`getURI`](#the-geturi-function)
* [`getDescription`](#the-getdescription-function)
Find a [template](#template) at the end of this file.
**Notice:** For a standard feed only `collectData` need to be implemented. `collectData` should call `$this->collectExpandableDatas('your URI here');` to automatically load feed items and header data (will subsequently call `parseItem` for each item in the feed). You can limit the number of items to fetch by specifying an additional parameter for: `$this->collectExpandableDatas('your URI here', 10)` (limited to 10 items).
## The `parseItem` function
## The `parseItem` method
This function receives one item from the current feed and should return one **RSS-Bridge** item.
This method receives one item from the current feed and should return one **RSS-Bridge** item.
The default function does all the work to get the item data from the feed, whether it is RSS 1.0,
RSS 2.0 or Atom 1.0. If you have to redefine this function in your **RSS-Bridge** for whatever reason,
you should first call the parent function to initialize the item, then apply the changes that you require.
RSS 2.0 or Atom 1.0.
**Notice:** The following code sample is just an example. Implementation depends on your requirements!
```PHP
protected function parseItem($feedItem){
$item = parent::parseItem($feedItem);
$item['content'] = str_replace('rssbridge','RSS-Bridge',$feedItem->content);
protected function parseItem(array $item)
{
$item['content'] = str_replace('rssbridge','RSS-Bridge',$item['content']);
return $item;
}
```
### Helper functions
### Feed parsing
The `FeedExpander` already provides a set of functions to parse RSS or Atom items based on the specifications. Where possible make use of these functions:
Function | Description
---------|------------
`parseATOMItem` | Parses an Atom 1.0 feed item
`parseRSS_0_9_1_Item` | Parses an RSS 0.91 feed item
`parseRSS_1_0_Item` | Parses an RSS 1.0 feed item
`parseRSS_2_0_Item` | Parses an RSS 2.0 feed item
In the following list you'll find the feed tags assigned to the the **RSS-Bridge** item keys:
How rss-bridge processes xml feeds:
Function | uri | title | timestamp | author | content
---------|-----|-------|-----------|--------|--------
`parseATOMItem` | id | title | updated | author | content
`parseRSS_0_9_1_Item` | link | title | | | description
`parseRSS_1_0_Item` | link | title | dc:date | dc:creator | description
`parseRSS_2_0_Item` | link, guid | title | pubDate, dc:date | author, dc:creator | description
## The `getName` function
Returns the name of the current feed.
```PHP
return $this->name;
```
**Notice:** Only implement this function if you require different behavior!
## The `getURI` function
Return the uri for the current feed.
```PHP
return $this->uri;
```
**Notice:** Only implement this function if you require different behavior!
## The `getDescription` function
Returns the description for the current bridge.
```PHP
return $this->description;
```
**Notice:** Only implement this function if you require different behavior!
`atom` | id | title | updated | author | content
`rss 0.91` | link | title | | | description
`rss 1.0` | link | title | dc:date | dc:creator | description
`rss 2.0` | link, guid | title | pubDate, dc:date | author, dc:creator | description
# Template
@ -87,19 +37,19 @@ This is the template for a new bridge:
```PHP
<?php
class MySiteBridge extends FeedExpander {
class MySiteBridge extends FeedExpander
{
const MAINTAINER = 'No maintainer';
const NAME = 'Unnamed bridge';
const URI = '';
const DESCRIPTION = 'No description provided';
const PARAMETERS = [];
const CACHE_TIMEOUT = 3600;
const MAINTAINER = 'No maintainer';
const NAME = 'Unnamed bridge';
const URI = '';
const DESCRIPTION = 'No description provided';
const PARAMETERS = [];
const CACHE_TIMEOUT = 3600;
public function collectData()
{
$this->collectExpandableDatas('your feed URI');
}
}
// Imaginary empty line!
```

View File

@ -7,7 +7,7 @@ and extends one of the base classes of **RSS-Bridge**:
Base class | Description
-----------|------------
[`BridgeAbstract`](./02_BridgeAbstract.md) | This class is intended for standard _Bridges_ that need to filter HTML pages for content.
[`FeedExpander`](./03_FeedExpander.md) | This class is an extension of `HttpCachingBridgeAbstract`, designed to load existing feeds into **RSS-Bridge**
[`FeedExpander`](./03_FeedExpander.md) | Expand/modify existing feed urls
[`XPathAbstract`](./04_XPathAbstract.md) | This class is meant as an alternative base class for bridge implementations. It offers preliminary functionality for generating feeds based on _XPath expressions_.
For more information about how to create a new _Bridge_, read [How to create a new Bridge?](./01_How_to_create_a_new_bridge.md)

View File

@ -5,111 +5,57 @@
*/
abstract class FeedExpander extends BridgeAbstract
{
const FEED_TYPE_RSS_1_0 = 'RSS_1_0';
const FEED_TYPE_RSS_2_0 = 'RSS_2_0';
const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0';
private string $feedType;
private FeedParser $feedParser;
private array $parsedFeed;
public function __construct(CacheInterface $cache, Logger $logger)
{
parent::__construct($cache, $logger);
$this->feedParser = new FeedParser();
}
private array $feed;
public function collectExpandableDatas(string $url, $maxItems = -1)
{
if (!$url) {
throw new \Exception('There is no $url for this RSS expander');
}
$maxItems = (int) $maxItems;
if ($maxItems === -1) {
$maxItems = 999;
}
$accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*'];
$httpHeaders = ['Accept: ' . implode(', ', $accept)];
// Notice we do not use cache here on purpose. We want a fresh view of the RSS stream each time
$xmlString = getContents($url, $httpHeaders);
if ($xmlString === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
// Maybe move this call earlier up the stack frames
// Disable triggering of the php error-handler and handle errors manually instead
libxml_use_internal_errors(true);
// Consider replacing libxml with https://www.php.net/domdocument
// Intentionally not using the silencing operator (@) because it has no effect here
$xml = simplexml_load_string(trim($xmlString));
if ($xml === false) {
$xmlErrors = libxml_get_errors();
foreach ($xmlErrors as $xmlError) {
Debug::log(trim($xmlError->message));
}
if ($xmlErrors) {
// Render only the first error into exception message
$firstXmlErrorMessage = $xmlErrors[0]->message;
}
throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''), 11);
}
// Restore previous behaviour in case other code relies on it being off
libxml_use_internal_errors(false);
// Currently only feed metadata (not items) are plucked out
$this->parsedFeed = $this->feedParser->parseFeed($xmlString);
if (isset($xml->item[0])) {
$this->feedType = self::FEED_TYPE_RSS_1_0;
$items = $xml->item;
} elseif (isset($xml->channel[0])) {
$this->feedType = self::FEED_TYPE_RSS_2_0;
$items = $xml->channel[0]->item;
} elseif (isset($xml->entry[0])) {
$this->feedType = self::FEED_TYPE_ATOM_1_0;
$items = $xml->entry;
} else {
throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url));
}
$feedParser = new FeedParser();
$this->feed = $feedParser->parseFeed($xmlString);
$items = array_slice($this->feed['items'], 0, $maxItems);
foreach ($items as $item) {
$parsedItem = $this->parseItem($item);
if ($parsedItem) {
$this->items[] = $parsedItem;
}
if (count($this->items) >= $maxItems) {
break;
// Give bridges a chance to modify the item
$item = $this->parseItem($item);
if ($item) {
$this->items[] = $item;
}
}
return $this;
}
/**
* @param \SimpleXMLElement $item The feed item to be parsed
* This method is overidden by bridges
*
* @return array
*/
protected function parseItem($item)
protected function parseItem(array $item)
{
switch ($this->feedType) {
case self::FEED_TYPE_RSS_1_0:
return $this->feedParser->parseRss1Item($item);
case self::FEED_TYPE_RSS_2_0:
return $this->feedParser->parseRss2Item($item);
case self::FEED_TYPE_ATOM_1_0:
return $this->feedParser->parseAtomItem($item);
default:
throw new \Exception(sprintf('Unknown version %s!', $this->getInput('version')));
}
return $item;
}
public function getURI()
{
return $this->parsedFeed['uri'] ?? parent::getURI();
return $this->feed['uri'] ?? parent::getURI();
}
public function getName()
{
return $this->parsedFeed['title'] ?? parent::getName();
return $this->feed['title'] ?? parent::getName();
}
public function getIcon()
{
return $this->parsedFeed['icon'] ?? parent::getIcon();
return $this->feed['icon'] ?? parent::getIcon();
}
}

View File

@ -14,10 +14,10 @@ final class FeedParser
throw new \Exception('Unable to parse xml');
}
$feed = [
'title' => null,
'url' => null,
'icon' => null,
'items' => [],
'title' => null,
'uri' => null,
'icon' => null,
'items' => [],
];
if (isset($xml->item[0])) {
// rss 1.0