test: add test for FeedParser (#3754)

This commit is contained in:
Dag 2023-10-13 23:14:08 +02:00 committed by GitHub
parent 5f37c72be0
commit daef240cd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 174 additions and 25 deletions

View File

@ -30,7 +30,7 @@ class ArsTechnicaBridge extends FeedExpander
public function collectData()
{
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
$this->collectExpandableDatas($url);
$this->collectExpandableDatas($url, 10);
}
protected function parseItem(array $item)

View File

@ -6,7 +6,7 @@ class UrlebirdBridge extends BridgeAbstract
const NAME = 'urlebird.com';
const URI = 'https://urlebird.com/';
const DESCRIPTION = 'Bridge for urlebird.com';
const CACHE_TIMEOUT = 10;
const CACHE_TIMEOUT = 60 * 5;
const PARAMETERS = [
[
'query' => [
@ -21,50 +21,70 @@ class UrlebirdBridge extends BridgeAbstract
private $title;
private function fixURI($uri)
{
$path = parse_url($uri, PHP_URL_PATH);
$encoded_path = array_map('urlencode', explode('/', $path));
return str_replace($path, implode('/', $encoded_path), $uri);
}
public function collectData()
{
switch ($this->getInput('query')[0]) {
default:
returnServerError('Please, enter valid username or hashtag!');
break;
case '@':
$url = 'https://urlebird.com/user/' . substr($this->getInput('query'), 1) . '/';
break;
case '#':
$url = 'https://urlebird.com/hash/' . substr($this->getInput('query'), 1) . '/';
break;
default:
returnServerError('Please, enter valid username or hashtag!');
break;
}
$html = getSimpleHTMLDOM($url);
$limit = 10;
$this->title = $html->find('title', 0)->innertext;
$articles = $html->find('div.thumb');
$articles = array_slice($articles, 0, $limit);
foreach ($articles as $article) {
$item = [];
$item['uri'] = $this->fixURI($article->find('a', 2)->href);
$article_content = getSimpleHTMLDOM($item['uri']);
$item['author'] = $article->find('img', 0)->alt . ' (' .
$article_content->find('a.user-video', 1)->innertext . ')';
$item['title'] = $article_content->find('title', 0)->innertext;
$item['enclosures'][] = $article_content->find('video', 0)->poster;
$video = $article_content->find('video', 0);
$itemUrl = $article->find('a', 2)->href;
$item['uri'] = $this->encodePathSegments($itemUrl);
$dom = getSimpleHTMLDOM($item['uri']);
$videoDiv = $dom->find('div.video', 0);
// timestamp
$timestampH6 = $videoDiv->find('h6', 0);
$datetimeString = str_replace('Posted ', '', $timestampH6->plaintext);
$item['timestamp'] = $datetimeString;
$innertext = $dom->find('a.user-video', 1)->innertext;
$alt = $article->find('img', 0)->alt;
$item['author'] = $alt . ' (' . $innertext . ')';
$item['title'] = $dom->find('title', 0)->innertext;
$item['enclosures'][] = $dom->find('video', 0)->poster;
$video = $dom->find('video', 0);
$video->autoplay = null;
$item['content'] = $video->outertext . '<br>' .
$article_content->find('div.music', 0) . '<br>' .
$article_content->find('div.info2', 0)->innertext .
'<br><br><a href="' . $article_content->find('video', 0)->src .
$dom->find('div.music', 0) . '<br>' .
$dom->find('div.info2', 0)->innertext .
'<br><br><a href="' . $dom->find('video', 0)->src .
'">Direct video link</a><br><br><a href="' . $item['uri'] .
'">Post link</a><br><br>';
$this->items[] = $item;
}
}
private function encodePathSegments($url)
{
$path = parse_url($url, PHP_URL_PATH);
$pathSegments = explode('/', $path);
$encodedPathSegments = array_map('urlencode', $pathSegments);
$encodedPath = implode('/', $encodedPathSegments);
$result = str_replace($path, $encodedPath, $url);
return $result;
}
public function getName()
{
return $this->title ?: parent::getName();

View File

@ -142,6 +142,7 @@ final class FeedParser
}
if (isset($feedItem->guid)) {
// Pluck out a url from guid
foreach ($feedItem->guid->attributes() as $attribute => $value) {
if (
$attribute === 'isPermaLink'
@ -207,9 +208,9 @@ final class FeedParser
'content' => null,
'timestamp' => null,
'author' => null,
'uid' => null,
'categories' => [],
'enclosures' => [],
//'uid' => null,
//'categories' => [],
//'enclosures' => [],
];
if (isset($feedItem->link)) {
// todo: trim uri

128
tests/FeedParserTest.php Normal file
View File

@ -0,0 +1,128 @@
<?php
declare(strict_types=1);
namespace RssBridge\Tests;
use PHPUnit\Framework\TestCase;
class FeedParserTest extends TestCase
{
public function testRss1()
{
$xml = <<<XML
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:cc="http://creativecommons.org/ns#"
xmlns="http://purl.org/rss/1.0/"
>
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
<title>hello feed</title>
<link>http://meerkat.oreillynet.com</link>
<description>Meerkat: An Open Wire Service</description>
<items>
<rdf:Seq>
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
</rdf:Seq>
</items>
</channel>
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
<title>XML: A Disruptive Technology</title>
<link>http://c.moreover.com/click/here.pl?r123</link>
<description>desc</description>
</item>
</rdf:RDF>
XML;
$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']);
$this->assertSame('http://meerkat.oreillynet.com', $feed['uri']);
$this->assertSame(null, $feed['icon']);
$item = $feed['items'][0];
$this->assertSame('XML: A Disruptive Technology', $item['title']);
$this->assertSame('http://c.moreover.com/click/here.pl?r123', $item['uri']);
$this->assertSame('desc', $item['content']);
}
public function testRss2()
{
$xml = <<<XML
<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>hello feed</title>
<link>https://example.com/</link>
<image>
<url>https://example.com/2.ico</url>
</image>
<item>
<title>hello world</title>
<link>https://example.com/1</link>
<description>desc2</description>
<pubDate>Tue, 26 Apr 2022 00:00:00 +0200</pubDate>
<author>root</author>
<enclosure url="https://example.com/1.png"></enclosure>
</item>
</channel>
</rss>
XML;
$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/', $feed['uri']);
$this->assertSame('https://example.com/2.ico', $feed['icon']);
$item = $feed['items'][0];
$this->assertSame('hello world', $item['title']);
$this->assertSame('https://example.com/1', $item['uri']);
$this->assertSame(1650924000, $item['timestamp']);
$this->assertSame('root', $item['author']);
$this->assertSame('desc2', $item['content']);
$this->assertSame(['https://example.com/1.png'], $item['enclosures']);
}
public function testAtom()
{
$xml = <<<XML
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<title>hello feed</title>
<link href="https://example.com/1"></link>
<icon>https://example.com/2.ico</icon>
<entry>
<title>hello world</title>
<link href="https://example.com/1"></link>
<author>
<name>root</name>
</author>
<content type="html">html</content>
<updated>2015-11-05T14:38:49+01:00</updated>
</entry>
</feed>
XML;
$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/1', $feed['uri']);
$this->assertSame('https://example.com/2.ico', $feed['icon']);
$item = $feed['items'][0];
$this->assertSame('hello world', $item['title']);
$this->assertSame('https://example.com/1', $item['uri']);
$this->assertSame(1446730729, $item['timestamp']);
$this->assertSame('root', $item['author']);
$this->assertSame('html', $item['content']);
}
}