fix(senscritique) (#3750)

This commit is contained in:
Dag 2023-10-13 11:24:22 +02:00 committed by GitHub
parent 49d9dafaec
commit 920d00480d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 35 deletions

View File

@ -56,7 +56,8 @@ class SensCritiqueBridge extends BridgeAbstract
break;
}
$html = getSimpleHTMLDOM($uri);
$list = $html->find('ul.elpr-list', 0);
// This selector name looks like it's automatically generated
$list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0);
$this->extractDataFromList($list);
}
@ -68,36 +69,13 @@ class SensCritiqueBridge extends BridgeAbstract
if ($list === null) {
returnClientError('Cannot extract data from list');
}
foreach ($list->find('li') as $movie) {
foreach ($list->find('div[data-testid="product-list-item"]') as $movie) {
$item = [];
$item['author'] = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES)
. ' '
. $movie->find('.elco-date', 0)->plaintext;
$item['title'] = $movie->find('.elco-title a', 0)->plaintext
. ' '
. $movie->find('.elco-date', 0)->plaintext;
$item['content'] = '';
$originalTitle = $movie->find('.elco-original-title', 0);
$description = $movie->find('.elco-description', 0);
if ($originalTitle) {
$item['content'] = '<em>' . $originalTitle->plaintext . '</em><br><br>';
}
$item['content'] .= $movie->find('.elco-baseline', 0)->plaintext
. '<br>'
. $movie->find('.elco-baseline', 1)->plaintext
. '<br><br>'
. ($description ? $description->plaintext : '')
. '<br><br>'
. trim($movie->find('.erra-ratings .erra-global', 0)->plaintext)
. ' / 10';
$item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
$item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
$item['title'] = $movie->find('h2 a', 0)->plaintext;
// todo: fix image
$item['content'] = $movie->innertext;
$item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
$item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
$this->items[] = $item;
}
}

View File

@ -158,8 +158,8 @@ class VkBridge extends BridgeAbstract
$article_author_selector = 'div.article_snippet__author';
$article_thumb_selector = 'div.article_snippet__image';
}
$article_title = $article->find($article_title_selector, 0)->innertext;
$article_author = $article->find($article_author_selector, 0)->innertext;
$article_title = $article->find($article_title_selector, 0)->innertext ?? '';
$article_author = $article->find($article_author_selector, 0)->innertext ?? '';
$article_link = $article->getAttribute('href');
$article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);

View File

@ -2,6 +2,13 @@
declare(strict_types=1);
/**
* Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
* Emit arrays meant to be used inside rss-bridge.
*
* The feed item structure is identical to that of FeedItem
*/
final class FeedParser
{
public function parseFeed(string $xmlString): array
@ -200,6 +207,8 @@ final class FeedParser
'content' => null,
'timestamp' => null,
'author' => null,
'uid' => null,
'categories' => [],
'enclosures' => [],
];
if (isset($feedItem->link)) {

View File

@ -31,7 +31,10 @@ abstract class FormatAbstract
$this->lastModified = $lastModified;
}
public function setItems(array $items)
/**
* @param FeedItem[] $items
*/
public function setItems(array $items): void
{
$this->items = $items;
}

View File

@ -7,7 +7,9 @@ final class UrlException extends \Exception
}
/**
* Intentionally restrictive url parser
* Intentionally restrictive url parser.
*
* Only absolute http/https urls.
*/
final class Url
{
@ -29,7 +31,7 @@ final class Url
$parts = parse_url($url);
if ($parts === false) {
throw new UrlException(sprintf('Invalid url %s', $url));
throw new UrlException(sprintf('Failed to parse_url(): %s', $url));
}
return (new self())
@ -38,6 +40,7 @@ final class Url
->withPort($parts['port'] ?? 80)
->withPath($parts['path'] ?? '/')
->withQueryString($parts['query'] ?? null);
// todo: add fragment
}
public static function validate(string $url): bool