fix: various fixes (#3702)

* fix: symfonycasts

* various fixes
This commit is contained in:
Dag 2023-09-24 18:15:14 +02:00 committed by GitHub
parent ce353c1e4f
commit 437afd67e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 75 deletions

View File

@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract
public function collectData()
{
// this bridge is broken and unmaintained
return;
$uri = self::URI . '/monair/commune/' . $this->getInput('cities');
$html = getSimpleHTMLDOM($uri);

View File

@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract
]
];
const CACHE_TIMEOUT = 1800; // 30 mins
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());
$results = $html->find('div.results', 0);
foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) {
$item = [];
$videoPath = self::URI . $li->children(0)->href;
$videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600);
$json = $this->extractJson($videoPageHtml);
$id = array_keys((array) $json->media->index)[0];
$item['uri'] = $videoPath;
$item['title'] = $json->media->index->$id->title;
$item['timestamp'] = $json->media->index->$id->published_at;
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
$description = $json->media->index->$id->description;
$article = '';
if (is_null($json->media->index->$id->media->seo_article) === false) {
$article = markdownToHtml($json->media->index->$id->media->seo_article);
}
$item['content'] = <<<EOD
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
</video>
<p>{$description}</p>
{$article}
EOD;
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
$url = $this->getURI();
$html = getSimpleHTMLDOM($url);
$regex = '/window.__PRELOADED_STATE__ = (.*);/';
preg_match($regex, $html, $parts);
$data = Json::decode($parts[1], false);
foreach ($data->medias->index as $uid => $media) {
$this->items[] = [
'uid' => $uid,
'title' => $media->metadata->slug,
'uri' => $media->share_url,
'timestamp' => $media->published_at,
];
}
}
@ -90,35 +60,14 @@ EOD;
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
}
return parent::getURI();
}
public function getName()
{
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return $this->getKey('category') . ' - ' .
$this->getKey('edition') . ' - Brut.';
return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.';
}
return parent::getName();
}
/**
* Extract JSON from page
*/
private function extractJson($html)
{
if (!preg_match($this->jsonRegex, $html, $parts)) {
returnServerError('Failed to extract data from page');
}
$data = json_decode($parts[1]);
if ($data === false) {
returnServerError('Failed to decode extracted data');
}
return $data;
}
}

View File

@ -72,7 +72,7 @@ class SitemapBridge extends CssSelectorBridge
$sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map));
$links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit);
if (empty($links) && empty(sitemapXmlToList($sitemap_xml))) {
if (empty($links) && empty($this->sitemapXmlToList($sitemap_xml))) {
returnClientError('Could not retrieve URLs with Timestamps from Sitemap: ' . $sitemap_url);
}

View File

@ -10,22 +10,27 @@ class SymfonyCastsBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM('https://symfonycasts.com/updates/find');
$dives = $html->find('div');
$url = 'https://symfonycasts.com/updates/find';
$html = getSimpleHTMLDOM($url);
/** @var simple_html_dom_node[] $dives */
$dives = $html->find('div.user-notification-not-viewed');
/* @var simple_html_dom $div */
foreach ($dives as $div) {
$id = $div->getAttribute('data-mark-update-id-value');
$type = $div->find('h5', 0);
$title = $div->find('span', 0);
$title = $div->find('a', 0);
$dateString = $div->find('h5.font-gray', 0);
$href = $div->find('a', 0);
$url = 'https://symfonycasts.com' . $href->getAttribute('href');
$hrefAttribute = $href->getAttribute('href');
$url = 'https://symfonycasts.com' . $hrefAttribute;
$item = []; // Create an empty item
$item['uid'] = $id;
$item = [];
$item['uid'] = $div->getAttribute('data-mark-update-update-url-value');
$item['title'] = $title->innertext;
// this natural language date string does not work
$item['timestamp'] = $dateString->innertext;
$item['content'] = $type->plaintext . '<a href="' . $url . '">' . $title . '</a>';
$item['uri'] = $url;
$this->items[] = $item; // Add item to the list

View File

@ -28,8 +28,7 @@
"ext-openssl": "*",
"ext-libxml": "*",
"ext-simplexml": "*",
"ext-json": "*",
"ext-intl": "*"
"ext-json": "*"
},
"require-dev": {
"phpunit/phpunit": "^9",
@ -39,6 +38,7 @@
"ext-memcached": "Allows to use memcached as cache type",
"ext-sqlite3": "Allows to use an SQLite database for caching",
"ext-zip": "Required for FDroidRepoBridge",
"ext-intl": "Required for OLXBridge",
"ext-dom": "Allows to use some bridges based on XPath expressions"
},
"autoload-dev": {