[MsnMondeBridge] Fix bridge (#2813)

This commit is contained in:
Yaman Qalieh 2022-06-14 09:45:46 -04:00 committed by GitHub
parent 90e0504da5
commit 3e2423d86b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 26 additions and 22 deletions

View File

@ -1,36 +1,40 @@
<?php <?php
class MsnMondeBridge extends BridgeAbstract { class MsnMondeBridge extends FeedExpander {
const MAINTAINER = 'kranack'; const MAINTAINER = 'kranack';
const NAME = 'MSN Actu Monde'; const NAME = 'MSN Actu Monde';
const URI = 'http://www.msn.com/';
const DESCRIPTION = 'Returns the 10 newest posts from MSN Actualités (full text)'; const DESCRIPTION = 'Returns the 10 newest posts from MSN Actualités (full text)';
const URI = 'https://www.msn.com/fr-fr/actualite';
const FEED_URL = 'https://rss.msn.com/fr-fr';
const JSON_URL = 'https://assets.msn.com/content/view/v2/Detail/fr-fr/';
const LIMIT = 10;
public function getURI(){ public function getName() {
return self::URI . 'fr-fr/actualite/monde'; return 'MSN Actualités';
} }
private function msnMondeExtractContent($url, &$item){ public function getURI() {
$html2 = getSimpleHTMLDOM($url); return self::URI;
$item['content'] = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext;
$item['timestamp'] = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime);
} }
public function collectData() { public function collectData() {
$html = getSimpleHTMLDOM($this->getURI()); $this->collectExpandableDatas(self::FEED_URL, self::LIMIT);
}
$limit = 0; protected function parseItem($newsItem) {
$item = parent::parseItem($newsItem);
if (!preg_match('#fr-fr/actualite.*/ar-(?<id>[\w]*)\?#', $item['uri'], $matches)) {
return;
}
// TODO: fix why articles is empty $json = json_decode(getContents(self::JSON_URL . $matches['id']), true);
foreach($html->find('.smalla') as $article) { $item['content'] = $json['body'];
if($limit < 10) { if (!empty($json['authors']))
$item = array(); $item['author'] = reset($json['authors'])['name'];
$item['title'] = utf8_decode($article->find('h4', 0)->innertext); $item['timestamp'] = $json['createdDateTime'];
$item['uri'] = self::URI . utf8_decode($article->find('a', 0)->href); foreach($json['tags'] as $tag) {
$this->msnMondeExtractContent($item['uri'], $item); $item['categories'][] = $tag['label'];
$this->items[] = $item; }
$limit++; return $item;
}
}
} }
} }