From 3e2423d86b0a2d5e6b53a30ee48a0ad6e41b2702 Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Tue, 14 Jun 2022 09:45:46 -0400 Subject: [PATCH] [MsnMondeBridge] Fix bridge (#2813) --- bridges/MsnMondeBridge.php | 48 +++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index 9c418bd9..817f13ad 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -1,36 +1,40 @@ find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; - $item['timestamp'] = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); + public function getURI() { + return self::URI; } - public function collectData(){ - $html = getSimpleHTMLDOM($this->getURI()); + public function collectData() { + $this->collectExpandableDatas(self::FEED_URL, self::LIMIT); + } - $limit = 0; - - // TODO: fix why articles is empty - foreach($html->find('.smalla') as $article) { - if($limit < 10) { - $item = array(); - $item['title'] = utf8_decode($article->find('h4', 0)->innertext); - $item['uri'] = self::URI . utf8_decode($article->find('a', 0)->href); - $this->msnMondeExtractContent($item['uri'], $item); - $this->items[] = $item; - $limit++; - } + protected function parseItem($newsItem) { + $item = parent::parseItem($newsItem); + if (!preg_match('#fr-fr/actualite.*/ar-(?[\w]*)\?#', $item['uri'], $matches)) { + return; } + + $json = json_decode(getContents(self::JSON_URL . $matches['id']), true); + $item['content'] = $json['body']; + if (!empty($json['authors'])) + $item['author'] = reset($json['authors'])['name']; + $item['timestamp'] = $json['createdDateTime']; + foreach($json['tags'] as $tag) { + $item['categories'][] = $tag['label']; + } + return $item; } }