From d37f0c14a094d747987d6458766a7bf3238849f4 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 2 Mar 2019 19:03:29 +0100 Subject: [PATCH] [LeMondeInformatique] Handle special articles (#1039) Fix content extraction for special article compiling previous articles --- bridges/LeMondeInformatiqueBridge.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 09bcf6a3..45aa6075 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -20,12 +20,13 @@ class LeMondeInformatiqueBridge extends FeedExpander { str_replace( '/grande/', '/petite/', - $article_html->find('.article-image', 0)->find('img', 0)->src + $article_html->find('.article-image > img, figure > img', 0)->src ) ); //No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail - $item['content'] = utf8_encode($this->cleanArticle($article_html->find('div.col-primary', 0)->innertext)); + $content_node = $article_html->find('div.col-primary, div.col-sm-9', 0); + $item['content'] = utf8_encode($this->cleanArticle($content_node->innertext)); $item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext); return $item;