From 8ac5045963cffe71d762180077af7017e0600aee Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Sat, 1 Oct 2022 08:41:19 +0200 Subject: [PATCH] [TheGuardianBridge] Fix missing article content (fix #3032) (#3072) --- bridges/TheGuardianBridge.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bridges/TheGuardianBridge.php b/bridges/TheGuardianBridge.php index d3b1147c..2e14de7a 100644 --- a/bridges/TheGuardianBridge.php +++ b/bridges/TheGuardianBridge.php @@ -66,7 +66,7 @@ class TheGuardianBridge extends FeedExpander // figure contain's the main article image $article = $articlePage->find('figure', 0); // content__article-body has the actual article - foreach ($articlePage->find('.content__article-body') as $element) { + foreach ($articlePage->find('#maincontent') as $element) { $article = $article . $element; } @@ -80,11 +80,13 @@ class TheGuardianBridge extends FeedExpander // List of all the crap in the article $uselessElements = [ + 'span > figcaption', '#show-caption', '.element-atom', '.submeta', 'youtube-media-atom', - 'svg' + 'svg', + '#the-checkbox', ]; // Remove the listed crap