[TheGuardianBridge] Fix missing article content (fix #3032) (#3072)

This commit is contained in:
Corentin Garcia 2022-10-01 08:41:19 +02:00 committed by GitHub
parent 4a21855e5c
commit 8ac5045963
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 2 deletions

View File

@ -66,7 +66,7 @@ class TheGuardianBridge extends FeedExpander
// figure contain's the main article image
$article = $articlePage->find('figure', 0);
// content__article-body has the actual article
foreach ($articlePage->find('.content__article-body') as $element) {
foreach ($articlePage->find('#maincontent') as $element) {
$article = $article . $element;
}
@ -80,11 +80,13 @@ class TheGuardianBridge extends FeedExpander
// List of all the crap in the article
$uselessElements = [
'span > figcaption',
'#show-caption',
'.element-atom',
'.submeta',
'youtube-media-atom',
'svg'
'svg',
'#the-checkbox',
];
// Remove the listed crap