fix: various fixes (#3741)

This commit is contained in:
Dag 2023-10-12 19:49:04 +02:00 committed by GitHub
parent d21f8cebf6
commit 6a72c56cdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 19 deletions

View File

@ -25,20 +25,28 @@ class CarThrottleBridge extends BridgeAbstract
$articlePage = getSimpleHTMLDOMCached($item['uri']) $articlePage = getSimpleHTMLDOMCached($item['uri'])
or returnServerError('could not retrieve page'); or returnServerError('could not retrieve page');
$item['author'] = $articlePage->find('div.author div')[1]->innertext; $authorDiv = $articlePage->find('div.author div');
if ($authorDiv) {
$dinges = $articlePage->find('div.main-body')[0]; $item['author'] = $authorDiv[1]->innertext;
//remove ads
foreach ($dinges->find('aside') as $ad) {
$ad->outertext = '';
$dinges->save();
} }
$item['content'] = $articlePage->find('div.summary')[0] . $dinges = $articlePage->find('div.main-body')[0] ?? null;
$articlePage->find('figure.main-image')[0] . //remove ads
$dinges; if ($dinges) {
foreach ($dinges->find('aside') as $ad) {
$ad->outertext = '';
$dinges->save();
}
}
$var = $articlePage->find('div.summary')[0] ?? '';
$var1 = $articlePage->find('figure.main-image')[0] ?? '';
$dinges1 = $dinges ?? '';
$item['content'] = $var .
$var1 .
$dinges1;
//add the item to the list
array_push($this->items, $item); array_push($this->items, $item);
} }
} }

View File

@ -23,9 +23,11 @@ class GatesNotesBridge extends BridgeAbstract
$cleanedContent = str_replace([ $cleanedContent = str_replace([
'<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">', '<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">',
'</string>', '</string>',
'\r\n',
], '', $rawContent); ], '', $rawContent);
$cleanedContent = str_replace('\"', '"', $cleanedContent);
$cleanedContent = trim($cleanedContent, '"');
// The content is actually a json between quotes with \r\n inserted
$json = Json::decode($cleanedContent, false); $json = Json::decode($cleanedContent, false);
foreach ($json as $article) { foreach ($json as $article) {
@ -98,7 +100,7 @@ class GatesNotesBridge extends BridgeAbstract
} }
$article_body = sanitize($article_body->innertext); $article_body = sanitize($article_body->innertext);
$content = $top_description . $hero_image . $article_body; $content = $top_description . ($hero_image ?? '') . $article_body;
return $content; return $content;
} }

View File

@ -158,13 +158,12 @@ function getSimpleHTMLDOM(
$defaultBRText = DEFAULT_BR_TEXT, $defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT $defaultSpanText = DEFAULT_SPAN_TEXT
) { ) {
$content = getContents( $html = getContents($url, $header ?? [], $opts ?? []);
$url, if ($html === '') {
$header ?? [], throw new \Exception('Unable to parse dom because the http response was the empty string');
$opts ?? [] }
);
return str_get_html( return str_get_html(
$content, $html,
$lowercase, $lowercase,
$forceTagsClosed, $forceTagsClosed,
$target_charset, $target_charset,