diff --git a/bridges/CarThrottleBridge.php b/bridges/CarThrottleBridge.php index 5b95dd28..913b686c 100644 --- a/bridges/CarThrottleBridge.php +++ b/bridges/CarThrottleBridge.php @@ -25,20 +25,28 @@ class CarThrottleBridge extends BridgeAbstract $articlePage = getSimpleHTMLDOMCached($item['uri']) or returnServerError('could not retrieve page'); - $item['author'] = $articlePage->find('div.author div')[1]->innertext; - - $dinges = $articlePage->find('div.main-body')[0]; - //remove ads - foreach ($dinges->find('aside') as $ad) { - $ad->outertext = ''; - $dinges->save(); + $authorDiv = $articlePage->find('div.author div'); + if ($authorDiv) { + $item['author'] = $authorDiv[1]->innertext; } - $item['content'] = $articlePage->find('div.summary')[0] . - $articlePage->find('figure.main-image')[0] . - $dinges; + $dinges = $articlePage->find('div.main-body')[0] ?? null; + //remove ads + if ($dinges) { + foreach ($dinges->find('aside') as $ad) { + $ad->outertext = ''; + $dinges->save(); + } + } + + $var = $articlePage->find('div.summary')[0] ?? ''; + $var1 = $articlePage->find('figure.main-image')[0] ?? ''; + $dinges1 = $dinges ?? ''; + + $item['content'] = $var . + $var1 . + $dinges1; - //add the item to the list array_push($this->items, $item); } } diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 61dccb1a..24ba9b2e 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -23,9 +23,11 @@ class GatesNotesBridge extends BridgeAbstract $cleanedContent = str_replace([ '', '', + '\r\n', ], '', $rawContent); + $cleanedContent = str_replace('\"', '"', $cleanedContent); + $cleanedContent = trim($cleanedContent, '"'); - // The content is actually a json between quotes with \r\n inserted $json = Json::decode($cleanedContent, false); foreach ($json as $article) { @@ -98,7 +100,7 @@ class GatesNotesBridge extends BridgeAbstract } $article_body = sanitize($article_body->innertext); - $content = $top_description . $hero_image . $article_body; + $content = $top_description . ($hero_image ?? '') . $article_body; return $content; } diff --git a/lib/contents.php b/lib/contents.php index cfb9f36a..a3830ca7 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -158,13 +158,12 @@ function getSimpleHTMLDOM( $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT ) { - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); + $html = getContents($url, $header ?? [], $opts ?? []); + if ($html === '') { + throw new \Exception('Unable to parse dom because the http response was the empty string'); + } return str_get_html( - $content, + $html, $lowercase, $forceTagsClosed, $target_charset,