fix: TheHackerNewsBridge (#3154)

This commit is contained in:
Dag 2022-11-19 00:25:31 +01:00 committed by GitHub
parent 88766e6fde
commit 745a7ba122
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 14 deletions

View File

@ -19,14 +19,18 @@ class TheHackerNewsBridge extends BridgeAbstract
$article_author = str_replace('', '', $article_author); $article_author = str_replace('', '', $article_author);
$article_title = $element->find('h2.home-title', 0)->plaintext; $article_title = $element->find('h2.home-title', 0)->plaintext;
$article_timestamp = time();
//Date without time //Date without time
$article_timestamp = strtotime( $calendar = $element->find('i.icon-calendar', 0);
extractFromDelimiters( if ($calendar) {
$element->find('i.icon-calendar', 0)->parent()->outertext, $article_timestamp = strtotime(
'</i>', extractFromDelimiters(
'<span>' $calendar->parent()->outertext,
) '</i>',
); '<span>'
)
);
}
//Article thumbnail in lazy-loading image //Article thumbnail in lazy-loading image
if (is_object($element->find('img[data-echo]', 0))) { if (is_object($element->find('img[data-echo]', 0))) {
@ -41,13 +45,16 @@ class TheHackerNewsBridge extends BridgeAbstract
$article_thumbnail = []; $article_thumbnail = [];
} }
if ($article = getSimpleHTMLDOMCached($article_url)) { $article = getSimpleHTMLDOMCached($article_url);
if ($article) {
//Article body //Article body
$contents = $article->find('div.articlebody', 0)->innertext; $var = $article->find('div.articlebody', 0);
$contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_'); if ($var) {
$contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>'); $contents = $var->innertext;
$contents = stripWithDelimiters($contents, '<script', '</script>'); $contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
$contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
$contents = stripWithDelimiters($contents, '<script', '</script>');
}
//Date with time //Date with time
if (is_object($article->find('meta[itemprop=dateModified]', 0))) { if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
$article_timestamp = strtotime( $article_timestamp = strtotime(
@ -68,7 +75,7 @@ class TheHackerNewsBridge extends BridgeAbstract
$item['author'] = $article_author; $item['author'] = $article_author;
$item['enclosures'] = $article_thumbnail; $item['enclosures'] = $article_thumbnail;
$item['timestamp'] = $article_timestamp; $item['timestamp'] = $article_timestamp;
$item['content'] = trim($contents); $item['content'] = trim($contents ?? '');
$this->items[] = $item; $this->items[] = $item;
$limit++; $limit++;
} }