various fixes (#3190)

* fix: Call to a member function parent() on null

* fix: notice

fixes Trying to get property plaintext of non-object at bridges/WikiLeaksBridge.php line 96

* fix: CommonDreamsBridge
This commit is contained in:
Dag 2022-12-13 21:04:57 +01:00 committed by GitHub
parent a13c4624fb
commit 936ae8cca3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 76 additions and 65 deletions

View File

@ -21,10 +21,11 @@ class CommonDreamsBridge extends FeedExpander
private function extractContent($url)
{
$html3 = getSimpleHTMLDOMCached($url);
$text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
$html3->clear();
unset($html3);
$dom = getSimpleHTMLDOMCached($url);
$summary = $dom->find('div.node__body', 0);
$text = $summary->innertext;
$dom->clear();
unset($dom);
return $text;
}
}

View File

@ -13,72 +13,80 @@ class TheHackerNewsBridge extends BridgeAbstract
$limit = 0;
foreach ($html->find('div.body-post') as $element) {
if ($limit < 5) {
$article_url = $element->find('a.story-link', 0)->href;
$article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext);
$article_author = str_replace('&#59396;', '', $article_author);
$article_title = $element->find('h2.home-title', 0)->plaintext;
if ($limit >= 5) {
break;
}
$article_timestamp = time();
//Date without time
$calendar = $element->find('i.icon-calendar', 0);
if ($calendar) {
$article_author = null;
$icon_user = $element->find('i.icon-user', 0);
if ($icon_user) {
$article_author = trim($icon_user->parent()->plaintext);
$article_author = str_replace('&#59396;', '', $article_author);
}
$article_title = $element->find('h2.home-title', 0)->plaintext;
$article_timestamp = time();
//Date without time
$calendar = $element->find('i.icon-calendar', 0);
if ($calendar) {
$article_timestamp = strtotime(
extractFromDelimiters(
$calendar->parent()->outertext,
'</i>',
'<span>'
)
);
}
//Article thumbnail in lazy-loading image
if (is_object($element->find('img[data-echo]', 0))) {
$article_thumbnail = [
extractFromDelimiters(
$element->find('img[data-echo]', 0)->outertext,
"data-echo='",
"'"
)
];
} else {
$article_thumbnail = [];
}
$article_url = $element->find('a.story-link', 0)->href;
$article = getSimpleHTMLDOMCached($article_url);
if ($article) {
//Article body
$var = $article->find('div.articlebody', 0);
if ($var) {
$contents = $var->innertext;
$contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
$contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
$contents = stripWithDelimiters($contents, '<script', '</script>');
}
//Date with time
if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
$article_timestamp = strtotime(
extractFromDelimiters(
$calendar->parent()->outertext,
'</i>',
'<span>'
$article->find('meta[itemprop=dateModified]', 0)->outertext,
"content='",
"'"
)
);
}
//Article thumbnail in lazy-loading image
if (is_object($element->find('img[data-echo]', 0))) {
$article_thumbnail = [
extractFromDelimiters(
$element->find('img[data-echo]', 0)->outertext,
"data-echo='",
"'"
)
];
} else {
$article_thumbnail = [];
}
$article = getSimpleHTMLDOMCached($article_url);
if ($article) {
//Article body
$var = $article->find('div.articlebody', 0);
if ($var) {
$contents = $var->innertext;
$contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
$contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
$contents = stripWithDelimiters($contents, '<script', '</script>');
}
//Date with time
if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
$article_timestamp = strtotime(
extractFromDelimiters(
$article->find('meta[itemprop=dateModified]', 0)->outertext,
"content='",
"'"
)
);
}
} else {
$contents = 'Could not request TheHackerNews: ' . $article_url;
}
$item = [];
$item['uri'] = $article_url;
$item['title'] = $article_title;
$item['author'] = $article_author;
$item['enclosures'] = $article_thumbnail;
$item['timestamp'] = $article_timestamp;
$item['content'] = trim($contents ?? '');
$this->items[] = $item;
$limit++;
} else {
$contents = 'Could not request TheHackerNews: ' . $article_url;
}
$item = [];
$item['uri'] = $article_url;
$item['title'] = $article_title;
if ($article_author) {
$item['author'] = $article_author;
}
$item['enclosures'] = $article_thumbnail;
$item['timestamp'] = $article_timestamp;
$item['content'] = trim($contents ?? '');
$this->items[] = $item;
$limit++;
}
}
}

View File

@ -93,8 +93,10 @@ class WikiLeaksBridge extends BridgeAbstract
$item['title'] = $article->find('h3', 0)->plaintext;
$item['uri'] = static::URI . $article->find('h3 a', 0)->href;
$item['content'] = $article->find('div.introduction', 0)->plaintext;
$item['timestamp'] = strtotime($article->find('div.timestamp', 0)->plaintext);
$timestamp = $article->find('div.timestamp', 0);
if ($timestamp) {
$item['timestamp'] = strtotime($timestamp->plaintext);
}
$this->items[] = $item;
}
}