[NiusBridge] fix parse error, fix image content-type (#3728)

This commit is contained in:
Niehztog 2023-10-05 02:31:04 +02:00 committed by GitHub
parent 1cbe1a6f98
commit e376805249
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 13 additions and 11 deletions

View File

@ -12,29 +12,31 @@ class NiusBridge extends XPathAbstract
const FEED_SOURCE_URL = 'https://www.nius.de/news';
const XPATH_EXPRESSION_ITEM = './/div[contains(@class, "compact-story") or contains(@class, "regular-story")]';
const XPATH_EXPRESSION_ITEM_TITLE = './/h2[@class="title"]//node()';
const XPATH_EXPRESSION_ITEM_CONTENT = './/h2[@class="title"]//node()';
const XPATH_EXPRESSION_ITEM_CONTENT = self::XPATH_EXPRESSION_ITEM_TITLE;
const XPATH_EXPRESSION_ITEM_URI = './/a[1]/@href';
const XPATH_EXPRESSION_ITEM_AUTHOR = 'normalize-space(.//span[@class="author"]/text()[3])';
const XPATH_EXPR_AUTHOR_PART1 = 'normalize-space(.//span[@class="author"]/text()[1])';
const XPATH_EXPR_AUTHOR_PART2 = 'normalize-space(.//span[@class="author"]/text()[2])';
const XPATH_EXPRESSION_ITEM_AUTHOR = 'substring-after(concat(' . self::XPATH_EXPR_AUTHOR_PART1 . ', " ", ' . self::XPATH_EXPR_AUTHOR_PART2 . '), " ")';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = 'normalize-space(.//span[@class="author"]/text()[1])';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img[@sizes]/@src';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img[@sizes and @alt="Article background picture"]/@src';
const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="subtitle"]/text()';
const SETTING_FIX_ENCODING = false;
protected function formatItemTimestamp($value)
protected function formatItemTitle($value)
{
return DateTimeImmutable::createFromFormat(
false !== strpos($value, ' Uhr') ? 'H:i \U\h\r' : 'd.m.y',
$value,
new DateTimeZone('Europe/Berlin')
)->format('U');
return strip_tags($value);
}
protected function formatItemContent($value)
{
return strip_tags($value);
}
protected function cleanMediaUrl($mediaUrl)
{
$result = preg_match('~https:\/\/www\.nius\.de\/_next\/image\?url=(.*)\?~', $mediaUrl, $matches);
return $result ? $matches[1] : $mediaUrl;
return $result ? $matches[1] . '#.jpg' : $mediaUrl;
}
protected function generateItemId(FeedItem $item)