rss-bridge/bridges/NordbayernBridge.php

158 lines
4.9 KiB
PHP
Raw Normal View History

2020-03-31 21:14:16 +02:00
<?php
2020-03-31 21:14:16 +02:00
class NordbayernBridge extends BridgeAbstract {
const MAINTAINER = 'schabi.org';
const NAME = 'Nordbayern';
2020-03-31 21:14:16 +02:00
const CACHE_TIMEOUT = 3600;
const URI = 'https://www.nordbayern.de';
const DESCRIPTION = 'Bridge for Bavarian regional news site nordbayern.de';
2020-03-31 21:14:16 +02:00
const PARAMETERS = array( array(
'region' => array(
'name' => 'region',
'type' => 'list',
'exampleValue' => 'Nürnberg',
'title' => 'Select a region',
'values' => array(
'Nürnberg' => 'nuernberg',
'Fürth' => 'fuerth',
'Erlangen' => 'erlangen',
2020-03-31 21:14:16 +02:00
'Altdorf' => 'altdorf',
'Ansbach' => 'ansbach',
'Bad Windsheim' => 'bad-windsheim',
'Bamberg' => 'bamberg',
'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen',
'Feucht' => 'feucht',
'Forchheim' => 'forchheim',
'Gunzenhausen' => 'gunzenhausen',
'Hersbruck' => 'hersbruck',
'Herzogenaurach' => 'herzogenaurach',
'Hilpoltstein' => 'hilpoltstein',
2020-03-31 21:14:16 +02:00
'Höchstadt' => 'hoechstadt',
'Lauf' => 'lauf',
'Neumarkt' => 'neumarkt',
'Neustadt/Aisch' => 'neustadt-aisch',
'Pegnitz' => 'pegnitz',
'Roth' => 'roth',
'Rothenburg o.d.T.' => 'rothenburg-o-d-t',
'Treuchtlingen' => 'treuchtlingen',
'Weißenburg' => 'weissenburg'
)
),
'policeReports' => array(
'name' => 'Police Reports',
'type' => 'checkbox',
'exampleValue' => 'checked',
'title' => 'Include Police Reports',
2020-03-31 21:14:16 +02:00
)
));
2022-05-02 19:06:30 +02:00
private function getValidImage($picture) {
$img = $picture->find('img', 0);
if ($img) {
$imgUrl = $img->src;
if(!str_contains($imgUrl, '/img/nb/logo-vnp.png') &&
!str_contains($imgUrl, '/img/nn/logo-vnp.png') &&
!str_contains($imgUrl, '/img/nb/logo-nuernberger-nachrichten.png') &&
!str_contains($imgUrl, '/img/nb/logo-nordbayern.png') &&
!str_contains($imgUrl, '/img/nn/logo-nuernberger-nachrichten.png') &&
!str_contains($imgUrl, '/img/nb/logo-erlanger-nachrichten.png')) {
2022-05-02 19:06:30 +02:00
return '<br><img src="' . $imgUrl . '">';
}
}
return '';
}
private function getUseFullContent($rawContent) {
$content = '';
foreach($rawContent->children as $element) {
if(($element->tag === 'p' || $element->tag === 'h3') &&
$element->class !== 'article__teaser') {
$content .= $element;
2022-05-02 19:06:30 +02:00
} else if($element->tag === 'main') {
$content .= self::getUseFullContent($element->find('article', 0));
2022-05-02 19:06:30 +02:00
} else if($element->tag === 'header') {
$content .= self::getUseFullContent($element);
2022-05-02 19:06:30 +02:00
} else if($element->tag === 'div' &&
!str_contains($element->class, 'article__infobox') &&
!str_contains($element->class, 'authorinfo')) {
$content .= self::getUseFullContent($element);
} else if($element->tag === 'section' &&
2022-05-02 19:06:30 +02:00
(str_contains($element->class, 'article__richtext') ||
str_contains($element->class, 'article__context'))) {
$content .= self::getUseFullContent($element);
} else if($element->tag === 'picture') {
2022-05-02 19:06:30 +02:00
$content .= self::getValidImage($element);
}
2020-03-31 21:14:16 +02:00
}
return $content;
2020-03-31 21:14:16 +02:00
}
private function handleArticle($link) {
$item = array();
$article = getSimpleHTMLDOM($link);
defaultLinkTo($article, self::URI);
2022-05-02 19:06:30 +02:00
$content = $article->find('article[id=article]', 0);
2020-03-31 21:14:16 +02:00
$item['uri'] = $link;
2022-05-02 19:06:30 +02:00
$author = $article->find('[id="openAuthor"]', 0);
if ($author) {
$item['author'] = $author->plaintext;
}
$createdAt = $article->find('[class=article__release]', 0);
if ($createdAt) {
$item['timestamp'] = strtotime(str_replace('Uhr', '', $createdAt->plaintext));
}
if ($article->find('h2', 0) == null) {
$item['title'] = $article->find('h3', 0)->innertext;
} else {
$item['title'] = $article->find('h2', 0)->innertext;
}
2020-03-31 21:14:16 +02:00
$item['content'] = '';
if ($article->find('section[class*=article__richtext]', 0) == null) {
$content = $article->find('div[class*=modul__teaser]', 0)
->find('p', 0);
$item['content'] .= $content;
} else {
2022-05-02 19:06:30 +02:00
$content = $article->find('article', 0);
// change order of article teaser in order to show it on top
// of the title image. If we didn't do this some rss programs
// would show the subtitle of the title image as teaser instead
// of the actuall article teaser.
$item['content'] .= $content->find('p[class=article__teaser]', 0);
$item['content'] .= self::getUseFullContent($content);
}
// exclude police reports if desired
if($this->getInput('policeReports') ||
!str_contains($item['content'], 'Hier geht es zu allen aktuellen Polizeimeldungen.')) {
$this->items[] = $item;
2020-03-31 21:14:16 +02:00
}
2020-03-31 21:14:16 +02:00
$article->clear();
}
private function handleNewsblock($listSite) {
$main = $listSite->find('main', 0);
foreach($main->find('article') as $article) {
$url = $article->find('a', 0)->href;
$url = urljoin(self::URI, $url);
self::handleArticle($url);
2020-03-31 21:14:16 +02:00
}
}
public function collectData() {
$region = $this->getInput('region');
if($region === 'rothenburg-o-d-t') {
$region = 'rothenburg-ob-der-tauber';
}
$url = self::URI . '/region/' . $region;
$listSite = getSimpleHTMLDOM($url);
2020-03-31 21:14:16 +02:00
self::handleNewsblock($listSite);
2020-03-31 21:14:16 +02:00
}
}