From e9af41d666c05ceecd8b4af2a48cd95874a87098 Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 5 Jul 2023 16:43:59 +0200 Subject: [PATCH] Add bridges for JohannesBlick Steinfeld, OM Online and UsesTech (#3489) * Add bridges for JohannesBlick Steinfeld, OM Online and UsesTech * Fixed linit alert --- bridges/JohannesBlickBridge.php | 29 +++++++++++++ bridges/OMonlineBridge.php | 72 +++++++++++++++++++++++++++++++++ bridges/UsesTechBridge.php | 30 ++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 bridges/JohannesBlickBridge.php create mode 100644 bridges/OMonlineBridge.php create mode 100644 bridges/UsesTechBridge.php diff --git a/bridges/JohannesBlickBridge.php b/bridges/JohannesBlickBridge.php new file mode 100644 index 00000000..6c00feca --- /dev/null +++ b/bridges/JohannesBlickBridge.php @@ -0,0 +1,29 @@ +find('td > a') as $index => $a) { + $item = []; // Create an empty item + $articlePath = $a->href; + $item['title'] = $a->innertext; + $item['uri'] = $articlePath; + $item['content'] = ''; + + $this->items[] = $item; // Add item to the list + if (count($this->items) >= 10) { + break; + } + } + } +} diff --git a/bridges/OMonlineBridge.php b/bridges/OMonlineBridge.php new file mode 100644 index 00000000..a434e44e --- /dev/null +++ b/bridges/OMonlineBridge.php @@ -0,0 +1,72 @@ + [ + 'name' => 'Ortsname', + 'title' => 'Für die Anzeige von Beitragen nur aus einem Ort oder mehreren Orten + geben einen Orstnamen ein. Mehrere Ortsnamen müssen mit / getrennt eingeben werden, + z.B. Vechta/Cloppenburg. Groß- und Kleinschreibung beachten!' + ] + ] + ]; + + public function collectData() + { + if (!empty($this->getInput('ort'))) { + $url = sprintf('%s/ort/%s', self::URI, $this->getInput('ort')); + } else { + $url = sprintf('%s', self::URI); + } + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request: ' . $url); + + $html = defaultLinkTo($html, $url); + + foreach ($html->find('div.molecule-teaser > a ') as $index => $a) { + $item = []; + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT) + or returnServerError('Could not request: ' . $articlePath); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.molecule-article', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h1', 0)->innertext; + + $contents->find('div.col-12 col-md-10 offset-0 offset-md-1', 0); + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate2($a->plaintext); + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate2($text) + { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} diff --git a/bridges/UsesTechBridge.php b/bridges/UsesTechBridge.php new file mode 100644 index 00000000..653d83dc --- /dev/null +++ b/bridges/UsesTechBridge.php @@ -0,0 +1,30 @@ +find('div[class=PersonInner]') as $index => $a) { + $item = []; // Create an empty item + $articlePath = $a->find('a[class=displayLink]', 0)->href; + $item['title'] = $a->find('img', 0)->getAttribute('alt'); + $item['author'] = $a->find('img', 0)->getAttribute('alt'); + $item['uri'] = $articlePath; + $item['content'] = $a->find('p', 0)->innertext; + + $this->items[] = $item; // Add item to the list + if (count($this->items) >= self::MAX_ITEM) { + break; + } + } + } +}