From 6e4b6fa1ccea29c4d60c21f938762553e22aead6 Mon Sep 17 00:00:00 2001 From: fulmeek <36341513+fulmeek@users.noreply.github.com> Date: Wed, 26 Dec 2018 20:55:38 +0100 Subject: [PATCH] [OsmAndBlogBridge] Add new bridge (#973) --- bridges/OsmAndBlogBridge.php | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 bridges/OsmAndBlogBridge.php diff --git a/bridges/OsmAndBlogBridge.php b/bridges/OsmAndBlogBridge.php new file mode 100644 index 00000000..e8b72388 --- /dev/null +++ b/bridges/OsmAndBlogBridge.php @@ -0,0 +1,67 @@ +find('div.article') as $element) { + $item = array(); + + $objTitle = $element->find('h1', 0); + if (!$objTitle) + $objTitle = $element->find('h2', 0); + if (!$objTitle) + $objTitle = $element->find('h3', 0); + if ($objTitle) + $item['title'] = $objTitle->plaintext; + + $objDate = $element->find('meta[pubdate]', 0); + if ($objDate) { + $item['timestamp'] = strtotime($objDate->pubdate); + } else { + $objDate = $element->find('.date', 0); + if ($objDate) + $item['timestamp'] = strtotime($objDate->plaintext); + } + + $this->cleanupContent($element, $objTitle, $objDate, $element->find('.date', 0)); + $item['content'] = $element->innertext; + + $objLink = $html->find('.articlelinklist a', 0); + if ($objLink) { + $item['uri'] = $this->filterURL($objLink->href); + } else { + $item['uri'] = 'urn:sha1:' . hash('sha1', $item['content']); + } + + $this->items[] = $item; + } + } + + + private function filterURL($url) { + if (strpos($url, '://') === false) + return self::URI . ltrim($url, '/'); + return $url; + } + + + private function cleanupContent($content, ...$removeItems) { + foreach ($removeItems as $obj) { + if ($obj) $obj->outertext = ''; + } + foreach ($content->find('img') as $obj) { + $obj->src = $this->filterURL($obj->src); + } + foreach ($content->find('a') as $obj) { + $obj->href = $this->filterURL($obj->href); + $obj->target = '_blank'; + } + } +}