From 2eec89ab2718065bdb78560171fb55e280fb9806 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sat, 10 Sep 2016 19:11:09 +0200 Subject: [PATCH] [bridges] Change all bridges to use BridgeAbstract with getSimpleHTMLDOMCached --- bridges/CADBridge.php | 2 +- bridges/CommonDreamsBridge.php | 2 +- bridges/CpasbienBridge.php | 4 ++-- bridges/DauphineLibereBridge.php | 2 +- bridges/DeveloppezDotComBridge.php | 2 +- bridges/FreenewsBridge.php | 2 +- bridges/FuturaSciencesBridge.php | 2 +- bridges/JapanExpoBridge.php | 4 ++-- bridges/KununuBridge.php | 4 ++-- bridges/LeJournalDuGeekBridge.php | 2 +- bridges/LeMondeInformatiqueBridge.php | 2 +- bridges/LichessBridge.php | 2 +- bridges/NextInpactBridge.php | 2 +- bridges/NextgovBridge.php | 2 +- bridges/NiceMatinBridge.php | 2 +- bridges/NumeramaBridge.php | 2 +- bridges/TheOatMealBridge.php | 2 +- bridges/WikipediaBridge.php | 4 ++-- bridges/WordPressBridge.php | 4 ++-- bridges/WorldOfTanksBridge.php | 4 ++-- lib/FeedExpander.php | 2 +- 21 files changed, 27 insertions(+), 27 deletions(-) diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php index 86dfdb06..595160e2 100644 --- a/bridges/CADBridge.php +++ b/bridges/CADBridge.php @@ -16,7 +16,7 @@ class CADBridge extends FeedExpander { } private function CADExtractContent($url) { - $html3 = $this->get_cached($url); + $html3 = $this->getSimpleHTMLDOMCached($url); // The request might fail due to missing https support or wrong URL if($html3 == false) diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index e8a4af38..224b309a 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -17,7 +17,7 @@ class CommonDreamsBridge extends FeedExpander { } private function CommonDreamsExtractContent($url) { - $html3 = $this->get_cached($url); + $html3 = $this->getSimpleHTMLDOMCached($url); $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; $html3->clear(); unset ($html3); diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index 829c5960..10af594d 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -1,5 +1,5 @@ getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - $htmlepisode=$this->get_cached($episode->find('a', 0)->getAttribute('href')); + $htmlepisode=$this->getSimpleHTMLDOMCached($episode->find('a', 0)->getAttribute('href')); $item = array(); $item['author'] = $episode->find('a', 0)->text(); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index fe4775ca..9e9aacd4 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -47,7 +47,7 @@ class DauphineLibereBridge extends FeedExpander { } private function ExtractContent($url) { - $html2 = $this->get_cached($url); + $html2 = $this->getSimpleHTMLDOMCached($url); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index fe08d286..5cbd5767 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -42,7 +42,7 @@ class DeveloppezDotComBridge extends FeedExpander { } private function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = $this->get_cached($url); + $articleHTMLContent = $this->getSimpleHTMLDOMCached($url); $text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); $text = utf8_encode($text); return trim($text); diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index dbc46b9e..1934e0b3 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -13,7 +13,7 @@ class FreenewsBridge extends FeedExpander { protected function parseItem($newsItem) { $item = $this->parseRSS_2_0_Item($newsItem); - $articlePage = $this->get_cached($item['uri']); + $articlePage = $this->getSimpleHTMLDOMCached($item['uri']); $content = $articlePage->find('.post-container', 0); $item['content'] = $content->innertext; diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 2cf846c2..73f1b530 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -86,7 +86,7 @@ class FuturaSciencesBridge extends FeedExpander { protected function parseItem($newsItem){ $item = $this->parseRSS_2_0_Item($newsItem); $item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']); - $article = $this->get_cached($item['uri']) + $article = $this->getSimpleHTMLDOMCached($item['uri']) or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']); $item['content'] = $this->ExtractArticleContent($article); $item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article); diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 4019ae86..dcd951ad 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -1,5 +1,5 @@ get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + $article_html = $this->getSimpleHTMLDOMCached('Could not request JapanExpo: '.$url); $header = $article_html->find('header.pageHeadBox', 0); $timestamp = strtotime($header->find('time', 0)->datetime); $title_html = $header->find('div.section', 0)->next_sibling(); diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php index da159ce6..a958b77b 100644 --- a/bridges/KununuBridge.php +++ b/bridges/KununuBridge.php @@ -1,5 +1,5 @@ get_cached($uri); + $html = $this->getSimpleHTMLDOMCached($uri); if($html === false) $this->returnServerError('Could not load full description!'); diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index c723a2fa..95bd960a 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -17,7 +17,7 @@ class LeJournalDuGeekBridge extends FeedExpander { } private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->get_cached($url); + $articleHTMLContent = $this->getSimpleHTMLDOMCached($url); $text = $articleHTMLContent->find('div.post-content', 0)->innertext; foreach($articleHTMLContent->find('a.more') as $element) { diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 010228a3..f609517c 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -12,7 +12,7 @@ class LeMondeInformatiqueBridge extends FeedExpander { protected function parseItem($newsItem){ $item = $this->parseRSS_1_0_Item($newsItem); - $article_html = $this->get_cached($item['uri']) + $article_html = $this->getSimpleHTMLDOMCached($item['uri']) or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']); $item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext); $item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext; diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 638811d2..6f645394 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -17,7 +17,7 @@ class LichessBridge extends FeedExpander { } private function retrieve_lichess_post($blog_post_uri){ - $blog_post_html = $this->get_cached($blog_post_uri); + $blog_post_html = $this->getSimpleHTMLDOMCached($blog_post_uri); $blog_post_div = $blog_post_html->find('#lichess_blog', 0); $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 815a2363..3152b09e 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -17,7 +17,7 @@ class NextInpactBridge extends FeedExpander { } private function ExtractContent($url) { - $html2 = $this->get_cached($url); + $html2 = $this->getSimpleHTMLDOMCached($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php index 5d26ec5f..d7061193 100644 --- a/bridges/NextgovBridge.php +++ b/bridges/NextgovBridge.php @@ -56,7 +56,7 @@ class NextgovBridge extends FeedExpander { } private function ExtractContent($url){ - $article = $this->get_cached($url) + $article = $this->getSimpleHTMLDOMCached($url) or $this->returnServerError('Could not request Nextgov: ' . $url); $contents = $article->find('div.wysiwyg', 0)->innertext; diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 6d148ad4..4e83cfff 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -17,7 +17,7 @@ class NiceMatinBridge extends FeedExpander { } private function NiceMatinExtractContent($url) { - $html = $this->get_cached($url); + $html = $this->getSimpleHTMLDOMCached($url); if(!$html) return 'Could not acquire content from url: ' . $url . '!'; diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index ead340a4..d018fbd4 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -17,7 +17,7 @@ class NumeramaBridge extends FeedExpander { } private function ExtractContent($url){ - $article_html = $this->get_cached($url) or $this->returnServerError('Could not request Numerama: '.$url); + $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url); $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block $contents = ''; // add post picture return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index eee9283c..3c3d216f 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -13,7 +13,7 @@ class TheOatmealBridge extends FeedExpander{ protected function parseItem($newsItem) { $item = $this->parseRSS_1_0_Item($newsItem); - $articlePage = $this->get_cached($item['uri']); + $articlePage = $this->getSimpleHTMLDOMCached($item['uri']); $content = $articlePage->find('#comic', 0); if(is_null($content)) // load alternative $content = $articlePage->find('#blog', 0); diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index d7a90dca..5feb4291 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -3,7 +3,7 @@ define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know... -class WikipediaBridge extends HttpCachingBridgeAbstract { +class WikipediaBridge extends BridgeAbstract { const MAINTAINER = 'logmanoriginal'; const NAME = 'Wikipedia bridge for many languages'; const URI = 'https://www.wikipedia.org/'; @@ -175,7 +175,7 @@ class WikipediaBridge extends HttpCachingBridgeAbstract { * Loads the full article from a given URI */ private function LoadFullArticle($uri){ - $content_html = $this->get_cached($uri); + $content_html = $this->getSimpleHTMLDOMCached($uri); if(!$content_html) $this->returnServerError('Could not load site: ' . $uri . '!'); diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 30e7e2aa..6c7d5f6d 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -1,7 +1,7 @@ find('updated', 0)->innertext); } - $article_html = $this->get_cached($item['uri']); + $article_html = $this->getSimpleHTMLDOMCached($item['uri']); // Attempt to find most common content div if(!isset($item['content'])){ diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index b7235263..a3179be9 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -1,5 +1,5 @@ href; // now load that uri from cache $this->debugMessage("loading page ".$item['uri']); - $articlePage = $this->get_cached($item['uri']); + $articlePage = $this->getSimpleHTMLDOMCached($item['uri']); $content = $articlePage->find('.l-content', 0); HTMLSanitizer::defaultImageSrcTo($content, self::URI); $item['title'] = $content->find('h1', 0)->innertext; diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index abaf1210..5566f7c1 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -1,6 +1,6 @@