[CourrierInternationalBridge] fix content parsing

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-09-26 00:50:26 +02:00
parent ad534444fa
commit 3dcdaa1595
1 changed files with 10 additions and 4 deletions

View File

@ -25,14 +25,20 @@ class CourrierInternationalBridge extends BridgeAbstract{
$item['uri'] = self::URI.$item['uri']; $item['uri'] = self::URI.$item['uri'];
} }
$page = $this->getSimpleHTMLDOM($item['uri']);
$page = $this->getSimpleHTMLDOMCached($item['uri']);
$cleaner = new HTMLSanitizer(); $cleaner = new HTMLSanitizer();
$item['content'] = $cleaner->sanitize($page->find("div.article-text")[0]); $content = $page->find('.article-text',0);
$item['title'] = strip_tags($article->find(".title")[0]); if(!$content){
$content = $page->find('.depeche-text',0);
}
$dateTime = date_parse($page->find("time")[0]); $item['content'] = $cleaner->sanitize($content);
$item['title'] = strip_tags($article->find(".title",0));
$dateTime = date_parse($page->find("time",0));
$item['timestamp'] = mktime( $item['timestamp'] = mktime(
$dateTime['hour'], $dateTime['hour'],