diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 5f5092e3..6da6a3d1 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -61,24 +61,42 @@ class HeiseBridge extends FeedExpander private function addArticleToItem($item, $article) { - $authors = $article->find('.a-creator__names', 0)->find('.a-creator__name'); + // copy full-res img src to standard img element + foreach ($article->find('a-img') as $aimg) { + $img = $aimg->find('img', 0); + $img->src = $aimg->src; + // client scales based on aspect ratio in style attribute + $img->width = ''; + $img->height = ''; + } + // relink URIs, as the previous a-img tags weren't recognized by this function + $article = defaultLinkTo($article, $item['uri']); + + // remove unwanted stuff + foreach ($article->find('figure.branding, a-ad, div.ho-text, noscript img, .opt-in__content-container') as $element) { + $element->remove(); + } + // reload html, as remove() is buggy + $article = str_get_html($article->outertext); + + $header = $article->find('header.a-article-header', 0); + $headerElements = $header->find('p, a-img img, figure img'); + $item['content'] = implode('', $headerElements); + + $authors = $header->find('.a-creator__names .a-creator__name'); if ($authors) { $item['author'] = implode(', ', array_map(function ($e) { return $e->plaintext; }, $authors)); } - $content = $article->find('div[class*="article-content"]', 0); + $content = $article->find('.article-content', 0); + $contentElements = $content->find( + 'p, h3, ul, table, pre, a-img img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' + ); + $item['content'] .= implode('', $contentElements); - if ($content == null) { - $content = $article->find('#article_content', 0); - } - - foreach ($content->find('p, h3, ul, table, pre, img') as $element) { - $item['content'] .= $element; - } - - foreach ($content->find('img') as $img) { + foreach ($article->find('a-img img, a-bilderstrecke img, figure img') as $img) { $item['enclosures'][] = $img->src; }