[ReutersBridge] Updated 'Top News' feed, some fix (#2488)

This commit is contained in:
csisoap 2022-03-29 01:34:41 +07:00 committed by GitHub
parent 626cc9119a
commit e86ce338a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 107 additions and 16 deletions

View File

@ -131,6 +131,7 @@ class ReutersBridge extends BridgeAbstract
'sports' => '/lifestyle/sports',
'life' => '/lifestyle',
'science' => '/lifestyle/science',
'home/topnews' => '/home',
);
const OLD_WIRE_SECTION = array(
@ -211,11 +212,12 @@ class ReutersBridge extends BridgeAbstract
}
/**
* @param string $endpoint - Provide section's endpoint to Reuters API.
* @param string $endpoint - A endpoint is provided could be article URI or ID.
* @param string $fetch_type - Provide what kind of fetch do you want? Article or Section.
* @param boolean $is_article_uid {true|false} - A boolean flag to determined if using UID instead of url to fetch.
* @return string A completed API URL to fetch data
*/
private function getAPIURL($endpoint, $fetch_type) {
private function getAPIURL($endpoint, $fetch_type, $is_article_uid = false) {
$base_url = self::URI . '/pf/api/v3/content/fetch/';
$wire_url = 'https://wireapi.reuters.com/v8';
switch($fetch_type) {
@ -223,10 +225,23 @@ class ReutersBridge extends BridgeAbstract
if($this->useWireAPI) {
return $wire_url . $endpoint;
}
$query = array(
'website_url' => $endpoint,
'website' => 'reuters'
$base_query = array(
'website' => 'reuters',
);
$query = array();
if ($is_article_uid) {
$query = array(
'id' => $endpoint
);
} else {
$query = array(
'website_url' => $endpoint,
);
}
$query = array_merge($base_query, $query);
$json_query = json_encode($query);
return $base_url . 'article-by-id-or-url-v1?query=' . $json_query;
break;
@ -241,11 +256,17 @@ class ReutersBridge extends BridgeAbstract
return $wire_url . $feed_uri;
}
$query = array(
'fetch_type' => 'section',
'section_id' => $endpoint,
'size' => 30,
'website' => 'reuters'
);
if ($endpoint != '/home') {
$query = array_merge($query, array(
'fetch_type' => 'section',
));
}
$json_query = json_encode($query);
return $base_url . 'articles-by-section-alias-or-id-v1?query=' . $json_query;
break;
@ -253,11 +274,27 @@ class ReutersBridge extends BridgeAbstract
returnServerError('unsupported endpoint');
}
private function getArticle($feed_uri)
private function addStories($title, $content, $timestamp, $author, $url, $category) {
$item = array();
$item['categories'] = $category;
$item['author'] = $author;
$item['content'] = $content;
$item['title'] = $title;
$item['timestamp'] = $timestamp;
$item['uri'] = $url;
$this->items[] = $item;
}
private function getArticle($feed_uri, $is_article_uid = false)
{
// This will make another request to API to get full detail of article and author's name.
$url = $this->getAPIURL($feed_uri, 'article');
$url = $this->getAPIURL($feed_uri, 'article', $is_article_uid);
$rawData = $this->getJson($url);
if(json_last_error() != JSON_ERROR_NONE) { // Checking whether a valid JSON or not
return $this->handleRedirectedArticle($url);
}
$article_content = '';
$authorlist = '';
$category = array();
@ -299,6 +336,40 @@ class ReutersBridge extends BridgeAbstract
return $content_detail;
}
private function handleRedirectedArticle($url) {
$html = getSimpleHTMLDOMCached($url, 86400); // Duration 24h
$description = '';
$author = '';
$images = '';
$meta_items = $html->find('meta');
foreach($meta_items as $meta) {
switch ($meta->name) {
case 'description':
$description = $meta->content;
break;
case 'author':
case 'twitter:creator':
$author = $meta->content;
break;
case 'twitter:image:src':
case 'twitter:image':
$url = $meta->content;
$images = "<img src=$url" . '>';
break;
}
}
return array(
'content' => $description,
'author' => $author,
'category' => '',
'images' => $images,
'published_at' => '',
'status' => 'redirected'
);
}
private function handleImage($images) {
$img_placeholder = '';
@ -444,6 +515,27 @@ EOD;
return $description;
}
/**
* @param array $stories
*/
private function addRelatedStories($stories) {
foreach($stories as $story) {
$story_data = $this->getArticle($story['url']);
$title = $story['caption'];
$url = self::URI . $story['url'];
if(isset($story_data['status']) && $story_data['status'] != 'redirected') {
$article_body = defaultLinkTo($story_data['content'], $this->getURI());
} else {
$article_body = $story_data['content'];
}
$content = $article_body . $story_data['images'];
$timestamp = $story_data['published_at'];
$category = $story_data['category'];
$author = $story_data['author'];
$this->addStories($title, $content, $timestamp, $author, $url, $category);
}
}
public function getName() {
return $this->feedName;
}
@ -500,11 +592,14 @@ EOD;
$title = $story['title'];
$article_uri = $story['canonical_url'];
$source_type = $story['source']['name'];
if (isset($story['related_stories'])) {
$this->addRelatedStories($story['related_stories']);
}
}
// Some article cause unexpected behaviour like redirect to another site not API.
// Attempt to check article source type to avoid this.
if($source_type == 'composer') { // Only Reuters PF api have this, Wire don't.
if(!$this->useWireAPI && $source_type != 'Package') { // Only Reuters PF api have this, Wire don't.
$author = $this->handleAuthorName($story['authors']);
$timestamp = $story['published_time'];
$image_placeholder = '';
@ -512,6 +607,7 @@ EOD;
$image_placeholder = $this->handleImage(array($story['thumbnail']));
}
$content = $story['description'] . $image_placeholder;
$category = array($story['primary_section']['name']);
} else {
$content_detail = $this->getArticle($article_uri);
$description = $content_detail['content'];
@ -524,13 +620,8 @@ EOD;
$timestamp = $content_detail['published_at'];
}
$item['categories'] = $category;
$item['author'] = $author;
$item['content'] = $content;
$item['title'] = $title;
$item['timestamp'] = $timestamp;
$item['uri'] = $url;
$this->items[] = $item;
$this->addStories($title, $content, $timestamp, $author, $url, $category);
}
}
}