[PinterestBridge] Fix implementation after DOM changes

Due to breaking DOM changes this bridge required re-implementation.
With this fix the brige will make use of the JSON data embedded in
the returned HTML. The content returned for all contexts is similar
with only a few differences due to limitations of the JSON.

Feeds returned for a given username and board will by default make
use of the provided RSS feed instead of using the custom filter.
This bahaviour can be changed by setting the  optional parameter
'&r=off' (on by default)

Notice: The JSON data for userdata and search results is very
different, so two functions were implemented to account for that.

References #498
This commit is contained in:
logmanoriginal 2017-04-09 23:26:35 +02:00
parent 4124c707d4
commit 2500d0df93
1 changed files with 95 additions and 53 deletions

View File

@ -1,9 +1,9 @@
<?php
class PinterestBridge extends BridgeAbstract {
class PinterestBridge extends FeedExpander {
const MAINTAINER = 'pauder';
const NAME = 'Pinterest Bridge';
const URI = 'http://www.pinterest.com/';
const URI = 'https://www.pinterest.com';
const DESCRIPTION = 'Returns the newest images on a board';
const PARAMETERS = array(
@ -15,6 +15,13 @@ class PinterestBridge extends BridgeAbstract {
'b' => array(
'name' => 'board',
'required' => true
),
'r' => array(
'name' => 'Use provided RSS',
'type' => 'checkbox',
'required' => false,
'defaultValue' => 'checked',
'title' => 'Uncheck to return data via custom filters (more data)'
)
),
'From search' => array(
@ -26,81 +33,116 @@ class PinterestBridge extends BridgeAbstract {
);
public function collectData(){
$html = getSimpleHTMLDOM($this->getURI());
if(!$html){
switch($this->queriedContext){
case 'By username and board':
returnServerError('Username and/or board not found');
if($this->getInput('r')){
$this->collectExpandableDatas($this->getURI() . '.rss');
} else {
$html = getSimpleHTMLDOMCached($this->getURI());
$this->getUserResults($html);
}
break;
case 'From search':
returnServerError('Could not request Pinterest.');
default:
$html = getSimpleHTMLDOMCached($this->getURI());
$this->getSearchResults($html);
}
}
if($this->queriedContext === 'From search'){
foreach($html->find('div.pinWrapper') as $div){
private function getUserResults($html){
$json = json_decode($html->find('#jsInit1', 0)->innertext, true);
$results = $json['tree']['children'][0]['children'][0]['children'][0]['options']['props']['data']['board_feed'];
$username = $json['resourceDataCache'][0]['data']['owner']['username'];
$fullname = $json['resourceDataCache'][0]['data']['owner']['full_name'];
$avatar = $json['resourceDataCache'][0]['data']['owner']['image_small_url'];
foreach($results as $result){
$item = array();
$a = $div->find('a.pinImageWrapper', 0);
$img = $a->find('img', 0);
$item['uri'] = $result['link'];
$item['uri'] = $this->getURI() . $a->getAttribute('href');
$item['content'] = '<img src="'
. htmlentities(str_replace('/236x/', '/736x/', $img->getAttribute('src')))
. '" alt="" />';
// Some use regular titles, others provide 'advanced' infos, a few
// provide even less info. Thus we attempt multiple options.
$item['title'] = trim($result['title']);
$avatar = $div->find('div.creditImg', 0)->find('img', 0);
$avatar = $avatar->getAttribute('data-src');
$avatar = str_replace("\\", "", $avatar);
if($item['title'] === "")
$item['title'] = trim($result['rich_summary']['display_name']);
$username = $div->find('div.creditName', 0);
$board = $div->find('div.creditTitle', 0);
if($item['title'] === "")
$item['title'] = trim($result['description']);
$item['username'] = $username->innertext;
$item['fullname'] = $board->innertext;
$item['timestamp'] = strtotime($result['created_at']);
$item['username'] = $username;
$item['fullname'] = $fullname;
$item['avatar'] = $avatar;
$item['content'] .= '<br /><img align="left" style="margin: 2px 4px;" src="'
$item['author'] = $item['username'] . ' (' . $item['fullname'] . ')';
$item['content'] = '<img align="left" style="margin: 2px 4px;" src="'
. htmlentities($item['avatar'])
. '" /> <strong>'
. '" /><p><strong>'
. $item['username']
. '</strong><br />'
. $item['fullname'];
. '</strong><br>'
. $item['fullname']
. '</p><br><img src="'
. $result['images']['736x']['url']
. '" alt="" /><br><p>'
. $result['description']
. '</p>';
$item['enclosures'] = array($result['images']['orig']['url']);
$item['title'] = $img->getAttribute('alt');
$this->items[] = $item;
}
} elseif($this->queriedContext === 'By username and board'){
$container = $html->find('SCRIPT[type="application/ld+json"]', 0)
or returnServerError('Unable to find data container!');
}
$json = json_decode($container->innertext, true);
private function getSearchResults($html){
$json = json_decode($html->find('#jsInit1', 0)->innertext, true);
$results = $json['resourceDataCache'][0]['data']['results'];
foreach($json['itemListElement'] as $element){
foreach($results as $result){
$item = array();
$item['uri'] = $element['item']['sharedContent']['author']['url'];
$item['title'] = $element['item']['name'];
$item['author'] = $element['item']['user']['name'];
$item['timestamp'] = strtotime($element['item']['datePublished']);
$item['content'] = <<<EOD
<a href="{$item['uri']}">
<img src="{$element['item']['image']}">
</a>
<p>{$element['item']['text']}</p>
EOD;
$item['uri'] = self::URI . $result['board']['url'];
// Some use regular titles, others provide 'advanced' infos, a few
// provide even less info. Thus we attempt multiple options.
$item['title'] = trim($result['title']);
if($item['title'] === "")
$item['title'] = trim($result['rich_summary']['display_name']);
if($item['title'] === "")
$item['title'] = trim($result['grid_description']);
$item['timestamp'] = strtotime($result['created_at']);
$item['username'] = $result['pinner']['username'];
$item['fullname'] = $result['pinner']['full_name'];
$item['avatar'] = $result['pinner']['image_small_url'];
$item['author'] = $item['username'] . ' (' . $item['fullname'] . ')';
$item['content'] = '<img align="left" style="margin: 2px 4px;" src="'
. htmlentities($item['avatar'])
. '" /><p><strong>'
. $item['username']
. '</strong><br>'
. $item['fullname']
. '</p><br><img src="'
. $result['images']['736x']['url']
. '" alt="" /><br><p>'
. $result['description']
. '</p>';
$item['enclosures'] = array($result['images']['orig']['url']);
$this->items[] = $item;
}
}
}
public function getURI(){
switch($this->queriedContext){
case 'By username and board':
$uri = self::URI . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b'));
$uri = self::URI . '/' . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b'));// . '.rss';
break;
case 'From search':
$uri = self::URI . 'search/?q=' . urlencode($this->getInput('q'));
$uri = self::URI . '/search/?q=' . urlencode($this->getInput('q'));
break;
default: return parent::getURI();
}