diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php
index 418e715e..40a8f6a9 100644
--- a/bridges/YoutubeBridge.php
+++ b/bridges/YoutubeBridge.php
@@ -11,7 +11,7 @@ class YoutubeBridge extends BridgeAbstract
{
const NAME = 'YouTube Bridge';
const URI = 'https://www.youtube.com';
- const CACHE_TIMEOUT = 10800; // 3h
+ const CACHE_TIMEOUT = 60 * 60 * 3;
const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
const PARAMETERS = [
@@ -78,116 +78,6 @@ class YoutubeBridge extends BridgeAbstract
// This took from repo BetterVideoRss of VerifiedJoseph.
const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore
- private function collectDataInternal()
- {
- $xml = '';
- $html = '';
- $url_feed = '';
- $url_listing = '';
-
- if ($this->getInput('u')) {
- /* User and Channel modes */
- $request = $this->getInput('u');
- $url_feed = self::URI . '/feeds/videos.xml?user=' . urlencode($request);
- $url_listing = self::URI . '/user/' . urlencode($request) . '/videos';
- } elseif ($this->getInput('c')) {
- $request = $this->getInput('c');
- $url_feed = self::URI . '/feeds/videos.xml?channel_id=' . urlencode($request);
- $url_listing = self::URI . '/channel/' . urlencode($request) . '/videos';
- } elseif ($this->getInput('custom')) {
- $request = $this->getInput('custom');
- $url_listing = self::URI . '/' . urlencode($request) . '/videos';
- }
-
- if (!empty($url_feed) || !empty($url_listing)) {
- $this->feeduri = $url_listing;
- if (!empty($this->getInput('custom'))) {
- $html = $this->ytGetSimpleHTMLDOM($url_listing);
- $jsonData = $this->getJSONData($html);
- $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
- $this->feedIconUrl = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
- }
- if (!$this->skipFeeds()) {
- $html = $this->ytGetSimpleHTMLDOM($url_feed);
- $this->ytBridgeParseXmlFeed($html);
- } else {
- if (empty($this->getInput('custom'))) {
- $html = $this->ytGetSimpleHTMLDOM($url_listing);
- $jsonData = $this->getJSONData($html);
- }
- $channel_id = '';
- if (isset($jsonData->contents)) {
- $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
- $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
- $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
- // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
- $this->parseJSONListing($jsonData);
- } else {
- returnServerError('Unable to get data from YouTube. Username/Channel: ' . $request);
- }
- }
- $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
- } elseif ($this->getInput('p')) {
- /* playlist mode */
- // TODO: this mode makes a lot of excess video query requests.
- // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
- // This cache will be used to find out, which videos to fetch
- // to make feed of 15 items or more, if there a lot of videos published on that date.
- $request = $this->getInput('p');
- $url_feed = self::URI . '/feeds/videos.xml?playlist_id=' . urlencode($request);
- $url_listing = self::URI . '/playlist?list=' . urlencode($request);
- $html = $this->ytGetSimpleHTMLDOM($url_listing);
- $jsonData = $this->getJSONData($html);
- // TODO: this method returns only first 100 video items
- // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
- $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
- $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
- $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
- $item_count = count($jsonData);
-
- if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
- $this->ytBridgeParseXmlFeed($xml);
- } else {
- $this->parseJSONListing($jsonData);
- }
- $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
- usort($this->items, function ($item1, $item2) {
- if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
- $item1['timestamp'] = strtotime($item1['timestamp']);
- $item2['timestamp'] = strtotime($item2['timestamp']);
- }
- return $item2['timestamp'] - $item1['timestamp'];
- });
- } elseif ($this->getInput('s')) {
- /* search mode */
- $request = $this->getInput('s');
- $url_listing = self::URI
- . '/results?search_query='
- . urlencode($request)
- . '&sp=CAI%253D';
-
- $html = $this->ytGetSimpleHTMLDOM($url_listing);
-
- $jsonData = $this->getJSONData($html);
- $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
- $jsonData = $jsonData->sectionListRenderer->contents;
- foreach ($jsonData as $data) {
- // Search result includes some ads, have to filter them
- if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
- $jsonData = $data->itemSectionRenderer->contents;
- break;
- }
- }
- $this->parseJSONListing($jsonData);
- $this->feeduri = $url_listing;
- $this->feedName = 'Search: ' . $request;
- } else {
- /* no valid mode */
- returnClientError("You must either specify either:\n - YouTube
- username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
- }
- }
-
public function collectData()
{
$cacheKey = 'youtube_rate_limit';
@@ -204,9 +94,133 @@ class YoutubeBridge extends BridgeAbstract
}
}
- private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time)
+ private function collectDataInternal()
{
- $html = $this->ytGetSimpleHTMLDOM(self::URI . "/watch?v=$vid", true);
+ $xml = '';
+ $html = '';
+ $url_feed = '';
+ $url_listing = '';
+
+ $username = $this->getInput('u');
+ $channel = $this->getInput('c');
+ $custom = $this->getInput('custom');
+
+ if ($username) {
+ // user and channel
+ $request = $username;
+ $url_feed = self::URI . '/feeds/videos.xml?user=' . urlencode($request);
+ $url_listing = self::URI . '/user/' . urlencode($request) . '/videos';
+ } elseif ($channel) {
+ $request = $channel;
+ $url_feed = self::URI . '/feeds/videos.xml?channel_id=' . urlencode($request);
+ $url_listing = self::URI . '/channel/' . urlencode($request) . '/videos';
+ } elseif ($custom) {
+ $request = $custom;
+ $url_listing = self::URI . '/' . urlencode($request) . '/videos';
+ }
+
+ $playlist = $this->getInput('p');
+ $search = $this->getInput('s');
+
+ $durationMin = $this->getInput('duration_min');
+ $durationMax = $this->getInput('duration_max');
+
+ // Whether to discriminate videos by duration
+ $filterByDuration = $durationMin || $durationMax;
+
+ if ($url_feed || $url_listing) {
+ // user, channel or custom
+ $this->feeduri = $url_listing;
+ if ($custom) {
+ // Extract the feed url for the custom name
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ // Pluck out the rss feed url
+ $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
+ $this->feedIconUrl = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
+ }
+ if ($filterByDuration) {
+ if (!$custom) {
+ // Fetch the html page
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ }
+ $channel_id = '';
+ if (isset($jsonData->contents)) {
+ $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
+ $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
+ // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
+ $this->fetchItemsFromFromJsonData($jsonData);
+ } else {
+ returnServerError('Unable to get data from YouTube. Username/Channel: ' . $request);
+ }
+ } else {
+ // Fetch the xml feed
+ $html = $this->fetch($url_feed);
+ $this->extractItemsFromXmlFeed($html);
+ }
+ $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
+ } elseif ($playlist) {
+ // playlist
+ // TODO: this mode makes a lot of excess video query requests.
+ // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
+ // This cache will be used to find out, which videos to fetch
+ // to make feed of 15 items or more, if there a lot of videos published on that date.
+ $request = $playlist;
+ $url_feed = self::URI . '/feeds/videos.xml?playlist_id=' . urlencode($request);
+ $url_listing = self::URI . '/playlist?list=' . urlencode($request);
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ // TODO: this method returns only first 100 video items
+ // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
+ $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
+ $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
+ $item_count = count($jsonData);
+
+ if ($item_count > 15 || $filterByDuration) {
+ $this->fetchItemsFromFromJsonData($jsonData);
+ } else {
+ $xml = $this->fetch($url_feed);
+ $this->extractItemsFromXmlFeed($xml);
+ }
+ $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
+ usort($this->items, function ($item1, $item2) {
+ if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
+ $item1['timestamp'] = strtotime($item1['timestamp']);
+ $item2['timestamp'] = strtotime($item2['timestamp']);
+ }
+ return $item2['timestamp'] - $item1['timestamp'];
+ });
+ } elseif ($search) {
+ // search
+ $request = $search;
+ $url_listing = self::URI . '/results?search_query=' . urlencode($request) . '&sp=CAI%253D';
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
+ $jsonData = $jsonData->sectionListRenderer->contents;
+ foreach ($jsonData as $data) {
+ // Search result includes some ads, have to filter them
+ if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
+ $jsonData = $data->itemSectionRenderer->contents;
+ break;
+ }
+ }
+ $this->fetchItemsFromFromJsonData($jsonData);
+ $this->feeduri = $url_listing;
+ $this->feedName = 'Search: ' . $request;
+ } else {
+ returnClientError("You must either specify either:\n - YouTube
+ username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
+ }
+ }
+
+ private function fetchVideoDetails($vid, &$author, &$desc, &$time)
+ {
+ $url = self::URI . "/watch?v=$vid";
+ $html = $this->fetch($url, true);
// Skip unavailable videos
if (strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) {
@@ -223,7 +237,7 @@ class YoutubeBridge extends BridgeAbstract
$time = strtotime($elDatePublished->getAttribute('content'));
}
- $jsonData = $this->getJSONData($html);
+ $jsonData = $this->extractJsonFromHtml($html);
if (!isset($jsonData->contents)) {
return;
}
@@ -370,7 +384,7 @@ class YoutubeBridge extends BridgeAbstract
if ($commandUrl['path'] === '/redirect') {
parse_str($commandUrl['query'], $commandUrlQuery);
$enhancement['url'] = urldecode($commandUrlQuery['q']);
- } else if (isset($commandUrl['host'])) {
+ } elseif (isset($commandUrl['host'])) {
$enhancement['url'] = $commandMetadata->url;
} else {
$enhancement['url'] = $baseUrl . $commandMetadata->url;
@@ -388,94 +402,37 @@ class YoutubeBridge extends BridgeAbstract
return array_reverse($enhancements);
}
- private function ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail = '')
+ private function extractItemsFromXmlFeed($xml)
{
- $item = [];
- $item['id'] = $vid;
- $item['title'] = $title;
- $item['author'] = $author;
- $item['timestamp'] = $time;
- $item['uri'] = self::URI . '/watch?v=' . $vid;
- if (!$thumbnail) {
- // Fallback to default thumbnail if there aren't any provided.
- $thumbnail = '0';
- }
- $thumbnailUri = str_replace('/www.', '/img.', self::URI) . '/vi/' . $vid . '/' . $thumbnail . '.jpg';
- $item['content'] = '
' . $desc;
- $this->items[] = $item;
- }
+ $this->feedName = $this->decodeTitle($xml->find('feed > title', 0)->plaintext);
- private function ytBridgeParseXmlFeed($xml)
- {
foreach ($xml->find('entry') as $element) {
- $title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext);
+ $videoId = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
+ if (strpos($videoId, 'googleads') !== false) {
+ continue;
+ }
+ $title = $this->decodeTitle($element->find('title', 0)->plaintext);
$author = $element->find('name', 0)->plaintext;
$desc = $element->find('media:description', 0)->innertext;
-
- // Make sure the description is easy on the eye :)
$desc = htmlspecialchars($desc);
$desc = nl2br($desc);
- $desc = preg_replace(
- self::URI_REGEX,
- '$1 ',
- $desc
- );
-
- $vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
+ $desc = preg_replace(self::URI_REGEX, '$1 ', $desc);
$time = strtotime($element->find('published', 0)->plaintext);
- if (strpos($vid, 'googleads') === false) {
- $this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
- }
+ $this->addItem($videoId, $title, $author, $desc, $time);
}
- $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName()
}
- private function ytBridgeFixTitle($title)
+ private function fetch($url, bool $cache = false)
{
- // convert both Ӓ and " to UTF-8
- return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
- }
-
- private function ytGetSimpleHTMLDOM($url, $cached = false)
- {
- $header = [
- 'Accept-Language: en-US'
- ];
- $opts = [];
- $lowercase = true;
- $forceTagsClosed = true;
- $target_charset = DEFAULT_TARGET_CHARSET;
- $stripRN = false;
- $defaultBRText = DEFAULT_BR_TEXT;
- $defaultSpanText = DEFAULT_SPAN_TEXT;
- if ($cached) {
- return getSimpleHTMLDOMCached(
- $url,
- 86400,
- $header,
- $opts,
- $lowercase,
- $forceTagsClosed,
- $target_charset,
- $stripRN,
- $defaultBRText,
- $defaultSpanText
- );
+ $header = ['Accept-Language: en-US'];
+ if ($cache) {
+ $ttl = 86400;
+ return getSimpleHTMLDOMCached($url, $ttl, $header);
}
- return getSimpleHTMLDOM(
- $url,
- $header,
- $opts,
- $lowercase,
- $forceTagsClosed,
- $target_charset,
- $stripRN,
- $defaultBRText,
- $defaultSpanText
- );
+ return getSimpleHTMLDOM($url, $header);
}
- private function getJSONData($html)
+ private function extractJsonFromHtml($html)
{
$scriptRegex = '/var ytInitialData = (.*?);<\/script>/';
$result = preg_match($scriptRegex, $html, $matches);
@@ -483,10 +440,11 @@ class YoutubeBridge extends BridgeAbstract
$this->logger->debug('Could not find ytInitialData');
return null;
}
- return json_decode($matches[1]);
+ $data = json_decode($matches[1]);
+ return $data;
}
- private function parseJSONListing($jsonData)
+ private function fetchItemsFromFromJsonData($jsonData)
{
$duration_min = $this->getInput('duration_min') ?: -1;
$duration_min = $duration_min * 60;
@@ -497,9 +455,6 @@ class YoutubeBridge extends BridgeAbstract
if ($duration_max < $duration_min) {
returnClientError('Max duration must be greater than min duration!');
}
-
- // $vid_list = '';
-
foreach ($jsonData as $item) {
$wrapper = null;
if (isset($item->gridVideoRenderer)) {
@@ -513,10 +468,8 @@ class YoutubeBridge extends BridgeAbstract
} else {
continue;
}
-
- $vid = $wrapper->videoId;
+ $videoId = $wrapper->videoId;
$title = $wrapper->title->runs[0]->text;
-
$author = '';
$desc = '';
$time = '';
@@ -535,7 +488,6 @@ class YoutubeBridge extends BridgeAbstract
}
}
}
-
if (is_string($durationText)) {
if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) {
$durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText);
@@ -549,15 +501,37 @@ class YoutubeBridge extends BridgeAbstract
}
}
- // $vid_list .= $vid . ',';
- $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time);
- $this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
+ //$durationSeconds = (int) $wrapper->lengthSeconds;
+ if ($duration < $duration_min || $duration > $duration_max) {
+ continue;
+ }
+ $this->fetchVideoDetails($videoId, $author, $desc, $time);
+ $this->addItem($videoId, $title, $author, $desc, $time);
}
}
- private function skipFeeds()
+ private function addItem($videoId, $title, $author, $desc, $time, $thumbnail = '')
{
- return ($this->getInput('duration_min') || $this->getInput('duration_max'));
+ $item = [];
+ // This should probably be uid?
+ $item['id'] = $videoId;
+ $item['title'] = $title;
+ $item['author'] = $author;
+ $item['timestamp'] = $time;
+ $item['uri'] = self::URI . '/watch?v=' . $videoId;
+ if (!$thumbnail) {
+ // Fallback to default thumbnail if there aren't any provided.
+ $thumbnail = '0';
+ }
+ $thumbnailUri = str_replace('/www.', '/img.', self::URI) . '/vi/' . $videoId . '/' . $thumbnail . '.jpg';
+ $item['content'] = sprintf('
%s', $item['uri'], $thumbnailUri, $desc);
+ $this->items[] = $item;
+ }
+
+ private function decodeTitle($title)
+ {
+ // convert both Ӓ and " to UTF-8
+ return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
}
public function getURI()
diff --git a/caches/FileCache.php b/caches/FileCache.php
index 1ae88704..2f4b3ad5 100644
--- a/caches/FileCache.php
+++ b/caches/FileCache.php
@@ -30,7 +30,8 @@ class FileCache implements CacheInterface
if (!file_exists($cacheFile)) {
return $default;
}
- $item = unserialize(file_get_contents($cacheFile));
+ $data = file_get_contents($cacheFile);
+ $item = unserialize($data);
if ($item === false) {
$this->logger->warning(sprintf('Failed to unserialize: %s', $cacheFile));
$this->delete($key);
@@ -87,7 +88,8 @@ class FileCache implements CacheInterface
if (isset($excluded[$filename]) || !is_file($cacheFile)) {
continue;
}
- $item = unserialize(file_get_contents($cacheFile));
+ $data = file_get_contents($cacheFile);
+ $item = unserialize($data);
if ($item === false) {
unlink($cacheFile);
continue;