From 1232de5744081bd9d12fbd175239218299651f98 Mon Sep 17 00:00:00 2001 From: Nicolas Delsaux Date: Mon, 3 Oct 2022 06:55:24 +0200 Subject: [PATCH] Add new bridge RadioFranceBridge, fix #3077 (#3082) As far as I understand, this supports all radio france sub-pages listing podcast and/or shows (tested with my two favorites shows, so not a very professionnal test). We use here the data model provided by Radio France, which includes all data in an easily usable format. --- bridges/RadioFranceBridge.php | 207 ++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 bridges/RadioFranceBridge.php diff --git a/bridges/RadioFranceBridge.php b/bridges/RadioFranceBridge.php new file mode 100644 index 00000000..df7922ed --- /dev/null +++ b/bridges/RadioFranceBridge.php @@ -0,0 +1,207 @@ + [ + 'name' => 'Domain to use', + 'required' => true, + 'defaultValue' => self::DEFAULT_DOMAIN + ], + 'page' => [ + 'name' => 'Initial page to load', + 'required' => true, + 'exampleValue' => 'franceinter/podcasts/burne-out' + ] + ]]; + + private function getDomain() + { + $domain = $this->getInput('domain'); + if (empty($domain)) { + $domain = self::DEFAULT_DOMAIN; + } + if (strpos($domain, '://') === false) { + $domain = 'https://' . $domain; + } + return $domain; + } + + public function getURI() + { + return $this->getDomain() . '/' . $this->getInput('page'); + } + + public function collectData() + { + $html = getSimpleHTMLDOM($this->getURI()); + + // An array of dom nodes + $documentsList = $html->find('.DocumentsList', 0); + $documentsListWrapper = $documentsList->find('.DocumentsList-wrapper', 0); + $cardList = $documentsListWrapper->find('.Card'); + + foreach ($cardList as $card) { + $item = []; + $title_link = $card->find('.ConceptTitle a', 0); + $item['title'] = $title_link->plaintext; + $uri = $title_link->getAttribute('href', 0); + switch (substr($uri, 0, 1)) { + case 'h': // absolute uri + $item['uri'] = $uri; + break; + case '/': // domain relative uri + $item['uri'] = $this->getDomain() . $uri; + break; + default: + $item['uri'] = $this->getDomain() . '/' . $uri; + } + // Finally, obtain the mp3 from some weird Radio France API (url obtained by reading network calls, no less) + $media_url = self::APIENDPOINT . '?value=' . $uri; + $rawJSON = getSimpleHTMLDOMCached($media_url); + $processedJSON = json_decode($rawJSON); + $model_content = $processedJSON->content; + if (empty($model_content->manifestations)) { + error_log("Seems like $uri has no manifestation"); + } else { + $item['enclosures'] = [ $model_content->manifestations[0]->url ]; + + $item['content'] = ''; + if (isset($model_content->visual)) { + $item['content'] .= "visual->src}\" + alt=\"{$model_content->visual->legend}\" + style=\"float:left; width:400px; margin: 1em;\"/>"; + } + if (isset($model_content->standFirst)) { + $item['content'] .= $model_content->standFirst; + } + if (isset($model_content->bodyJson)) { + if (!empty($item['content'])) { + $item['content'] .= '
'; + } + $pseudo_html_array = array_map([$this, 'convertJsonElementToHTML'], $model_content->bodyJson); + $pseudo_html_text = array_reduce( + $pseudo_html_array, + function ($text, $element) { + return $text . "\n" . $element; + }, + '' + ); + $item['content'] .= $pseudo_html_text; + } + if (isset($model_content->producers)) { + $item['author'] = $this->readAuthorsNamesFrom($model_content->producers); + } elseif (isset($model_content->staff)) { + $item['author'] = $this->readAuthorsNamesFrom($model_content->staff); + } + $time = $card->find('time', 0); + $timevalue = $time->getAttribute('datetime'); + $item['timestamp'] = strtotime($timevalue); + + $this->items[] = $item; + } + } + } + + private function readAuthorsNamesFrom($persons_array) + { + $persons_names = array_map(function ($person_element) { + return $person_element->name; + }, $persons_array); + return array_reduce($persons_names, function ($a, $b) { + if (!empty($a)) { + $a .= ', '; + } + return $a . $b; + }, ''); + } + + private function convertJsonElementToHTML($jsonElement) + { + $childText = isset($jsonElement->children) ? $this->convertJsonChildrenToHTML($jsonElement->children) : ''; + $valueText = isset($jsonElement->value) ? $jsonElement->value : ''; + switch ($jsonElement->type) { + case 'text': + return "{$childText}{$valueText}"; + case 'heading': + $level = $jsonElement->level; + return "{$childText}{$valueText}"; + case 'list': + $tag = 'ul'; + if (isset($jsonElement->ordered)) { + if ($jsonElement->ordered) { + $tag = 'ol'; + } + } + return "<$tag>\n" . $childText . "\n"; + case 'list_item': + return "
  • {$childText}{$valueText}
  • \n"; + case 'bounce': + return ''; + case 'paragraph': + return "

    {$childText}{$valueText}

    \n"; + case 'quote': + return "
    {$childText}{$valueText}
    \n"; + case 'link': + return "data->href}\">{$childText}{$valueText}\n"; + case 'audio': + return ''; + case 'embed': + return $jsonElement->data->html; + default: + return $jsonElement->value; + } + } + + private function convertJsonChildrenToHTML($children) + { + $converted = array_map([$this, 'convertJsonElementToHTML'], $children); + return array_reduce($converted, function ($a, $b) { + return $a . $b; + }, ''); + } + + private function removeAds($element) + { + $ads = $element->find('AdSlot'); + foreach ($ads as $ad) { + $ad->remove(); + } + return $element; + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element) + { + $returned = $element->innertext; + foreach (self::REPLACED_ATTRIBUTES as $initial => $final) { + $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned); + } + return $returned; + } +}