message; } throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? '')); } $feed = [ 'title' => null, 'uri' => null, 'icon' => null, 'items' => [], ]; if (isset($xml->item[0])) { // rss 1.0 $channel = $xml->channel[0]; $feed['title'] = trim((string)$channel->title); $feed['uri'] = trim((string)$channel->link); if (!empty($channel->image)) { $feed['icon'] = trim((string)$channel->image->url); } foreach ($xml->item as $item) { $feed['items'][] = $this->parseRss1Item($item); } } elseif (isset($xml->channel[0])) { // rss 2.0 $channel = $xml->channel[0]; $feed['title'] = trim((string)$channel->title); $feed['uri'] = trim((string)$channel->link); if (!empty($channel->image)) { $feed['icon'] = trim((string)$channel->image->url); } foreach ($channel->item as $item) { $feed['items'][] = $this->parseRss2Item($item); } } elseif (isset($xml->entry[0])) { // atom 1.0 $feed['title'] = (string)$xml->title; // Find best link (only one, or first of 'alternate') if (!isset($xml->link)) { $feed['uri'] = ''; } elseif (count($xml->link) === 1) { $feed['uri'] = (string)$xml->link[0]['href']; } else { $feed['uri'] = ''; foreach ($xml->link as $link) { if (strtolower((string) $link['rel']) === 'alternate') { $feed['uri'] = (string)$link['href']; break; } } } if (!empty($xml->icon)) { $feed['icon'] = (string)$xml->icon; } elseif (!empty($xml->logo)) { $feed['icon'] = (string)$xml->logo; } foreach ($xml->entry as $item) { $feed['items'][] = $this->parseAtomItem($item); } } else { throw new \Exception('Unable to detect feed format'); } return $feed; } public function parseAtomItem(\SimpleXMLElement $feedItem): array { $item = $this->parseRss2Item($feedItem); if (isset($feedItem->id)) { $item['uri'] = (string)$feedItem->id; } if (isset($feedItem->title)) { $item['title'] = html_entity_decode((string)$feedItem->title); } if (isset($feedItem->updated)) { $item['timestamp'] = strtotime((string)$feedItem->updated); } if (isset($feedItem->author)) { $item['author'] = (string)$feedItem->author->name; } if (isset($feedItem->content)) { $contentChildren = $feedItem->content->children(); if (count($contentChildren) > 0) { $content = ''; foreach ($contentChildren as $contentChild) { $content .= $contentChild->asXML(); } $item['content'] = $content; } else { $item['content'] = (string)$feedItem->content; } } // When "link" field is present, URL is more reliable than "id" field if (count($feedItem->link) === 1) { $item['uri'] = (string)$feedItem->link[0]['href']; } else { foreach ($feedItem->link as $link) { if (strtolower((string) $link['rel']) === 'alternate') { $item['uri'] = (string)$link['href']; } if (strtolower((string) $link['rel']) === 'enclosure') { $item['enclosures'][] = (string)$link['href']; } } } return $item; } public function parseRss2Item(\SimpleXMLElement $feedItem): array { $item = [ 'uri' => '', 'title' => '', 'content' => '', 'timestamp' => '', 'author' => '', //'uid' => null, //'categories' => [], //'enclosures' => [], ]; foreach ($feedItem as $k => $v) { $hasChildren = count($v) !== 0; if (!$hasChildren) { $item[$k] = (string) $v; } } if (isset($feedItem->link)) { // todo: trim uri $item['uri'] = (string)$feedItem->link; } if (isset($feedItem->title)) { $item['title'] = html_entity_decode((string)$feedItem->title); } if (isset($feedItem->description)) { $item['content'] = (string)$feedItem->description; } $namespaces = $feedItem->getNamespaces(true); if (isset($namespaces['dc'])) { $dc = $feedItem->children($namespaces['dc']); } if (isset($namespaces['media'])) { $media = $feedItem->children($namespaces['media']); } foreach ($namespaces as $namespaceName => $namespaceUrl) { if (in_array($namespaceName, ['', 'content', 'media'])) { continue; } $module = $feedItem->children($namespaceUrl); $item[$namespaceName] = []; foreach ($module as $moduleKey => $moduleValue) { $item[$namespaceName][$moduleKey] = (string) $moduleValue; } } if (isset($namespaces['itunes'])) { $enclosure = $feedItem->enclosure; $item['enclosure'] = [ 'url' => (string) $enclosure['url'], 'length' => (string) $enclosure['length'], 'type' => (string) $enclosure['type'], ]; } if (isset($feedItem->guid)) { // Pluck out a url from guid foreach ($feedItem->guid->attributes() as $attribute => $value) { if ( $attribute === 'isPermaLink' && ( $value === 'true' || ( filter_var($feedItem->guid, FILTER_VALIDATE_URL) && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) ) ) ) { $item['uri'] = (string)$feedItem->guid; break; } } } if (isset($feedItem->pubDate)) { $item['timestamp'] = strtotime((string)$feedItem->pubDate); } elseif (isset($dc->date)) { $item['timestamp'] = strtotime((string)$dc->date); } if (isset($feedItem->author)) { $item['author'] = (string)$feedItem->author; } elseif (isset($feedItem->creator)) { $item['author'] = (string)$feedItem->creator; } elseif (isset($dc->creator)) { $item['author'] = (string)$dc->creator; } elseif (isset($media->credit)) { $item['author'] = (string)$media->credit; } if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { $item['enclosures'] = [ (string)$feedItem->enclosure['url'], ]; } return $item; } public function parseRss1Item(\SimpleXMLElement $feedItem): array { $item = [ 'uri' => '', 'title' => '', 'content' => '', 'timestamp' => '', 'author' => '', //'uid' => null, //'categories' => [], //'enclosures' => [], ]; if (isset($feedItem->link)) { // todo: trim uri $item['uri'] = (string)$feedItem->link; } if (isset($feedItem->title)) { $item['title'] = html_entity_decode((string)$feedItem->title); } if (isset($feedItem->description)) { $item['content'] = (string)$feedItem->description; } $namespaces = $feedItem->getNamespaces(true); if (isset($namespaces['dc'])) { $dc = $feedItem->children($namespaces['dc']); if (isset($dc->date)) { $item['timestamp'] = strtotime((string)$dc->date); } if (isset($dc->creator)) { $item['author'] = (string)$dc->creator; } } return $item; } }