From e9b8a1f9f97b5b2e70b86770a9bde06ebfc58497 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Mon, 20 Jun 2022 19:11:46 -0400 Subject: [PATCH] [Mastodon] Use ActivityPub outbox for Mastodon (et al.) feed (#2756) * Use ActivityPub outbox for Mastodon (et al.) feed closes #2754 * Better description for Mastodon bridge I mean I could rename it to ActivityPub bridge if the maintainer so pleases * [Mastodon] Please the lint * [Mastodon] address feedback * [Mastodon] fix link, address spelling case bug * refactor * [Mastodon] add username cache, fix try-catch, rename * [Mastodon] shorten description to satisfy the lint * [Mastodon] address feedback * [Mastodon] support Secure Mode instances * [Mastodon] add config documentation * [Mastodon] update docs Co-authored-by: Dag --- bridges/MastodonBridge.php | 202 +++++++++++++----- .../ActivityPub_(Mastodon).md | 57 +++++ 2 files changed, 211 insertions(+), 48 deletions(-) create mode 100644 docs/10_Bridge_Specific/ActivityPub_(Mastodon).md diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 549647ea..bbbc5587 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -1,13 +1,33 @@ configuration.'; const URI = 'https://mastodon.social'; + // Some Mastodon instances use Secure Mode which requires all requests to be signed. + // You do not need this for most instances, but if you want to support every known + // instance, then you should configure them. + // See also https://docs.joinmastodon.org/spec/security/#http + const CONFIGURATION = array( + 'private_key' => array( + 'required' => false, + ), + 'key_id' => array( + 'required' => false, + ), + ); + const PARAMETERS = array(array( 'canusername' => array( 'name' => 'Canonical username', @@ -17,74 +37,160 @@ class MastodonBridge extends FeedExpander { 'norep' => array( 'name' => 'Without replies', 'type' => 'checkbox', - 'title' => 'Only return initial toots' + 'title' => 'Only return statuses that are not replies, as determined by relations (not mentions).' ), 'noboost' => array( 'name' => 'Without boosts', 'required' => false, 'type' => 'checkbox', - 'title' => 'Hide boosts' + 'title' => 'Hide boosts. Note that RSS-Bridge will fetch the original status from other federated instances.' ) )); - public function getName(){ - switch($this->queriedContext) { - case 'By username': + public function getName() { + if($this->getInput('canusername')) { return $this->getInput('canusername'); - default: return parent::getName(); } + return parent::getName(); } - protected function parseItem($newItem){ - $item = parent::parseItem($newItem); - - $content = str_get_html($item['content']); - $title = str_get_html($item['title']); - - $item['title'] = $content->plaintext; - - if(strlen($item['title']) > 75) { - $item['title'] = substr($item['title'], 0, strpos(wordwrap($item['title'], 75), "\n")) . '...'; - } - - if(strpos($title, 'shared a status by') !== false) { - if($this->getInput('noboost')) { - return null; - } - - preg_match('/shared a status by (\S{0,})/', $title, $matches); - $item['title'] = 'Boost ' . $matches[1] . ' ' . $item['title']; - $item['author'] = $matches[1]; - } else { - $item['author'] = $this->getInput('canusername'); - } - - // Check if it's a initial toot or a response - if($this->getInput('norep') && preg_match('/^@.+/', trim($content->plaintext))) { - return null; - } - - return $item; - } - - private function getInstance(){ + private function getInstance() { preg_match('/^@[a-zA-Z0-9_]+@(.+)/', $this->getInput('canusername'), $matches); return $matches[1]; } - private function getUsername(){ + private function getUsername() { preg_match('/^@([a-zA-Z_0-9_]+)@.+/', $this->getInput('canusername'), $matches); return $matches[1]; } public function getURI(){ - if($this->getInput('canusername')) - return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss'; + if($this->getInput('canusername')) { + // We parse webfinger to make sure the URL is correct. This is mostly because + // MissKey uses user ID instead of the username in the endpoint, domain delegations, + // and also to be compatible with future ActivityPub implementations. + $resource = 'acct:' . $this->getUsername() . '@' . $this->getInstance(); + $webfingerUrl = 'https://' . $this->getInstance() . '/.well-known/webfinger?resource=' . $resource; + $webfingerHeader = array( + 'Content-Type: application/jrd+json' + ); + $webfinger = json_decode(getContents($webfingerUrl, $webfingerHeader), true); + foreach ($webfinger['links'] as $link) { + if ($link['type'] === 'application/activity+json') { + return $link['href']; + } + } + } return parent::getURI(); } - public function collectData(){ - return $this->collectExpandableDatas($this->getURI()); + public function collectData() { + $url = $this->getURI() . '/outbox?page=true'; + $content = $this->fetchAP($url); + if ($content['id'] === $url) { + foreach ($content['orderedItems'] as $status) { + $this->items[] = $this->parseItem($status); + } + } else { + throw new \Exception('Unexpected response from server.'); + } + } + + protected function parseItem($content) { + $item = array(); + switch ($content['type']) { + case 'Announce': // boost + if ($this->getInput('noboost')) { + return null; + } + // We fetch the boosted content. + try { + $rtContent = $this->fetchAP($content['object']); + $rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400); + if (!isset($rtUser)) { + // We fetch the author, since we cannot always assume the format of the URL. + $user = $this->fetchAP($rtContent['attributedTo']); + preg_match('/https?:\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); + // We assume that the server name as indicated by the path is the actual server name, + // since using webfinger to delegate domains is not officially supported, and it only + // seems to work in one way. + $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[1]; + $this->saveCacheValue($rtContent['attributedTo'], $rtUser); + } + $item['author'] = $rtUser; + $item['title'] = 'Shared a status by ' . $rtUser . ': '; + $item = $this->parseObject($rtContent, $item); + } catch (UnexpectedResponseException $th) { + $item['title'] = 'Shared an unreachable status: ' . $content['object']; + $item['content'] = $content['object']; + $item['uri'] = $content['object']; + } + break; + case 'Create': // posts + if ($this->getInput('norep') && isset($content['object']['inReplyTo'])) { + return null; + } + $item['author'] = $this->getInput('canusername'); + $item['title'] = ''; + $item = $this->parseObject($content['object'], $item); + } + $item['timestamp'] = $content['published']; + $item['uid'] = $content['id']; + return $item; + } + + protected function parseObject($object, $item) { + $item['content'] = $object['content']; + $strippedContent = strip_tags(str_replace('
', ' ', $object['content'])); + + if (mb_strlen($strippedContent) > 75) { + $contentSubstring = mb_substr($strippedContent, 0, mb_strpos(wordwrap($strippedContent, 75), "\n")); + $item['title'] .= $contentSubstring . '...'; + } else { + $item['title'] .= $strippedContent; + } + $item['uri'] = $object['id']; + foreach ($object['attachment'] as $attachment) { + // Only process REMOTE pictures (prevent xss) + if ($attachment['mediaType'] + && preg_match('/^image\//', $attachment['mediaType'], $match) + && preg_match('/^http(s|):\/\//', $attachment['url'], $match) + ) { + $item['content'] = $item['content'] . '
', $attachment['url']); + } + } + return $item; + } + + protected function fetchAP($url) { + $d = new DateTime(); + $d->setTimezone(new DateTimeZone('GMT')); + $date = $d->format('D, d M Y H:i:s e'); + preg_match('/https?:\/\/([a-z0-9-\.]{0,})(\/[^?#]+)/', $url, $matches); + $headers = array( + 'Accept: application/activity+json', + 'Host: ' . $matches[1], + 'Date: ' . $date + ); + $privateKey = $this->getOption('private_key'); + $keyId = $this->getOption('key_id'); + if ($privateKey && $keyId) { + $pkey = openssl_pkey_get_private('file://' . $privateKey); + $toSign = '(request-target): get ' . $matches[2] . "\nhost: " . $matches[1] . "\ndate: " . $date; + $result = openssl_sign($toSign, $signature, $pkey, 'RSA-SHA256'); + if ($result) { + Debug::log($toSign); + $sig = 'Signature: keyId="' . $keyId . '",headers="(request-target) host date",signature="' . + base64_encode($signature) . '"'; + Debug::log($sig); + array_push($headers, $sig); + } + } + return json_decode(getContents($url, $headers), true); } } diff --git a/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md new file mode 100644 index 00000000..cdf0a5d9 --- /dev/null +++ b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md @@ -0,0 +1,57 @@ +# MastodonBridge (aka. ActivityPub Bridge) + +Certain ActivityPub implementations, such as [Mastodon](https://docs.joinmastodon.org/spec/security/#http) and [Pleroma](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#activitypub), allow instances to require requests to ActivityPub endpoints to be signed. RSS-Bridge can handle the HTTP signature header if a private key is provided, while the ActivityPub instance must be able to know the corresponding public key. + +You do **not** need to configure this if the usage on your RSS-Bridge instance is limited to accessing ActivityPub instances that do not have such requirements. While the majority of ActivityPub instances don't have them at the time of writing, the situation may change in the future. + +## Configuration + +[This article](https://blog.joinmastodon.org/2018/06/how-to-implement-a-basic-activitypub-server/) is referenced. + +1. Select a domain. It may, but does not need to, be the one RSS-Bridge is on. For all subsequent steps, replace `DOMAIN` with this domain. +2. Run the following commands on your machine: +```bash +$ openssl genrsa -out private.pem 2048 +$ openssl rsa -in private.pem -outform PEM -pubout -out public.pem +``` +3. Place `private.pem` in an appropriate location and note down its absolute path. +4. Serve the following page at `https://DOMAIN/.well-known/webfinger`: +```json +{ + "subject": "acct:DOMAIN@DOMAIN", + "aliases": ["https://DOMAIN/actor"], + "links": [{ + "rel": "self", + "type": "application/activity+json", + "href": "https://DOMAIN/actor" + }] +} +``` +5. Serve the following page at `https://DOMAIN/actor`, replacing the value of `publicKeyPem` with the contents of the `public.pem` file in step 2, with all line breaks substituted with `\n`: +```json +{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1" + ], + "id": "https://DOMAIN/actor", + "type": "Application", + "inbox": "https://DOMAIN/actor/inbox", + "preferredUsername": "DOMAIN", + "publicKey": { + "id": "https://DOMAIN/actor#main-key", + "owner": "https://DOMAIN/actor", + "publicKeyPem": "-----BEGIN PUBLIC KEY-----\n...\n-----END PUBLIC KEY-----\n" + } +} +``` +6. Add the following configuration in `config.ini.php` in your RSS-Bridge folder, replacing the path with the one from step 3: +```ini +[MastodonBridge] +private_key = "/absolute/path/to/your/private.pem" +key_id = "https://DOMAIN/actor#main-key" +``` + +## Considerations + +Any ActivityPub instance your users requested content from will be able to identify requests from your RSS-Bridge instance by the domain you specified in the configuration. This also means that an ActivityPub instance may choose to block this domain should they judge your instance's usage excessive. Therefore, public instance operators should monitor for abuse and prepare to communicate with ActivityPub instance admins when necessary. You may also leave contact information as the `summary` value in the actor JSON (step 5).