[Mastodon] Use ActivityPub outbox for Mastodon (et al.) feed (#2756)

* Use ActivityPub outbox for Mastodon (et al.) feed

closes #2754

* Better description for Mastodon bridge

I mean I could rename it to ActivityPub bridge if the maintainer so pleases

* [Mastodon] Please the lint

* [Mastodon] address feedback

* [Mastodon] fix link, address spelling case bug

* refactor

* [Mastodon] add username cache, fix try-catch, rename

* [Mastodon] shorten description to satisfy the lint

* [Mastodon] address feedback

* [Mastodon] support Secure Mode instances

* [Mastodon] add config documentation

* [Mastodon] update docs

Co-authored-by: Dag <me@dvikan.no>
This commit is contained in:
Austin Huang 2022-06-20 19:11:46 -04:00 committed by GitHub
parent 8365a7a34d
commit e9b8a1f9f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 211 additions and 48 deletions

View File

@ -1,13 +1,33 @@
<?php
class MastodonBridge extends FeedExpander {
class MastodonBridge extends BridgeAbstract {
// This script attempts to imitiate the behaviour of a read-only ActivityPub server
// to read the outbox.
const MAINTAINER = 'husim0';
const NAME = 'Mastodon Bridge';
// Note: Most PixelFed instances have ActivityPub outbox disabled,
// so use the official feed: https://pixelfed.instance/users/username.atom (Posts only)
const MAINTAINER = 'Austin Huang';
const NAME = 'ActivityPub Bridge';
const CACHE_TIMEOUT = 900; // 15mn
const DESCRIPTION = 'Returns toots';
const DESCRIPTION = 'Returns recent statuses. Supports Mastodon, Pleroma and Misskey, among others. Access to
instances that have Authorized Fetch enabled requires
<a href="https://rss-bridge.github.io/rss-bridge/Bridge_Specific/ActivityPub_(Mastodon).html">configuration</a>.';
const URI = 'https://mastodon.social';
// Some Mastodon instances use Secure Mode which requires all requests to be signed.
// You do not need this for most instances, but if you want to support every known
// instance, then you should configure them.
// See also https://docs.joinmastodon.org/spec/security/#http
const CONFIGURATION = array(
'private_key' => array(
'required' => false,
),
'key_id' => array(
'required' => false,
),
);
const PARAMETERS = array(array(
'canusername' => array(
'name' => 'Canonical username',
@ -17,54 +37,21 @@ class MastodonBridge extends FeedExpander {
'norep' => array(
'name' => 'Without replies',
'type' => 'checkbox',
'title' => 'Only return initial toots'
'title' => 'Only return statuses that are not replies, as determined by relations (not mentions).'
),
'noboost' => array(
'name' => 'Without boosts',
'required' => false,
'type' => 'checkbox',
'title' => 'Hide boosts'
'title' => 'Hide boosts. Note that RSS-Bridge will fetch the original status from other federated instances.'
)
));
public function getName() {
switch($this->queriedContext) {
case 'By username':
if($this->getInput('canusername')) {
return $this->getInput('canusername');
default: return parent::getName();
}
}
protected function parseItem($newItem){
$item = parent::parseItem($newItem);
$content = str_get_html($item['content']);
$title = str_get_html($item['title']);
$item['title'] = $content->plaintext;
if(strlen($item['title']) > 75) {
$item['title'] = substr($item['title'], 0, strpos(wordwrap($item['title'], 75), "\n")) . '...';
}
if(strpos($title, 'shared a status by') !== false) {
if($this->getInput('noboost')) {
return null;
}
preg_match('/shared a status by (\S{0,})/', $title, $matches);
$item['title'] = 'Boost ' . $matches[1] . ' ' . $item['title'];
$item['author'] = $matches[1];
} else {
$item['author'] = $this->getInput('canusername');
}
// Check if it's a initial toot or a response
if($this->getInput('norep') && preg_match('/^@.+/', trim($content->plaintext))) {
return null;
}
return $item;
return parent::getName();
}
private function getInstance() {
@ -78,13 +65,132 @@ class MastodonBridge extends FeedExpander {
}
public function getURI(){
if($this->getInput('canusername'))
return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss';
if($this->getInput('canusername')) {
// We parse webfinger to make sure the URL is correct. This is mostly because
// MissKey uses user ID instead of the username in the endpoint, domain delegations,
// and also to be compatible with future ActivityPub implementations.
$resource = 'acct:' . $this->getUsername() . '@' . $this->getInstance();
$webfingerUrl = 'https://' . $this->getInstance() . '/.well-known/webfinger?resource=' . $resource;
$webfingerHeader = array(
'Content-Type: application/jrd+json'
);
$webfinger = json_decode(getContents($webfingerUrl, $webfingerHeader), true);
foreach ($webfinger['links'] as $link) {
if ($link['type'] === 'application/activity+json') {
return $link['href'];
}
}
}
return parent::getURI();
}
public function collectData() {
return $this->collectExpandableDatas($this->getURI());
$url = $this->getURI() . '/outbox?page=true';
$content = $this->fetchAP($url);
if ($content['id'] === $url) {
foreach ($content['orderedItems'] as $status) {
$this->items[] = $this->parseItem($status);
}
} else {
throw new \Exception('Unexpected response from server.');
}
}
protected function parseItem($content) {
$item = array();
switch ($content['type']) {
case 'Announce': // boost
if ($this->getInput('noboost')) {
return null;
}
// We fetch the boosted content.
try {
$rtContent = $this->fetchAP($content['object']);
$rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400);
if (!isset($rtUser)) {
// We fetch the author, since we cannot always assume the format of the URL.
$user = $this->fetchAP($rtContent['attributedTo']);
preg_match('/https?:\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches);
// We assume that the server name as indicated by the path is the actual server name,
// since using webfinger to delegate domains is not officially supported, and it only
// seems to work in one way.
$rtUser = '@' . $user['preferredUsername'] . '@' . $matches[1];
$this->saveCacheValue($rtContent['attributedTo'], $rtUser);
}
$item['author'] = $rtUser;
$item['title'] = 'Shared a status by ' . $rtUser . ': ';
$item = $this->parseObject($rtContent, $item);
} catch (UnexpectedResponseException $th) {
$item['title'] = 'Shared an unreachable status: ' . $content['object'];
$item['content'] = $content['object'];
$item['uri'] = $content['object'];
}
break;
case 'Create': // posts
if ($this->getInput('norep') && isset($content['object']['inReplyTo'])) {
return null;
}
$item['author'] = $this->getInput('canusername');
$item['title'] = '';
$item = $this->parseObject($content['object'], $item);
}
$item['timestamp'] = $content['published'];
$item['uid'] = $content['id'];
return $item;
}
protected function parseObject($object, $item) {
$item['content'] = $object['content'];
$strippedContent = strip_tags(str_replace('<br>', ' ', $object['content']));
if (mb_strlen($strippedContent) > 75) {
$contentSubstring = mb_substr($strippedContent, 0, mb_strpos(wordwrap($strippedContent, 75), "\n"));
$item['title'] .= $contentSubstring . '...';
} else {
$item['title'] .= $strippedContent;
}
$item['uri'] = $object['id'];
foreach ($object['attachment'] as $attachment) {
// Only process REMOTE pictures (prevent xss)
if ($attachment['mediaType']
&& preg_match('/^image\//', $attachment['mediaType'], $match)
&& preg_match('/^http(s|):\/\//', $attachment['url'], $match)
) {
$item['content'] = $item['content'] . '<br /><img ';
if ($attachment['name']) {
$item['content'] .= sprintf('alt="%s" ', $attachment['name']);
}
$item['content'] .= sprintf('src="%s" />', $attachment['url']);
}
}
return $item;
}
protected function fetchAP($url) {
$d = new DateTime();
$d->setTimezone(new DateTimeZone('GMT'));
$date = $d->format('D, d M Y H:i:s e');
preg_match('/https?:\/\/([a-z0-9-\.]{0,})(\/[^?#]+)/', $url, $matches);
$headers = array(
'Accept: application/activity+json',
'Host: ' . $matches[1],
'Date: ' . $date
);
$privateKey = $this->getOption('private_key');
$keyId = $this->getOption('key_id');
if ($privateKey && $keyId) {
$pkey = openssl_pkey_get_private('file://' . $privateKey);
$toSign = '(request-target): get ' . $matches[2] . "\nhost: " . $matches[1] . "\ndate: " . $date;
$result = openssl_sign($toSign, $signature, $pkey, 'RSA-SHA256');
if ($result) {
Debug::log($toSign);
$sig = 'Signature: keyId="' . $keyId . '",headers="(request-target) host date",signature="' .
base64_encode($signature) . '"';
Debug::log($sig);
array_push($headers, $sig);
}
}
return json_decode(getContents($url, $headers), true);
}
}

View File

@ -0,0 +1,57 @@
# MastodonBridge (aka. ActivityPub Bridge)
Certain ActivityPub implementations, such as [Mastodon](https://docs.joinmastodon.org/spec/security/#http) and [Pleroma](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#activitypub), allow instances to require requests to ActivityPub endpoints to be signed. RSS-Bridge can handle the HTTP signature header if a private key is provided, while the ActivityPub instance must be able to know the corresponding public key.
You do **not** need to configure this if the usage on your RSS-Bridge instance is limited to accessing ActivityPub instances that do not have such requirements. While the majority of ActivityPub instances don't have them at the time of writing, the situation may change in the future.
## Configuration
[This article](https://blog.joinmastodon.org/2018/06/how-to-implement-a-basic-activitypub-server/) is referenced.
1. Select a domain. It may, but does not need to, be the one RSS-Bridge is on. For all subsequent steps, replace `DOMAIN` with this domain.
2. Run the following commands on your machine:
```bash
$ openssl genrsa -out private.pem 2048
$ openssl rsa -in private.pem -outform PEM -pubout -out public.pem
```
3. Place `private.pem` in an appropriate location and note down its absolute path.
4. Serve the following page at `https://DOMAIN/.well-known/webfinger`:
```json
{
"subject": "acct:DOMAIN@DOMAIN",
"aliases": ["https://DOMAIN/actor"],
"links": [{
"rel": "self",
"type": "application/activity+json",
"href": "https://DOMAIN/actor"
}]
}
```
5. Serve the following page at `https://DOMAIN/actor`, replacing the value of `publicKeyPem` with the contents of the `public.pem` file in step 2, with all line breaks substituted with `\n`:
```json
{
"@context": [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1"
],
"id": "https://DOMAIN/actor",
"type": "Application",
"inbox": "https://DOMAIN/actor/inbox",
"preferredUsername": "DOMAIN",
"publicKey": {
"id": "https://DOMAIN/actor#main-key",
"owner": "https://DOMAIN/actor",
"publicKeyPem": "-----BEGIN PUBLIC KEY-----\n...\n-----END PUBLIC KEY-----\n"
}
}
```
6. Add the following configuration in `config.ini.php` in your RSS-Bridge folder, replacing the path with the one from step 3:
```ini
[MastodonBridge]
private_key = "/absolute/path/to/your/private.pem"
key_id = "https://DOMAIN/actor#main-key"
```
## Considerations
Any ActivityPub instance your users requested content from will be able to identify requests from your RSS-Bridge instance by the domain you specified in the configuration. This also means that an ActivityPub instance may choose to block this domain should they judge your instance's usage excessive. Therefore, public instance operators should monitor for abuse and prepare to communicate with ActivityPub instance admins when necessary. You may also leave contact information as the `summary` value in the actor JSON (step 5).