mirror of https://github.com/danoloan10/rss-bridge
Compare commits
359 Commits
7fc5182780
...
ea58c8d2bc
Author | SHA1 | Date |
---|---|---|
нездалисько | ea58c8d2bc | |
sysadminstory | 55ffac5bae | |
Alexandre Alapetite | 12395fcf2d | |
Petr Kolář | 0f6fa8034b | |
Damien Calesse | e904de2dc9 | |
Dag | ef378663aa | |
Dag | fac1f5cd88 | |
Dag | 7dbe106582 | |
Damien Calesse | 2032ed18c4 | |
sysadminstory | f67d2eb88a | |
tillcash | 5ab1924c4f | |
Damien Calesse | c8178e1fc4 | |
Florent V | ad2d4c7b1b | |
Florent V | 1938446385 | |
sysadminstory | c9074facfe | |
sysadminstory | 9f163ab7c6 | |
xduugu | 98dafb61ae | |
July | ea2b4d7506 | |
Dag | f40f997405 | |
Dag | 4c5cf89725 | |
Paul | a81acbe464 | |
Mynacol | 4e40e032b0 | |
Dag | 98a94855dc | |
Dag | 0c6ffbf5a4 | |
Dag | 3944ae68cb | |
Brendan Kidwell | b34fa2d278 | |
Mynacol | c5f586497f | |
Paul | c9c2944e7c | |
Mynacol | 0116dde275 | |
Tone | d4ae55733b | |
ash | 4e1fa946b4 | |
Arnav Jain | d127bf6e00 | |
Dag | 38e9c396cf | |
Dag | 0c4b498d4f | |
Dag | d157816e07 | |
Dag | f01729c86f | |
sysadminstory | 0b67544f86 | |
Guillaume Lacasa | a3b064f4ee | |
Raymond Berger | 4a398a5b14 | |
sysadminstory | 3ef0226a08 | |
sysadminstory | c3d9383523 | |
knrdl | deb9a7269e | |
Eugene Molotov | f3df283c4d | |
Nick McCarthy | 206edaedf5 | |
Niehztog | 44ff2f2cf8 | |
Michael Bemmerl | ccc20849ff | |
George Sokianos | 609eed1791 | |
Matt DeMoss | b037d1b4d1 | |
joaomqc | 2b741b1c1b | |
knrdl | ef711cb30b | |
knrdl | 4919c53c10 | |
Dag | b347a9268a | |
SebLaus | e76b0601b3 | |
sysadminstory | 57b61c8787 | |
wpdevelopment11 | 7a7fa876d2 | |
sysadminstory | a6310cff1a | |
sysadminstory | 84b5ffcc7c | |
Evgeny | 8d0ddb579f | |
Niehztog | 1dabd10e25 | |
ORelio | cee25d862d | |
Ryan Stafford | d4e4c3e89a | |
Park0 | f134808a26 | |
mruac | a6a4502209 | |
Manu | 4722201281 | |
ORelio | 4f7451895b | |
ORelio | 8ff39f64f7 | |
Teemu Ikonen | 658391263e | |
ORelio | 9056106c2d | |
ORelio | 7533ef12e3 | |
ORelio | a41bb088f8 | |
sysadminstory | 8203196145 | |
Dag | 563c2a345b | |
Dag | ef5bd83bd0 | |
Ololbu | 408c2e5e91 | |
Dag | f7f3ca0126 | |
Dag | 611fabe46c | |
Dag | 2aa52aa99a | |
Dag | cf9558648e | |
Dag | daef240cd2 | |
Dag | 5f37c72be0 | |
ORelio | fd52b9b9a4 | |
Dag | 920d00480d | |
Dag | 49d9dafaec | |
Dag | 2880524dfc | |
Dag | e379019db2 | |
Dag | 44fb2c98bc | |
Dag | 382648fc22 | |
Dag | 9bda9e246a | |
Jisagi | 6634291c67 | |
Dag | e55a88fb8e | |
Dag | 6a72c56cdd | |
Dag | d21f8cebf6 | |
Eugene Molotov | 7e183915a9 | |
Eugene Molotov | 145bd10f4c | |
Dag | b6a9baff94 | |
ORelio | 143f90da60 | |
ORelio | 47f52b5912 | |
Park0 | f97a3fa4d9 | |
Dag | 5f777d4126 | |
Niehztog | e376805249 | |
sysadminstory | 1cbe1a6f98 | |
User123698745 | 59dd49671d | |
Dag | 64582a64f1 | |
Dag | 547af0d0d2 | |
User123698745 | 69da0dd583 | |
Dag | 41df17bc46 | |
sysadminstory | 0c92cf32d4 | |
Dag | 7273a05f02 | |
User123698745 | d822d666c7 | |
vdbhb59 | 6cf9dfb7c9 | |
ORelio | 3557e5ffd4 | |
Dag | 2172df9fa2 | |
Dag | b9ec6a0eb4 | |
Dag | 0de5180ded | |
Dag | f9ec88fb45 | |
User123698745 | c04c0a5614 | |
Dag | ae53adefad | |
Dag | f421c45b21 | |
Dag | cd30c25b08 | |
ORelio | e1b911fc1f | |
User123698745 | 09f3c1532a | |
Dag | 857e908929 | |
Dag | f321f000c1 | |
Dag | 437afd67e0 | |
ORelio | ce353c1e4f | |
Dag | 0dc6c66840 | |
Dag | d33808ea9e | |
Dag | 0c69148cff | |
Dag | bab02bf190 | |
Dag | f943f8d002 | |
Dag | b3b0736761 | |
Dag | cb6c931b1f | |
Dag | 07f49225d9 | |
Dag | a6a1d553d9 | |
Dag | 39d6710798 | |
mruac | a3c29f3a52 | |
User123698745 | 7a9bfa1087 | |
Dag | 7329b83cc0 | |
Julien Papasian | 360f953be8 | |
Dag | 0bf38e5c56 | |
Dag | e6aef73a02 | |
Scott Colby | cf7e3eea56 | |
User123698745 | 3b91b1d260 | |
Dag | 409236e48e | |
Dag | bb7f329e81 | |
Alexandre Alapetite | 0175e13712 | |
sysadminstory | 4323a11667 | |
mruac | 4f5a492dde | |
mruac | 3e1e96e477 | |
ImportTaste | a9cf1512e7 | |
Dag | 3178deb5a8 | |
Dag | 4b9f6f7e53 | |
mruac | a786bbd4e0 | |
sysadminstory | 078091752a | |
July | 586d707ae4 | |
mruac | b3a7842448 | |
csisoap | dbe37cc302 | |
sysadminstory | 52b90e0873 | |
sysadminstory | 38b957398a | |
sysadminstory | 752098e0fa | |
User123698745 | 99b86c0e1c | |
mruac | b9fdd20f8f | |
Niehztog | 92b2bc5e11 | |
R3dError | 64000a2526 | |
csisoap | 4d05d0beff | |
mruac | 9707586ee8 | |
sysadminstory | 52c59caf2f | |
mruac | f0ec797f4b | |
mruac | 9e33a15b93 | |
Paul | 00a18a1cd1 | |
Mynacol | 14607c07f6 | |
Mynacol | 999d5dce40 | |
Mynacol | c3b5b382ba | |
Mynacol | 18a8a51271 | |
t0stiman | 0325c2414a | |
Lars Stegman | eb4ff7099f | |
sysadminstory | 7591b10219 | |
Dag | 54045be951 | |
Dag | 3ac861a866 | |
veloute | c5cbab1231 | |
Eugene Molotov | 959dd937b4 | |
John S Long | 79e3f7f204 | |
Corentin Garcia | a1237d90f1 | |
mruac | 28077155ca | |
mruac | 7a1180c80f | |
Dag | ce72503df6 | |
George Sokianos | d55994643d | |
Christian Schabesberger | 11ea6aedfd | |
sysadminstory | 52d3cce59d | |
ORelio | 6cc4cf24dc | |
sysadminstory | 1fcf67f14a | |
sysadminstory | f3896ed543 | |
ORelio | b86ee5778b | |
adminvulcano | 43ec82179b | |
Korytov Pavel | cf6d94dc2a | |
Niehztog | 3e3481bd7a | |
User123698745 | 4976cd227e | |
Tone | d32419ffcf | |
User123698745 | 7661a78a43 | |
Dag | ed97ce8646 | |
mruac | 10f7b6f4f6 | |
Dag | 8e2353ad3e | |
Dag | 7e4807530e | |
Dag | 8b6eecea25 | |
ORelio | f8fd05f08f | |
User123698745 | f957eea300 | |
User123698745 | 93eecdf79f | |
mrtnvgr | 3a57fc800b | |
Dag | 701fe3cfed | |
Aaron F | 11ce8b5dcd | |
Korytov Pavel | f5f76f111b | |
Korytov Pavel | bf4ea12719 | |
ORelio | 235c084820 | |
ORelio | 977c0db382 | |
csisoap | 556bca58cf | |
Dawid Wróbel | 2cc89b767c | |
Simon Alberny | 1f6c2cd32c | |
Dag | b6fab20601 | |
Dag | 74635fd752 | |
Eugene Molotov | 38ca124de0 | |
Dag | 39a8346c53 | |
Dag | d08b2616ef | |
Dag | 0a118310cb | |
Predä | 663729cf19 | |
Predä | 2ffb54c7c2 | |
Dag | 517c7f5c9b | |
Dag | 93620aa105 | |
Dag | a4a328583a | |
Dag | f91723d9e5 | |
Dag | 6254b8593e | |
sysadminstory | 087e790ec1 | |
mrtnvgr | 4ce63c88aa | |
Paroleen | a1bae7a9a8 | |
Dag | 08d16322e1 | |
Dag | 440adf2f3b | |
Dag | a59793e8d6 | |
Dag | 7b46b97abd | |
Dag | 310160fd92 | |
Dag | 773eea196f | |
Dag | e8420b9f39 | |
Dag | ef8181478d | |
Dag | eaea8e6640 | |
Thomas | e5729fdaac | |
Ryan Stafford | c8039d483b | |
Ryan Stafford | 73d88dda46 | |
Ryan Stafford | ea0456ea08 | |
ORelio | 9efdf24a6e | |
alexvong243f | a234392f80 | |
Jisagi | b9102d7e87 | |
Fake4d | 0f2b55fbef | |
Dag | 69aa751f40 | |
csisoap | b3bf95bfdd | |
Dag | 6c0e186d3f | |
Dag | c9a861e259 | |
Dag | dfe78fb379 | |
Dag | f0a504bb9a | |
Dag | 7881c87bed | |
Dag | 1a529fac46 | |
Dag | adc38e65d9 | |
Dag | 0b95dc2d4f | |
Dag | 61b307a9f9 | |
Dag | 341649a8a4 | |
Dag | 91976f7d56 | |
Dag | 8b996e3056 | |
Dag | c1c8304fc0 | |
Dag | b594ad2de3 | |
User123698745 | ef0b86968c | |
somini | d49ea235f0 | |
Dag | 46f0e97c73 | |
Dag | 5e22459eb6 | |
Dag | 965d7d44c5 | |
Dag | 21c8d8775e | |
Dag | caac7f572c | |
Dag | f8801d8cb3 | |
Dag | 8f9147458d | |
Dag | a9fd3b9e61 | |
Dag | 18e1597361 | |
Patrick | e9af41d666 | |
Dag | 354317d010 | |
Dag | 84501cfc00 | |
Dag | 82c22bd2b5 | |
Dag | a21d496bc7 | |
Dag | cf920694d5 | |
Thomas | bf0d771367 | |
Dag | 48385777b4 | |
Dag | d8bc015efc | |
Eugene Molotov | 0f14a0f6ee | |
Thomas | eb2b4747ae | |
Dag | bf73372d7f | |
Dag | 748fc9fd65 | |
Dag | 372880b5ef | |
rmscoelho | cc91ee1e37 | |
Arnav Jain | fece9ed344 | |
Bocki | b6a263037a | |
Mynacol | 410ef85618 | |
Dag | 8eabdbe5f8 | |
António Pereira | bd6f56383c | |
ORelio | d4bc63ee98 | |
rmscoelho | 1b02d4f49b | |
rmscoelho | a4ed52ca30 | |
rmscoelho | 1769399da8 | |
rmscoelho | 12ba6154f9 | |
rmscoelho | 8e35ebf482 | |
rmscoelho | 61130e89b4 | |
rmscoelho | ebebb886c5 | |
Matt Connell | 6eaa31b999 | |
rmscoelho | 1d3888f22a | |
rmscoelho | 60be4cdebd | |
rmscoelho | 5a0bacbd8a | |
rmscoelho | 0c808dc3a1 | |
rmscoelho | 98b72b2c5c | |
Thomas | 54d626d5cd | |
somini | 1e470ef341 | |
Dag | 0a8fe57003 | |
Nick McCarthy | d9490c6518 | |
Jisagi | eb799e59a6 | |
Eugene Molotov | ec1a3f4fe3 | |
Eugene Molotov | 80376830c5 | |
Shikiryu | e859497d6a | |
Dag | ca351edbfe | |
Dag | 8f9eaae338 | |
Dag | fbaf26e8bf | |
Simon Alberny | 95071d0134 | |
Simon Alberny | 3f8165207e | |
Simon Alberny | 08be0ad7a5 | |
Simon Alberny | 6fa1f349d9 | |
July | 54957d2a03 | |
Tone | 819e453064 | |
Dag | 1636a84c25 | |
Dag | ee498eadf9 | |
Ryan Stafford | c5cd229445 | |
July | 845a8f7936 | |
aysilu-kitsune | 2f0784c287 | |
piyushpaliwal | 227c7b8968 | |
July | 01f731cfa4 | |
mrnoname1000 | 87b9f2dd94 | |
Dag | f803ffa79a | |
mrnoname1000 | b5dbec4cc1 | |
mrnoname1000 | 3e0d024888 | |
Dag | cfe81ab2ac | |
mrnoname1000 | 096c3bca73 | |
Tone | ecd717cf58 | |
Tone | 0c540b4637 | |
mrnoname1000 | d0f7f5e2d8 | |
Alexandre Alapetite | e99e026fa8 | |
Dag | 50865d5741 | |
July | dc4134ed1d | |
mrnoname1000 | c6c4b3a24f | |
mrnoname1000 | 63dc500ae0 | |
vincentvd1 | 723768c828 | |
Tone | e7bda080b4 | |
Joseph | 8fd677f4ae | |
Dag | 88f646cf12 | |
Dag | 49d105fd70 | |
Dag | ff49c9f731 | |
Max | c628f99928 | |
Tone | f26808d22c | |
Tone | a1b6bca581 | |
Tone | ec091fb747 |
|
@ -12,6 +12,6 @@ server {
|
|||
|
||||
location ~ \.php$ {
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_pass 127.0.0.1:9000;
|
||||
fastcgi_pass unix:/var/run/php/php8.2-fpm.sock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# Visual Studio Code
|
||||
.vscode/*
|
||||
|
||||
# Generated files
|
||||
comment*.md
|
||||
comment*.txt
|
|
@ -1,109 +1,183 @@
|
|||
import argparse
|
||||
import requests
|
||||
import itertools
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from typing import Iterable
|
||||
import os.path
|
||||
|
||||
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
|
||||
#
|
||||
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
|
||||
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
|
||||
# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
|
||||
# It also add a <base> tag with the url of em's public instance, so viewing
|
||||
# the HTML file locally will actually work as designed.
|
||||
|
||||
def testBridges(bridges,status):
|
||||
for bridge in bridges:
|
||||
if bridge.get('data-ref'): # Some div entries are empty, this ignores those
|
||||
bridgeid = bridge.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(bridgeid + "\n")
|
||||
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
||||
forms = bridge.find_all("form")
|
||||
formid = 1
|
||||
for form in forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working formstring that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
formstring = ''
|
||||
errormessages = []
|
||||
parameters = form.find_all("input")
|
||||
lists = form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the formstring
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
||||
cleanvalue = parameter.get('value').replace(" ","+")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
||||
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
errormessages.append(parameter.get('name'))
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
||||
class Instance:
|
||||
name = ''
|
||||
url = ''
|
||||
|
||||
def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str):
|
||||
start_date = datetime.now()
|
||||
table_rows = []
|
||||
for instance in instances:
|
||||
page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
table_rows += testBridges(instance, bridge_cards, with_upload, with_reduced_upload) # run the main scraping code with the list of bridges
|
||||
with open(file=output_file, mode='w+', encoding='utf-8') as file:
|
||||
table_rows_value = '\n'.join(sorted(table_rows))
|
||||
file.write(f'''
|
||||
## {title}
|
||||
| Bridge | Context | Status |
|
||||
| - | - | - |
|
||||
{table_rows_value}
|
||||
|
||||
*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}*
|
||||
'''.strip())
|
||||
|
||||
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool) -> Iterable:
|
||||
instance_suffix = ''
|
||||
if instance.name:
|
||||
instance_suffix = f' ({instance.name})'
|
||||
table_rows = []
|
||||
for bridge_card in bridge_cards:
|
||||
bridgeid = bridge_card.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(f'{bridgeid}{instance_suffix}')
|
||||
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
||||
bridge_name = bridgeid.replace('Bridge', '')
|
||||
context_forms = bridge_card.find_all("form")
|
||||
form_number = 1
|
||||
for context_form in context_forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working formstring that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
formstring = ''
|
||||
error_messages = []
|
||||
context_name = '*untitled*'
|
||||
context_name_element = context_form.find_previous_sibling('h5')
|
||||
if context_name_element and context_name_element.text.strip() != '':
|
||||
context_name = context_name_element.text
|
||||
parameters = context_form.find_all("input")
|
||||
lists = context_form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the formstring
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
||||
cleanvalue = parameter.get('value').replace(" ","+")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
||||
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
name_value = parameter.get('name')
|
||||
error_messages.append(f'Missing example or default value for parameter "{name_value}"')
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
||||
if parameter.get('type') == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
formstring = formstring + '&' + parameter.get('name') + '=on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
if 'optgroup' in listing.contents[0].name:
|
||||
listing = list(itertools.chain.from_iterable(listing))
|
||||
firstselectionentry = 1
|
||||
for selectionentry in listing:
|
||||
if firstselectionentry:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
break
|
||||
formstring = formstring + '&' + listname + '=' + selectionvalue
|
||||
if not errormessages:
|
||||
# if all example/default values are present, form the full request string, run the request, replace the static css
|
||||
# file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
|
||||
r = requests.get(URL + bridgestring + formstring)
|
||||
pagetext = r.text.replace('static/style.css','https://rss-bridge.org/bridge01/static/style.css')
|
||||
pagetext = pagetext.encode("utf_8")
|
||||
termpad = requests.post(url="https://termpad.com/", data=pagetext)
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
||||
if parameter.get('type') == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
formstring = formstring + '&' + parameter.get('name') + '=on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
cleanlist = []
|
||||
for option in listing.contents:
|
||||
if 'optgroup' in option.name:
|
||||
cleanlist.extend(option)
|
||||
else:
|
||||
cleanlist.append(option)
|
||||
firstselectionentry = 1
|
||||
for selectionentry in cleanlist:
|
||||
if firstselectionentry:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
break
|
||||
formstring = formstring + '&' + listname + '=' + selectionvalue
|
||||
termpad_url = 'about:blank'
|
||||
if error_messages:
|
||||
status = '<br>'.join(map(lambda m: f'❌ `{m}`', error_messages))
|
||||
else:
|
||||
# if all example/default values are present, form the full request string, run the request, add a <base> tag with
|
||||
# the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and
|
||||
# then upload it to termpad.com, a pastebin-like-site.
|
||||
response = requests.get(instance.url + bridgestring + formstring)
|
||||
page_text = response.text.replace('<head>','<head><base href="https://rss-bridge.org/bridge01/" target="_blank">')
|
||||
page_text = page_text.encode("utf_8")
|
||||
soup = BeautifulSoup(page_text, "html.parser")
|
||||
status_messages = []
|
||||
if response.status_code != 200:
|
||||
status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`']
|
||||
else:
|
||||
# if there are errors (which means that a required value has no example or default value), log out which error appeared
|
||||
termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
formid += 1
|
||||
feed_items = soup.select('.feeditem')
|
||||
feed_items_length = len(feed_items)
|
||||
if feed_items_length <= 0:
|
||||
status_messages += [f'⚠️ `The feed has no items`']
|
||||
elif feed_items_length == 1 and len(soup.select('.error')) > 0:
|
||||
status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`']
|
||||
status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message'))
|
||||
for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected <pre> tags from item content
|
||||
item_element.decompose()
|
||||
status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
|
||||
status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
|
||||
status = '<br>'.join(status_messages)
|
||||
status_is_ok = status == '';
|
||||
if status_is_ok:
|
||||
status = '✔️'
|
||||
if with_upload and (not with_reduced_upload or not status_is_ok):
|
||||
termpad = requests.post(url="https://termpad.com/", data=page_text)
|
||||
termpad_url = termpad.text.strip()
|
||||
termpad_url = termpad_url.replace('termpad.com/','termpad.com/raw/')
|
||||
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({termpad_url}) | {status} |')
|
||||
form_number += 1
|
||||
return table_rows
|
||||
|
||||
gitstatus = ["current", "pr"]
|
||||
now = datetime.now()
|
||||
date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
|
||||
def getFirstLine(value: str) -> str:
|
||||
# trim whitespace and remove text that can break the table or is simply unnecessary
|
||||
clean_value = re.sub('^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip())
|
||||
first_line = next(iter(clean_value.splitlines()), '')
|
||||
max_length = 250
|
||||
if (len(first_line) > max_length):
|
||||
first_line = first_line[:max_length] + '...'
|
||||
return first_line
|
||||
|
||||
with open(os.getcwd() + '/comment.txt', 'w+') as file:
|
||||
file.write(''' ## Pull request artifacts
|
||||
| file | last change |
|
||||
| ---- | ------ |''')
|
||||
|
||||
for status in gitstatus: # run this twice, once for the current version, once for the PR version
|
||||
if status == "current":
|
||||
port = "3000" # both ports are defined in the corresponding workflow .yml file
|
||||
elif status == "pr":
|
||||
port = "3001"
|
||||
URL = "http://localhost:" + port
|
||||
page = requests.get(URL) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--instances', nargs='+')
|
||||
parser.add_argument('--no-upload', action='store_true')
|
||||
parser.add_argument('--reduced-upload', action='store_true')
|
||||
parser.add_argument('--title', default='Pull request artifacts')
|
||||
parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt')
|
||||
args = parser.parse_args()
|
||||
instances = []
|
||||
if args.instances:
|
||||
for instance_arg in args.instances:
|
||||
instance_arg_parts = instance_arg.split('::')
|
||||
instance = Instance()
|
||||
instance.name = instance_arg_parts[1] if len(instance_arg_parts) >= 2 else ''
|
||||
instance.url = instance_arg_parts[0]
|
||||
instances.append(instance)
|
||||
else:
|
||||
instance = Instance()
|
||||
instance.name = 'current'
|
||||
instance.url = 'http://localhost:3000'
|
||||
instances.append(instance)
|
||||
instance = Instance()
|
||||
instance.name = 'pr'
|
||||
instance.url = 'http://localhost:3001'
|
||||
instances.append(instance)
|
||||
main(
|
||||
instances=instances,
|
||||
with_upload=not args.no_upload,
|
||||
with_reduced_upload=args.reduced_upload and not args.no_upload,
|
||||
title=args.title,
|
||||
output_file=args.output_file
|
||||
);
|
|
@ -8,6 +8,8 @@ jobs:
|
|||
test-pr:
|
||||
name: Generate HTML
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
# Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989
|
||||
steps:
|
||||
- name: Check out self
|
||||
|
@ -18,11 +20,11 @@ jobs:
|
|||
- name: Check out rss-bridge
|
||||
run: |
|
||||
PR=${{github.event.number}};
|
||||
wget -O requirements.txt https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester-requirements.txt;
|
||||
wget https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester.py;
|
||||
wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt;
|
||||
wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py;
|
||||
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
|
||||
touch DEBUG;
|
||||
cat $PR.patch | grep " bridges/.*\.php" | sed "s= bridges/\(.*\)Bridge.php.*=\1=g" | sort | uniq > whitelist.txt
|
||||
cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt
|
||||
- name: Start Docker - Current
|
||||
run: |
|
||||
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest
|
||||
|
|
|
@ -230,6 +230,9 @@ pip-log.txt
|
|||
DEBUG
|
||||
config.ini.php
|
||||
config/*
|
||||
!config/nginx.conf
|
||||
!config/php-fpm.conf
|
||||
!config/php.ini
|
||||
|
||||
######################
|
||||
## VisualStudioCode ##
|
||||
|
|
43
Dockerfile
43
Dockerfile
|
@ -1,36 +1,47 @@
|
|||
FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate
|
||||
|
||||
FROM php:8.0.27-fpm-buster AS rssbridge
|
||||
FROM debian:12-slim AS rssbridge
|
||||
|
||||
LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one."
|
||||
LABEL repository="https://github.com/RSS-Bridge/rss-bridge"
|
||||
LABEL website="https://github.com/RSS-Bridge/rss-bridge"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
ca-certificates \
|
||||
nginx \
|
||||
zlib1g-dev \
|
||||
libzip-dev \
|
||||
libmemcached-dev \
|
||||
nss-plugin-pem \
|
||||
libicu-dev && \
|
||||
docker-php-ext-install zip && \
|
||||
docker-php-ext-install intl && \
|
||||
pecl install memcached && \
|
||||
docker-php-ext-enable memcached && \
|
||||
docker-php-ext-enable opcache && \
|
||||
mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
|
||||
php-curl \
|
||||
php-fpm \
|
||||
php-intl \
|
||||
# php-json is enabled by default with PHP 8.2 in Debian 12
|
||||
php-mbstring \
|
||||
php-memcached \
|
||||
# php-opcache is enabled by default with PHP 8.2 in Debian 12
|
||||
# php-openssl is enabled by default with PHP 8.2 in Debian 12
|
||||
php-sqlite3 \
|
||||
php-xml \
|
||||
php-zip \
|
||||
# php-zlib is enabled by default with PHP 8.2 in Debian 12
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./config/nginx.conf /etc/nginx/sites-enabled/default
|
||||
|
||||
COPY --chown=www-data:www-data ./ /app/
|
||||
# logs should go to stdout / stderr
|
||||
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
|
||||
ln -sfT /dev/stdout /var/log/nginx/access.log; \
|
||||
chown -R --no-dereference www-data:adm /var/log/nginx/
|
||||
|
||||
COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/
|
||||
|
||||
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
|
||||
|
||||
ENV CURL_IMPERSONATE ff91esr
|
||||
|
||||
COPY ./config/nginx.conf /etc/nginx/sites-available/default
|
||||
COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini
|
||||
|
||||
COPY --chown=www-data:www-data ./ /app/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
ENTRYPOINT ["/app/docker-entrypoint.sh"]
|
||||
|
|
331
README.md
331
README.md
|
@ -2,12 +2,18 @@
|
|||
|
||||
![RSS-Bridge](static/logo_600px.png)
|
||||
|
||||
RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one.
|
||||
RSS-Bridge is a PHP web application.
|
||||
|
||||
It generates web feeds for websites that don't have one.
|
||||
|
||||
Officially hosted instance: https://rss-bridge.org/bridge01/
|
||||
|
||||
IRC channel #rssbridge at https://libera.chat/
|
||||
|
||||
|
||||
[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE)
|
||||
[![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest)
|
||||
[![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge)
|
||||
[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat)
|
||||
[![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions)
|
||||
|
||||
|||
|
||||
|
@ -17,66 +23,178 @@ RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for website
|
|||
|![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)|
|
||||
|![Screenshot #7](/static/twitter-form.png?raw=true)|![Screenshot #8](/static/twitter-rasmus.png?raw=true)|
|
||||
|
||||
## A subset of bridges
|
||||
## A subset of bridges (17/412)
|
||||
|
||||
* `YouTube` : YouTube user channel, playlist or search
|
||||
* `Twitter` : Return keyword/hashtag search or user timeline
|
||||
* `Telegram` : Return the latest posts from a public group
|
||||
* `Reddit` : Return the latest posts from a subreddit or user
|
||||
* `Filter` : Filter an existing feed url
|
||||
* `Vk` : Latest posts from a user or group
|
||||
* `FeedMerge` : Merge two or more existing feeds into one
|
||||
* `Twitch` : Fetch the latest videos from a channel
|
||||
* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords
|
||||
|
||||
And [many more](bridges/), thanks to the community!
|
||||
* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge)
|
||||
* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge)
|
||||
* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge)
|
||||
* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge)
|
||||
* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge)
|
||||
* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge)
|
||||
* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge)
|
||||
* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge)
|
||||
* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge)
|
||||
* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge)
|
||||
* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge)
|
||||
* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge)
|
||||
* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge)
|
||||
* `TwitterBridge`: [Fetches tweets](https://rss-bridge.org/bridge01/#bridge-TwitterBridge)
|
||||
* `VkBridge`: [Fetches posts from user/group](https://rss-bridge.org/bridge01/#bridge-VkBridge)
|
||||
* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge)
|
||||
* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge)
|
||||
* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge)
|
||||
|
||||
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
|
||||
|
||||
Check out RSS-Bridge right now on https://rss-bridge.org/bridge01 or find another
|
||||
Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/
|
||||
|
||||
Alternatively find another
|
||||
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
|
||||
|
||||
Requires minimum PHP 7.4.
|
||||
|
||||
## Tutorial
|
||||
|
||||
RSS-Bridge requires php 7.4 (or higher).
|
||||
### How to install on traditional shared web hosting
|
||||
|
||||
### Install with git:
|
||||
RSS-Bridge can basically be unzipped in a web folder. Should be working instantly.
|
||||
|
||||
Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip
|
||||
|
||||
### How to install on Debian 12 (nginx + php-fpm)
|
||||
|
||||
These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month).
|
||||
|
||||
```shell
|
||||
timedatectl set-timezone Europe/Oslo
|
||||
|
||||
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl
|
||||
|
||||
# Create a new user account
|
||||
useradd --shell /bin/bash --create-home rss-bridge
|
||||
|
||||
```bash
|
||||
cd /var/www
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git
|
||||
|
||||
# Give the http user write permission to the cache folder
|
||||
chown www-data:www-data /var/www/rss-bridge/cache
|
||||
# Create folder and change ownership
|
||||
mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/
|
||||
|
||||
# Optionally copy over the default config file
|
||||
cp config.default.ini.php config.ini.php
|
||||
# Become user
|
||||
su rss-bridge
|
||||
|
||||
# Optionally copy over the default whitelist file
|
||||
cp whitelist.default.txt whitelist.txt
|
||||
# Fetch latest master
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/
|
||||
cd rss-bridge
|
||||
|
||||
# Copy over the default config
|
||||
cp -v config.default.ini.php config.ini.php
|
||||
|
||||
# Give full permissions only to owner (rss-bridge)
|
||||
chmod 700 -R ./
|
||||
|
||||
# Give read and execute to others (nginx and php-fpm)
|
||||
chmod o+rx ./ ./static
|
||||
|
||||
# Give read to others (nginx)
|
||||
chmod o+r -R ./static
|
||||
```
|
||||
|
||||
Example config for nginx:
|
||||
Nginx config:
|
||||
|
||||
```nginx
|
||||
# /etc/nginx/sites-enabled/rssbridge
|
||||
# /etc/nginx/sites-enabled/rss-bridge.conf
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name example.com;
|
||||
root /var/www/rss-bridge;
|
||||
index index.php;
|
||||
access_log /var/log/nginx/rss-bridge.access.log;
|
||||
error_log /var/log/nginx/rss-bridge.error.log;
|
||||
|
||||
location ~ \.php$ {
|
||||
# Intentionally not setting a root folder here
|
||||
|
||||
# autoindex is off by default but feels good to explicitly turn off
|
||||
autoindex off;
|
||||
|
||||
# Static content only served here
|
||||
location /static/ {
|
||||
alias /var/www/rss-bridge/static/;
|
||||
}
|
||||
|
||||
# Pass off to php-fpm only when location is exactly /
|
||||
location = / {
|
||||
root /var/www/rss-bridge/;
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_read_timeout 60s;
|
||||
fastcgi_pass unix:/run/php/php-fpm.sock;
|
||||
fastcgi_pass unix:/run/php/rss-bridge.sock;
|
||||
}
|
||||
|
||||
# Reduce spam
|
||||
location = /favicon.ico {
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
|
||||
# Reduce spam
|
||||
location = /robots.txt {
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Install with Docker:
|
||||
PHP FPM pool config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
|
||||
Install by using docker image from Docker Hub:
|
||||
[rss-bridge]
|
||||
|
||||
user = rss-bridge
|
||||
group = rss-bridge
|
||||
|
||||
listen = /run/php/rss-bridge.sock
|
||||
|
||||
listen.owner = www-data
|
||||
listen.group = www-data
|
||||
|
||||
pm = static
|
||||
pm.max_children = 10
|
||||
pm.max_requests = 500
|
||||
```
|
||||
|
||||
PHP ini config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
||||
|
||||
max_execution_time = 20
|
||||
memory_limit = 64M
|
||||
```
|
||||
|
||||
Restart fpm and nginx:
|
||||
|
||||
```shell
|
||||
# Lint and restart php-fpm
|
||||
php-fpm8.2 -t
|
||||
systemctl restart php8.2-fpm
|
||||
|
||||
# Lint and restart nginx
|
||||
nginx -t
|
||||
systemctl restart nginx
|
||||
```
|
||||
|
||||
### How to install from Composer
|
||||
|
||||
Install the latest release.
|
||||
|
||||
```shell
|
||||
cd /var/www
|
||||
composer create-project -v --no-dev rss-bridge/rss-bridge
|
||||
```
|
||||
|
||||
### How to install with Caddy
|
||||
|
||||
TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785
|
||||
|
||||
### Install from Docker Hub:
|
||||
|
||||
Install by downloading the docker image from Docker Hub:
|
||||
|
||||
```bash
|
||||
# Create container
|
||||
|
@ -88,7 +206,7 @@ docker start rss-bridge
|
|||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
Install by locally building the image:
|
||||
### Install by locally building from Dockerfile
|
||||
|
||||
```bash
|
||||
# Build image from Dockerfile
|
||||
|
@ -97,13 +215,13 @@ docker build -t rss-bridge .
|
|||
# Create container
|
||||
docker create --name rss-bridge --publish 3000:80 rss-bridge
|
||||
|
||||
# Start the container
|
||||
# Start container
|
||||
docker start rss-bridge
|
||||
```
|
||||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
#### Install with docker-compose
|
||||
### Install with docker-compose
|
||||
|
||||
Create a `docker-compose.yml` file locally with with the following content:
|
||||
```yml
|
||||
|
@ -126,11 +244,12 @@ docker-compose up
|
|||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
### Alternative installation methods
|
||||
### Other installation methods
|
||||
|
||||
[![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge)
|
||||
[![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy)
|
||||
[![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html)
|
||||
[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge)
|
||||
|
||||
The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and
|
||||
modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688
|
||||
|
@ -140,6 +259,22 @@ Learn more in
|
|||
|
||||
## How-to
|
||||
|
||||
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
|
||||
|
||||
```shell
|
||||
# Give rssbridge ownership
|
||||
chown rssbridge:rssbridge -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give www-data ownership
|
||||
chown www-data:www-data -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give everyone write permission
|
||||
chmod 777 -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or last ditch effort (CAREFUL)
|
||||
rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/
|
||||
```
|
||||
|
||||
### How to create a new bridge from scratch
|
||||
|
||||
Create the new bridge in e.g. `bridges/BearBlogBridge.php`:
|
||||
|
@ -169,29 +304,96 @@ Learn more in [bridge api](https://rss-bridge.github.io/rss-bridge/Bridge_API/in
|
|||
|
||||
### How to enable all bridges
|
||||
|
||||
Write an asterisks to `whitelist.txt`:
|
||||
Modify `config.ini.php`:
|
||||
|
||||
echo '*' > whitelist.txt
|
||||
enabled_bridges[] = *
|
||||
|
||||
Learn more in [enabling briges](https://rss-bridge.github.io/rss-bridge/For_Hosts/Whitelisting.html)
|
||||
### How to enable some bridges
|
||||
|
||||
### How to enable a bridge
|
||||
|
||||
Add the bridge name to `whitelist.txt`:
|
||||
|
||||
echo 'FirefoxAddonsBridge' >> whitelist.txt
|
||||
```
|
||||
enabled_bridges[] = TwitchBridge
|
||||
enabled_bridges[] = GettrBridge
|
||||
```
|
||||
|
||||
### How to enable debug mode
|
||||
|
||||
Create a file named `DEBUG`:
|
||||
The
|
||||
[debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html)
|
||||
disables the majority of caching operations.
|
||||
|
||||
touch DEBUG
|
||||
enable_debug_mode = true
|
||||
|
||||
Learn more in [debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html).
|
||||
### How to switch to memcached as cache backend
|
||||
|
||||
```
|
||||
[cache]
|
||||
|
||||
; Cache backend: file (default), sqlite, memcached, null
|
||||
type = "memcached"
|
||||
```
|
||||
|
||||
### How to switch to sqlite3 as cache backend
|
||||
|
||||
type = "sqlite"
|
||||
|
||||
### How to disable bridge errors (as feed items)
|
||||
|
||||
When a bridge fails, RSS-Bridge will produce a feed with a single item describing the error.
|
||||
|
||||
This way, feed readers pick it up and you are notified.
|
||||
|
||||
If you don't want this behaviour, switch the error output to `http`:
|
||||
|
||||
[error]
|
||||
|
||||
; Defines how error messages are returned by RSS-Bridge
|
||||
;
|
||||
; "feed" = As part of the feed (default)
|
||||
; "http" = As HTTP error message
|
||||
; "none" = No errors are reported
|
||||
output = "http"
|
||||
|
||||
### How to accumulate errors before finally reporting it
|
||||
|
||||
Modify `report_limit` so that an error must occur 3 times before it is reported.
|
||||
|
||||
; Defines how often an error must occur before it is reported to the user
|
||||
report_limit = 3
|
||||
|
||||
The report count is reset to 0 each day.
|
||||
|
||||
### How to password-protect the instance
|
||||
|
||||
HTTP basic access authentication:
|
||||
|
||||
[authentication]
|
||||
|
||||
enable = true
|
||||
username = "alice"
|
||||
password = "cat"
|
||||
|
||||
Will typically require feed readers to be configured with the credentials.
|
||||
|
||||
It may also be possible to manually include the credentials in the URL:
|
||||
|
||||
https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardBridge&format=Html
|
||||
|
||||
### How to create a new output format
|
||||
|
||||
[Create a new format](https://rss-bridge.github.io/rss-bridge/Format_API/index.html).
|
||||
See `formats/PlaintextFormat.php` for an example.
|
||||
|
||||
### How to run unit tests and linter
|
||||
|
||||
These commands require that you have installed the dev dependencies in `composer.json`.
|
||||
|
||||
./vendor/bin/phpunit
|
||||
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
|
||||
|
||||
### How to spawn a minimal development environment
|
||||
|
||||
php -S 127.0.0.1:9001
|
||||
|
||||
http://127.0.0.1:9001/
|
||||
|
||||
## Explanation
|
||||
|
||||
|
@ -204,15 +406,19 @@ webmaster of
|
|||
See [CONTRIBUTORS.md](CONTRIBUTORS.md)
|
||||
|
||||
RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds.
|
||||
The specific cache duration can be different between bridges. Cached files are deleted automatically after 24 hours.
|
||||
The specific cache duration can be different between bridges.
|
||||
Cached files are deleted automatically after 24 hours.
|
||||
|
||||
RSS-Bridge allows you to take full control over which bridges are displayed to the user.
|
||||
That way you can host your own RSS-Bridge service with your favorite collection of bridges!
|
||||
|
||||
Current maintainers (as of 2023): @dvikan and @Mynacol #2519
|
||||
|
||||
## Reference
|
||||
|
||||
### FeedItem properties
|
||||
### Feed item structure
|
||||
|
||||
This is the feed item structure that bridges are expected to produce.
|
||||
|
||||
```php
|
||||
$item = [
|
||||
|
@ -235,13 +441,22 @@ That way you can host your own RSS-Bridge service with your favorite collection
|
|||
]
|
||||
```
|
||||
|
||||
### Output formats:
|
||||
### Output formats
|
||||
|
||||
* `Atom` : Atom feed, for use in feed readers
|
||||
* `Html` : Simple HTML page
|
||||
* `Json` : JSON, for consumption by other applications
|
||||
* `Mrss` : MRSS feed, for use in feed readers
|
||||
* `Plaintext` : Raw text, for consumption by other applications
|
||||
* `Atom`: Atom feed, for use in feed readers
|
||||
* `Html`: Simple HTML page
|
||||
* `Json`: JSON, for consumption by other applications
|
||||
* `Mrss`: MRSS feed, for use in feed readers
|
||||
* `Plaintext`: Raw text, for consumption by other applications
|
||||
* `Sfeed`: Text, TAB separated
|
||||
|
||||
### Cache backends
|
||||
|
||||
* `File`
|
||||
* `SQLite`
|
||||
* `Memcached`
|
||||
* `Array`
|
||||
* `Null`
|
||||
|
||||
### Licenses
|
||||
|
||||
|
|
|
@ -1,17 +1,5 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
/**
|
||||
* Checks if the website for a given bridge is reachable.
|
||||
*
|
||||
|
@ -34,51 +22,45 @@ class ConnectivityAction implements ActionInterface
|
|||
public function execute(array $request)
|
||||
{
|
||||
if (!Debug::isEnabled()) {
|
||||
throw new \Exception('This action is only available in debug mode!');
|
||||
return new Response('This action is only available in debug mode!', 403);
|
||||
}
|
||||
|
||||
if (!isset($request['bridge'])) {
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
if (!$bridgeName) {
|
||||
return render_template('connectivity.html.php');
|
||||
}
|
||||
|
||||
$bridgeClassName = $this->bridgeFactory->sanitizeBridgeName($request['bridge']);
|
||||
|
||||
if ($bridgeClassName === null) {
|
||||
throw new \InvalidArgumentException('Bridge name invalid!');
|
||||
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
return new Response('Bridge not found', 404);
|
||||
}
|
||||
|
||||
return $this->reportBridgeConnectivity($bridgeClassName);
|
||||
}
|
||||
|
||||
private function reportBridgeConnectivity($bridgeClassName)
|
||||
{
|
||||
if (!$this->bridgeFactory->isWhitelisted($bridgeClassName)) {
|
||||
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
throw new \Exception('Bridge is not whitelisted!');
|
||||
}
|
||||
|
||||
$retVal = [
|
||||
'bridge' => $bridgeClassName,
|
||||
'successful' => false,
|
||||
'http_code' => 200,
|
||||
];
|
||||
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
$curl_opts = [
|
||||
CURLOPT_CONNECTTIMEOUT => 5
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
];
|
||||
$result = [
|
||||
'bridge' => $bridgeClassName,
|
||||
'successful' => false,
|
||||
'http_code' => null,
|
||||
];
|
||||
try {
|
||||
$reply = getContents($bridge::URI, [], $curl_opts, true);
|
||||
|
||||
if ($reply['code'] === 200) {
|
||||
$retVal['successful'] = true;
|
||||
if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) {
|
||||
$retVal['http_code'] = 301;
|
||||
}
|
||||
$response = getContents($bridge::URI, [], $curl_opts, true);
|
||||
$result['http_code'] = $response['code'];
|
||||
if (in_array($response['code'], [200])) {
|
||||
$result['successful'] = true;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
$retVal['successful'] = false;
|
||||
}
|
||||
|
||||
return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']);
|
||||
return new Response(Json::encode($result), 200, ['content-type' => 'text/json']);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,17 +1,5 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class DetectAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
|
@ -29,7 +17,7 @@ class DetectAction implements ActionInterface
|
|||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
if (!$bridgeFactory->isWhitelisted($bridgeClassName)) {
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -45,7 +33,7 @@ class DetectAction implements ActionInterface
|
|||
$bridgeParams['format'] = $format;
|
||||
|
||||
$url = '?action=display&' . http_build_query($bridgeParams);
|
||||
return new Response('', 301, ['Location' => $url]);
|
||||
return new Response('', 301, ['location' => $url]);
|
||||
}
|
||||
|
||||
throw new \Exception('No bridge found for given URL: ' . $targetURL);
|
||||
|
|
|
@ -1,210 +1,186 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class DisplayAction implements ActionInterface
|
||||
{
|
||||
private CacheInterface $cache;
|
||||
private Logger $logger;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->cache = RssBridge::getCache();
|
||||
$this->logger = RssBridge::getLogger();
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
{
|
||||
if (Configuration::getConfig('system', 'enable_maintenance_mode')) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', [
|
||||
'title' => '503 Service Unavailable',
|
||||
'message' => 'RSS-Bridge is down for maintenance.',
|
||||
]), 503);
|
||||
}
|
||||
|
||||
$cacheKey = 'http_' . json_encode($request);
|
||||
/** @var Response $cachedResponse */
|
||||
$cachedResponse = $this->cache->get($cacheKey);
|
||||
if ($cachedResponse) {
|
||||
$ifModifiedSince = $_SERVER['HTTP_IF_MODIFIED_SINCE'] ?? null;
|
||||
$lastModified = $cachedResponse->getHeader('last-modified');
|
||||
if ($ifModifiedSince && $lastModified) {
|
||||
$lastModified = new \DateTimeImmutable($lastModified);
|
||||
$lastModifiedTimestamp = $lastModified->getTimestamp();
|
||||
$modifiedSince = strtotime($ifModifiedSince);
|
||||
if ($lastModifiedTimestamp <= $modifiedSince) {
|
||||
$modificationTimeGMT = gmdate('D, d M Y H:i:s ', $lastModifiedTimestamp);
|
||||
return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']);
|
||||
}
|
||||
}
|
||||
return $cachedResponse;
|
||||
}
|
||||
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
if (!$bridgeName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge parameter']), 400);
|
||||
}
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$bridgeClassName = null;
|
||||
if (isset($request['bridge'])) {
|
||||
$bridgeClassName = $bridgeFactory->sanitizeBridgeName($request['bridge']);
|
||||
$bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404);
|
||||
}
|
||||
|
||||
if ($bridgeClassName === null) {
|
||||
throw new \InvalidArgumentException('Bridge name invalid!');
|
||||
}
|
||||
|
||||
$format = $request['format'] ?? null;
|
||||
if (!$format) {
|
||||
throw new \Exception('You must specify a format!');
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400);
|
||||
}
|
||||
if (!$bridgeFactory->isWhitelisted($bridgeClassName)) {
|
||||
throw new \Exception('This bridge is not whitelisted');
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400);
|
||||
}
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$format = $formatFactory->create($format);
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge->loadConfiguration();
|
||||
|
||||
$noproxy = array_key_exists('_noproxy', $request)
|
||||
&& filter_var($request['_noproxy'], FILTER_VALIDATE_BOOLEAN);
|
||||
|
||||
if (Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge') && $noproxy) {
|
||||
$noproxy = $request['_noproxy'] ?? null;
|
||||
if (
|
||||
Configuration::getConfig('proxy', 'url')
|
||||
&& Configuration::getConfig('proxy', 'by_bridge')
|
||||
&& $noproxy
|
||||
) {
|
||||
// This const is only used once in getContents()
|
||||
define('NOPROXY', true);
|
||||
}
|
||||
|
||||
if (array_key_exists('_cache_timeout', $request)) {
|
||||
if (! Configuration::getConfig('cache', 'custom_timeout')) {
|
||||
unset($request['_cache_timeout']);
|
||||
$uri = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) . '?' . http_build_query($request);
|
||||
return new Response('', 301, ['Location' => $uri]);
|
||||
}
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$formatFactory = new FormatFactory();
|
||||
$format = $formatFactory->create($format);
|
||||
|
||||
$cache_timeout = filter_var($request['_cache_timeout'], FILTER_VALIDATE_INT);
|
||||
} else {
|
||||
$cache_timeout = $bridge->getCacheTimeout();
|
||||
$response = $this->createResponse($request, $bridge, $format);
|
||||
|
||||
if ($response->getCode() === 200) {
|
||||
$ttl = $request['_cache_timeout'] ?? null;
|
||||
if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) {
|
||||
$ttl = (int) $ttl;
|
||||
} else {
|
||||
$ttl = $bridge->getCacheTimeout();
|
||||
}
|
||||
$this->cache->set($cacheKey, $response, $ttl);
|
||||
}
|
||||
|
||||
// Remove parameters that don't concern bridges
|
||||
$bridge_params = array_diff_key(
|
||||
$request,
|
||||
array_fill_keys(
|
||||
[
|
||||
'action',
|
||||
'bridge',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time'
|
||||
],
|
||||
''
|
||||
)
|
||||
);
|
||||
if (in_array($response->getCode(), [403, 429, 503])) {
|
||||
// Cache these responses for about ~20 mins on average
|
||||
$this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10));
|
||||
}
|
||||
|
||||
// Remove parameters that don't concern caches
|
||||
$cache_params = array_diff_key(
|
||||
$request,
|
||||
array_fill_keys(
|
||||
[
|
||||
'action',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time'
|
||||
],
|
||||
''
|
||||
)
|
||||
);
|
||||
if ($response->getCode() === 500) {
|
||||
$this->cache->set($cacheKey, $response, 60 * 15);
|
||||
}
|
||||
|
||||
$cacheFactory = new CacheFactory();
|
||||
if (rand(1, 100) === 2) {
|
||||
$this->cache->prune();
|
||||
}
|
||||
|
||||
$cache = $cacheFactory->create();
|
||||
$cache->setScope('');
|
||||
$cache->purgeCache(86400); // 24 hours
|
||||
$cache->setKey($cache_params);
|
||||
return $response;
|
||||
}
|
||||
|
||||
private function createResponse(array $request, BridgeAbstract $bridge, FormatAbstract $format)
|
||||
{
|
||||
$items = [];
|
||||
$infos = [];
|
||||
$mtime = $cache->getTime();
|
||||
|
||||
if (
|
||||
$mtime !== false
|
||||
&& (time() - $cache_timeout < $mtime)
|
||||
&& !Debug::isEnabled()
|
||||
) {
|
||||
// At this point we found the feed in the cache
|
||||
|
||||
if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
|
||||
// The client wants to know if the feed has changed since its last check
|
||||
$stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']);
|
||||
if ($mtime <= $stime) {
|
||||
$lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT';
|
||||
return new Response('', 304, ['Last-Modified' => $lastModified2]);
|
||||
try {
|
||||
$bridge->loadConfiguration();
|
||||
// Remove parameters that don't concern bridges
|
||||
$input = array_diff_key($request, array_fill_keys(['action', 'bridge', 'format', '_noproxy', '_cache_timeout', '_error_time'], ''));
|
||||
$bridge->setInput($input);
|
||||
$bridge->collectData();
|
||||
$items = $bridge->getItems();
|
||||
if (isset($items[0]) && is_array($items[0])) {
|
||||
$feedItems = [];
|
||||
foreach ($items as $item) {
|
||||
$feedItems[] = FeedItem::fromArray($item);
|
||||
}
|
||||
$items = $feedItems;
|
||||
}
|
||||
$infos = [
|
||||
'name' => $bridge->getName(),
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'icon' => $bridge->getIcon()
|
||||
];
|
||||
} catch (\Exception $e) {
|
||||
if ($e instanceof HttpException) {
|
||||
// Reproduce (and log) these responses regardless of error output and report limit
|
||||
if ($e->getCode() === 429) {
|
||||
$this->logger->info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429);
|
||||
}
|
||||
if ($e->getCode() === 503) {
|
||||
$this->logger->info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 503);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch the cached feed from the cache and prepare it
|
||||
$cached = $cache->loadData();
|
||||
if (isset($cached['items']) && isset($cached['extraInfos'])) {
|
||||
foreach ($cached['items'] as $item) {
|
||||
$items[] = new FeedItem($item);
|
||||
}
|
||||
$infos = $cached['extraInfos'];
|
||||
$this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]);
|
||||
$errorOutput = Configuration::getConfig('error', 'output');
|
||||
$reportLimit = Configuration::getConfig('error', 'report_limit');
|
||||
$errorCount = 1;
|
||||
if ($reportLimit > 1) {
|
||||
$errorCount = $this->logBridgeError($bridge->getName(), $e->getCode());
|
||||
}
|
||||
} else {
|
||||
// At this point we did NOT find the feed in the cache. So invoke the bridge!
|
||||
try {
|
||||
$bridge->setDatas($bridge_params);
|
||||
$bridge->collectData();
|
||||
|
||||
$items = $bridge->getItems();
|
||||
|
||||
if (isset($items[0]) && is_array($items[0])) {
|
||||
$feedItems = [];
|
||||
foreach ($items as $item) {
|
||||
$feedItems[] = new FeedItem($item);
|
||||
}
|
||||
$items = $feedItems;
|
||||
}
|
||||
$infos = [
|
||||
'name' => $bridge->getName(),
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'icon' => $bridge->getIcon()
|
||||
];
|
||||
} catch (\Throwable $e) {
|
||||
if ($e instanceof HttpException) {
|
||||
// Produce a smaller log record for http exceptions
|
||||
Logger::warning(sprintf('Exception in %s: %s', $bridgeClassName, create_sane_exception_message($e)));
|
||||
} else {
|
||||
// Log the exception
|
||||
Logger::error(sprintf('Exception in %s', $bridgeClassName), ['e' => $e]);
|
||||
}
|
||||
|
||||
// Emit error only if we are passed the error report limit
|
||||
$errorCount = self::logBridgeError($bridge->getName(), $e->getCode());
|
||||
if ($errorCount >= Configuration::getConfig('error', 'report_limit')) {
|
||||
if (Configuration::getConfig('error', 'output') === 'feed') {
|
||||
// Emit the error as a feed item in a feed so that feed readers can pick it up
|
||||
$items[] = $this->createFeedItemFromException($e, $bridge);
|
||||
} elseif (Configuration::getConfig('error', 'output') === 'http') {
|
||||
// Emit as a regular web response
|
||||
throw $e;
|
||||
}
|
||||
// Let clients know about the error if we are passed the report limit
|
||||
if ($errorCount >= $reportLimit) {
|
||||
if ($errorOutput === 'feed') {
|
||||
// Render the exception as a feed item
|
||||
$items[] = $this->createFeedItemFromException($e, $bridge);
|
||||
} elseif ($errorOutput === 'http') {
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500);
|
||||
} elseif ($errorOutput === 'none') {
|
||||
// Do nothing (produces an empty feed)
|
||||
}
|
||||
}
|
||||
|
||||
$cache->saveData([
|
||||
'items' => array_map(function (FeedItem $item) {
|
||||
return $item->toArray();
|
||||
}, $items),
|
||||
'extraInfos' => $infos
|
||||
]);
|
||||
}
|
||||
|
||||
$format->setItems($items);
|
||||
$format->setExtraInfos($infos);
|
||||
$lastModified = $cache->getTime();
|
||||
$format->setLastModified($lastModified);
|
||||
$headers = [];
|
||||
if ($lastModified) {
|
||||
$headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $lastModified) . 'GMT';
|
||||
}
|
||||
$headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset();
|
||||
$now = time();
|
||||
$format->setLastModified($now);
|
||||
$headers = [
|
||||
'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT',
|
||||
'content-type' => $format->getMimeType() . '; charset=' . $format->getCharset(),
|
||||
];
|
||||
return new Response($format->stringify(), 200, $headers);
|
||||
}
|
||||
|
||||
private function createFeedItemFromException($e, BridgeInterface $bridge): FeedItem
|
||||
private function createFeedItemFromException($e, BridgeAbstract $bridge): FeedItem
|
||||
{
|
||||
$item = new FeedItem();
|
||||
|
||||
// Create a unique identifier every 24 hours
|
||||
$uniqueIdentifier = urlencode((int)(time() / 86400));
|
||||
$itemTitle = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
$item->setTitle($itemTitle);
|
||||
$title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
$item->setTitle($title);
|
||||
$item->setURI(get_current_url());
|
||||
$item->setTimestamp(time());
|
||||
|
||||
// Create a item identifier for feed readers e.g. "staysafetv twitch videos_19389"
|
||||
// Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389"
|
||||
$item->setUid($bridge->getName() . '_' . $uniqueIdentifier);
|
||||
|
||||
$content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [
|
||||
'error' => render_template(__DIR__ . '/../templates/error.html.php', ['e' => $e]),
|
||||
'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]),
|
||||
'searchUrl' => self::createGithubSearchUrl($bridge),
|
||||
'issueUrl' => self::createGithubIssueUrl($bridge, $e, create_sane_exception_message($e)),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
|
@ -213,14 +189,12 @@ class DisplayAction implements ActionInterface
|
|||
return $item;
|
||||
}
|
||||
|
||||
private static function logBridgeError($bridgeName, $code)
|
||||
private function logBridgeError($bridgeName, $code)
|
||||
{
|
||||
$cacheFactory = new CacheFactory();
|
||||
$cache = $cacheFactory->create();
|
||||
$cache->setScope('error_reporting');
|
||||
$cache->setkey([$bridgeName . '_' . $code]);
|
||||
$cache->purgeCache(86400); // 24 hours
|
||||
if ($report = $cache->loadData()) {
|
||||
// todo: it's not really necessary to json encode $report
|
||||
$cacheKey = 'error_reporting_' . $bridgeName . '_' . $code;
|
||||
$report = $this->cache->get($cacheKey);
|
||||
if ($report) {
|
||||
$report = Json::decode($report);
|
||||
$report['time'] = time();
|
||||
$report['count']++;
|
||||
|
@ -231,7 +205,8 @@ class DisplayAction implements ActionInterface
|
|||
'count' => 1,
|
||||
];
|
||||
}
|
||||
$cache->saveData(Json::encode($report));
|
||||
$ttl = 86400 * 5;
|
||||
$this->cache->set($cacheKey, Json::encode($report), $ttl);
|
||||
return $report['count'];
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This action is used by the frontpage form search.
|
||||
* It finds a bridge based off of a user input url.
|
||||
* It uses bridges' detectParameters implementation.
|
||||
*/
|
||||
class FindfeedAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
{
|
||||
$targetURL = $request['url'] ?? null;
|
||||
$format = $request['format'] ?? null;
|
||||
|
||||
if (!$targetURL) {
|
||||
return new Response('You must specify a url', 400);
|
||||
}
|
||||
if (!$format) {
|
||||
return new Response('You must specify a format', 400);
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$results = [];
|
||||
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
|
||||
if ($bridgeParams === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// It's allowed to have no 'context' in a bridge (only a default context without any name)
|
||||
// In this case, the reference to the parameters are found in the first element of the PARAMETERS array
|
||||
|
||||
$context = $bridgeParams['context'] ?? 0;
|
||||
|
||||
$bridgeData = [];
|
||||
// Construct the array of parameters
|
||||
foreach ($bridgeParams as $key => $value) {
|
||||
// 'context' is a special case : it's a bridge parameters, there is no "name" for this parameter
|
||||
if ($key == 'context') {
|
||||
$bridgeData[$key]['name'] = 'Context';
|
||||
$bridgeData[$key]['value'] = $value;
|
||||
} else {
|
||||
$bridgeData[$key]['name'] = $this->getParameterName($bridge, $context, $key);
|
||||
$bridgeData[$key]['value'] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
$content = [
|
||||
'url' => './?action=display&' . http_build_query($bridgeParams),
|
||||
'bridgeParams' => $bridgeParams,
|
||||
'bridgeData' => $bridgeData,
|
||||
'bridgeMeta' => [
|
||||
'name' => $bridge::NAME,
|
||||
'description' => $bridge::DESCRIPTION,
|
||||
'parameters' => $bridge::PARAMETERS,
|
||||
'icon' => $bridge->getIcon(),
|
||||
],
|
||||
];
|
||||
$results[] = $content;
|
||||
}
|
||||
if ($results === []) {
|
||||
return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']);
|
||||
}
|
||||
return new Response(Json::encode($results), 200, ['content-type' => 'application/json']);
|
||||
}
|
||||
|
||||
// Get parameter name in the actual context, or in the global parameter
|
||||
private function getParameterName($bridge, $context, $key)
|
||||
{
|
||||
if (isset($bridge::PARAMETERS[$context][$key]['name'])) {
|
||||
$name = $bridge::PARAMETERS[$context][$key]['name'];
|
||||
} else if (isset($bridge::PARAMETERS['global'][$key]['name'])) {
|
||||
$name = $bridge::PARAMETERS['global'][$key]['name'];
|
||||
} else {
|
||||
$name = 'Variable "' . $key . '" (No name provided)';
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
}
|
|
@ -4,18 +4,26 @@ final class FrontpageAction implements ActionInterface
|
|||
{
|
||||
public function execute(array $request)
|
||||
{
|
||||
$messages = [];
|
||||
$showInactive = (bool) ($request['show_inactive'] ?? null);
|
||||
$activeBridges = 0;
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
$bridgeClassNames = $bridgeFactory->getBridgeClassNames();
|
||||
|
||||
foreach ($bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) {
|
||||
$messages[] = [
|
||||
'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge),
|
||||
'level' => 'warning'
|
||||
];
|
||||
}
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$formats = $formatFactory->getFormatNames();
|
||||
|
||||
$body = '';
|
||||
foreach ($bridgeClassNames as $bridgeClassName) {
|
||||
if ($bridgeFactory->isWhitelisted($bridgeClassName)) {
|
||||
if ($bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats);
|
||||
$activeBridges++;
|
||||
} elseif ($showInactive) {
|
||||
|
@ -23,7 +31,9 @@ final class FrontpageAction implements ActionInterface
|
|||
}
|
||||
}
|
||||
|
||||
// todo: cache this renderered template
|
||||
return render(__DIR__ . '/../templates/frontpage.html.php', [
|
||||
'messages' => $messages,
|
||||
'admin_email' => Configuration::getConfig('admin', 'email'),
|
||||
'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
|
||||
'bridges' => $body,
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
class HealthAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
{
|
||||
$response = [
|
||||
'code' => 200,
|
||||
'message' => 'all is good',
|
||||
];
|
||||
return new Response(Json::encode($response), 200, ['content-type' => 'application/json']);
|
||||
}
|
||||
}
|
|
@ -1,17 +1,5 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class ListAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
|
@ -26,17 +14,17 @@ class ListAction implements ActionInterface
|
|||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$list->bridges[$bridgeClassName] = [
|
||||
'status' => $bridgeFactory->isWhitelisted($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
];
|
||||
}
|
||||
$list->total = count($list->bridges);
|
||||
return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']);
|
||||
return new Response(Json::encode($list), 200, ['content-type' => 'application/json']);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,53 +1,44 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class SetBridgeCacheAction implements ActionInterface
|
||||
{
|
||||
private CacheInterface $cache;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->cache = RssBridge::getCache();
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
{
|
||||
$authenticationMiddleware = new ApiAuthenticationMiddleware();
|
||||
$authenticationMiddleware($request);
|
||||
|
||||
$key = $request['key'] or returnClientError('You must specify key!');
|
||||
|
||||
$bridgeFactory = new \BridgeFactory();
|
||||
|
||||
$bridgeClassName = null;
|
||||
if (isset($request['bridge'])) {
|
||||
$bridgeClassName = $bridgeFactory->sanitizeBridgeName($request['bridge']);
|
||||
$key = $request['key'] ?? null;
|
||||
if (!$key) {
|
||||
returnClientError('You must specify key!');
|
||||
}
|
||||
|
||||
if ($bridgeClassName === null) {
|
||||
throw new \InvalidArgumentException('Bridge name invalid!');
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
$bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
throw new \Exception(sprintf('Bridge not found: %s', $bridgeName));
|
||||
}
|
||||
|
||||
// whitelist control
|
||||
if (!$bridgeFactory->isWhitelisted($bridgeClassName)) {
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
throw new \Exception('This bridge is not whitelisted', 401);
|
||||
die;
|
||||
}
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge->loadConfiguration();
|
||||
$value = $request['value'];
|
||||
|
||||
$cacheFactory = new CacheFactory();
|
||||
|
||||
$cache = $cacheFactory->create();
|
||||
$cache->setScope(get_class($bridge));
|
||||
$cache->setKey($key);
|
||||
$cache->saveData($value);
|
||||
$cacheKey = get_class($bridge) . '_' . $key;
|
||||
$ttl = 86400 * 3;
|
||||
$this->cache->set($cacheKey, $value, $ttl);
|
||||
|
||||
header('Content-Type: text/plain');
|
||||
echo 'done';
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
<?php
|
||||
|
||||
class ABolaBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'A Bola';
|
||||
const URI = 'https://abola.pt/';
|
||||
const DESCRIPTION = 'Returns news from the Portuguese sports newspaper A BOLA.PT';
|
||||
const MAINTAINER = 'rmscoelho';
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'feed' => [
|
||||
'name' => 'News Feed',
|
||||
'type' => 'list',
|
||||
'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT',
|
||||
'values' => [
|
||||
'Últimas' => 'Nnh/Noticias',
|
||||
'Seleção Nacional' => 'Selecao/Noticias',
|
||||
'Futebol Nacional' => [
|
||||
'Notícias' => 'Nacional/Noticias',
|
||||
'Primeira Liga' => 'Nacional/Liga/Noticias',
|
||||
'Liga 2' => 'Nacional/Liga2/Noticias',
|
||||
'Liga 3' => 'Nacional/Liga3/Noticias',
|
||||
'Liga Revelação' => 'Nacional/Liga-Revelacao/Noticias',
|
||||
'Campeonato de Portugal' => 'Nacional/Campeonato-Portugal/Noticias',
|
||||
'Distritais' => 'Nacional/Distritais/Noticias',
|
||||
'Taça de Portugal' => 'Nacional/TPortugal/Noticias',
|
||||
'Futebol Feminino' => 'Nacional/FFeminino/Noticias',
|
||||
'Futsal' => 'Nacional/Futsal/Noticias',
|
||||
],
|
||||
'Futebol Internacional' => [
|
||||
'Notícias' => 'Internacional/Noticias/Noticias',
|
||||
'Liga dos Campeões' => 'Internacional/Liga-dos-campeoes/Noticias',
|
||||
'Liga Europa' => 'Internacional/Liga-europa/Noticias',
|
||||
'Liga Conferência' => 'Internacional/Liga-conferencia/Noticias',
|
||||
'Liga das Nações' => 'Internacional/Liga-das-nacoes/Noticias',
|
||||
'UEFA Youth League' => 'Internacional/Uefa-Youth-League/Noticias',
|
||||
],
|
||||
'Mercado' => 'Mercado',
|
||||
'Modalidades' => 'Modalidades/Noticias',
|
||||
'Motores' => 'Motores/Noticias',
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://abola.pt/img/icons/favicon-96x96.png';
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return self::URI . $this->getInput('feed');
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = sprintf('https://abola.pt/%s', $this->getInput('feed'));
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
if ($this->getInput('feed') !== 'Mercado') {
|
||||
$dom = $dom->find('div#body_Todas1_upNoticiasTodas', 0);
|
||||
} else {
|
||||
$dom = $dom->find('div#body_NoticiasMercado_upNoticiasTodas', 0);
|
||||
}
|
||||
if (!$dom) {
|
||||
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
|
||||
}
|
||||
$dom = defaultLinkTo($dom, $this->getURI());
|
||||
foreach ($dom->find('div.media') as $key => $article) {
|
||||
//Get thumbnail
|
||||
$image = $article->find('.media-img', 0)->style;
|
||||
$image = preg_replace('/background-image: url\(/i', '', $image);
|
||||
$image = substr_replace($image, '', -4);
|
||||
$image = preg_replace('/https:\/\//i', '', $image);
|
||||
$image = preg_replace('/www\./i', '', $image);
|
||||
$image = preg_replace('/\/\//', '/', $image);
|
||||
$image = preg_replace('/\/\/\//', '//', $image);
|
||||
$image = substr($image, 7);
|
||||
$image = 'https://' . $image;
|
||||
$image = preg_replace('/ptimg/', 'pt/img', $image);
|
||||
$image = preg_replace('/\/\/bola/', 'www.abola', $image);
|
||||
//Timestamp
|
||||
$date = date('Y/m/d');
|
||||
if (!is_null($article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0))) {
|
||||
$date = $article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0)->plaintext;
|
||||
$date = preg_replace('/\./', '/', $date);
|
||||
}
|
||||
$time = $article->find("span#body_Todas1_rptNoticiasTodas_lblHora_$key", 0)->plaintext;
|
||||
$date = explode('/', $date);
|
||||
$time = explode(':', $time);
|
||||
$year = $date[0];
|
||||
$month = $date[1];
|
||||
$day = $date[2];
|
||||
$hour = $time[0];
|
||||
$minute = $time[1];
|
||||
$timestamp = mktime($hour, $minute, 0, $month, $day, $year);
|
||||
//Content
|
||||
$image = '<img src="' . $image . '" alt="' . $article->find('h4 span', 0)->plaintext . '" />';
|
||||
$description = '<p>' . $article->find('.media-texto > span', 0)->plaintext . '</p>';
|
||||
$content = $image . '</br>' . $description;
|
||||
$a = $article->find('.media-body > a', 0);
|
||||
$this->items[] = [
|
||||
'title' => $a->find('h4 span', 0)->plaintext,
|
||||
'uri' => $a->href,
|
||||
'content' => $content,
|
||||
'timestamp' => $timestamp,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -33,6 +33,7 @@ class AO3Bridge extends BridgeAbstract
|
|||
],
|
||||
]
|
||||
];
|
||||
private $title;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
|
@ -92,8 +93,14 @@ class AO3Bridge extends BridgeAbstract
|
|||
private function collectWork($id)
|
||||
{
|
||||
$url = self::URI . "/works/$id/navigate";
|
||||
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
|
||||
$html = \str_get_html($response['body']);
|
||||
$httpClient = RssBridge::getHttpClient();
|
||||
|
||||
$version = 'v0.0.1';
|
||||
$response = $httpClient->request($url, [
|
||||
'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)",
|
||||
]);
|
||||
|
||||
$html = \str_get_html($response->getBody());
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
$this->title = $html->find('h2 a', 0)->plaintext;
|
||||
|
|
|
@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$oldTz = date_default_timezone_get();
|
||||
$path = $this->getInput('path');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$oldTz = date_default_timezone_get();
|
||||
date_default_timezone_set('Europe/Berlin');
|
||||
|
||||
$pathComponents = explode('/', $this->getInput('path'));
|
||||
$pathComponents = explode('/', $path);
|
||||
if (empty($pathComponents)) {
|
||||
returnClientError('Path may not be empty');
|
||||
}
|
||||
|
@ -82,17 +84,21 @@ class ARDAudiothekBridge extends BridgeAbstract
|
|||
}
|
||||
|
||||
$url = self::APIENDPOINT . 'programsets/' . $showID . '/';
|
||||
$rawJSON = getContents($url);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json1 = getContents($url);
|
||||
$data1 = Json::decode($json1, false);
|
||||
$processedJSON = $data1->data->programSet;
|
||||
if (!$processedJSON) {
|
||||
throw new \Exception('Unable to find show id: ' . $showID);
|
||||
}
|
||||
|
||||
$limit = $this->getInput('limit');
|
||||
$answerLength = 1;
|
||||
$offset = 0;
|
||||
$numberOfElements = 1;
|
||||
|
||||
while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) {
|
||||
$rawJSON = getContents($url . '?offset=' . $offset);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json2 = getContents($url . '?offset=' . $offset);
|
||||
$data2 = Json::decode($json2, false);
|
||||
$processedJSON = $data2->data->programSet;
|
||||
|
||||
$answerLength = count($processedJSON->items->nodes);
|
||||
$offset = $offset + $answerLength;
|
||||
|
@ -113,7 +119,16 @@ class ARDAudiothekBridge extends BridgeAbstract
|
|||
$item['timestamp'] = $audio->publicationStartDateAndTime;
|
||||
$item['uid'] = $audio->id;
|
||||
$item['author'] = $audio->programSet->publicationService->title;
|
||||
$item['categories'] = [ $audio->programSet->editorialCategories->title ];
|
||||
|
||||
$category = $audio->programSet->editorialCategories->title ?? null;
|
||||
if ($category) {
|
||||
$item['categories'] = [$category];
|
||||
}
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $audio->duration,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,12 @@ class ASRockNewsBridge extends BridgeAbstract
|
|||
|
||||
$item['content'] = $contents->innertext;
|
||||
$item['timestamp'] = $this->extractDate($a->plaintext);
|
||||
$item['enclosures'][] = $a->find('img', 0)->src;
|
||||
|
||||
$img = $a->find('img', 0);
|
||||
if ($img) {
|
||||
$item['enclosures'][] = $img->src;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
|
|
|
@ -20,17 +20,14 @@ class AcrimedBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(
|
||||
static::URI . 'spip.php?page=backend',
|
||||
$this->getInput('limit')
|
||||
);
|
||||
$url = 'https://www.acrimed.org/spip.php?page=backend';
|
||||
$limit = $this->getInput('limit');
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = defaultLinkTo($article, static::URI);
|
||||
$item['content'] = $article;
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
|
||||
class AllSidesBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'AllSides';
|
||||
const URI = 'https://www.allsides.com';
|
||||
const DESCRIPTION = 'Balanced news and media bias ratings.';
|
||||
const MAINTAINER = 'Oliver Nutter';
|
||||
const PARAMETERS = [
|
||||
'global' => [
|
||||
'limit' => [
|
||||
'name' => 'Number of posts to return',
|
||||
'type' => 'number',
|
||||
'defaultValue' => 10,
|
||||
'required' => false,
|
||||
'title' => 'Zero or negative values return all posts (ignored if not fetching full article)',
|
||||
],
|
||||
'fetch' => [
|
||||
'name' => 'Fetch full article content',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked',
|
||||
],
|
||||
],
|
||||
'Headline Roundups' => [],
|
||||
];
|
||||
|
||||
private const ROUNDUPS_URI = self::URI . '/headline-roundups';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Headline Roundups':
|
||||
$index = getSimpleHTMLDOM(self::ROUNDUPS_URI);
|
||||
defaultLinkTo($index, self::ROUNDUPS_URI);
|
||||
$entries = $index->find('table.views-table > tbody > tr');
|
||||
|
||||
$limit = (int) $this->getInput('limit');
|
||||
$fetch = (bool) $this->getInput('fetch');
|
||||
|
||||
if ($limit > 0 && $fetch) {
|
||||
$entries = array_slice($entries, 0, $limit);
|
||||
}
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
$item = [
|
||||
'title' => $entry->find('.views-field-name', 0)->text(),
|
||||
'uri' => $entry->find('a', 0)->href,
|
||||
'timestamp' => $entry->find('.date-display-single', 0)->content,
|
||||
'author' => 'AllSides Staff',
|
||||
];
|
||||
|
||||
if ($fetch) {
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
defaultLinkTo($article, $item['uri']);
|
||||
|
||||
$item['content'] = $article->find('.story-id-page-description', 0);
|
||||
|
||||
foreach ($article->find('.page-tags a') as $tag) {
|
||||
$item['categories'][] = $tag->text();
|
||||
}
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if ($this->queriedContext) {
|
||||
return self::NAME . " - {$this->queriedContext}";
|
||||
}
|
||||
return self::NAME;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Headline Roundups':
|
||||
return self::ROUNDUPS_URI;
|
||||
}
|
||||
return self::URI;
|
||||
}
|
||||
}
|
|
@ -16,14 +16,20 @@ class AllegroBridge extends BridgeAbstract
|
|||
'sessioncookie' => [
|
||||
'name' => 'The \'wdctx\' session cookie',
|
||||
'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits',
|
||||
'pattern' => '^.{250,};?$',
|
||||
'pattern' => '^.{70,};?$',
|
||||
// phpcs:ignore
|
||||
'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd',
|
||||
'required' => false,
|
||||
],
|
||||
'includeSponsoredOffers' => [
|
||||
'type' => 'checkbox',
|
||||
'name' => 'Include Sponsored Offers'
|
||||
'name' => 'Include Sponsored Offers',
|
||||
'defaultValue' => 'checked'
|
||||
],
|
||||
'includePromotedOffers' => [
|
||||
'type' => 'checkbox',
|
||||
'name' => 'Include Promoted Offers',
|
||||
'defaultValue' => 'checked'
|
||||
]
|
||||
]];
|
||||
|
||||
|
@ -63,58 +69,57 @@ class AllegroBridge extends BridgeAbstract
|
|||
return;
|
||||
}
|
||||
|
||||
$results = $html->find('._6a66d_V7Lel article');
|
||||
$results = $html->find('article[data-analytics-view-custom-context="REGULAR"]');
|
||||
|
||||
if (!$this->getInput('includeSponsoredOffers')) {
|
||||
$results = array_filter($results, function ($node) {
|
||||
return $node->{'data-analytics-view-label'} != 'showSponsoredItems';
|
||||
});
|
||||
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]'));
|
||||
}
|
||||
|
||||
if (!$this->getInput('includePromotedOffers')) {
|
||||
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]'));
|
||||
}
|
||||
|
||||
foreach ($results as $post) {
|
||||
$item = [];
|
||||
|
||||
$item['uri'] = $post->find('._6a66d_LX75-', 0)->href;
|
||||
|
||||
//TODO: port this over, whatever it does, from https://github.com/MK-PL/AllegroRSS
|
||||
// if (arrayLinks.includes('events/clicks?')) {
|
||||
// let sponsoredLink = new URL(arrayLinks).searchParams.get('redirect')
|
||||
// arrayLinks = sponsoredLink.slice(0, sponsoredLink.indexOf('?'))
|
||||
// }
|
||||
|
||||
$item['title'] = $post->find('._6a66d_LX75-', 0)->innertext;
|
||||
|
||||
$item['uid'] = $post->{'data-analytics-view-value'};
|
||||
|
||||
$item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0);
|
||||
|
||||
$item['uri'] = $item_link->href;
|
||||
|
||||
$item['title'] = $item_link->find('img', 0)->alt;
|
||||
|
||||
$image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false;
|
||||
|
||||
if ($image) {
|
||||
$item['enclosures'] = [$image . '#.image'];
|
||||
}
|
||||
|
||||
$price = $post->{'data-analytics-view-json-custom-price'};
|
||||
if ($price) {
|
||||
$priceDecoded = json_decode(html_entity_decode($price));
|
||||
$price = $priceDecoded->amount . ' ' . $priceDecoded->currency;
|
||||
}
|
||||
|
||||
$descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/'];
|
||||
$descriptionReplacements = ['<span>', ':</span> ', '<strong>', ' </strong> '];
|
||||
$description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext;
|
||||
$descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description);
|
||||
|
||||
$buyNowAuction = $post->find('.mqu1_g3.mvrt_0.mgn2_12', 0)->innertext ?? '';
|
||||
$buyNowAuction = str_replace('</span><span', '</span> <span', $buyNowAuction);
|
||||
|
||||
$auctionTimeLeft = $post->find('._6a66d_ImOzU', 0)->innertext ?? '';
|
||||
|
||||
$price = $post->find('._6a66d_6R3iN', 0)->plaintext;
|
||||
$price = empty($auctionTimeLeft) ? $price : $price . '- kwota licytacji';
|
||||
|
||||
$image = $post->find('._6a66d_44ioA img', 0)->{'data-src'} ?: $post->find('._6a66d_44ioA img', 0)->src ?? false;
|
||||
if ($image) {
|
||||
$item['enclosures'] = [$image . '#.image'];
|
||||
}
|
||||
|
||||
$offerExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
|
||||
$pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
|
||||
return empty($node->find('.mvrt_0'));
|
||||
});
|
||||
|
||||
$offerExtraInfo = $offerExtraInfo[0]->plaintext ?? '';
|
||||
$pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? '';
|
||||
|
||||
$isSmart = $post->find('._6a66d_TC2Zk', 0)->innertext ?? '';
|
||||
if (str_contains($isSmart, 'z kurierem')) {
|
||||
$offerExtraInfo .= ', Smart z kurierem';
|
||||
} else {
|
||||
$offerExtraInfo .= ', Smart';
|
||||
$offerExtraInfo = array_map(function ($node) {
|
||||
return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext;
|
||||
}, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12'));
|
||||
|
||||
$isSmart = $post->find('img[alt="Smart!"]', 0) ?? false;
|
||||
if ($isSmart) {
|
||||
$pricingExtraInfo .= $isSmart->outertext;
|
||||
}
|
||||
|
||||
$item['categories'] = [];
|
||||
|
@ -131,11 +136,9 @@ class AllegroBridge extends BridgeAbstract
|
|||
. '<div><strong>'
|
||||
. $price
|
||||
. '</strong></div><div>'
|
||||
. $auctionTimeLeft
|
||||
. '</div><div>'
|
||||
. $buyNowAuction
|
||||
. implode('</div><div>', $offerExtraInfo)
|
||||
. '</div><dl>'
|
||||
. $offerExtraInfo
|
||||
. $pricingExtraInfo
|
||||
. '</dl><hr>';
|
||||
|
||||
$this->items[] = $item;
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
<?php
|
||||
|
||||
class AllocineFRSortiesBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'Simounet';
|
||||
const NAME = 'AlloCiné Sorties Bridge';
|
||||
const CACHE_TIMEOUT = 25200; // 7h
|
||||
const BASE_URI = 'https://www.allocine.fr';
|
||||
const URI = self::BASE_URI . '/film/sorties-semaine/';
|
||||
const DESCRIPTION = 'Bridge for AlloCiné - Sorties cinéma cette semaine';
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return self::NAME;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
foreach ($html->find('section.section.section-wrap', 0)->find('li.mdl') as $element) {
|
||||
$item = [];
|
||||
|
||||
$thumb = $element->find('figure.thumbnail', 0);
|
||||
$meta = $element->find('div.meta-body', 0);
|
||||
$synopsis = $element->find('div.synopsis', 0);
|
||||
$date = $element->find('span.date', 0);
|
||||
|
||||
$title = $element->find('a[class*=meta-title-link]', 0);
|
||||
$content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI));
|
||||
|
||||
// Replace image 'src' with the one in 'data-src'
|
||||
$content = preg_replace('@src="data:image/gif;base64,[A-Za-z0-9=+\/]*"@', '', $content);
|
||||
$content = preg_replace('@data-src=@', 'src=', $content);
|
||||
|
||||
$item['content'] = $content;
|
||||
$item['title'] = trim($title->innertext);
|
||||
$item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext);
|
||||
$item['uri'] = static::BASE_URI . '/' . substr($title->href, 1);
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function frenchPubDateToTimestamp($date)
|
||||
{
|
||||
return strtotime(
|
||||
strtr(
|
||||
strtolower($date),
|
||||
[
|
||||
'janvier' => 'jan',
|
||||
'février' => 'feb',
|
||||
'mars' => 'march',
|
||||
'avril' => 'apr',
|
||||
'mai' => 'may',
|
||||
'juin' => 'jun',
|
||||
'juillet' => 'jul',
|
||||
'août' => 'aug',
|
||||
'septembre' => 'sep',
|
||||
'octobre' => 'oct',
|
||||
'novembre' => 'nov',
|
||||
'décembre' => 'dec'
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -125,14 +125,13 @@ class AmazonPriceTrackerBridge extends BridgeAbstract
|
|||
*/
|
||||
private function getImage($html)
|
||||
{
|
||||
$image = 'https://placekitten.com/200/300';
|
||||
$imageSrc = $html->find('#main-image-container img', 0);
|
||||
|
||||
if ($imageSrc) {
|
||||
$hiresImage = $imageSrc->getAttribute('data-old-hires');
|
||||
$dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
|
||||
$image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
|
||||
}
|
||||
$image = $image ?: 'https://placekitten.com/200/300';
|
||||
|
||||
return <<<EOT
|
||||
<img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
|
||||
|
|
|
@ -40,6 +40,8 @@ class AppleMusicBridge extends BridgeAbstract
|
|||
|
||||
foreach ($json->results as $obj) {
|
||||
if ($obj->wrapperType === 'collection') {
|
||||
$copyright = $obj->copyright ?? '';
|
||||
|
||||
$this->items[] = [
|
||||
'title' => $obj->artistName . ' - ' . $obj->collectionName,
|
||||
'uri' => $obj->collectionViewUrl,
|
||||
|
@ -49,7 +51,7 @@ class AppleMusicBridge extends BridgeAbstract
|
|||
. '><img src="' . $obj->artworkUrl100 . '" /></a><br><br>'
|
||||
. $obj->artistName . ' - ' . $obj->collectionName
|
||||
. '<br>'
|
||||
. $obj->copyright,
|
||||
. $copyright,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
<?php
|
||||
|
||||
class ArsTechnicaBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'phantop';
|
||||
const NAME = 'Ars Technica';
|
||||
const URI = 'https://arstechnica.com/';
|
||||
const DESCRIPTION = 'Returns the latest articles from Ars Technica';
|
||||
const PARAMETERS = [[
|
||||
'section' => [
|
||||
'name' => 'Site section',
|
||||
'type' => 'list',
|
||||
'defaultValue' => 'index',
|
||||
'values' => [
|
||||
'All' => 'index',
|
||||
'Apple' => 'apple',
|
||||
'Board Games' => 'cardboard',
|
||||
'Cars' => 'cars',
|
||||
'Features' => 'features',
|
||||
'Gaming' => 'gaming',
|
||||
'Information Technology' => 'technology-lab',
|
||||
'Science' => 'science',
|
||||
'Staff Blogs' => 'staff-blogs',
|
||||
'Tech Policy' => 'tech-policy',
|
||||
'Tech' => 'gadgets',
|
||||
]
|
||||
]
|
||||
]];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] = $item_html->find('.article-content', 0);
|
||||
|
||||
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
|
||||
if (null !== $pages) {
|
||||
for ($i = 2; $i <= $pages->innertext; $i++) {
|
||||
$page_url = $item['uri'] . '&page=' . $i;
|
||||
$page_html = getSimpleHTMLDOMCached($page_url);
|
||||
$page_html = defaultLinkTo($page_html, self::URI);
|
||||
$item['content'] .= $page_html->find('.article-content', 0);
|
||||
}
|
||||
$item['content'] = str_get_html($item['content']);
|
||||
}
|
||||
|
||||
// remove various ars advertising
|
||||
$item['content']->find('#social-left', 0)->remove();
|
||||
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.ad_wrapper') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.sidebar') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
|
||||
$item['content'] = backgroundToImg($item['content']);
|
||||
|
||||
$item['uid'] = explode('=', $item['uri'])[1];
|
||||
|
||||
return $item;
|
||||
}
|
||||
}
|
|
@ -156,6 +156,10 @@ class Arte7Bridge extends BridgeAbstract
|
|||
. $element['mainImage']['url']
|
||||
. '" /></a>';
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $durationSeconds,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,8 @@ class AskfmBridge extends BridgeAbstract
|
|||
|
||||
$item['timestamp'] = strtotime($element->find('time', 0)->datetime);
|
||||
|
||||
$answer = trim($element->find('div.streamItem_content', 0)->innertext);
|
||||
$var = $element->find('div.streamItem_content', 0);
|
||||
$answer = trim($var->innertext ?? '');
|
||||
|
||||
// This probably should be cleaned up, especially for YouTube embeds
|
||||
if ($visual = $element->find('div.streamItem_visual', 0)) {
|
||||
|
|
|
@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
// this bridge is broken and unmaintained
|
||||
return;
|
||||
|
||||
$uri = self::URI . '/monair/commune/' . $this->getInput('cities');
|
||||
|
||||
$html = getSimpleHTMLDOM($uri);
|
||||
|
|
|
@ -13,12 +13,20 @@ class AutoJMBridge extends BridgeAbstract
|
|||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/',
|
||||
'exampleValue' => 'recherche?brands[]=peugeot&ranges[]=peugeot-nouvelle-308-2021-5p'
|
||||
'exampleValue' => 'recherche?brands[]=PEUGEOT&ranges[]=PEUGEOT 308'
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
const TEST_DETECT_PARAMETERS = [
|
||||
'https://www.autojm.fr/recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308'
|
||||
=> ['url' => 'recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308',
|
||||
'context' => 'Afficher les offres de véhicules disponible sur la recheche AutoJM'
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . 'favicon.ico';
|
||||
|
@ -35,6 +43,17 @@ class AutoJMBridge extends BridgeAbstract
|
|||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Afficher les offres de véhicules disponible sur la recheche AutoJM':
|
||||
return self::URI . $this->getInput('url');
|
||||
break;
|
||||
default:
|
||||
return self::URI;
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Get the number of result for this search
|
||||
|
@ -52,7 +71,7 @@ class AutoJMBridge extends BridgeAbstract
|
|||
$data = json_decode($json);
|
||||
|
||||
$nb_results = $data->nbResults;
|
||||
$total_pages = ceil($nb_results / 15);
|
||||
$total_pages = ceil($nb_results / 14);
|
||||
|
||||
// Limit the number of page to analyse to 10
|
||||
for ($page = 1; $page <= $total_pages && $page <= 10; $page++) {
|
||||
|
@ -66,8 +85,8 @@ class AutoJMBridge extends BridgeAbstract
|
|||
$image = $car->find('div[class=card-car__header__img]', 0)->find('img', 0)->src;
|
||||
// Decode HTML attribute JSON data
|
||||
$car_data = json_decode(html_entity_decode($car->{'data-layer'}));
|
||||
$car_model = $car->{'data-title'} . ' ' . $car->{'data-suptitle'};
|
||||
$availability = $car->find('div[class=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext;
|
||||
$car_model = $car_data->title;
|
||||
$availability = $car->find('div[class*=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext;
|
||||
$warranty = $car->find('div[data-type=WarrantyCard]', 0)->plaintext;
|
||||
$discount_html = $car->find('div[class=subtext vehicle_reference_element]', 0);
|
||||
// Check if there is any discount info displayed
|
||||
|
@ -132,4 +151,18 @@ class AutoJMBridge extends BridgeAbstract
|
|||
|
||||
return $html;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
$regex = '/^(https?:\/\/)?(www\.|)autojm.fr\/(recherche\?.*|recherche\/[0-9]{1,10}\?.*)$/m';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$url = preg_replace('#(recherche|recherche/[0-9]{1,10})#', 'recherche', $matches[3]);
|
||||
|
||||
$params['url'] = $url;
|
||||
$params['context'] = 'Afficher les offres de véhicules disponible sur la recheche AutoJM';
|
||||
|
||||
return $params;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,29 +14,10 @@ class AwwwardsBridge extends BridgeAbstract
|
|||
|
||||
private $sites = [];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.awwwards.com/favicon.ico';
|
||||
}
|
||||
|
||||
private function fetchSites()
|
||||
{
|
||||
Debug::log('Fetching all sites');
|
||||
$sites = getSimpleHTMLDOM(self::SITESURI);
|
||||
|
||||
Debug::log('Parsing all JSON data');
|
||||
foreach ($sites->find('.grid-sites li') as $site) {
|
||||
$decode = html_entity_decode($site->attr['data-collectable-model-value'], ENT_QUOTES, 'utf-8');
|
||||
$decode = json_decode($decode, true);
|
||||
$this->sites[] = $decode;
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->fetchSites();
|
||||
|
||||
Debug::log('Building RSS feed');
|
||||
foreach ($this->sites as $site) {
|
||||
$item = [];
|
||||
$item['title'] = $site['title'];
|
||||
|
@ -56,4 +37,23 @@ class AwwwardsBridge extends BridgeAbstract
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.awwwards.com/favicon.ico';
|
||||
}
|
||||
|
||||
private function fetchSites()
|
||||
{
|
||||
$sites = getSimpleHTMLDOM(self::SITESURI);
|
||||
foreach ($sites->find('.grid-sites li') as $li) {
|
||||
$encodedJson = $li->attr['data-collectable-model-value'] ?? null;
|
||||
if (!$encodedJson) {
|
||||
continue;
|
||||
}
|
||||
$json = html_entity_decode($encodedJson, ENT_QUOTES, 'utf-8');
|
||||
$site = Json::decode($json);
|
||||
$this->sites[] = $site;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -138,6 +138,7 @@ class BadDragonBridge extends BridgeAbstract
|
|||
// Sale
|
||||
$regex = '/^(https?:\/\/)?bad-dragon\.com\/sales/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Sales';
|
||||
return $params;
|
||||
}
|
||||
|
||||
|
@ -192,6 +193,7 @@ class BadDragonBridge extends BridgeAbstract
|
|||
isset($urlParams['noAccessories'])
|
||||
&& $urlParams['noAccessories'] === '1'
|
||||
&& $params['noAccessories'] = 'on';
|
||||
$params['context'] = 'Clearance';
|
||||
|
||||
return $params;
|
||||
}
|
||||
|
|
|
@ -397,6 +397,7 @@ class BandcampBridge extends BridgeAbstract
|
|||
// By tag
|
||||
$regex = '/^(https?:\/\/)?bandcamp\.com\/tag\/([^\/.&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By tag';
|
||||
$params['tag'] = urldecode($matches[2]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -404,6 +405,7 @@ class BandcampBridge extends BridgeAbstract
|
|||
// By band
|
||||
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By band';
|
||||
$params['band'] = urldecode($matches[2]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -411,6 +413,7 @@ class BandcampBridge extends BridgeAbstract
|
|||
// By album
|
||||
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com\/album\/([^\/.&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By album';
|
||||
$params['band'] = urldecode($matches[2]);
|
||||
$params['album'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
|
|
|
@ -8,48 +8,27 @@ class BinanceBridge extends BridgeAbstract
|
|||
const MAINTAINER = 'thefranke';
|
||||
const CACHE_TIMEOUT = 3600; // 1h
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.binance.com/bapi/composite/v1/public/content/blog/list?category=&tag=&page=1&size=12';
|
||||
$json = getContents($url);
|
||||
$data = Json::decode($json, false);
|
||||
foreach ($data->data->blogList as $post) {
|
||||
$item = [];
|
||||
$item['title'] = $post->title;
|
||||
// Url slug not in json
|
||||
//$item['uri'] = $uri;
|
||||
$item['timestamp'] = $post->postTimeUTC / 1000;
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $post->brief;
|
||||
//$item['categories'] = $category;
|
||||
$item['uid'] = $post->idStr;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://bin.bnbstatic.com/static/images/common/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI)
|
||||
or returnServerError('Could not fetch Binance blog data.');
|
||||
|
||||
$appData = $html->find('script[id="__APP_DATA"]');
|
||||
$appDataJson = json_decode($appData[0]->innertext);
|
||||
$allposts = $appDataJson->routeProps->f3ac->blogListRes->list;
|
||||
|
||||
foreach ($allposts as $element) {
|
||||
$date = $element->releasedTime;
|
||||
$title = $element->title;
|
||||
$category = $element->category->name;
|
||||
|
||||
$suburl = strtolower($category);
|
||||
$suburl = str_replace(' ', '_', $suburl);
|
||||
|
||||
$uri = self::URI . '/' . $suburl . '/' . $element->idStr;
|
||||
|
||||
$contentHTML = getSimpleHTMLDOMCached($uri);
|
||||
$contentAppData = $contentHTML->find('script[id="__APP_DATA"]');
|
||||
$contentAppDataJson = json_decode($contentAppData[0]->innertext);
|
||||
$content = $contentAppDataJson->routeProps->a106->blogDetail->content;
|
||||
|
||||
$item = [];
|
||||
$item['title'] = $title;
|
||||
$item['uri'] = $uri;
|
||||
$item['timestamp'] = substr($date, 0, -3);
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $content;
|
||||
$item['categories'] = $category;
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,14 @@ class BleepingComputerBridge extends FeedExpander
|
|||
const URI = 'https://www.bleepingcomputer.com/';
|
||||
const DESCRIPTION = 'Returns the newest articles.';
|
||||
|
||||
protected function parseItem($item)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
|
@ -23,10 +27,4 @@ class BleepingComputerBridge extends FeedExpander
|
|||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract
|
|||
]
|
||||
];
|
||||
|
||||
const CACHE_TIMEOUT = 1800; // 30 mins
|
||||
|
||||
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$results = $html->find('div.results', 0);
|
||||
|
||||
foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) {
|
||||
$item = [];
|
||||
|
||||
$videoPath = self::URI . $li->children(0)->href;
|
||||
$videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600);
|
||||
|
||||
$json = $this->extractJson($videoPageHtml);
|
||||
$id = array_keys((array) $json->media->index)[0];
|
||||
|
||||
$item['uri'] = $videoPath;
|
||||
$item['title'] = $json->media->index->$id->title;
|
||||
$item['timestamp'] = $json->media->index->$id->published_at;
|
||||
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
|
||||
|
||||
$description = $json->media->index->$id->description;
|
||||
$article = '';
|
||||
|
||||
if (is_null($json->media->index->$id->media->seo_article) === false) {
|
||||
$article = markdownToHtml($json->media->index->$id->media->seo_article);
|
||||
}
|
||||
|
||||
$item['content'] = <<<EOD
|
||||
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
|
||||
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
|
||||
</video>
|
||||
<p>{$description}</p>
|
||||
{$article}
|
||||
EOD;
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$regex = '/window.__PRELOADED_STATE__ = (.*);/';
|
||||
preg_match($regex, $html, $parts);
|
||||
$data = Json::decode($parts[1], false);
|
||||
foreach ($data->medias->index as $uid => $media) {
|
||||
$this->items[] = [
|
||||
'uid' => $uid,
|
||||
'title' => $media->metadata->slug,
|
||||
'uri' => $media->share_url,
|
||||
'timestamp' => $media->published_at,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,35 +60,14 @@ EOD;
|
|||
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
|
||||
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
|
||||
}
|
||||
|
||||
return parent::getURI();
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
|
||||
return $this->getKey('category') . ' - ' .
|
||||
$this->getKey('edition') . ' - Brut.';
|
||||
return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.';
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract JSON from page
|
||||
*/
|
||||
private function extractJson($html)
|
||||
{
|
||||
if (!preg_match($this->jsonRegex, $html, $parts)) {
|
||||
returnServerError('Failed to extract data from page');
|
||||
}
|
||||
|
||||
$data = json_decode($parts[1]);
|
||||
|
||||
if ($data === false) {
|
||||
returnServerError('Failed to decode extracted data');
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,7 +159,7 @@ class BugzillaBridge extends BridgeAbstract
|
|||
protected function getUser($user)
|
||||
{
|
||||
// Check if the user endpoint is available
|
||||
if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) {
|
||||
if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) {
|
||||
return $user;
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,9 @@ class BundesbankBridge extends BridgeAbstract
|
|||
$item['content'] .= '<strong>' . $study->find('.teasable__subtitle', 0)->plaintext . '</strong>';
|
||||
}
|
||||
|
||||
$item['content'] .= '<p>' . $study->find('.teasable__text', 0)->plaintext . '</p>';
|
||||
$teasable = $study->find('.teasable__text', 0);
|
||||
$teasableText = $teasable->plaintext ?? '';
|
||||
$item['content'] .= '<p>' . $teasableText . '</p>';
|
||||
|
||||
$item['timestamp'] = strtotime($study->find('.teasable__date', 0)->plaintext);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<?php
|
||||
|
||||
class CNETBridge extends BridgeAbstract
|
||||
class CNETBridge extends SitemapBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CNET News';
|
||||
|
@ -14,101 +14,101 @@ class CNETBridge extends BridgeAbstract
|
|||
'type' => 'list',
|
||||
'values' => [
|
||||
'All articles' => '',
|
||||
'Apple' => 'apple',
|
||||
'Google' => 'google',
|
||||
'Microsoft' => 'tags-microsoft',
|
||||
'Computers' => 'topics-computers',
|
||||
'Mobile' => 'topics-mobile',
|
||||
'Sci-Tech' => 'topics-sci-tech',
|
||||
'Security' => 'topics-security',
|
||||
'Internet' => 'topics-internet',
|
||||
'Tech Industry' => 'topics-tech-industry'
|
||||
'Tech' => 'tech',
|
||||
'Money' => 'personal-finance',
|
||||
'Home' => 'home',
|
||||
'Wellness' => 'health',
|
||||
'Energy' => 'home/energy-and-utilities',
|
||||
'Deals' => 'deals',
|
||||
'Computing' => 'tech/computing',
|
||||
'Mobile' => 'tech/mobile',
|
||||
'Science' => 'science',
|
||||
'Services' => 'tech/services-and-software'
|
||||
]
|
||||
]
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
private function cleanArticle($article_html)
|
||||
{
|
||||
$offset_p = strpos($article_html, '<p>');
|
||||
$offset_figure = strpos($article_html, '<figure');
|
||||
$offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
|
||||
$article_html = substr($article_html, $offset);
|
||||
$article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
|
||||
$article_html = str_replace(' height="0"', '', $article_html);
|
||||
$article_html = str_replace('<noscript>', '', $article_html);
|
||||
$article_html = str_replace('</noscript>', '', $article_html);
|
||||
$article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<script', '</script>');
|
||||
$article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
|
||||
return $article_html;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Retrieve and check user input
|
||||
$topic = str_replace('-', '/', $this->getInput('topic'));
|
||||
if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic)))) {
|
||||
returnClientError('Invalid topic: ' . $topic);
|
||||
$topic = $this->getInput('topic');
|
||||
$limit = $this->getInput('limit');
|
||||
$limit = empty($limit) ? 10 : $limit;
|
||||
|
||||
$url_pattern = empty($topic) ? '' : self::URI . $topic;
|
||||
$sitemap_latest = self::URI . 'sitemaps/article/' . date('Y/m') . '.xml';
|
||||
$sitemap_previous = self::URI . 'sitemaps/article/' . date('Y/m', strtotime('last day of previous month')) . '.xml';
|
||||
|
||||
$links = array_merge(
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_latest, true), $url_pattern, $limit),
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_previous, true), $url_pattern, $limit)
|
||||
);
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
// Retrieve webpage
|
||||
$pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
|
||||
$html = getSimpleHTMLDOM($pageUrl);
|
||||
if (empty($links)) {
|
||||
returnClientError('Failed to retrieve article list');
|
||||
}
|
||||
|
||||
// Process articles
|
||||
foreach ($html->find('div.assetBody, div.riverPost') as $element) {
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
foreach ($links as $article_uri) {
|
||||
$article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri));
|
||||
$title = trim($article_dom->find('h1', 0)->plaintext);
|
||||
$author = $article_dom->find('span.c-assetAuthor_name', 0)->plaintext;
|
||||
$headline = $article_dom->find('p.c-contentHeader_description', 0);
|
||||
$content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0);
|
||||
$date = null;
|
||||
$enclosure = null;
|
||||
|
||||
$article_title = trim($element->find('h2, h3', 0)->plaintext);
|
||||
$article_uri = self::URI . substr($element->find('a', 0)->href, 1);
|
||||
$article_thumbnail = $element->parent()->find('img[src]', 0)->src;
|
||||
$article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
|
||||
$article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
|
||||
$article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
|
||||
|
||||
if (is_null($article_thumbnail)) {
|
||||
$article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
|
||||
}
|
||||
|
||||
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
|
||||
$article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
|
||||
|
||||
if (!is_null($article_html)) {
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('div.originalImage', 0);
|
||||
}
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('span.imageContainer', 0);
|
||||
}
|
||||
if (is_object($article_thumbnail)) {
|
||||
$article_thumbnail = $article_thumbnail->find('img', 0)->src;
|
||||
}
|
||||
|
||||
$article_content .= trim(
|
||||
$this->cleanArticle(
|
||||
extractFromDelimiters(
|
||||
$article_html,
|
||||
'<article',
|
||||
'<footer'
|
||||
)
|
||||
)
|
||||
);
|
||||
foreach ($article_dom->find('script[type=application/ld+json]') as $ldjson) {
|
||||
$datePublished = extractFromDelimiters($ldjson->innertext, '"datePublished":"', '"');
|
||||
if ($datePublished !== false) {
|
||||
$date = strtotime($datePublished);
|
||||
}
|
||||
$imageObject = extractFromDelimiters($ldjson->innertext, 'ImageObject","url":"', '"');
|
||||
if ($imageObject !== false) {
|
||||
$enclosure = $imageObject;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $article_title;
|
||||
$item['author'] = $article_author;
|
||||
$item['timestamp'] = $article_timestamp;
|
||||
$item['enclosures'] = [$article_thumbnail];
|
||||
$item['content'] = $article_content;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
foreach ($content->find('div.c-shortcodeGallery') as $cleanup) {
|
||||
$cleanup->outertext = '';
|
||||
}
|
||||
|
||||
foreach ($content->find('figure') as $figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
if ($img) {
|
||||
$figure->outertext = $img->outertext;
|
||||
}
|
||||
}
|
||||
|
||||
$content = $content->innertext;
|
||||
|
||||
if ($enclosure) {
|
||||
$content = "<div><img src=\"$enclosure\" /></div>" . $content;
|
||||
}
|
||||
|
||||
if ($headline) {
|
||||
$content = '<p><b>' . $headline->plaintext . '</b></p><br />' . $content;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $title;
|
||||
$item['author'] = $author;
|
||||
$item['content'] = $content;
|
||||
|
||||
if (!is_null($date)) {
|
||||
$item['timestamp'] = $date;
|
||||
}
|
||||
|
||||
if (!is_null($enclosure)) {
|
||||
$item['enclosures'] = [$enclosure];
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
foreach ($this->bannedTitle as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
|
||||
return null;
|
||||
|
@ -54,7 +52,7 @@ class CNETFranceBridge extends FeedExpander
|
|||
}
|
||||
|
||||
foreach ($this->bannedURL as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['uri']) === 1) {
|
||||
if (preg_match('#' . $term . '#mi', $item['uri'])) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,12 +36,65 @@ class CVEDetailsBridge extends BridgeAbstract
|
|||
private $vendor = '';
|
||||
private $product = '';
|
||||
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
private function buildUrl()
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
|
||||
// There are some optional vulnerability types, which will be
|
||||
// added to the categories as well as the CWE number -- which is
|
||||
// always given.
|
||||
$categories = [$this->vendor];
|
||||
$enclosures = [];
|
||||
|
||||
$detailLink = $tr->find('h3 > a', 0);
|
||||
$detailHtml = getSimpleHTMLDOM($detailLink->href);
|
||||
|
||||
// The CVE number itself
|
||||
$title = $tr->find('h3 > a', 0)->innertext;
|
||||
$content = $tr->find('.cvesummarylong', 0)->innertext;
|
||||
$cweList = $detailHtml->find('h2', 2)->next_sibling();
|
||||
foreach ($cweList->find('li') as $li) {
|
||||
$cweWithDescription = $li->find('a', 0)->innertext ?? '';
|
||||
|
||||
if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) {
|
||||
$categories[] = 'CWE-' . $cwe[1];
|
||||
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->product != '') {
|
||||
$categories[] = $this->product;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
|
||||
'title' => $title,
|
||||
'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext,
|
||||
'content' => $content,
|
||||
'categories' => $categories,
|
||||
'enclosures' => $enclosures,
|
||||
'uid' => $title,
|
||||
];
|
||||
|
||||
// We only want to fetch the latest 10 CVEs
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
// build url
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
|
||||
if ($this->getInput('product_id') !== '') {
|
||||
$url .= '/product_id-' . $this->getInput('product_id');
|
||||
|
@ -51,32 +104,21 @@ class CVEDetailsBridge extends BridgeAbstract
|
|||
// number, which should be mostly accurate.
|
||||
$url .= '?order=1'; // Order by CVE number DESC
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->buildUrl());
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$this->html = defaultLinkTo($html, self::URI);
|
||||
|
||||
$vendor = $html->find('#contentdiv > h1 > a', 0);
|
||||
$vendor = $html->find('#contentdiv h1 > a', 0);
|
||||
if ($vendor == null) {
|
||||
returnServerError('Invalid Vendor ID ' .
|
||||
$this->getInput('vendor_id') .
|
||||
' or Product ID ' .
|
||||
$this->getInput('product_id'));
|
||||
returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id'));
|
||||
}
|
||||
$this->vendor = $vendor->innertext;
|
||||
|
||||
$product = $html->find('#contentdiv > h1 > a', 1);
|
||||
$product = $html->find('#contentdiv h1 > a', 1);
|
||||
if ($product != null) {
|
||||
$this->product = $product->innertext;
|
||||
}
|
||||
}
|
||||
|
||||
// Build the name of the feed.
|
||||
public function getName()
|
||||
{
|
||||
if ($this->getInput('vendor_id') == '') {
|
||||
|
@ -94,52 +136,4 @@ class CVEDetailsBridge extends BridgeAbstract
|
|||
|
||||
return $name;
|
||||
}
|
||||
|
||||
// Pull the data from the HTML response and fill the items..
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
foreach ($this->html->find('#vulnslisttable .srrowns') as $i => $tr) {
|
||||
// There are some optional vulnerability types, which will be
|
||||
// added to the categories as well as the CWE number -- which is
|
||||
// always given.
|
||||
$categories = [$this->vendor];
|
||||
$enclosures = [];
|
||||
|
||||
$cwe = $tr->find('td', 2)->find('a', 0);
|
||||
if ($cwe != null) {
|
||||
$cwe = $cwe->innertext;
|
||||
$categories[] = 'CWE-' . $cwe;
|
||||
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe . '.html';
|
||||
}
|
||||
$c = $tr->find('td', 4)->innertext;
|
||||
if (trim($c) != '') {
|
||||
$categories[] = $c;
|
||||
}
|
||||
if ($this->product != '') {
|
||||
$categories[] = $this->product;
|
||||
}
|
||||
|
||||
// The CVE number itself
|
||||
$title = $tr->find('td', 1)->find('a', 0)->innertext;
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $tr->find('td', 1)->find('a', 0)->href,
|
||||
'title' => $title,
|
||||
'timestamp' => $tr->find('td', 5)->innertext,
|
||||
'content' => $tr->next_sibling()->innertext,
|
||||
'categories' => $categories,
|
||||
'enclosures' => $enclosures,
|
||||
'uid' => $tr->find('td', 1)->find('a', 0)->innertext,
|
||||
];
|
||||
|
||||
// We only want to fetch the latest 10 CVEs
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,44 +1,51 @@
|
|||
<?php
|
||||
|
||||
class CarThrottleBridge extends FeedExpander
|
||||
class CarThrottleBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Car Throttle ';
|
||||
const URI = 'https://www.carthrottle.com';
|
||||
const NAME = 'Car Throttle';
|
||||
const URI = 'https://www.carthrottle.com/';
|
||||
const DESCRIPTION = 'Get the latest car-related news from Car Throttle.';
|
||||
const MAINTAINER = 't0stiman';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://www.carthrottle.com/rss', 10);
|
||||
}
|
||||
$news = getSimpleHTMLDOMCached(self::URI . 'news');
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$this->items[] = [];
|
||||
|
||||
//fetch page
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link)
|
||||
or returnServerError('Could not retrieve ' . $feedItem->link);
|
||||
//for each post
|
||||
foreach ($news->find('div.cmg-card') as $post) {
|
||||
$item = [];
|
||||
|
||||
$subtitle = $articlePage->find('p.standfirst', 0);
|
||||
$article = $articlePage->find('div.content_field', 0);
|
||||
$titleElement = $post->find('div.title a.cmg-link')[0];
|
||||
$item['uri'] = self::URI . $titleElement->getAttribute('href');
|
||||
$item['title'] = $titleElement->innertext;
|
||||
|
||||
$item['content'] = str_get_html($subtitle . $article);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
//convert <iframe>s to <a>s. meant for embedded videos.
|
||||
foreach ($item['content']->find('iframe') as $found) {
|
||||
$iframeUrl = $found->getAttribute('src');
|
||||
|
||||
if ($iframeUrl) {
|
||||
$found->outertext = '<a href="' . $iframeUrl . '">' . $iframeUrl . '</a>';
|
||||
$authorDiv = $articlePage->find('div.author div');
|
||||
if ($authorDiv) {
|
||||
$item['author'] = $authorDiv[1]->innertext;
|
||||
}
|
||||
}
|
||||
|
||||
//remove scripts from the text
|
||||
foreach ($item['content']->find('script') as $remove) {
|
||||
$remove->outertext = '';
|
||||
}
|
||||
$dinges = $articlePage->find('div.main-body')[0] ?? null;
|
||||
//remove ads
|
||||
if ($dinges) {
|
||||
foreach ($dinges->find('aside') as $ad) {
|
||||
$ad->outertext = '';
|
||||
$dinges->save();
|
||||
}
|
||||
}
|
||||
|
||||
return $item;
|
||||
$var = $articlePage->find('div.summary')[0] ?? '';
|
||||
$var1 = $articlePage->find('figure.main-image')[0] ?? '';
|
||||
$dinges1 = $dinges ?? '';
|
||||
|
||||
$item['content'] = $var .
|
||||
$var1 .
|
||||
$dinges1;
|
||||
|
||||
array_push($this->items, $item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,10 +34,8 @@ class CaschyBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
|
||||
return $item;
|
||||
}
|
||||
|
@ -55,12 +53,20 @@ class CaschyBridge extends FeedExpander
|
|||
private function addArticleToItem($item, $article)
|
||||
{
|
||||
// remove unwanted stuff
|
||||
foreach ($article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, div.wp-embed, p.wp-caption-text') as $element) {
|
||||
foreach (
|
||||
$article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
|
||||
div.wp-embed, p.wp-caption-text, script') as $element
|
||||
) {
|
||||
$element->remove();
|
||||
}
|
||||
// reload html, as remove() is buggy
|
||||
$article = str_get_html($article->outertext);
|
||||
|
||||
$categories = $article->find('div.post-category a');
|
||||
foreach ($categories as $category) {
|
||||
$item['categories'][] = $category->plaintext;
|
||||
}
|
||||
|
||||
$content = $article->find('div.entry-inner', 0);
|
||||
$item['content'] = $content;
|
||||
|
||||
|
|
|
@ -57,9 +57,9 @@ class CeskaTelevizeBridge extends BridgeAbstract
|
|||
$this->feedName .= " ({$category})";
|
||||
}
|
||||
|
||||
foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) {
|
||||
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) {
|
||||
$itemTitle = $element->find('h3', 0);
|
||||
$itemContent = $element->find('div[class^=content-]', 0);
|
||||
$itemContent = $element->find('p[class^=content-]', 0);
|
||||
$itemDate = $element->find('div[class^=playTime-] span', 0);
|
||||
$itemThumbnail = $element->find('img', 0);
|
||||
$itemUri = self::URI . $element->getAttribute('href');
|
||||
|
|
|
@ -79,9 +79,9 @@ class CodebergBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$html = defaultLinkTo($html, $url);
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'Commits':
|
||||
|
@ -181,7 +181,12 @@ class CodebergBridge extends BridgeAbstract
|
|||
$item['title'] = $message->find('span.message-wrapper', 0)->plaintext;
|
||||
$item['uri'] = $tr->find('td.sha', 0)->find('a', 0)->href;
|
||||
$item['author'] = $tr->find('td.author', 0)->plaintext;
|
||||
$item['timestamp'] = $tr->find('td', 3)->find('span', 0)->title;
|
||||
|
||||
$var = $tr->find('td', 3);
|
||||
$var1 = $var->find('span', 0);
|
||||
if ($var1) {
|
||||
$item['timestamp'] = $var1->title;
|
||||
}
|
||||
|
||||
if ($message->find('pre.commit-body', 0)) {
|
||||
$message->find('pre.commit-body', 0)->style = '';
|
||||
|
@ -200,17 +205,22 @@ class CodebergBridge extends BridgeAbstract
|
|||
*/
|
||||
private function extractIssues($html)
|
||||
{
|
||||
$div = $html->find('div.issue.list', 0);
|
||||
$issueList = $html->find('div#issue-list', 0);
|
||||
|
||||
foreach ($div->find('li.item') as $li) {
|
||||
foreach ($issueList->find('div.flex-item') as $div) {
|
||||
$item = [];
|
||||
|
||||
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
$number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$item['timestamp'] = $li->find('span.time-since', 0)->title;
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
$item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $div->find('a.issue-title', 0)->href;
|
||||
|
||||
$time = $div->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
//$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch issue page
|
||||
$issuePage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
|
@ -218,7 +228,7 @@ class CodebergBridge extends BridgeAbstract
|
|||
|
||||
$item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0);
|
||||
|
||||
foreach ($li->find('a.ui.label') as $label) {
|
||||
foreach ($div->find('a.ui.label') as $label) {
|
||||
$item['categories'][] = $label->plaintext;
|
||||
}
|
||||
|
||||
|
@ -250,7 +260,11 @@ class CodebergBridge extends BridgeAbstract
|
|||
}
|
||||
|
||||
$item['author'] = $div->find('a.author', 0)->innertext;
|
||||
$item['timestamp'] = $div->find('span.time-since', 0)->title;
|
||||
|
||||
$timeSince = $div->find('span.time-since', 0);
|
||||
if ($timeSince) {
|
||||
$item['timestamp'] = $timeSince->title;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
@ -270,14 +284,23 @@ class CodebergBridge extends BridgeAbstract
|
|||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$item['timestamp'] = $li->find('span.time-since', 0)->title;
|
||||
|
||||
$time = $li->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch pull request page
|
||||
$pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
$pullRequestPage = defaultLinkTo($pullRequestPage, self::URI);
|
||||
|
||||
$item['content'] = $pullRequestPage->find('ui.timeline', 0)->find('div.render-content.markup', 0);
|
||||
$var = $pullRequestPage->find('ui.timeline', 0);
|
||||
if ($var) {
|
||||
$var1 = $var->find('div.render-content.markup', 0);
|
||||
$item['content'] = $var1;
|
||||
}
|
||||
|
||||
foreach ($li->find('a.ui.label') as $label) {
|
||||
$item['categories'][] = $label->plaintext;
|
||||
|
@ -380,6 +403,9 @@ EOD;
|
|||
*/
|
||||
private function stripSvg($html)
|
||||
{
|
||||
if ($html === null) {
|
||||
return null;
|
||||
}
|
||||
if ($html->find('svg', 0)) {
|
||||
$html->find('svg', 0)->outertext = '';
|
||||
}
|
||||
|
|
|
@ -12,9 +12,8 @@ class CommonDreamsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
|
||||
class CorreioDaFeiraBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Correio da Feira';
|
||||
const URI = 'https://www.correiodafeira.pt/';
|
||||
const DESCRIPTION = 'Returns news from the Portuguese local newspaper Correio da Feira';
|
||||
const MAINTAINER = 'rmscoelho';
|
||||
const CACHE_TIMEOUT = 86400;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'feed' => [
|
||||
'name' => 'News Feed',
|
||||
'type' => 'list',
|
||||
'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT',
|
||||
'values' => [
|
||||
'Cultura' => 'cultura',
|
||||
'Desporto' => 'desporto',
|
||||
'Economia' => 'economia',
|
||||
'Entrevista' => 'entrevista',
|
||||
'Freguesias' => 'freguesias',
|
||||
'Justiça' => 'justica',
|
||||
'Opinião' => 'opiniao',
|
||||
'Política' => 'politica',
|
||||
'Reportagem' => 'reportagem',
|
||||
'Sociedade' => 'sociedade',
|
||||
'Tecnologia' => 'tecnologia',
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.correiodafeira.pt/wp-content/uploads/base_reporter-200x200.jpg';
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return self::URI . $this->getInput('feed');
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = sprintf('https://www.correiodafeira.pt/categoria/%s', $this->getInput('feed'));
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
$dom = $dom->find('main', 0);
|
||||
if (!$dom) {
|
||||
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
|
||||
}
|
||||
$dom = defaultLinkTo($dom, $this->getURI());
|
||||
foreach ($dom->find('div.post') as $article) {
|
||||
$a = $article->find('div.blog-box', 0);
|
||||
//Get date and time of publishing
|
||||
$time = $a->find('.post-date > :nth-child(2)', 0)->plaintext;
|
||||
$datetime = explode('/', $time);
|
||||
$year = $datetime[2];
|
||||
$month = $datetime[1];
|
||||
$day = $datetime[0];
|
||||
$timestamp = mktime(0, 0, 0, $month, $day, $year);
|
||||
$this->items[] = [
|
||||
'title' => $a->find('h2.entry-title > a', 0)->plaintext,
|
||||
'uri' => $a->find('h2.entry-title > a', 0)->href,
|
||||
'author' => $a->find('li.post-author > a', 0)->plaintext,
|
||||
'content' => $a->find('.entry-content > p', 0)->plaintext,
|
||||
'timestamp' => $timestamp,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,11 +13,9 @@ class CourrierInternationalBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $articlePage->find('.article-text, depeche-text', 0);
|
||||
if (!$content) {
|
||||
return $item;
|
||||
|
|
|
@ -63,7 +63,7 @@ class CraigslistBridge extends BridgeAbstract
|
|||
$html = getSimpleHTMLDOM($uri);
|
||||
|
||||
// Check if no results page is shown (nearby results)
|
||||
if ($html->find('.displaycountShow', 0)->plaintext == '0') {
|
||||
if (($html->find('.displaycountShow', 0)->plaintext ?? '') == '0') {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,543 @@
|
|||
<?php
|
||||
|
||||
class CssSelectorBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CSS Selector Bridge';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = 'Convert any site to RSS feed using CSS selectors (Advanced Users)';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'home_page' => [
|
||||
'name' => 'Site URL: Home page with latest articles',
|
||||
'exampleValue' => 'https://example.com/blog/',
|
||||
'required' => true
|
||||
],
|
||||
'url_selector' => [
|
||||
'name' => 'Selector for article links or their parent elements',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "a.article" will match all <a class="article"
|
||||
href="URL">TITLE</a> on home page, each one being treated as a feed item.
|
||||
Instead of just a link you can selet one of its parent element. Everything inside that
|
||||
element becomes feed item content, e.g. image and summary present on home page.
|
||||
When doing so, the first link inside the selected element becomes feed item URL/Title.
|
||||
EOT,
|
||||
'exampleValue' => 'a.article',
|
||||
'required' => true
|
||||
],
|
||||
'url_pattern' => [
|
||||
'name' => '[Optional] Pattern for site URLs to keep in feed',
|
||||
'title' => 'Optionally filter items by applying a regular expression on their URL',
|
||||
'exampleValue' => '/blog/article/.*',
|
||||
],
|
||||
'content_selector' => [
|
||||
'name' => '[Optional] Selector to expand each article content',
|
||||
'title' => <<<EOT
|
||||
When specified, the bridge will fetch each article from its URL
|
||||
and extract content using the provided selector (Slower!)
|
||||
EOT,
|
||||
'exampleValue' => 'article.content',
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'title_cleanup' => [
|
||||
'name' => '[Optional] Text to remove from expanded article title',
|
||||
'title' => <<<EOT
|
||||
When fetching each article page, feed item title comes from page title.
|
||||
Specify here some text from page title that need to be removed, e.g. " | BlogName".
|
||||
EOT,
|
||||
'exampleValue' => ' | BlogName',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
protected $feedName = '';
|
||||
protected $homepageUrl = '';
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->homepageUrl;
|
||||
if (empty($url)) {
|
||||
$url = parent::getURI();
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!empty($this->feedName)) {
|
||||
return $this->feedName;
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->homepageUrl = $this->getInput('home_page');
|
||||
$url_selector = $this->getInput('url_selector');
|
||||
$url_pattern = $this->getInput('url_pattern');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$title_cleanup = $this->getInput('title_cleanup');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
|
||||
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
|
||||
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
|
||||
|
||||
if (empty($content_selector)) {
|
||||
$this->items = $items;
|
||||
} else {
|
||||
foreach ($items as $item) {
|
||||
$item = $this->expandEntryWithSelector(
|
||||
$item['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup,
|
||||
$title_cleanup,
|
||||
$item['title']
|
||||
);
|
||||
if ($discard_thumbnail && isset($item['enclosures'])) {
|
||||
unset($item['enclosures']);
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter a list of URLs using a pattern and limit
|
||||
* @param array $links List of URLs
|
||||
* @param string $url_pattern Pattern to look for in URLs
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array Array of URLs
|
||||
*/
|
||||
protected function filterUrlList($links, $url_pattern, $limit = 0)
|
||||
{
|
||||
if (!empty($url_pattern)) {
|
||||
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
|
||||
$links = array_filter($links, function ($url) use ($url_pattern) {
|
||||
return preg_match($url_pattern, $url) === 1;
|
||||
});
|
||||
}
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve title from webpage URL or DOM
|
||||
* @param string|object $page URL or DOM to retrieve title from
|
||||
* @return string Webpage title
|
||||
*/
|
||||
protected function getPageTitle($page)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page);
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean Article title. Remove constant part that appears in every title such as blog name.
|
||||
* @param string $title Title to clean, e.g. "Article Name | BlogName"
|
||||
* @param string $title_cleanup string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Cleaned Title
|
||||
*/
|
||||
protected function titleCleanup($title, $title_cleanup)
|
||||
{
|
||||
if (!empty($title) && !empty($title_cleanup)) {
|
||||
return trim(str_replace($title_cleanup, '', $title));
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all elements from HTML content matching cleanup selector
|
||||
* @param string|object $content HTML content as HTML object or string
|
||||
* @return string|object Cleaned content (same type as input)
|
||||
*/
|
||||
protected function cleanArticleContent($content, $cleanup_selector)
|
||||
{
|
||||
$string_convert = false;
|
||||
if (is_string($content)) {
|
||||
$string_convert = true;
|
||||
$content = str_get_html($content);
|
||||
}
|
||||
|
||||
if (!empty($cleanup_selector)) {
|
||||
foreach ($content->find($cleanup_selector) as $item_to_clean) {
|
||||
$item_to_clean->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
if ($string_convert) {
|
||||
$content = $content->outertext;
|
||||
}
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve first N link+title+truncated-content from webpage URL or DOM satisfying the specified criteria
|
||||
* @param string|object $page URL or DOM to retrieve feed items from
|
||||
* @param string $url_selector DOM selector for matching links or their parent element
|
||||
* @param string $url_pattern Optional filter to keep only links matching the pattern
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
|
||||
* @return array of items {'uri': entry_url, 'title': entry_title, ['content': when present in DOM] }
|
||||
*/
|
||||
protected function htmlFindEntries($page, $url_selector, $url_pattern = '', $limit = 0, $content_cleanup = null)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOM($page);
|
||||
}
|
||||
|
||||
$links = $page->find($url_selector);
|
||||
|
||||
if (empty($links)) {
|
||||
returnClientError('No results for URL selector');
|
||||
}
|
||||
|
||||
$link_to_item = [];
|
||||
foreach ($links as $link) {
|
||||
$item = [];
|
||||
if ($link->innertext != $link->plaintext) {
|
||||
$item['content'] = $link->innertext;
|
||||
}
|
||||
if ($link->tag != 'a') {
|
||||
$link = $link->find('a', 0);
|
||||
if (is_null($link)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$item['uri'] = $link->href;
|
||||
$item['title'] = $link->plaintext;
|
||||
if (isset($item['content'])) {
|
||||
$item['content'] = convertLazyLoading($item['content']);
|
||||
$item['content'] = defaultLinkTo($item['content'], $item['uri']);
|
||||
$item['content'] = $this->cleanArticleContent($item['content'], $content_cleanup);
|
||||
}
|
||||
$link_to_item[$link->href] = $item;
|
||||
}
|
||||
|
||||
if (empty($link_to_item)) {
|
||||
returnClientError('The provided URL selector matches some elements, but they do not contain links.');
|
||||
}
|
||||
|
||||
$links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit);
|
||||
|
||||
if (empty($links)) {
|
||||
returnClientError('No results for URL pattern');
|
||||
}
|
||||
|
||||
$items = [];
|
||||
foreach ($links as $link) {
|
||||
$items[] = $link_to_item[$link];
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve article content from its URL using content selector and return a feed item
|
||||
* @param string $entry_url URL to retrieve article from
|
||||
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
|
||||
* @param string $title_cleanup Optional string to remove from article title, e.g. " | BlogName"
|
||||
* @param string $title_default Optional title to use when could not extract title reliably
|
||||
* @return array Entry data: uri, title, content
|
||||
*/
|
||||
protected function expandEntryWithSelector($entry_url, $content_selector, $content_cleanup = null, $title_cleanup = null, $title_default = null)
|
||||
{
|
||||
if (empty($content_selector)) {
|
||||
returnClientError('Please specify a content selector');
|
||||
}
|
||||
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url);
|
||||
$item = $this->entryHtmlRetrieveMetadata($entry_html);
|
||||
|
||||
if (empty($item['uri'])) {
|
||||
$item['uri'] = $entry_url;
|
||||
}
|
||||
|
||||
if (empty($item['title'])) {
|
||||
$article_title = $this->getPageTitle($entry_html, $title_cleanup);
|
||||
if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) {
|
||||
$article_title = $title_default;
|
||||
}
|
||||
$item['title'] = $article_title;
|
||||
}
|
||||
|
||||
$item['title'] = $this->titleCleanup($item['title'], $title_cleanup);
|
||||
|
||||
$article_content = $entry_html->find($content_selector);
|
||||
|
||||
if (!empty($article_content)) {
|
||||
$article_content = $article_content[0];
|
||||
$article_content = convertLazyLoading($article_content);
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup);
|
||||
$item['content'] = $article_content;
|
||||
} else if (!empty($item['content'])) {
|
||||
$item['content'] .= '<br /><p><em>Could not extract full content, selector may need to be updated.</em></p>';
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve metadata from entry HTML: title, author, date published, etc. from metadata intended for social media embeds and SEO
|
||||
* @param obj $entry_html DOM object representing the webpage HTML
|
||||
* @return array Entry data collected from Metadata
|
||||
*/
|
||||
protected function entryHtmlRetrieveMetadata($entry_html)
|
||||
{
|
||||
$item = [];
|
||||
|
||||
// == First source of metadata: Meta tags ==
|
||||
// Facebook Open Graph (og:KEY) - https://developers.facebook.com/docs/sharing/webmasters
|
||||
// Twitter (twitter:KEY) - https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started
|
||||
// Standard meta tags - https://www.w3schools.com/tags/tag_meta.asp
|
||||
|
||||
// Each Entry field mapping defines a list of possible <meta> tags names that contains the expected value
|
||||
static $meta_mappings = [
|
||||
// <meta property="article:KEY" content="VALUE" />
|
||||
// <meta property="og:KEY" content="VALUE" />
|
||||
// <meta property="KEY" content="VALUE" />
|
||||
// <meta name="twitter:KEY" content="VALUE" />
|
||||
// <meta name="KEY" content="VALUE">
|
||||
// <link rel="canonical" href="URL" />
|
||||
'uri' => [
|
||||
'og:url',
|
||||
'twitter:url',
|
||||
'canonical'
|
||||
],
|
||||
'title' => [
|
||||
'og:title',
|
||||
'twitter:title'
|
||||
],
|
||||
'content' => [
|
||||
'og:description',
|
||||
'twitter:description',
|
||||
'description'
|
||||
],
|
||||
'timestamp' => [
|
||||
'article:published_time',
|
||||
'og:article:published_time',
|
||||
'releaseDate',
|
||||
'releasedate',
|
||||
'article:modified_time',
|
||||
'og:article:modified_time',
|
||||
'lastModified',
|
||||
'lastmodified'
|
||||
],
|
||||
'enclosures' => [
|
||||
'og:image:secure_url',
|
||||
'og:image:url',
|
||||
'og:image',
|
||||
'twitter:image',
|
||||
'thumbnailImg',
|
||||
'thumbnailimg'
|
||||
],
|
||||
'author' => [
|
||||
'article:author',
|
||||
'og:article:author',
|
||||
'author',
|
||||
'article:author:username',
|
||||
'profile:first_name',
|
||||
'profile:last_name',
|
||||
'article:author:first_name',
|
||||
'article:author:last_name',
|
||||
'twitter:creator',
|
||||
],
|
||||
];
|
||||
|
||||
$author_first_name = null;
|
||||
$author_last_name = null;
|
||||
|
||||
// For each Entry property, look for corresponding HTML tags using a list of candidates
|
||||
foreach ($meta_mappings as $property => $field_list) {
|
||||
foreach ($field_list as $field) {
|
||||
// Look for HTML meta tag
|
||||
$element = null;
|
||||
if ($field === 'canonical') {
|
||||
$element = $entry_html->find('link[rel=canonical]');
|
||||
} else {
|
||||
$element = $entry_html->find("meta[property=$field], meta[name=$field]");
|
||||
}
|
||||
// Found something? Extract the value and populate Entry field
|
||||
if (!empty($element)) {
|
||||
$element = $element[0];
|
||||
$field_value = '';
|
||||
if ($field === 'canonical') {
|
||||
$field_value = $element->href;
|
||||
} else {
|
||||
$field_value = $element->content;
|
||||
}
|
||||
if (!empty($field_value)) {
|
||||
if ($field === 'article:author:first_name' || $field === 'profile:first_name') {
|
||||
$author_first_name = $field_value;
|
||||
} else if ($field === 'article:author:last_name' || $field === 'profile:last_name') {
|
||||
$author_last_name = $field_value;
|
||||
} else {
|
||||
$item[$property] = $field_value;
|
||||
break; // Stop on first match, e.g. og:url has priority over canonical url.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate author from first name and last name if all we have is nothing or Twitter @username
|
||||
if ((!isset($item['author']) || $item['author'][0] === '@') && (is_string($author_first_name) || is_string($author_last_name))) {
|
||||
$author = '';
|
||||
if (is_string($author_first_name)) {
|
||||
$author = $author_first_name;
|
||||
}
|
||||
if (is_string($author_last_name)) {
|
||||
$author = $author . ' ' . $author_last_name;
|
||||
}
|
||||
$item['author'] = trim($author);
|
||||
}
|
||||
|
||||
// == Second source of metadata: Embedded JSON ==
|
||||
// JSON linked data - https://www.w3.org/TR/2014/REC-json-ld-20140116/
|
||||
// JSON linked data is COMPLEX and MAY BE LESS RELIABLE than <meta> tags. Used for fields not found as <meta> tags.
|
||||
// The implementation below will load all ld+json we can understand and attempt to extract relevant information.
|
||||
|
||||
// ld+json object types that hold article metadata
|
||||
// Each mapping define item fields and a list of possible JSON field for this field
|
||||
// Each candiate JSON field is either a string (field name) or a list (path to nested field)
|
||||
static $ldjson_article_types = ['webpage', 'article', 'newsarticle', 'blogposting'];
|
||||
static $ldjson_article_mappings = [
|
||||
'uri' => ['url', 'mainEntityOfPage'],
|
||||
'title' => ['headline'],
|
||||
'content' => ['description'],
|
||||
'timestamp' => ['dateModified', 'datePublished'],
|
||||
'enclosures' => ['image'],
|
||||
'author' => [['author', 'name'], ['author', '@id'], 'author'],
|
||||
];
|
||||
|
||||
// ld+json object types that hold author metadata
|
||||
$ldjson_author_types = ['person', 'organization'];
|
||||
$ldjson_author_mappings = []; // ID => Name
|
||||
$ldjson_author_id = null;
|
||||
|
||||
// Utility function for checking if JSON array matches one of the desired ld+json object types
|
||||
// A JSON object may have a single ld+json @type as a string OR several types at once as a list
|
||||
$ldjson_is_of_type = function ($json, $allowed_types) {
|
||||
if (isset($json['@type'])) {
|
||||
$json_types = $json['@type'];
|
||||
if (!is_array($json_types)) {
|
||||
$json_types = [ $json_types ];
|
||||
}
|
||||
foreach ($json_types as $item_type) {
|
||||
if (in_array(strtolower($item_type), $allowed_types)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Process ld+json objects embedded in the HTML DOM
|
||||
foreach ($entry_html->find('script[type=application/ld+json]') as $html_ldjson_node) {
|
||||
$json_raw = json_decode($html_ldjson_node->innertext, true);
|
||||
if (is_array($json_raw)) {
|
||||
// The JSON we just loaded may contain directly a single ld+json object AND/OR several ones under the '@graph' key
|
||||
$json_items = [ $json_raw ];
|
||||
if (isset($json_raw['@graph'])) {
|
||||
foreach ($json_raw['@graph'] as $json_raw_sub_item) {
|
||||
$json_items[] = $json_raw_sub_item;
|
||||
}
|
||||
}
|
||||
// Now that we have a list of distinct JSON items, we can process them individually
|
||||
foreach ($json_items as $json) {
|
||||
// JSON item that holds an ld+json Article object (or a variant)
|
||||
if ($ldjson_is_of_type($json, $ldjson_article_types)) {
|
||||
// For each item property, look for corresponding JSON fields and populate the item
|
||||
foreach ($ldjson_article_mappings as $property => $field_list) {
|
||||
// Skip fields already found as <meta> tags, except Twitter @username (because we might find a better name)
|
||||
if (!isset($item[$property]) || ($property === 'author' && $item['author'][0] === '@')) {
|
||||
foreach ($field_list as $field) {
|
||||
$json_root = $json;
|
||||
// If necessary, navigate inside the JSON object to access a nested field
|
||||
if (is_array($field)) {
|
||||
// At this point, $field = ['author', 'name'] and $json_root = {"author": {"name": "John Doe"}}
|
||||
$json_navigate_ok = true;
|
||||
while (count($field) > 1) {
|
||||
$sub_field = array_shift($field);
|
||||
if (array_key_exists($sub_field, $json_root)) {
|
||||
$json_root = $json_root[$sub_field];
|
||||
if (array_is_list($json_root) && count($json_root) === 1) {
|
||||
$json_root = $json_root[0]; // Unwrap list of single item e.g. {"author":[{"name":"John Doe"}]}
|
||||
}
|
||||
} else {
|
||||
// Desired path not found in JSON, stop navigating
|
||||
$json_navigate_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$json_navigate_ok) {
|
||||
continue; //Desired path not found in JSON, skip this field
|
||||
}
|
||||
$field = $field[0];
|
||||
// At this point, $field = "name" and $json_root = {"name": "John Doe"}
|
||||
}
|
||||
// Now we can check for desired field in JSON and populate $item accordingly
|
||||
if (isset($json_root[$field])) {
|
||||
$field_value = $json_root[$field];
|
||||
if (is_array($field_value) && isset($field_value[0])) {
|
||||
$field_value = $field_value[0]; // Different versions of the same enclosure? Take the first one
|
||||
}
|
||||
if (is_string($field_value) && !empty($field_value)) {
|
||||
if ($property === 'author' && $field === '@id') {
|
||||
$ldjson_author_id = $field_value; // Author is referred to by its ID: We'll see later if we can resolve it
|
||||
} else {
|
||||
$item[$property] = $field_value;
|
||||
break; // Stop on first match, e.g. {"author":{"name":"John Doe"}} has priority over {"author":"John Doe"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// JSON item that holds an ld+json Author object (or a variant)
|
||||
} else if ($ldjson_is_of_type($json, $ldjson_author_types)) {
|
||||
if (isset($json['@id']) && isset($json['name'])) {
|
||||
$ldjson_author_mappings[$json['@id']] = $json['name'];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to resolve ld+json author if all we have is nothing or Twitter @username
|
||||
if ((!isset($item['author']) || $item['author'][0] === '@') && !is_null($ldjson_author_id) && isset($ldjson_author_mappings[$ldjson_author_id])) {
|
||||
$item['author'] = $ldjson_author_mappings[$ldjson_author_id];
|
||||
}
|
||||
|
||||
// Adjust item field types
|
||||
if (isset($item['enclosures'])) {
|
||||
$item['enclosures'] = [ $item['enclosures'] ];
|
||||
}
|
||||
if (isset($item['timestamp'])) {
|
||||
$item['timestamp'] = strtotime($item['timestamp']);
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,462 @@
|
|||
<?php
|
||||
|
||||
class CssSelectorComplexBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'Lars Stegman';
|
||||
const NAME = 'CSS Selector Complex Bridge';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = <<<EOT
|
||||
Convert any site to RSS feed using CSS selectors (Advanced Users). The bridge first selects
|
||||
the element describing the article entries. It then extracts the links to the articles from
|
||||
these elements. It then, depending on the setting "Load article from page", either parses
|
||||
the selected elements, or downloads the page for each article and parses those. Parsing the
|
||||
elements or page is done using the provided selectors.
|
||||
EOT;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'home_page' => [
|
||||
'name' => 'Site URL: Page with latest articles',
|
||||
'exampleValue' => 'https://example.com/blog/',
|
||||
'required' => true
|
||||
],
|
||||
'cookie' => [
|
||||
'name' => '[Optional] Cookie',
|
||||
'title' => <<<EOT
|
||||
Use when the website does not send the page contents, unless a static cookie is included.
|
||||
EOT,
|
||||
'exampleValue' => 'sessionId=deadb33f'
|
||||
],
|
||||
'title_cleanup' => [
|
||||
'name' => '[Optional] Text to remove from feed title',
|
||||
'title' => <<<EOT
|
||||
Text to remove from the feed title, which is read from the article list page.
|
||||
EOT,
|
||||
'exampleValue' => ' | BlogName',
|
||||
],
|
||||
'entry_element_selector' => [
|
||||
'name' => 'Selector for article entry elements',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "div.article" will match all
|
||||
<div class="article">...</div> on home page, each one being treated as a feed item.
|
||||
|
||||
Use the URL selector option to select the `a` element with the
|
||||
`href` to the article link. If this option is not configured, the first encountered
|
||||
`a` element is used.
|
||||
EOT,
|
||||
'exampleValue' => 'div.article',
|
||||
'required' => true
|
||||
],
|
||||
'url_selector' => [
|
||||
'name' => '[Optional] Selector for link elements',
|
||||
'title' => <<<EOT
|
||||
The selector to find `a` elements in the entry element. If empty,
|
||||
the first encountered `a` element is used. The `href` property
|
||||
is used to create entries in the feed.
|
||||
EOT,
|
||||
'exampleValue' => 'a.article',
|
||||
'defaultValue' => 'a'
|
||||
],
|
||||
'url_pattern' => [
|
||||
'name' => '[Optional] Pattern for site URLs to keep in feed',
|
||||
'title' => 'Optionally filter items by applying a regular expression on their URL',
|
||||
'exampleValue' => '/blog/article/.*',
|
||||
],
|
||||
'limit' => self::LIMIT,
|
||||
'use_article_pages' => [
|
||||
'name' => 'Load article from page',
|
||||
'title' => <<<EOT
|
||||
If true, the article page is load and parsed to get the article contents using
|
||||
the css selectors. (Slower!)
|
||||
Otherwise, the element selected by the article entry selector is used.
|
||||
EOT,
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'article_page_content_selector' => [
|
||||
'name' => '[Optional] Selector to select article element',
|
||||
'title' => 'Extract the article from its page using the provided selector',
|
||||
'exampleValue' => 'article.content',
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: selector for items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'title_selector' => [
|
||||
'name' => '[Optional] Selector for the article title',
|
||||
'title' => 'Selector to select the article title',
|
||||
'defaultValue' => 'h1'
|
||||
],
|
||||
'category_selector' => [
|
||||
'name' => '[Optional] Categories',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the catgories the article has
|
||||
EOT,
|
||||
'exampleValue' => 'span.category, #main-category'
|
||||
],
|
||||
'author_selector' => [
|
||||
'name' => '[Optional] Author',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the author of the article. If multiple elements are selected
|
||||
the first one is used.
|
||||
EOT,
|
||||
'exampleValue' => 'span#author'
|
||||
],
|
||||
'time_selector' => [
|
||||
'name' => '[Optional] Time selector',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the timestamp of the article. If the element
|
||||
is an html5 `time` element, the value for the `datetime` attribute is used.
|
||||
EOT,
|
||||
],
|
||||
'time_format' => [
|
||||
'name' => '[Optional] Format string for parsing time',
|
||||
'title' => <<<EOT
|
||||
The format to use to parse the timestamp. See
|
||||
https://www.php.net/manual/en/datetimeimmutable.createfromformat.php
|
||||
for the format specification.
|
||||
EOT
|
||||
],
|
||||
'remove_styling' => [
|
||||
'name' => '[Optional] Remove styling',
|
||||
'title' => 'Remove class and style attributes from the page elements',
|
||||
'type' => 'checkbox'
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
private $feedName = '';
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
if (empty($url)) {
|
||||
$url = parent::getURI();
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!empty($this->feedName)) {
|
||||
return $this->feedName;
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
protected function getHeaders()
|
||||
{
|
||||
$headers = [];
|
||||
$cookie = $this->getInput('cookie');
|
||||
if (!empty($cookie)) {
|
||||
$headers[] = 'Cookie: ' . $cookie;
|
||||
}
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
$headers = $this->getHeaders();
|
||||
|
||||
$entry_element_selector = $this->getInput('entry_element_selector');
|
||||
$url_selector = $this->getInput('url_selector');
|
||||
$url_pattern = $this->getInput('url_pattern');
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$use_article_pages = $this->getInput('use_article_pages');
|
||||
$article_page_content_selector = $this->getInput('article_page_content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$title_selector = $this->getInput('title_selector');
|
||||
$title_cleanup = $this->getInput('title_cleanup');
|
||||
$time_selector = $this->getInput('time_selector');
|
||||
$time_format = $this->getInput('time_format');
|
||||
|
||||
$category_selector = $this->getInput('category_selector');
|
||||
$author_selector = $this->getInput('author_selector');
|
||||
$remove_styling = $this->getInput('remove_styling');
|
||||
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($url, $headers), $url);
|
||||
$this->feedName = $this->getTitle($html, $title_cleanup);
|
||||
$entry_elements = $this->htmlFindEntryElements($html, $entry_element_selector, $url_selector, $url_pattern, $limit);
|
||||
|
||||
if (empty($entry_elements)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch the elements from the article pages.
|
||||
if ($use_article_pages) {
|
||||
if (empty($article_page_content_selector)) {
|
||||
returnClientError('`Article selector` is required when `Load article page` is enabled');
|
||||
}
|
||||
|
||||
foreach (array_keys($entry_elements) as $uri) {
|
||||
$entry_elements[$uri] = $this->fetchArticleElementFromPage($uri, $article_page_content_selector);
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($entry_elements as $uri => $element) {
|
||||
$entry = $this->parseEntryElement(
|
||||
$element,
|
||||
$title_selector,
|
||||
$author_selector,
|
||||
$category_selector,
|
||||
$time_selector,
|
||||
$time_format,
|
||||
$content_cleanup,
|
||||
$this->feedName,
|
||||
$remove_styling
|
||||
);
|
||||
|
||||
$entry['uri'] = $uri;
|
||||
$this->items[] = $entry;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter a list of URLs using a pattern and limit
|
||||
* @param array $links List of URLs
|
||||
* @param string $url_pattern Pattern to look for in URLs
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array Array of URLs
|
||||
*/
|
||||
protected function filterUrlList($links, $url_pattern, $limit = 0)
|
||||
{
|
||||
if (!empty($url_pattern)) {
|
||||
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
|
||||
$links = array_filter($links, function ($url) use ($url_pattern) {
|
||||
return preg_match($url_pattern, $url) === 1;
|
||||
});
|
||||
}
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve title from webpage URL or DOM
|
||||
* @param string|object $page URL or DOM to retrieve title from
|
||||
* @param string $title_cleanup optional string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Webpage title
|
||||
*/
|
||||
protected function getTitle($page, $title_cleanup)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page);
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
if (!empty($title)) {
|
||||
$title = trim(str_replace($title_cleanup, '', $title));
|
||||
}
|
||||
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all elements from HTML content matching cleanup selector
|
||||
* @param string|object $content HTML content as HTML object or string
|
||||
* @return string|object Cleaned content (same type as input)
|
||||
*/
|
||||
protected function cleanArticleContent($content, $cleanup_selector, $remove_styling)
|
||||
{
|
||||
$string_convert = false;
|
||||
if (is_string($content)) {
|
||||
$string_convert = true;
|
||||
$content = str_get_html($content);
|
||||
}
|
||||
|
||||
if (!empty($cleanup_selector)) {
|
||||
foreach ($content->find($cleanup_selector) as $item_to_clean) {
|
||||
$item_to_clean->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
if ($remove_styling) {
|
||||
foreach (['class', 'style'] as $attribute_to_remove) {
|
||||
foreach ($content->find('[' . $attribute_to_remove . ']') as $item_to_clean) {
|
||||
$item_to_clean->removeAttribute($attribute_to_remove);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($string_convert) {
|
||||
$content = $content->outertext;
|
||||
}
|
||||
return $content;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve first N link+element from webpage URL or DOM satisfying the specified criteria
|
||||
* @param string|object $page URL or DOM to retrieve feed items from
|
||||
* @param string $entry_selector DOM selector for matching HTML elements that contain article
|
||||
* entries
|
||||
* @param string $url_selector DOM selector for matching links
|
||||
* @param string $url_pattern Optional filter to keep only links matching the pattern
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array of items { <uri> => <html-element> }
|
||||
*/
|
||||
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOM($page);
|
||||
}
|
||||
|
||||
$entryElements = $page->find($entry_selector);
|
||||
if (empty($entryElements)) {
|
||||
returnClientError('No entry elements for entry selector');
|
||||
}
|
||||
|
||||
// Extract URIs with the associated entry element
|
||||
$links_with_elements = [];
|
||||
foreach ($entryElements as $entry) {
|
||||
$url_element = $entry->find($url_selector, 0);
|
||||
if (is_null($url_element)) {
|
||||
// No `a` element found in this entry
|
||||
if ($entry->tag == 'a') {
|
||||
$url_element = $entry;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
$links_with_elements[$url_element->href] = $entry;
|
||||
}
|
||||
|
||||
if (empty($links_with_elements)) {
|
||||
returnClientError('The provided URL selector matches some elements, but they do not
|
||||
contain links.');
|
||||
}
|
||||
|
||||
// Filter using the URL pattern
|
||||
$filtered_urls = $this->filterUrlList(array_keys($links_with_elements), $url_pattern, $limit);
|
||||
|
||||
if (empty($filtered_urls)) {
|
||||
returnClientError('No results for URL pattern');
|
||||
}
|
||||
|
||||
$items = [];
|
||||
foreach ($filtered_urls as $link) {
|
||||
$items[$link] = $links_with_elements[$link];
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve article element from its URL using content selector and return the DOM element
|
||||
* @param string $entry_url URL to retrieve article from
|
||||
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
|
||||
* @return article DOM element
|
||||
*/
|
||||
protected function fetchArticleElementFromPage($entry_url, $content_selector)
|
||||
{
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url);
|
||||
$article_content = $entry_html->find($content_selector, 0);
|
||||
|
||||
if (is_null($article_content)) {
|
||||
returnClientError('Could not get article content at URL: ' . $entry_url);
|
||||
}
|
||||
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
return $article_content;
|
||||
}
|
||||
|
||||
protected function parseTimeStrAsTimestamp($timeStr, $format)
|
||||
{
|
||||
$date = date_parse_from_format($format, $timeStr);
|
||||
if ($date['error_count'] != 0) {
|
||||
returnClientError('Error while parsing time string');
|
||||
}
|
||||
|
||||
$timestamp = mktime(
|
||||
$date['hour'],
|
||||
$date['minute'],
|
||||
$date['second'],
|
||||
$date['month'],
|
||||
$date['day'],
|
||||
$date['year']
|
||||
);
|
||||
|
||||
if ($timestamp == false) {
|
||||
returnClientError('Error while creating timestamp');
|
||||
}
|
||||
|
||||
return $timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve article content from its URL using content selector and return a feed item
|
||||
* @param object $entry_html A DOM element containing the article
|
||||
* @param string $title_selector A selector to the article title from the article
|
||||
* @param string $author_selector A selector to find the article author
|
||||
* @param string $time_selector A selector to get the article publication time.
|
||||
* @param string $time_format The format to parse the time_selector.
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads,
|
||||
* div.comments"
|
||||
* @param string $title_default Optional title to use when could not extract title reliably
|
||||
* @param bool $remove_styling Whether to remove class and style attributes from the HTML
|
||||
* @return array Entry data: uri, title, content
|
||||
*/
|
||||
protected function parseEntryElement(
|
||||
$entry_html,
|
||||
$title_selector = null,
|
||||
$author_selector = null,
|
||||
$category_selector = null,
|
||||
$time_selector = null,
|
||||
$time_format = null,
|
||||
$content_cleanup = null,
|
||||
$title_default = null,
|
||||
$remove_styling = false
|
||||
) {
|
||||
$article_content = convertLazyLoading($entry_html);
|
||||
|
||||
$article_title = '';
|
||||
if (is_null($title_selector)) {
|
||||
$article_title = $title_default;
|
||||
} else {
|
||||
$titleElement = $entry_html->find($title_selector, 0);
|
||||
if ($titleElement) {
|
||||
$article_title = trim($titleElement->innertext);
|
||||
}
|
||||
}
|
||||
|
||||
$author = null;
|
||||
if (!is_null($author_selector) && $author_selector != '') {
|
||||
$author = trim($entry_html->find($author_selector, 0)->innertext);
|
||||
}
|
||||
|
||||
$categories = [];
|
||||
if (!is_null($category_selector && $category_selector != '')) {
|
||||
$category_elements = $entry_html->find($category_selector);
|
||||
foreach ($category_elements as $category_element) {
|
||||
$categories[] = trim($category_element->innertext);
|
||||
}
|
||||
}
|
||||
|
||||
$time = null;
|
||||
if (!is_null($time_selector) && $time_selector != '') {
|
||||
$time_element = $entry_html->find($time_selector, 0);
|
||||
$time = $time_element->getAttribute('datetime');
|
||||
if (is_null($time)) {
|
||||
$time = $time_element->innertext;
|
||||
}
|
||||
|
||||
$this->parseTimeStrAsTimestamp($time, $time_format);
|
||||
}
|
||||
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup, $remove_styling);
|
||||
|
||||
$item = [];
|
||||
$item['title'] = $article_title;
|
||||
$item['content'] = $article_content;
|
||||
$item['categories'] = $categories;
|
||||
$item['timestamp'] = $time;
|
||||
$item['author'] = $author;
|
||||
return $item;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
<?php
|
||||
|
||||
class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CSS Selector Feed Expander';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'feed' => [
|
||||
'name' => 'Feed: URL of truncated RSS feed',
|
||||
'exampleValue' => 'https://example.com/feed.xml',
|
||||
'required' => true
|
||||
],
|
||||
'content_selector' => [
|
||||
'name' => 'Selector for each article content',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
|
||||
Everything inside that element becomes feed item content.
|
||||
EOT,
|
||||
'exampleValue' => 'article.content',
|
||||
'required' => true
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'dont_expand_metadata' => [
|
||||
'name' => '[Optional] Don\'t expand metadata',
|
||||
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('feed');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$feedParser = new FeedParser();
|
||||
$xml = getContents($url);
|
||||
$source_feed = $feedParser->parseFeed($xml);
|
||||
$items = $source_feed['items'];
|
||||
|
||||
// Map Homepage URL (Default: Root page)
|
||||
if (isset($source_feed['uri'])) {
|
||||
$this->homepageUrl = $source_feed['uri'];
|
||||
} else {
|
||||
$this->homepageUrl = urljoin($url, '/');
|
||||
}
|
||||
|
||||
// Map Feed Name (Default: Domain name)
|
||||
if (isset($source_feed['title'])) {
|
||||
$this->feedName = $source_feed['title'];
|
||||
} else {
|
||||
$this->feedName = explode('/', urljoin($url, '/'))[2];
|
||||
}
|
||||
|
||||
// Apply item limit (Default: Global limit)
|
||||
if ($limit > 0) {
|
||||
$items = array_slice($items, 0, $limit);
|
||||
}
|
||||
|
||||
// Expand feed items (CssSelectorBridge)
|
||||
foreach ($items as $item_from_feed) {
|
||||
$item_expanded = $this->expandEntryWithSelector(
|
||||
$item_from_feed['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup
|
||||
);
|
||||
|
||||
if ($dont_expand_metadata) {
|
||||
// Take feed item, only replace content from expanded data
|
||||
$content = $item_expanded['content'];
|
||||
$item_expanded = $item_from_feed;
|
||||
$item_expanded['content'] = $content;
|
||||
} else {
|
||||
// Take expanded item, but give priority to metadata already in source item
|
||||
foreach ($item_from_feed as $field => $val) {
|
||||
if ($field !== 'content' && !empty($val)) {
|
||||
$item_expanded[$field] = $val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
|
||||
unset($item_expanded['enclosures']);
|
||||
}
|
||||
|
||||
$this->items[] = $item_expanded;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -47,8 +47,8 @@ class CubariBridge extends BridgeAbstract
|
|||
*/
|
||||
public function collectData()
|
||||
{
|
||||
$jsonSite = getContents($this->getInput('gist'));
|
||||
$jsonFile = json_decode($jsonSite, true);
|
||||
$json = getContents($this->getInput('gist'));
|
||||
$jsonFile = json_decode($json, true);
|
||||
|
||||
$this->mangaTitle = $jsonFile['title'];
|
||||
|
||||
|
@ -66,12 +66,14 @@ class CubariBridge extends BridgeAbstract
|
|||
{
|
||||
$url = $this->getInput('gist');
|
||||
|
||||
preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches);
|
||||
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
|
||||
return base64_encode($unencoded);
|
||||
if (preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches)) {
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
return base64_encode($unencoded);
|
||||
} else {
|
||||
// todo: fix this
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
|
|
|
@ -23,7 +23,10 @@ class CuriousCatBridge extends BridgeAbstract
|
|||
|
||||
$apiJson = getContents($url);
|
||||
|
||||
$apiData = json_decode($apiJson, true);
|
||||
$apiData = Json::decode($apiJson);
|
||||
if (isset($apiData['error'])) {
|
||||
throw new \Exception($apiData['error_code']);
|
||||
}
|
||||
|
||||
foreach ($apiData['posts'] as $post) {
|
||||
$item = [];
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
<?PHP
|
||||
|
||||
class DagensNyheterDirektBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Dagens Nyheter Direkt';
|
||||
const URI = 'https://www.dn.se/direkt/';
|
||||
const BASEURL = 'https://www.dn.se';
|
||||
const DESCRIPTION = 'Latest news summarised by Dagens Nyheter';
|
||||
const MAINTAINER = 'ajain-93';
|
||||
const LIMIT = 20;
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://cdn.dn-static.se/images/favicon__c2dd3284b46ffdf4d520536e526065fa8.svg';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$NEWSURL = self::BASEURL . '/ajax/direkt/';
|
||||
|
||||
$html = getSimpleHTMLDOM($NEWSURL) or
|
||||
returnServerError('Could not request: ' . $NEWSURL);
|
||||
|
||||
foreach ($html->find('article') as $element) {
|
||||
$link = $element->find('button', 0)->getAttribute('data-link');
|
||||
$datetime = $element->getAttribute('data-publication-time');
|
||||
$url = self::BASEURL . $link;
|
||||
$title = $element->find('h2', 0)->plaintext;
|
||||
$author = $element->find('div.ds-byline__titles', 0)->plaintext;
|
||||
// Debug::log($link);
|
||||
// Debug::log($datetime);
|
||||
// Debug::log($title);
|
||||
// Debug::log($url);
|
||||
// Debug::log($author);
|
||||
|
||||
$article_content = $element->find('div.direkt-post__content', 0);
|
||||
$article_html = '';
|
||||
|
||||
$figure = $element->find('figure', 0);
|
||||
|
||||
if ($figure) {
|
||||
$article_html = $figure->find('img', 0) . '<p><i>' . $figure->find('figcaption', 0) . '</i></p>';
|
||||
}
|
||||
|
||||
foreach ($article_content->find('p') as $p) {
|
||||
$article_html = $article_html . $p;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $url,
|
||||
'title' => $title,
|
||||
'author' => trim($author),
|
||||
'timestamp' => $datetime,
|
||||
'content' => trim($article_html),
|
||||
];
|
||||
|
||||
if (count($this->items) > self::LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -56,9 +56,8 @@ class DarkReadingBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = []; //remove author profile picture
|
||||
|
|
|
@ -43,9 +43,8 @@ class DauphineLibereBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -1909,6 +1909,8 @@ class DealabsBridge extends PepperBridgeAbstract
|
|||
'context-group' => 'Deals par groupe',
|
||||
'context-talk' => 'Surveillance Discussion',
|
||||
'uri-group' => 'groupe/',
|
||||
'uri-deal' => 'bons-plans/',
|
||||
'uri-merchant' => 'search/bons-plans?merchant-id=',
|
||||
'request-error' => 'Impossible de joindre Dealabs',
|
||||
'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré',
|
||||
'no-results' => 'Il n'y a rien à afficher pour le moment :(',
|
||||
|
|
|
@ -4,8 +4,9 @@ class DemoBridge extends BridgeAbstract
|
|||
{
|
||||
const MAINTAINER = 'teromene';
|
||||
const NAME = 'DemoBridge';
|
||||
const URI = 'http://github.com/rss-bridge/rss-bridge';
|
||||
const URI = 'https://github.com/rss-bridge/rss-bridge';
|
||||
const DESCRIPTION = 'Bridge used for demos';
|
||||
const CACHE_TIMEOUT = 15;
|
||||
|
||||
const PARAMETERS = [
|
||||
'testCheckbox' => [
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
<?php
|
||||
|
||||
class DemosBerlinBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Demos Berlin';
|
||||
const URI = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/';
|
||||
const CACHE_TIMEOUT = 3 * 60 * 60;
|
||||
const DESCRIPTION = 'Angezeigte Versammlungen und Aufzüge in Berlin';
|
||||
const MAINTAINER = 'knrdl';
|
||||
const PARAMETERS = [[
|
||||
'days' => [
|
||||
'name' => 'Tage',
|
||||
'type' => 'number',
|
||||
'title' => 'Einträge für die nächsten Tage zurückgeben',
|
||||
'required' => true,
|
||||
'defaultValue' => 7,
|
||||
]
|
||||
]];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.berlin.de/i9f/r1/images/favicon/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$json = getContents('https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json');
|
||||
$jsonFile = json_decode($json, true);
|
||||
|
||||
$daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day');
|
||||
$maxTargetDate = date_add(new DateTime('now'), $daysInterval);
|
||||
|
||||
foreach ($jsonFile['index'] as $entry) {
|
||||
$entryDay = implode('-', array_reverse(explode('.', $entry['datum']))); // dd.mm.yyyy to yyyy-mm-dd
|
||||
$ts = (new DateTime())->setTimestamp(strtotime($entryDay));
|
||||
if ($ts <= $maxTargetDate) {
|
||||
$item = [];
|
||||
$item['uri'] = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/detail/' . $entry['id'];
|
||||
$item['timestamp'] = $entryDay . ' ' . $entry['von'];
|
||||
$item['title'] = $entry['thema'];
|
||||
$location = $entry['strasse_nr'] . ' ' . $entry['plz'];
|
||||
$locationQuery = http_build_query(['query' => $location]);
|
||||
$item['content'] = <<<HTML
|
||||
<h1>{$entry['thema']}</h1>
|
||||
<p>📅 <time datetime="{$item['timestamp']}">{$entry['datum']} {$entry['von']} - {$entry['bis']}</time></p>
|
||||
<a href="https://www.openstreetmap.org/search?$locationQuery">
|
||||
📍 {$location}
|
||||
</a>
|
||||
<p>{$entry['aufzugsstrecke']}</p>
|
||||
HTML;
|
||||
$item['uid'] = $this->getSanitizedHash($entry['datum'] . '-' . $entry['von'] . '-' . $entry['bis'] . '-' . $entry['thema']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
{
|
||||
return hash('sha1', preg_replace('/[^a-zA-Z0-9]/', '', strtolower($string)));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,141 @@
|
|||
<?php
|
||||
|
||||
class DeutscheWelleBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Deutsche Welle Bridge';
|
||||
const URI = 'https://www.dw.com';
|
||||
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
const PARAMETERS = [[
|
||||
'feed' => [
|
||||
'name' => 'feed',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All Top Stories and News Updates'
|
||||
=> 'http://rss.dw.com/atom/rss-en-all',
|
||||
'Top Stories'
|
||||
=> 'http://rss.dw.com/atom/rss-en-top',
|
||||
'Germany'
|
||||
=> 'http://rss.dw.com/atom/rss-en-ger',
|
||||
'World'
|
||||
=> 'http://rss.dw.com/atom/rss-en-world',
|
||||
'Europe'
|
||||
=> 'http://rss.dw.com/atom/rss-en-eu',
|
||||
'Business'
|
||||
=> 'http://rss.dw.com/atom/rss-en-bus',
|
||||
'Science'
|
||||
=> 'http://rss.dw.com/atom/rss_en_science',
|
||||
'Environment'
|
||||
=> 'http://rss.dw.com/atom/rss_en_environment',
|
||||
'Culture & Lifestyle'
|
||||
=> 'http://rss.dw.com/atom/rss-en-cul',
|
||||
'Sports'
|
||||
=> 'http://rss.dw.de/atom/rss-en-sports',
|
||||
'Visit Germany'
|
||||
=> 'http://rss.dw.com/atom/rss-en-visitgermany',
|
||||
'Asia'
|
||||
=> 'http://rss.dw.com/atom/rss-en-asia',
|
||||
'Deutsche Welle Gesamt'
|
||||
=> 'http://rss.dw.com/atom/rss-de-all',
|
||||
'Themen des Tages'
|
||||
=> 'http://rss.dw.com/atom/rss-de-top',
|
||||
'Nachrichten'
|
||||
=> 'http://rss.dw.com/atom/rss-de-news',
|
||||
'Wissenschaft'
|
||||
=> 'http://rss.dw.com/atom/rss-de-wissenschaft',
|
||||
'Sport'
|
||||
=> 'http://rss.dw.com/atom/rss-de-sport',
|
||||
'Deutschland entdecken'
|
||||
=> 'http://rss.dw.com/atom/rss-de-deutschlandentdecken',
|
||||
'Presse'
|
||||
=> 'http://rss.dw.com/atom/presse',
|
||||
'Politik'
|
||||
=> 'http://rss.dw.com/atom/rss_de_politik',
|
||||
'Wirtschaft'
|
||||
=> 'http://rss.dw.com/atom/rss-de-eco',
|
||||
'Kultur & Leben'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul',
|
||||
'Kultur & Leben: Buch'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-buch',
|
||||
'Kultur & Leben: Film'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-film',
|
||||
'Kultur & Leben: Musik'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-musik',
|
||||
]
|
||||
]
|
||||
]];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas($this->getInput('feed'));
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$parsedUrl = parse_url($item['uri']);
|
||||
unset($parsedUrl['query']);
|
||||
$url = $this->unparseUrl($parsedUrl);
|
||||
|
||||
$page = getSimpleHTMLDOM($url);
|
||||
$page = defaultLinkTo($page, $url);
|
||||
|
||||
$article = $page->find('article', 0);
|
||||
|
||||
// author
|
||||
$author = $article->find('.author-link > span', 0);
|
||||
if ($author) {
|
||||
$item['author'] = $author->text();
|
||||
}
|
||||
|
||||
$teaser = $article->find('.teaser-text', 0);
|
||||
if (!is_null($teaser)) {
|
||||
$item['content'] = $teaser->outertext();
|
||||
} else {
|
||||
$item['content'] = '';
|
||||
}
|
||||
|
||||
// remove unneeded elements
|
||||
foreach (
|
||||
$article->find(
|
||||
'header, .advertisement, [data-tracking-name="sharing-icons-inline"], a.external-link > svg, picture > source, .vjs-wrapper, .dw-widget, footer'
|
||||
) as $bad
|
||||
) {
|
||||
$bad->remove();
|
||||
}
|
||||
// reload html as remove() is buggy
|
||||
$article = str_get_html($article->outertext());
|
||||
|
||||
// remove width and height values from img tags
|
||||
foreach ($article->find('img') as $img) {
|
||||
$img->width = null;
|
||||
$img->height = null;
|
||||
}
|
||||
|
||||
// replace lazy-loaded images
|
||||
foreach ($article->find('figure.placeholder-image') as $figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
$img->src = str_replace('${formatId}', '906', $img->getAttribute('data-url'));
|
||||
$img->style = null;
|
||||
}
|
||||
|
||||
$item['content'] .= $article->save();
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
// https://www.php.net/manual/en/function.parse-url.php#106731
|
||||
private function unparseUrl($parsed_url)
|
||||
{
|
||||
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
|
||||
$host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
|
||||
$port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
|
||||
$user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
|
||||
$pass = isset($parsed_url['pass']) ? $parsed_url['pass'] : '';
|
||||
$pass = ($user || $pass) ? "$pass@" : '';
|
||||
$path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
|
||||
$query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
|
||||
$fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
|
||||
return "$scheme$user$pass$host$port$path$query$fragment";
|
||||
}
|
||||
}
|
|
@ -163,19 +163,6 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grabs the RSS item from Developpez.com
|
||||
*/
|
||||
|
@ -189,15 +176,12 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
* Parse the content of every RSS item. And will try to get the full article
|
||||
* pointed by the item URL intead of the default abstract.
|
||||
*/
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
if (count($this->items) >= $this->getInput('limit')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// This function parse each entry in the RSS with the default parse
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
// There is a bug in Developpez RSS, coma are writtent as '~?' in the
|
||||
// title, so I have to fix it manually
|
||||
$item['title'] = $this->fixComaInTitle($item['title']);
|
||||
|
@ -229,6 +213,19 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace '~?' by a proper coma ','
|
||||
*/
|
||||
|
@ -334,6 +331,9 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
*/
|
||||
private function isHtmlTagNotTxt($txt)
|
||||
{
|
||||
if ($txt === '') {
|
||||
return false;
|
||||
}
|
||||
$html = str_get_html($txt);
|
||||
return $html && $html->root && count($html->root->children) > 0;
|
||||
}
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
<?php
|
||||
|
||||
class DilbertBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'kranack';
|
||||
const NAME = 'Dilbert Daily Strip';
|
||||
const URI = 'https://dilbert.com';
|
||||
const CACHE_TIMEOUT = 21600; // 6h
|
||||
const DESCRIPTION = 'The Unofficial Dilbert Daily Comic Strip';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI);
|
||||
|
||||
foreach ($html->find('section.comic-item') as $element) {
|
||||
$img = $element->find('img', 0);
|
||||
$link = $element->find('a', 0);
|
||||
$comic = $img->src;
|
||||
$title = $img->alt;
|
||||
$url = $link->href;
|
||||
$date = substr(strrchr($url, '/'), 1);
|
||||
if (empty($title)) {
|
||||
$title = 'Dilbert Comic Strip on ' . $date;
|
||||
}
|
||||
$date = strtotime($date);
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $url;
|
||||
$item['title'] = $title;
|
||||
$item['author'] = 'Scott Adams';
|
||||
$item['timestamp'] = $date;
|
||||
$item['content'] = '<img src="' . $comic . '" alt="' . $img->alt . '" />';
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
<?php
|
||||
|
||||
class DoujinStyleBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'DoujinStyle Bridge';
|
||||
const URI = 'https://doujinstyle.com/';
|
||||
const DESCRIPTION = 'Returns submissions from DoujinStyle';
|
||||
const MAINTAINER = 'mrtnvgr';
|
||||
|
||||
// TODO: "Games" support
|
||||
|
||||
const PARAMETERS = [
|
||||
'Most recent submissions' => [],
|
||||
'Randomly selected items' => [],
|
||||
'From search results' => [
|
||||
'query' => [
|
||||
'name' => 'Search query',
|
||||
'required' => true,
|
||||
'exampleValue' => 'FELT',
|
||||
],
|
||||
'flac' => [
|
||||
'name' => 'Include FLAC',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => false,
|
||||
],
|
||||
'mp3' => [
|
||||
'name' => 'Include MP3',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => false,
|
||||
],
|
||||
'tta' => [
|
||||
'name' => 'Include TTA',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => false,
|
||||
],
|
||||
'opus' => [
|
||||
'name' => 'Include Opus',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => false,
|
||||
],
|
||||
'ogg' => [
|
||||
'name' => 'Include OGG',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => false,
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
|
||||
$submissions = $html->find('.gridBox .gridDetails');
|
||||
foreach ($submissions as $submission) {
|
||||
$item = [];
|
||||
|
||||
$item['uri'] = $submission->find('a', 0)->href;
|
||||
|
||||
$content = getSimpleHTMLDOM($item['uri']);
|
||||
$content = defaultLinkTo($content, $this->getURI());
|
||||
|
||||
$title = $content->find('h2', 0)->plaintext;
|
||||
|
||||
$cover = $content->find('#imgClick a', 0);
|
||||
if (is_null($cover)) {
|
||||
$cover = $content->find('.coverWrap', 0)->src;
|
||||
} else {
|
||||
$cover = $cover->href;
|
||||
}
|
||||
|
||||
$item['content'] = "<img src='$cover'/>";
|
||||
|
||||
$keys = [];
|
||||
foreach ($content->find('.pageWrap .pageSpan1') as $key) {
|
||||
$keys[] = $key->plaintext;
|
||||
}
|
||||
|
||||
$values = $content->find('.pageWrap .pageSpan2');
|
||||
$metadata = array_combine($keys, $values);
|
||||
|
||||
$format = 'Unknown';
|
||||
|
||||
foreach ($metadata as $key => $value) {
|
||||
switch ($key) {
|
||||
case 'Artist':
|
||||
$artist = $value->find('a', 0)->plaintext;
|
||||
$item['title'] = "$artist - $title";
|
||||
$item['content'] .= "<br>Artist: $artist";
|
||||
break;
|
||||
case 'Tags:':
|
||||
$item['categories'] = [];
|
||||
foreach ($value->find('a') as $tag) {
|
||||
$tag = str_replace('-', '-', $tag->plaintext);
|
||||
$item['categories'][] = $tag;
|
||||
}
|
||||
|
||||
$item['content'] .= '<br>Tags: ' . join(', ', $item['categories']);
|
||||
break;
|
||||
case 'Format:':
|
||||
$item['content'] .= "<br>Format: $value->plaintext";
|
||||
break;
|
||||
case 'Date Added:':
|
||||
$item['timestamp'] = $value->plaintext;
|
||||
break;
|
||||
case 'Provided By:':
|
||||
$item['author'] = $value->find('a', 0)->plaintext;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = self::URI;
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'From search results':
|
||||
$url .= '?p=search&type=blanket';
|
||||
$url .= '&result=' . $this->getInput('query');
|
||||
|
||||
if ($this->getInput('flac') == 1) {
|
||||
$url .= '&format0=on';
|
||||
}
|
||||
if ($this->getInput('mp3') == 1) {
|
||||
$url .= '&format1=on';
|
||||
}
|
||||
if ($this->getInput('tta') == 1) {
|
||||
$url .= '&format2=on';
|
||||
}
|
||||
if ($this->getInput('opus') == 1) {
|
||||
$url .= '&format3=on';
|
||||
}
|
||||
if ($this->getInput('ogg') == 1) {
|
||||
$url .= '&format4=on';
|
||||
}
|
||||
break;
|
||||
case 'Randomly selected items':
|
||||
$url .= '?p=random';
|
||||
break;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@ class DuckDuckGoBridge extends BridgeAbstract
|
|||
const CACHE_TIMEOUT = 21600; // 6h
|
||||
const DESCRIPTION = 'Returns results from DuckDuckGo.';
|
||||
|
||||
const SORT_DATE = '+sort:date';
|
||||
const SORT_DATE = ' sort:date';
|
||||
const SORT_RELEVANCE = '';
|
||||
|
||||
const PARAMETERS = [ [
|
||||
|
@ -31,13 +31,22 @@ class DuckDuckGoBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI . 'html/?kd=-1&q=' . $this->getInput('u') . $this->getInput('sort'));
|
||||
$query = [
|
||||
'kd' => '-1',
|
||||
'q' => $this->getInput('u') . $this->getInput('sort'),
|
||||
];
|
||||
$url = 'https://duckduckgo.com/html/?' . http_build_query($query);
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
foreach ($html->find('div.result') as $element) {
|
||||
$item = [];
|
||||
$item['uri'] = $element->find('a.result__a', 0)->href;
|
||||
$item['title'] = $element->find('h2.result__title', 0)->plaintext;
|
||||
$item['content'] = $element->find('a.result__snippet', 0)->plaintext;
|
||||
|
||||
$snippet = $element->find('a.result__snippet', 0);
|
||||
if ($snippet) {
|
||||
$item['content'] = $snippet->plaintext;
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,16 +66,27 @@ class EBayBridge extends BridgeAbstract
|
|||
$new_listing_label->remove();
|
||||
}
|
||||
|
||||
$item['title'] = $listing->find('.s-item__title', 0)->plaintext;
|
||||
$listingTitle = $listing->find('.s-item__title', 0);
|
||||
if ($listingTitle) {
|
||||
$item['title'] = $listingTitle->plaintext;
|
||||
}
|
||||
|
||||
$subtitle = implode('', $listing->find('.s-item__subtitle'));
|
||||
|
||||
$item['uri'] = $listing->find('.s-item__link', 0)->href;
|
||||
$listingUrl = $listing->find('.s-item__link', 0);
|
||||
if ($listingUrl) {
|
||||
$item['uri'] = $listingUrl->href;
|
||||
} else {
|
||||
$item['uri'] = null;
|
||||
}
|
||||
|
||||
preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches);
|
||||
$item['uid'] = $matches[1];
|
||||
if (preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches)) {
|
||||
$item['uid'] = $matches[1];
|
||||
}
|
||||
|
||||
$priceDom = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0);
|
||||
$price = $priceDom->plaintext ?? 'N/A';
|
||||
|
||||
$price = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0)->plaintext;
|
||||
$shippingFree = $listing->find('.s-item__details > .s-item__detail > .s-item__freeXDays', 0)->plaintext ?? '';
|
||||
$localDelivery = $listing->find('.s-item__details > .s-item__detail > .s-item__localDelivery', 0)->plaintext ?? '';
|
||||
$logisticsCost = $listing->find('.s-item__details > .s-item__detail > .s-item__logisticsCost', 0)->plaintext ?? '';
|
||||
|
@ -84,7 +95,12 @@ class EBayBridge extends BridgeAbstract
|
|||
|
||||
$sellerInfo = $listing->find('.s-item__seller-info-text', 0)->plaintext ?? '';
|
||||
|
||||
$item['enclosures'] = [ $listing->find('.s-item__image-wrapper > img', 0)->src . '#.image' ];
|
||||
$image = $listing->find('.s-item__image-wrapper > img', 0);
|
||||
if ($image) {
|
||||
// Not quite sure why append fragment here
|
||||
$imageUrl = $image->src . '#.image';
|
||||
$item['enclosures'] = [$imageUrl];
|
||||
}
|
||||
|
||||
$item['content'] = <<<CONTENT
|
||||
<p>$sellerInfo $location</p>
|
||||
|
|
|
@ -48,7 +48,6 @@ class EZTVBridge extends BridgeAbstract
|
|||
public function collectData()
|
||||
{
|
||||
$eztv_uri = $this->getEztvUri();
|
||||
Debug::log($eztv_uri);
|
||||
$ids = explode(',', trim($this->getInput('ids')));
|
||||
foreach ($ids as $id) {
|
||||
$data = json_decode(getContents(sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id)));
|
||||
|
@ -97,7 +96,7 @@ class EZTVBridge extends BridgeAbstract
|
|||
protected function getItemFromTorrent($torrent)
|
||||
{
|
||||
$item = [];
|
||||
$item['uri'] = $torrent->episode_url;
|
||||
$item['uri'] = $torrent->episode_url ?? $torrent->torrent_url;
|
||||
$item['author'] = $torrent->imdb_id;
|
||||
$item['timestamp'] = $torrent->date_released_unix;
|
||||
$item['title'] = $torrent->title;
|
||||
|
|
|
@ -93,21 +93,21 @@ class EconomistBridge extends FeedExpander
|
|||
$limit = 30;
|
||||
}
|
||||
|
||||
$this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
|
||||
$url = 'https://www.economist.com/' . $category . '/rss.xml';
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$article = $html->find('#new-article-template', 0);
|
||||
$article = $dom->find('#new-article-template', 0);
|
||||
if ($article == null) {
|
||||
$article = $html->find('main', 0);
|
||||
$article = $dom->find('main', 0);
|
||||
}
|
||||
if ($article) {
|
||||
$elem = $article->find('div', 0);
|
||||
list($content, $audio_url) = $this->processContent($html, $elem);
|
||||
list($content, $audio_url) = $this->processContent($dom, $elem);
|
||||
$item['content'] = $content;
|
||||
if ($audio_url != null) {
|
||||
$item['enclosures'] = [$audio_url];
|
||||
|
@ -159,7 +159,7 @@ class EconomistBridge extends FeedExpander
|
|||
$svelte->parent->removeChild($svelte);
|
||||
}
|
||||
foreach ($elem->find('img') as $strange_img) {
|
||||
if (!str_contains($strange_img->src, 'https://economist.com')) {
|
||||
if (!str_contains($strange_img->src, 'economist.com')) {
|
||||
$strange_img->src = 'https://economist.com' . $strange_img->src;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
<?php
|
||||
|
||||
class EdfPricesBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'EDF tarifs';
|
||||
// pull info from this site for now because EDF do not provide correct opendata
|
||||
const URI = 'https://www.jechange.fr';
|
||||
const DESCRIPTION = 'Fetches the latest infos of EDF prices';
|
||||
const MAINTAINER = 'floviolleau';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'contract' => [
|
||||
'name' => 'Choisir un contrat',
|
||||
'type' => 'list',
|
||||
// we can add later HCHP, EJP, base
|
||||
'values' => ['Tempo' => '/energie/edf/tarifs/tempo'],
|
||||
]
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 7200; // 2h
|
||||
|
||||
/**
|
||||
* @param simple_html_dom $html
|
||||
* @param string $contractUri
|
||||
* @return void
|
||||
*/
|
||||
private function tempo(simple_html_dom $html, string $contractUri): void
|
||||
{
|
||||
// current color and next
|
||||
$daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp');
|
||||
if ($daysDom && count($daysDom) === 2) {
|
||||
foreach ($daysDom as $dayDom) {
|
||||
$day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y'));
|
||||
$dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext;
|
||||
|
||||
$text = $day . ' - ' . $dayColor;
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
// colors
|
||||
$ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling();
|
||||
$elementsDom = $ulDom->find('li');
|
||||
if ($elementsDom && count($elementsDom) === 3) {
|
||||
foreach ($elementsDom as $elementDom) {
|
||||
$item = [];
|
||||
|
||||
$matches = [];
|
||||
preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0);
|
||||
|
||||
if ($matches && count($matches[0]) === 6) {
|
||||
for ($i = 0; $i < 2; $i++) {
|
||||
$text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€';
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// powers
|
||||
$ulPowerContract = $ulDom->nextSibling()->nextSibling();
|
||||
$elementsPowerContractDom = $ulPowerContract->find('li');
|
||||
if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) {
|
||||
foreach ($elementsPowerContractDom as $elementPowerContractDom) {
|
||||
$item = [];
|
||||
|
||||
$matches = [];
|
||||
preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0);
|
||||
|
||||
if ($matches && count($matches[0]) === 3) {
|
||||
$text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€';
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$contract = $this->getKey('contract');
|
||||
$contractUri = $this->getInput('contract');
|
||||
$html = getSimpleHTMLDOM(self::URI . $contractUri);
|
||||
|
||||
if ($contract === 'Tempo') {
|
||||
$this->tempo($html, $contractUri);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -113,21 +113,17 @@ class ElloBridge extends BridgeAbstract
|
|||
|
||||
private function getAPIKey()
|
||||
{
|
||||
$cacheFactory = new CacheFactory();
|
||||
$cacheKey = 'ElloBridge_key';
|
||||
$apiKey = $this->cache->get($cacheKey);
|
||||
|
||||
$cache = $cacheFactory->create();
|
||||
$cache->setScope('ElloBridge');
|
||||
$cache->setKey(['key']);
|
||||
$key = $cache->loadData();
|
||||
|
||||
if ($key == null) {
|
||||
$keyInfo = getContents(self::URI . 'api/webapp-token') or
|
||||
returnServerError('Unable to get token.');
|
||||
$key = json_decode($keyInfo)->token->access_token;
|
||||
$cache->saveData($key);
|
||||
if (!$apiKey) {
|
||||
$keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.');
|
||||
$apiKey = json_decode($keyInfo)->token->access_token;
|
||||
$ttl = 60 * 60 * 20;
|
||||
$this->cache->set($cacheKey, $apiKey, $ttl);
|
||||
}
|
||||
|
||||
return $key;
|
||||
return $apiKey;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
|
|
|
@ -10,21 +10,26 @@ class EngadgetBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(static::URI . 'rss.xml', 15);
|
||||
$url = 'https://www.engadget.com/rss.xml';
|
||||
$max = 10;
|
||||
$this->collectExpandableDatas($url, $max);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$itemUrl = trim($item['uri']);
|
||||
if (!$itemUrl) {
|
||||
return $item;
|
||||
}
|
||||
// todo: remove querystring tracking
|
||||
$dom = getSimpleHTMLDOM($itemUrl);
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
// .article-text has the actual article
|
||||
foreach ($articlePage->find('.article-text') as $element) {
|
||||
foreach ($dom->find('.article-text') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
$item['content'] = $article ?? '';
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Appears to be protected by cloudflare now
|
||||
*/
|
||||
class EsquerdaNetBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'somini';
|
||||
|
@ -23,32 +26,14 @@ class EsquerdaNetBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
parent::collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
# Fix Publish date
|
||||
$badDate = $newsItem->pubDate;
|
||||
preg_match('|(?P<day>\d\d)/(?P<month>\d\d)/(?P<year>\d\d\d\d) - (?P<hour>\d\d):(?P<minute>\d\d)|', $badDate, $d);
|
||||
$newsItem->pubDate = sprintf('%s-%s-%sT%s:%s', $d['year'], $d['month'], $d['day'], $d['hour'], $d['minute']);
|
||||
$item = parent::parseItem($newsItem);
|
||||
# Include all the content
|
||||
$uri = $item['uri'];
|
||||
$html = getSimpleHTMLDOMCached($uri);
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $html->find('div#content div.content', 0);
|
||||
## Fix author
|
||||
$authorHTML = $html->find('.field-name-field-op-author a', 0);
|
||||
|
@ -72,4 +57,15 @@ class EsquerdaNetBridge extends FeedExpander
|
|||
$item['content'] = $content;
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,11 +47,11 @@ class EtsyBridge extends BridgeAbstract
|
|||
|
||||
$item['title'] = $result->find('a', 0)->title;
|
||||
$item['uri'] = $result->find('a', 0)->href;
|
||||
$item['author'] = $result->find('p.wt-text-gray > span', 2)->plaintext;
|
||||
$item['author'] = $result->find('p.wt-text-gray > span', 2)->plaintext ?? '';
|
||||
|
||||
$item['content'] = '<p>'
|
||||
. $result->find('span.currency-symbol', 0)->plaintext
|
||||
. $result->find('span.currency-value', 0)->plaintext
|
||||
. ($result->find('span.currency-symbol', 0)->plaintext ?? '')
|
||||
. ($result->find('span.currency-value', 0)->plaintext ?? '')
|
||||
. '</p><p>'
|
||||
. $result->find('a', 0)->title
|
||||
. '</p>';
|
||||
|
|
|
@ -304,7 +304,11 @@ EOD
|
|||
$regex = '/"pageID":"([0-9]*)"/';
|
||||
preg_match($regex, $pageContent, $matches);
|
||||
|
||||
return ['userId' => $matches[1], 'username' => $username];
|
||||
$arr = [
|
||||
'userId' => $matches[1] ?? null,
|
||||
'username' => $username,
|
||||
];
|
||||
return $arr;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
|
|
|
@ -21,34 +21,18 @@ class FDroidBridge extends BridgeAbstract
|
|||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . 'assets/favicon.ico?v=8j6PKzW9Mk';
|
||||
return self::URI . 'assets/favicon.ico';
|
||||
}
|
||||
|
||||
private function getTimestamp($url)
|
||||
{
|
||||
$curlOptions = [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_HEADER => true,
|
||||
CURLOPT_NOBODY => true,
|
||||
CURLOPT_CONNECTTIMEOUT => 19,
|
||||
CURLOPT_TIMEOUT => 19,
|
||||
CURLOPT_CUSTOMREQUEST => 'HEAD',
|
||||
CURLOPT_NOBODY => true,
|
||||
];
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, $curlOptions);
|
||||
$curlHeaders = curl_exec($ch);
|
||||
$curlError = curl_error($ch);
|
||||
curl_close($ch);
|
||||
if (!empty($curlError)) {
|
||||
return false;
|
||||
}
|
||||
$curlHeaders = explode("\n", $curlHeaders);
|
||||
$timestamp = false;
|
||||
foreach ($curlHeaders as $header) {
|
||||
if (strpos($header, 'Last-Modified') !== false) {
|
||||
$timestamp = str_replace('Last-Modified: ', '', $header);
|
||||
$timestamp = strtotime($timestamp);
|
||||
}
|
||||
}
|
||||
$reponse = getContents($url, [], $curlOptions, true);
|
||||
$lastModified = $reponse['headers']['last-modified'][0] ?? null;
|
||||
$timestamp = strtotime($lastModified ?? 'today');
|
||||
return $timestamp;
|
||||
}
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ class FacebookBridge extends BridgeAbstract
|
|||
// By profile
|
||||
$regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/profile\.php\?id\=([^\/?&\n]+)?(.*)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'User';
|
||||
$params['u'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -95,6 +96,7 @@ class FacebookBridge extends BridgeAbstract
|
|||
// By group
|
||||
$regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/groups\/([^\/?\n]+)?(.*)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Group';
|
||||
$params['g'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -103,6 +105,7 @@ class FacebookBridge extends BridgeAbstract
|
|||
$regex = '/^(https?:\/\/)?(www\.)?facebook\.com\/([^\/?\n]+)/';
|
||||
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = '';
|
||||
$params['u'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
<?php
|
||||
|
||||
class FallGuysBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'User123698745';
|
||||
const NAME = 'Fall Guys';
|
||||
const BASE_URI = 'https://www.fallguys.com';
|
||||
const URI = self::BASE_URI . '/news';
|
||||
const CACHE_TIMEOUT = 600; // 10min
|
||||
const DESCRIPTION = 'News from the Fall Guys website';
|
||||
const DEFAULT_LOCALE = 'en-US';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'locale' => [
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'English' => 'en-US',
|
||||
'لعربية' => 'ar',
|
||||
'Deutsch' => 'de',
|
||||
'Español (Spain)' => 'es-ES',
|
||||
'Español (LA)' => 'es-MX',
|
||||
'Français' => 'fr',
|
||||
'Italiano' => 'it',
|
||||
'日本語' => 'ja',
|
||||
'한국어' => 'ko',
|
||||
'Polski' => 'pl',
|
||||
'Português (Brasil)' => 'pt-BR',
|
||||
'Русский' => 'ru',
|
||||
'Türkçe' => 'tr',
|
||||
'简体中文' => 'zh-CN',
|
||||
],
|
||||
'defaultValue' => self::DEFAULT_LOCALE,
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::getURI());
|
||||
|
||||
$data = json_decode($html->find('#__NEXT_DATA__', 0)->innertext);
|
||||
|
||||
foreach ($data->props->pageProps->newsList as $newsItem) {
|
||||
$headerDescription = property_exists($newsItem->header, 'description') ? $newsItem->header->description : '';
|
||||
$headerImage = $newsItem->header->image->src;
|
||||
|
||||
$contentImages = [$headerImage];
|
||||
|
||||
$content = <<<HTML
|
||||
<p>{$headerDescription}</p>
|
||||
<p><img src="{$headerImage}"></p>
|
||||
HTML;
|
||||
|
||||
foreach ($newsItem->content->items as $contentItem) {
|
||||
if (property_exists($contentItem, 'articleCopy')) {
|
||||
if (property_exists($contentItem->articleCopy, 'title')) {
|
||||
$title = $contentItem->articleCopy->title;
|
||||
|
||||
$content .= <<<HTML
|
||||
<h2>{$title}</h2>
|
||||
HTML;
|
||||
}
|
||||
|
||||
$text = $contentItem->articleCopy->copy;
|
||||
|
||||
$content .= <<<HTML
|
||||
<p>{$text}</p>
|
||||
HTML;
|
||||
} elseif (property_exists($contentItem, 'articleImage')) {
|
||||
$image = $contentItem->articleImage->imageSrc;
|
||||
|
||||
if ($image != $headerImage) {
|
||||
$contentImages[] = $image;
|
||||
|
||||
$content .= <<<HTML
|
||||
<p><img src="{$image}"></p>
|
||||
HTML;
|
||||
}
|
||||
} elseif (property_exists($contentItem, 'embeddedVideo')) {
|
||||
$mediaOptions = $contentItem->embeddedVideo->mediaOptions;
|
||||
$mainContentOptions = $contentItem->embeddedVideo->mainContentOptions;
|
||||
|
||||
if (count($mediaOptions) == count($mainContentOptions)) {
|
||||
for ($i = 0; $i < count($mediaOptions); $i++) {
|
||||
if (property_exists($mediaOptions[$i], 'youtubeVideo')) {
|
||||
$videoUrl = 'https://youtu.be/' . $mediaOptions[$i]->youtubeVideo->contentId;
|
||||
$image = $mainContentOptions[$i]->image->src ?? '';
|
||||
|
||||
$content .= '<p>';
|
||||
|
||||
if ($image != $headerImage) {
|
||||
$contentImages[] = $image;
|
||||
|
||||
$content .= <<<HTML
|
||||
<a href="{$videoUrl}"><img src="{$image}"></a><br>
|
||||
HTML;
|
||||
}
|
||||
|
||||
$content .= <<<HTML
|
||||
<i>(Video: <a href="{$videoUrl}">{$videoUrl}</a>)</i>
|
||||
HTML;
|
||||
|
||||
$content .= '</p>';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$item = [
|
||||
'uid' => $newsItem->_id,
|
||||
'uri' => self::getURI() . '/' . $newsItem->_slug,
|
||||
'title' => $newsItem->_title,
|
||||
'timestamp' => $newsItem->lastModified,
|
||||
'content' => $content,
|
||||
'enclosures' => $contentImages,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$locale = $this->getInput('locale') ?? self::DEFAULT_LOCALE;
|
||||
return self::BASE_URI . '/' . $locale . '/news';
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::BASE_URI . '/favicon.ico';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
<?php
|
||||
|
||||
class FarsideNitterBridge extends FeedExpander
|
||||
{
|
||||
const NAME = 'Farside Nitter Bridge';
|
||||
const DESCRIPTION = "Returns an user's recent tweets";
|
||||
const URI = 'https://farside.link/nitter/';
|
||||
const HOST = 'https://twitter.com/';
|
||||
const MAX_RETRIES = 3;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'username' => [
|
||||
'name' => 'username',
|
||||
'required' => true,
|
||||
'exampleValue' => 'NASA'
|
||||
],
|
||||
'noreply' => [
|
||||
'name' => 'Without replies',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Only return initial tweets'
|
||||
],
|
||||
'noretweet' => [
|
||||
'name' => 'Without retweets',
|
||||
'required' => false,
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Hide retweets'
|
||||
],
|
||||
'linkbacktotwitter' => [
|
||||
'name' => 'Link back to twitter',
|
||||
'required' => false,
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Rewrite links back to twitter.com'
|
||||
]
|
||||
],
|
||||
];
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
if (preg_match('/^(https?:\/\/)?(www\.)?(nitter\.net|twitter\.com)\/([^\/?\n]+)/', $url, $matches) > 0) {
|
||||
return [
|
||||
'username' => $matches[4],
|
||||
'noreply' => true,
|
||||
'noretweet' => true,
|
||||
'linkbacktotwitter' => true
|
||||
];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->getRSS();
|
||||
}
|
||||
|
||||
private function getRSS($attempt = 0)
|
||||
{
|
||||
try {
|
||||
$this->collectExpandableDatas(self::URI . $this->getInput('username') . '/rss');
|
||||
} catch (\Exception $e) {
|
||||
if ($attempt >= self::MAX_RETRIES) {
|
||||
throw $e;
|
||||
} else {
|
||||
$this->getRSS($attempt++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
if ($this->getInput('noreply') && substr($item['title'], 0, 5) == 'R to ') {
|
||||
return;
|
||||
}
|
||||
if ($this->getInput('noretweet') && substr($item['title'], 0, 6) == 'RT by ') {
|
||||
return;
|
||||
}
|
||||
$item['title'] = truncate($item['title']);
|
||||
if (preg_match('/(\/status\/.+)/', $item['uri'], $matches) > 0) {
|
||||
if ($this->getInput('linkbacktotwitter')) {
|
||||
$item['uri'] = self::HOST . $this->getInput('username') . $matches[1];
|
||||
} else {
|
||||
$item['uri'] = self::URI . $this->getInput('username') . $matches[1];
|
||||
}
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (preg_match('/(.+) \//', parent::getName(), $matches) > 0) {
|
||||
return $matches[1];
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if ($this->getInput('linkbacktotwitter')) {
|
||||
return self::HOST . $this->getInput('username');
|
||||
} else {
|
||||
return self::URI . $this->getInput('username');
|
||||
}
|
||||
}
|
||||
}
|
|
@ -43,24 +43,4 @@ class FeedExpanderExampleBridge extends FeedExpander
|
|||
returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
{
|
||||
switch ($this->getInput('version')) {
|
||||
case 'rss_0_9_1':
|
||||
return $this->parseRss091Item($newsItem);
|
||||
break;
|
||||
case 'rss_1_0':
|
||||
return $this->parseRss1Item($newsItem);
|
||||
break;
|
||||
case 'rss_2_0':
|
||||
return $this->parseRss2Item($newsItem);
|
||||
break;
|
||||
case 'atom_1_0':
|
||||
return $this->parseATOMItem($newsItem);
|
||||
break;
|
||||
default:
|
||||
returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
class FeedExpanderTestBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Unnamed bridge';
|
||||
const URI = 'https://esdf.com/';
|
||||
const DESCRIPTION = 'No description provided';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'http://static.userland.com/gems/backend/sampleRss.xml'; // rss 0.91
|
||||
$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
|
||||
$url = 'https://dvikan.no/feed.xml'; // rss 2.0
|
||||
$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
|
||||
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@ TEXT;
|
|||
'feed_name' => [
|
||||
'name' => 'Feed name',
|
||||
'type' => 'text',
|
||||
'exampleValue' => 'rss-bridge/FeedMerger',
|
||||
'exampleValue' => 'FeedMerge',
|
||||
],
|
||||
'feed_1' => [
|
||||
'name' => 'Feed url',
|
||||
|
@ -58,9 +58,29 @@ TEXT;
|
|||
$feeds = array_filter($feeds);
|
||||
|
||||
foreach ($feeds as $feed) {
|
||||
// Fetch all items from the feed
|
||||
// todo: consider wrapping this in a try..catch to not let a single feed break the entire bridge?
|
||||
$this->collectExpandableDatas($feed);
|
||||
if (count($feeds) > 1) {
|
||||
// Allow one or more feeds to fail
|
||||
try {
|
||||
$this->collectExpandableDatas($feed);
|
||||
} catch (HttpException $e) {
|
||||
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
|
||||
$this->items[] = [
|
||||
'title' => 'RSS-Bridge: ' . $e->getMessage(),
|
||||
// Give current time so it sorts to the top
|
||||
'timestamp' => time(),
|
||||
];
|
||||
continue;
|
||||
} catch (\Exception $e) {
|
||||
if (str_starts_with($e->getMessage(), 'Unable to parse xml')) {
|
||||
// Allow this particular exception from FeedExpander
|
||||
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
|
||||
continue;
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
} else {
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by timestamp descending
|
||||
|
@ -91,6 +111,6 @@ TEXT;
|
|||
|
||||
public function getName()
|
||||
{
|
||||
return $this->getInput('feed_name') ?: 'rss-bridge/FeedMerger';
|
||||
return $this->getInput('feed_name') ?: 'FeedMerge';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,8 +23,9 @@ class FeedReducerBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
if (preg_match('#^http(s?)://#i', $this->getInput('url'))) {
|
||||
$this->collectExpandableDatas($this->getInput('url'));
|
||||
$url = $this->getInput('url');
|
||||
if (preg_match('#^http(s?)://#i', $url)) {
|
||||
$this->collectExpandableDatas($url);
|
||||
} else {
|
||||
throw new Exception('URI must begin with http(s)://');
|
||||
}
|
||||
|
@ -35,7 +36,7 @@ class FeedReducerBridge extends FeedExpander
|
|||
$filteredItems = [];
|
||||
$intPercentage = (int)preg_replace('/[^0-9]/', '', $this->getInput('percentage'));
|
||||
|
||||
foreach ($this->items as $thisItem) {
|
||||
foreach ($this->items as $item) {
|
||||
// The URL is included in the hash:
|
||||
// - so you can change the output by adding a local-part to the URL
|
||||
// - so items with the same URI in different feeds won't be correlated
|
||||
|
@ -43,13 +44,13 @@ class FeedReducerBridge extends FeedExpander
|
|||
// $pseudoRandomInteger will be a 16 bit unsigned int mod 100.
|
||||
// This won't be uniformly distributed 1-100, but should be close enough.
|
||||
|
||||
$pseudoRandomInteger = unpack(
|
||||
'S', // unsigned 16-bit int
|
||||
hash('sha256', $thisItem['uri'] . '::' . $this->getInput('url'), true)
|
||||
)[1] % 100;
|
||||
$data = $item['uri'] . '::' . $this->getInput('url');
|
||||
$hash = hash('sha256', $data, true);
|
||||
// S = unsigned 16-bit int
|
||||
$pseudoRandomInteger = unpack('S', $hash)[1] % 100;
|
||||
|
||||
if ($pseudoRandomInteger < $intPercentage) {
|
||||
$filteredItems[] = $thisItem;
|
||||
$filteredItems[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
<?php
|
||||
|
||||
class FiaBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Federation Internationale de l\'Automobile site feed';
|
||||
const URI = 'https://fia.com';
|
||||
const DESCRIPTION = 'Get the latest F1 documents from the fia site';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 900;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.fia.com/documents/championships/fia-formula-one-world-championship-14/';
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$items = $html->find('li.document-row');
|
||||
foreach ($items as $item) {
|
||||
/** @var simple_html_dom $item */
|
||||
// Do something with each list item
|
||||
$title = trim($item->find('div.title', 0)->plaintext);
|
||||
$href = $item->find('a', 0)->href;
|
||||
$url = 'https://www.fia.com' . $href;
|
||||
|
||||
$date = $item->find('span.date-display-single', 0)->plaintext;
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $url;
|
||||
$item['title'] = $title;
|
||||
$item['timestamp'] = (string) DateTime::createFromFormat('d.m.y H:i', $date)->getTimestamp();
|
||||
;
|
||||
$item['author'] = 'Fia';
|
||||
$item['content'] = "Document on date $date: $title <br /><a href='$url'>$url</a>";
|
||||
$item['categories'] = 'Document';
|
||||
$item['uid'] = $title . $date;
|
||||
|
||||
$count = count($this->items);
|
||||
if ($count > 20) {
|
||||
break;
|
||||
} else {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -184,6 +184,7 @@ class FicbookBridge extends BridgeAbstract
|
|||
];
|
||||
|
||||
$fixed_date = str_replace($ru_month, $en_month, $date);
|
||||
$fixed_date = str_replace(' г.', '', $fixed_date);
|
||||
|
||||
if ($fixed_date === $date) {
|
||||
Debug::log('Unable to fix date: ' . $date);
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
<?php
|
||||
|
||||
class FiderBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Fider Bridge';
|
||||
const URI = 'https://fider.io/';
|
||||
const DESCRIPTION = 'Bridge for any Fider instance';
|
||||
const MAINTAINER = 'Oliver Nutter';
|
||||
const PARAMETERS = [
|
||||
'global' => [
|
||||
'instance' => [
|
||||
'name' => 'Instance URL',
|
||||
'required' => true,
|
||||
'example' => 'https://feedback.fider.io',
|
||||
],
|
||||
],
|
||||
'Post' => [
|
||||
'num' => [
|
||||
'name' => 'Post Number',
|
||||
'type' => 'number',
|
||||
'required' => true,
|
||||
],
|
||||
'limit' => [
|
||||
'name' => 'Number of comments to return',
|
||||
'type' => 'number',
|
||||
'required' => false,
|
||||
'title' => 'Specify number of comments to return',
|
||||
],
|
||||
],
|
||||
];
|
||||
|
||||
private $instance;
|
||||
private $posturi;
|
||||
private $title;
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return $this->title ?? parent::getName();
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return $this->posturi ?? parent::getURI();
|
||||
}
|
||||
|
||||
protected function setTitle($title)
|
||||
{
|
||||
$html = getSimpleHTMLDOMCached($this->instance);
|
||||
$name = $html->find('title', 0)->innertext;
|
||||
|
||||
$this->title = "$title - $name";
|
||||
}
|
||||
|
||||
protected function getItem($post, $response = false, $first = false)
|
||||
{
|
||||
$item = [];
|
||||
$item['uri'] = $this->getURI();
|
||||
$item['timestamp'] = $response ? $post->respondedAt : $post->createdAt;
|
||||
$item['author'] = $post->user->name;
|
||||
|
||||
$datetime = new DateTime($item['timestamp']);
|
||||
if ($response) {
|
||||
$item['uid'] = 'response';
|
||||
$item['content'] = $post->text;
|
||||
$item['title'] = "{$item['author']} marked as $post->status {$datetime->format('M d, Y')}";
|
||||
} elseif ($first) {
|
||||
$item['uid'] = 'post';
|
||||
$item['content'] = $post->description;
|
||||
$item['title'] = $post->title;
|
||||
} else {
|
||||
$item['uid'] = 'comment';
|
||||
$item['content'] = $post->content;
|
||||
$item['title'] = "{$item['author']} commented {$datetime->format('M d, Y')}";
|
||||
}
|
||||
|
||||
$item['uid'] .= $item['author'] . $item['timestamp'];
|
||||
|
||||
// parse markdown with implicit line breaks
|
||||
$item['content'] = markdownToHtml($item['content'], ['breaksEnabled' => true]);
|
||||
|
||||
if (property_exists($post, 'editedAt')) {
|
||||
$item['title'] .= ' (edited)';
|
||||
}
|
||||
|
||||
if ($first) {
|
||||
$item['categories'] = $post->tags;
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// collect first post
|
||||
$this->instance = rtrim($this->getInput('instance'), '/');
|
||||
|
||||
$num = $this->getInput('num');
|
||||
$this->posturi = "$this->instance/posts/$num";
|
||||
|
||||
$post_api_uri = "$this->instance/api/v1/posts/$num";
|
||||
$post = json_decode(getContents($post_api_uri));
|
||||
|
||||
$this->setTitle($post->title);
|
||||
|
||||
$item = $this->getItem($post, false, true);
|
||||
$this->items[] = $item;
|
||||
|
||||
// collect response to first post
|
||||
if (property_exists($post, 'response')) {
|
||||
$response = $post->response;
|
||||
$response->status = $post->status;
|
||||
$this->items[] = $this->getItem($response, true);
|
||||
}
|
||||
|
||||
// collect comments
|
||||
$comment_api_uri = "$post_api_uri/comments";
|
||||
$comments = json_decode(getContents($comment_api_uri));
|
||||
|
||||
foreach ($comments as $post) {
|
||||
$item = $this->getItem($post);
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
usort($this->items, function ($a, $b) {
|
||||
return $b['timestamp'] <=> $a['timestamp'];
|
||||
});
|
||||
|
||||
if ($this->getInput('limit') ?? 0 > 0) {
|
||||
$this->items = array_slice($this->items, 0, $this->getInput('limit'));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,7 +12,7 @@ class FilterBridge extends FeedExpander
|
|||
'url' => [
|
||||
'name' => 'Feed URL',
|
||||
'type' => 'text',
|
||||
'defaultValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day',
|
||||
'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day',
|
||||
'required' => true,
|
||||
],
|
||||
'filter' => [
|
||||
|
@ -73,17 +73,29 @@ class FilterBridge extends FeedExpander
|
|||
],
|
||||
]];
|
||||
|
||||
protected function parseItem($newItem)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$url = $this->getInput('url');
|
||||
if (!Url::validate($url)) {
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
// Generate title from first 50 characters of content?
|
||||
if ($this->getInput('title_from_content') && array_key_exists('content', $item)) {
|
||||
$content = str_get_html($item['content']);
|
||||
$pos = strpos($item['content'], ' ', 50);
|
||||
$item['title'] = substr($content->plaintext, 0, $pos);
|
||||
if (strlen($content->plaintext) >= $pos) {
|
||||
$item['title'] .= '...';
|
||||
$plaintext = $content->plaintext;
|
||||
if (mb_strlen($plaintext) < 51) {
|
||||
$item['title'] = $plaintext;
|
||||
} else {
|
||||
$pos = strpos($item['content'], ' ', 50);
|
||||
$item['title'] = substr($plaintext, 0, $pos);
|
||||
if (strlen($plaintext) >= $pos) {
|
||||
$item['title'] .= '...';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -153,13 +165,4 @@ class FilterBridge extends FeedExpander
|
|||
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->getInput('url') && substr($this->getInput('url'), 0, 4) !== 'http') {
|
||||
// just in case someone finds a way to access local files by playing with the url
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,11 @@ class FinanzflussBridge extends BridgeAbstract
|
|||
$img->srcset = $baseurl . $src;
|
||||
}
|
||||
|
||||
//remove unwanted stuff
|
||||
foreach ($content->find('div.newsletter-signup') as $element) {
|
||||
$element->remove();
|
||||
}
|
||||
|
||||
//get author
|
||||
$author = $domarticle->find('div.author-name', 0);
|
||||
|
||||
|
|
|
@ -145,7 +145,6 @@ class FlickrBridge extends BridgeAbstract
|
|||
. '</p>';
|
||||
|
||||
$item['enclosures'] = $this->extractEnclosures($model);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
@ -255,17 +254,22 @@ class FlickrBridge extends BridgeAbstract
|
|||
{
|
||||
$areas = [];
|
||||
$limit = 320 * 240;
|
||||
|
||||
foreach ($model['sizes']['data'] as $size) {
|
||||
$size = $size['data'];
|
||||
$image_area = $size['width'] * $size['height'];
|
||||
|
||||
if ($image_area >= $limit) {
|
||||
$areas[$image_area] = $size['url'];
|
||||
$sizes = $model['sizes']['data'];
|
||||
foreach ($sizes as $sizeData) {
|
||||
$sizeData = $sizeData['data'];
|
||||
$area = $sizeData['width'] * $sizeData['height'];
|
||||
if ($area >= $limit) {
|
||||
$areas[$area] = $sizeData['url'];
|
||||
}
|
||||
}
|
||||
|
||||
return $this->fixURL(min($areas));
|
||||
if ($areas) {
|
||||
$minKey = min(array_keys($areas));
|
||||
$url = $areas[$minKey];
|
||||
} else {
|
||||
$array_key_first = array_key_first($sizes);
|
||||
$url = $sizes[$array_key_first]['data']['url'];
|
||||
}
|
||||
return $this->fixURL($url);
|
||||
}
|
||||
|
||||
private function fixURL($url)
|
||||
|
|
|
@ -29,10 +29,8 @@ class FolhaDeSaoPauloBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if ($this->getInput('deep_crawl')) {
|
||||
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
|
||||
if ($articleHTMLContent) {
|
||||
|
|
|
@ -12,12 +12,10 @@ class ForGifsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://forgifs.com/gallery/srss/7');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
$content = str_get_html($item['content']);
|
||||
$img = $content->find('img', 0);
|
||||
$dom = str_get_html($item['content']);
|
||||
$img = $dom->find('img', 0);
|
||||
$poster = $img->src;
|
||||
|
||||
// The actual gif is the same path but its id must be decremented by one.
|
||||
|
@ -34,7 +32,7 @@ class ForGifsBridge extends FeedExpander
|
|||
$img->width = 'auto';
|
||||
$img->height = 'auto';
|
||||
|
||||
$item['content'] = $content;
|
||||
$item['content'] = (string) $dom;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,8 @@ class FourchanBridge extends BridgeAbstract
|
|||
$file = $element->find('.file', 0);
|
||||
|
||||
if (!empty($file)) {
|
||||
$item['image'] = $element->find('.file a', 0)->href;
|
||||
$var = $element->find('.file a', 0);
|
||||
$item['image'] = $var->href ?? '';
|
||||
$item['imageThumb'] = $element->find('.file img', 0)->src;
|
||||
if (!isset($item['imageThumb']) and strpos($item['image'], '.swf') !== false) {
|
||||
$item['imageThumb'] = 'http://i.imgur.com/eO0cxf9.jpg';
|
||||
|
|
|
@ -14,15 +14,15 @@ class FreeCodeCampBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
|
||||
// the actual article
|
||||
foreach ($articlePage->find('.post-full-content') as $element) {
|
||||
foreach ($dom->find('.post-full-content') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
|
|
|
@ -2,89 +2,93 @@
|
|||
|
||||
class FreeTelechargerBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Free-Telecharger';
|
||||
const URI = 'https://www.free-telecharger.live/';
|
||||
const DESCRIPTION = 'Suivi de série sur Free-Telecharger';
|
||||
const MAINTAINER = 'sysadminstory';
|
||||
const PARAMETERS = [
|
||||
'Suivi de publication de série' => [
|
||||
'url' => [
|
||||
'name' => 'URL de la série',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'URL d\'une série sans le https://www.free-telecharger.live/',
|
||||
'pattern' => 'series.*\.html',
|
||||
'exampleValue' => 'series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html'
|
||||
],
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI . $this->getInput('url'));
|
||||
const NAME = 'Free-Telecharger';
|
||||
const URI = 'https://www.free-telecharger.art/';
|
||||
const DESCRIPTION = 'Suivi de série sur Free-Telecharger';
|
||||
const MAINTAINER = 'sysadminstory';
|
||||
const PARAMETERS = [
|
||||
'Suivi de publication de série' => [
|
||||
'url' => [
|
||||
'name' => 'URL de la série',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'URL d\'une série sans le https://www.free-telecharger.art/',
|
||||
'pattern' => 'series.*\.html',
|
||||
'exampleValue' => 'series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html'
|
||||
],
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
private string $showTitle;
|
||||
private string $showTechDetails;
|
||||
|
||||
// Find all block content of the page
|
||||
$blocks = $html->find('div[class=block1]');
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI . $this->getInput('url'));
|
||||
|
||||
// Global Infos block
|
||||
$infosBlock = $blocks[0];
|
||||
// Links block
|
||||
$linksBlock = $blocks[2];
|
||||
// Find all block content of the page
|
||||
$blocks = $html->find('div[class=block1]');
|
||||
|
||||
// Extract Global Show infos
|
||||
$this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext);
|
||||
$this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext);
|
||||
// Global Infos block
|
||||
$infosBlock = $blocks[0];
|
||||
// Links block
|
||||
$linksBlock = $blocks[2];
|
||||
|
||||
// Extract Global Show infos
|
||||
$this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext);
|
||||
$this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext);
|
||||
|
||||
|
||||
|
||||
// Get Episodes names and links
|
||||
$episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#ff6600]');
|
||||
$links = $linksBlock->find('div[id=link]', 0)->find('a');
|
||||
// Get Episodes names and links
|
||||
$episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#e93100]');
|
||||
$links = $linksBlock->find('div[id=link]', 0)->find('a');
|
||||
|
||||
foreach ($episodes as $index => $episode) {
|
||||
$item = []; // Create an empty item
|
||||
$item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-');
|
||||
$item['uri'] = $links[$index]->href;
|
||||
$item['content'] = '<a href="' . $item['uri'] . '">' . $item['title'] . '</a>';
|
||||
$item['uid'] = hash('md5', $item['uri']);
|
||||
foreach ($episodes as $index => $episode) {
|
||||
$item = []; // Create an empty item
|
||||
$item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-');
|
||||
$item['uri'] = $links[$index]->href;
|
||||
$item['content'] = '<a href="' . $item['uri'] . '">' . $item['title'] . '</a>';
|
||||
$item['uid'] = hash('md5', $item['uri']);
|
||||
|
||||
$this->items[] = $item; // Add this item to the list
|
||||
}
|
||||
$this->items[] = $item; // Add this item to the list
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME;
|
||||
public function getName()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME;
|
||||
break;
|
||||
default:
|
||||
return self::NAME;
|
||||
}
|
||||
default:
|
||||
return self::NAME;
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return self::URI . $this->getInput('url');
|
||||
public function getURI()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return self::URI . $this->getInput('url');
|
||||
break;
|
||||
default:
|
||||
return self::URI;
|
||||
}
|
||||
default:
|
||||
return self::URI;
|
||||
}
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
// Example: https://www.free-telecharger.art/series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html
|
||||
|
||||
$params = [];
|
||||
$regex = '/^https:\/\/www.*\.free-telecharger\.art\/(series.*\.html)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Suivi de publication de série';
|
||||
$params['url'] = urldecode($matches[1]);
|
||||
return $params;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
// Example: https://www.free-telecharger.live/series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html
|
||||
|
||||
$params = [];
|
||||
$regex = '/^https:\/\/www.*\.free-telecharger\.live\/(series.*\.html)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['url'] = urldecode($matches[1]);
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ class FunkBridge extends BridgeAbstract
|
|||
$regex = '/^https?:\/\/(?:www\.)?funk\.net\/channel\/([^\/]+).*$/';
|
||||
if (preg_match($regex, $url, $urlMatches) > 0) {
|
||||
return [
|
||||
'context' => 'Channel',
|
||||
'channel' => $urlMatches[1]
|
||||
];
|
||||
} else {
|
||||
|
|
|
@ -6,7 +6,18 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
const URI = 'https://www.furaffinity.net';
|
||||
const CACHE_TIMEOUT = 300; // 5min
|
||||
const DESCRIPTION = 'Returns posts from various sections of FurAffinity';
|
||||
const MAINTAINER = 'Roliga';
|
||||
const MAINTAINER = 'Roliga, mruac';
|
||||
const CONFIGURATION = [
|
||||
'aCookie' => [
|
||||
'required' => false,
|
||||
'defaultValue' => 'ca6e4566-9d81-4263-9444-653b142e35f8'
|
||||
|
||||
],
|
||||
'bCookie' => [
|
||||
'required' => false,
|
||||
'defaultValue' => '4ce65691-b50f-4742-a990-bf28d6de16ee'
|
||||
]
|
||||
];
|
||||
const PARAMETERS = [
|
||||
'Search' => [
|
||||
'q' => [
|
||||
|
@ -594,7 +605,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
* This was aquired by creating a new user on FA then
|
||||
* extracting the cookie from the browsers dev console.
|
||||
*/
|
||||
const FA_AUTH_COOKIE = 'b=4ce65691-b50f-4742-a990-bf28d6de16ee; a=ca6e4566-9d81-4263-9444-653b142e35f8';
|
||||
private $FA_AUTH_COOKIE;
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
|
@ -603,6 +614,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
// Single journal
|
||||
$regex = '/^(https?:\/\/)?(www\.)?furaffinity.net\/journal\/(\d+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Single Journal';
|
||||
$params['journal-id'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -610,6 +622,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
// Journals
|
||||
$regex = '/^(https?:\/\/)?(www\.)?furaffinity.net\/journals\/([^\/&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Journals';
|
||||
$params['username-journals'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
|
@ -617,6 +630,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
// Gallery folder
|
||||
$regex = '/^(https?:\/\/)?(www\.)?furaffinity.net\/gallery\/([^\/&?\n]+)\/folder\/(\d+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Gallery Folder';
|
||||
$params['username-folder'] = urldecode($matches[3]);
|
||||
$params['folder-id'] = urldecode($matches[4]);
|
||||
$params['full'] = 'on';
|
||||
|
@ -626,6 +640,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
// Gallery (must be after gallery folder)
|
||||
$regex = '/^(https?:\/\/)?(www\.)?furaffinity.net\/(gallery|scraps|favorites)\/([^\/&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Gallery';
|
||||
$params['username-' . $matches[3]] = urldecode($matches[4]);
|
||||
$params['full'] = 'on';
|
||||
return $params;
|
||||
|
@ -658,7 +673,14 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
. '\'s Folder '
|
||||
. $this->getInput('folder-id');
|
||||
default:
|
||||
return parent::getName();
|
||||
$name = parent::getName();
|
||||
if ($this->getOption('aCookie') !== null) {
|
||||
$username = $this->loadCacheValue('username');
|
||||
if ($username !== null) {
|
||||
$name = $username . '\'s ' . parent::getName();
|
||||
}
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -737,6 +759,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->FA_AUTH_COOKIE = 'b=' . $this->getOption('bCookie') . '; a=' . $this->getOption('aCookie');
|
||||
switch ($this->queriedContext) {
|
||||
case 'Search':
|
||||
$data = [
|
||||
|
@ -802,19 +825,19 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
$header = [
|
||||
'Host: ' . parse_url(self::URI, PHP_URL_HOST),
|
||||
'Content-Type: application/x-www-form-urlencoded',
|
||||
'Cookie: ' . self::FA_AUTH_COOKIE
|
||||
'Cookie: ' . $this->FA_AUTH_COOKIE
|
||||
];
|
||||
|
||||
$html = getSimpleHTMLDOM($this->getURI(), $header, $opts);
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
|
||||
$this->saveLoggedInUser($html);
|
||||
return $html;
|
||||
}
|
||||
|
||||
private function getFASimpleHTMLDOM($url, $cache = false)
|
||||
{
|
||||
$header = [
|
||||
'Cookie: ' . self::FA_AUTH_COOKIE
|
||||
'Cookie: ' . $this->FA_AUTH_COOKIE
|
||||
];
|
||||
|
||||
if ($cache) {
|
||||
|
@ -822,12 +845,24 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
} else {
|
||||
$html = getSimpleHTMLDOM($url, $header);
|
||||
}
|
||||
|
||||
$this->saveLoggedInUser($html);
|
||||
$html = defaultLinkTo($html, $url);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
private function saveLoggedInUser($html)
|
||||
{
|
||||
$current_user = $html->find('#my-username', 0);
|
||||
if ($current_user !== null) {
|
||||
preg_match('/^(?:My FA \( |~)(.*?)(?: \)|)$/', trim($current_user->plaintext), $matches);
|
||||
$current_user = $current_user ? $matches[1] : null;
|
||||
if ($current_user !== null) {
|
||||
$this->saveCacheValue('username', $current_user);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function itemsFromJournalList($html, $limit)
|
||||
{
|
||||
foreach ($html->find('table[id^=jid:]') as $journal) {
|
||||
|
@ -888,7 +923,7 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
$item = [];
|
||||
|
||||
$submissionURL = $figure->find('b u a', 0)->href;
|
||||
$imgURL = 'https:' . $figure->find('b u a img', 0)->src;
|
||||
$imgURL = $figure->find('b u a img', 0)->src;
|
||||
|
||||
$item['uri'] = $submissionURL;
|
||||
$item['title'] = html_entity_decode(
|
||||
|
@ -896,46 +931,43 @@ class FurAffinityBridge extends BridgeAbstract
|
|||
);
|
||||
$item['author'] = $figure->find('figcaption p a[href*=/user/]', 0)->title;
|
||||
|
||||
$item['content'] = "<a href=\"$submissionURL\"> <img src=\"{$imgURL}\" referrerpolicy=\"no-referrer\"/></a>";
|
||||
|
||||
if ($this->getInput('full') === true) {
|
||||
$submissionHTML = $this->getFASimpleHTMLDOM($submissionURL, $cache);
|
||||
if (!$this->isHiddenSubmission($submissionHTML)) {
|
||||
$stats = $submissionHTML->find('.stats-container', 0);
|
||||
$popupDate = $stats->find('.popup_date', 0);
|
||||
if ($popupDate) {
|
||||
$item['timestamp'] = strtotime($popupDate->title);
|
||||
}
|
||||
|
||||
$stats = $submissionHTML->find('.stats-container', 0);
|
||||
$item['timestamp'] = strtotime($stats->find('.popup_date', 0)->title);
|
||||
$item['enclosures'] = [
|
||||
$submissionHTML->find('.actions a[href^=https://d.facdn]', 0)->href
|
||||
];
|
||||
foreach ($stats->find('#keywords a') as $keyword) {
|
||||
$item['categories'][] = $keyword->plaintext;
|
||||
$var = $submissionHTML->find('.actions a[href^=https://d.facdn]', 0);
|
||||
if ($var) {
|
||||
$item['enclosures'] = [$var->href];
|
||||
}
|
||||
|
||||
foreach ($stats->find('#keywords a') as $keyword) {
|
||||
$item['categories'][] = $keyword->plaintext;
|
||||
}
|
||||
|
||||
$previewSrc = $submissionHTML->find('#submissionImg', 0);
|
||||
if ($previewSrc) {
|
||||
$imgURL = 'https:' . $previewSrc->{'data-preview-src'};
|
||||
} else {
|
||||
$imgURL = $submissionHTML->find('[property="og:image"]', 0)->{'content'};
|
||||
}
|
||||
|
||||
$description = $submissionHTML->find('div.submission-description', 0);
|
||||
if ($description) {
|
||||
$this->setReferrerPolicy($description);
|
||||
$description = trim($description->innertext);
|
||||
} else {
|
||||
$description = '';
|
||||
}
|
||||
|
||||
$item['content'] = "<a href=\"$submissionURL\"> <img src=\"{$imgURL}\" referrerpolicy=\"no-referrer\"/></a><p>{$description}</p>";
|
||||
}
|
||||
|
||||
$previewSrc = $submissionHTML->find('#submissionImg', 0)
|
||||
->{'data-preview-src'};
|
||||
if ($previewSrc) {
|
||||
$imgURL = 'https:' . $previewSrc;
|
||||
}
|
||||
|
||||
$description = $submissionHTML->find('div.submission-description', 0);
|
||||
if ($description) {
|
||||
$this->setReferrerPolicy($description);
|
||||
$description = trim($description->innertext);
|
||||
} else {
|
||||
$description = '';
|
||||
}
|
||||
|
||||
$item['content'] = <<<EOD
|
||||
<a href="$submissionURL">
|
||||
<img src="{$imgURL}" referrerpolicy="no-referrer" />
|
||||
</a>
|
||||
<p>
|
||||
{$description}
|
||||
</p>
|
||||
EOD;
|
||||
} else {
|
||||
$item['content'] = <<<EOD
|
||||
<a href="$submissionURL">
|
||||
<img src="$imgURL" referrerpolicy="no-referrer" />
|
||||
</a>
|
||||
EOD;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
|
@ -954,4 +986,14 @@ EOD;
|
|||
$img->referrerpolicy = 'no-referrer';
|
||||
}
|
||||
}
|
||||
|
||||
private function isHiddenSubmission($html)
|
||||
{
|
||||
//Disabled accounts prevents their userpage, gallery, favorites and journals from being viewed.
|
||||
//Submissions can require maturity limit or logged-in account.
|
||||
$system_message = $html->find('.section-body.alignleft', 0);
|
||||
$system_message = $system_message ? $system_message->plaintext : '';
|
||||
|
||||
return str_contains($system_message, 'System Message');
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,13 +85,12 @@ class FuturaSciencesBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$author = $this->extractAuthor($article);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($dom);
|
||||
$author = $this->extractAuthor($dom);
|
||||
if (!empty($author)) {
|
||||
$item['author'] = $author;
|
||||
}
|
||||
|
@ -100,7 +99,7 @@ class FuturaSciencesBridge extends FeedExpander
|
|||
|
||||
private function extractArticleContent($article)
|
||||
{
|
||||
$contents = $article->find('section.article-text', 1);
|
||||
$contents = $article->find('div.article-text', 0);
|
||||
|
||||
foreach ($contents->find('img') as $img) {
|
||||
if (!empty($img->getAttribute('data-src'))) {
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
<?php
|
||||
|
||||
class GameBananaBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'GameBanana';
|
||||
const MAINTAINER = 'phantop';
|
||||
const URI = 'https://gamebanana.com/';
|
||||
const DESCRIPTION = 'Returns mods from GameBanana.';
|
||||
const PARAMETERS = [
|
||||
'Game' => [
|
||||
'gid' => [
|
||||
'name' => 'Game ID',
|
||||
'required' => true,
|
||||
// Example: latest mods from Zelda: Tears of the Kingdom
|
||||
'exampleValue' => '7617',
|
||||
],
|
||||
'updates' => [
|
||||
'name' => 'Get updates',
|
||||
'type' => 'checkbox',
|
||||
'required' => false,
|
||||
'title' => 'Enable game updates in feed'
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://images.gamebanana.com/static/img/favicon/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://api.gamebanana.com/Core/List/New?itemtype=Mod&page=1&gameid=' . $this->getInput('gid');
|
||||
if ($this->getInput('updates')) {
|
||||
$url .= '&include_updated=1';
|
||||
}
|
||||
$api_response = getContents($url);
|
||||
$json_list = json_decode($api_response, true); // Get first page mod list
|
||||
|
||||
$url = 'https://api.gamebanana.com/Core/Item/Data?itemtype[]=Game&fields[]=name&itemid[]=' . $this->getInput('gid');
|
||||
$fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate';
|
||||
foreach ($json_list as $element) { // Build api request to minimize API calls
|
||||
$mid = $element[1];
|
||||
$url .= '&itemtype[]=Mod&fields[]=' . $fields . '&itemid[]=' . $mid;
|
||||
}
|
||||
$api_response = getContents($url);
|
||||
$json_list = json_decode($api_response, true);
|
||||
|
||||
$this->title = $json_list[0][0];
|
||||
array_shift($json_list); // Take title from API request and remove from json
|
||||
|
||||
foreach ($json_list as $element) {
|
||||
$item = [];
|
||||
$item['uri'] = $element[6];
|
||||
$item['comments'] = $item['uri'] . '#PostsListModule';
|
||||
$item['title'] = $element[0];
|
||||
$item['author'] = $element[1];
|
||||
|
||||
$item['timestamp'] = $element[5];
|
||||
if ($this->getInput('updates')) {
|
||||
$item['timestamp'] = $element[7];
|
||||
}
|
||||
|
||||
$item['enclosures'] = [];
|
||||
foreach ($element[4] as $file) { // Place mod downloads in enclosures
|
||||
array_push($item['enclosures'], 'https://files.gamebanana.com/mods/' . $file['_sFile']);
|
||||
}
|
||||
|
||||
// Get screenshots from element[3]
|
||||
$img_list = json_decode($element[3], true);
|
||||
$item['content'] = '';
|
||||
foreach ($img_list as $img_element) {
|
||||
$item['content'] .= '<img src="https://images.gamebanana.com/img/ss/mods/' . $img_element['_sFile'] . '"/>';
|
||||
}
|
||||
$item['content'] .= '<br>' . $element[2];
|
||||
|
||||
$item['uid'] = $item['uri'] . $item['title'] . $item['timestamp'];
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$name = parent::getName();
|
||||
if (isset($this->title)) {
|
||||
$name .= " - $this->title";
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$uri = parent::getURI() . 'games/' . $this->getInput('gid');
|
||||
return $uri;
|
||||
}
|
||||
}
|
|
@ -20,11 +20,17 @@ class GatesNotesBridge extends BridgeAbstract
|
|||
$apiUrl = self::URI . $api_endpoint . http_build_query($params);
|
||||
|
||||
$rawContent = getContents($apiUrl);
|
||||
$cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1));
|
||||
$cleanedContent = str_replace('\"', '"', $cleanedContent);
|
||||
$cleanedContent = str_replace([
|
||||
'<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">',
|
||||
'</string>',
|
||||
], '', $rawContent);
|
||||
// $cleanedContent = str_replace('\"', '"', $cleanedContent);
|
||||
// $cleanedContent = trim($cleanedContent, '"');
|
||||
|
||||
// The content is actually a json between quotes with \r\n inserted
|
||||
$json = json_decode($cleanedContent);
|
||||
$json = Json::decode($cleanedContent, false);
|
||||
if (is_string($json)) {
|
||||
throw new \Exception('wtf? ' . $json);
|
||||
}
|
||||
|
||||
foreach ($json as $article) {
|
||||
$item = [];
|
||||
|
@ -57,8 +63,10 @@ class GatesNotesBridge extends BridgeAbstract
|
|||
$article_html = defaultLinkTo($article_html, $this->getURI());
|
||||
|
||||
$top_description = '<p>' . $article_html->find('div.article_top_description', 0)->innertext . '</p>';
|
||||
$hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>';
|
||||
|
||||
$heroImage = $article_html->find('img.article_top_DMT_Image', 0);
|
||||
if ($heroImage) {
|
||||
$hero_image = '<img src=' . $heroImage->getAttribute('data-src') . '>';
|
||||
}
|
||||
$article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0);
|
||||
|
||||
// Remove the menu bar on some articles (PDF download etc.)
|
||||
|
@ -94,7 +102,7 @@ class GatesNotesBridge extends BridgeAbstract
|
|||
}
|
||||
$article_body = sanitize($article_body->innertext);
|
||||
|
||||
$content = $top_description . $hero_image . $article_body;
|
||||
$content = $top_description . ($hero_image ?? '') . $article_body;
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
|
|
@ -27,12 +27,21 @@ class GettrBridge extends BridgeAbstract
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$user = $this->getInput('user');
|
||||
$api = sprintf(
|
||||
'https://api.gettr.com/u/user/%s/posts?offset=0&max=%s&dir=fwd&incl=posts&fp=f_uo',
|
||||
$this->getInput('user'),
|
||||
$user,
|
||||
min($this->getInput('limit'), 20)
|
||||
);
|
||||
$data = json_decode(getContents($api), false);
|
||||
try {
|
||||
$json = getContents($api);
|
||||
} catch (HttpException $e) {
|
||||
if ($e->getCode() === 400 && str_contains($e->response->getBody(), 'E_USER_NOTFOUND')) {
|
||||
throw new \Exception('User not found: ' . $user);
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
$data = json_decode($json, false);
|
||||
|
||||
foreach ($data->result->aux->post as $post) {
|
||||
$this->items[] = [
|
||||
|
|
|
@ -5,7 +5,7 @@ class GithubIssueBridge extends BridgeAbstract
|
|||
const MAINTAINER = 'Pierre Mazière';
|
||||
const NAME = 'Github Issue';
|
||||
const URI = 'https://github.com/';
|
||||
const CACHE_TIMEOUT = 0; // 10min
|
||||
const CACHE_TIMEOUT = 600; // 10m
|
||||
const DESCRIPTION = 'Returns the issues or comments of an issue of a github project';
|
||||
|
||||
const PARAMETERS = [
|
||||
|
@ -137,7 +137,8 @@ class GithubIssueBridge extends BridgeAbstract
|
|||
{
|
||||
$uri = $this->buildGitHubIssueCommentUri($issueNbr, $comment->id);
|
||||
|
||||
$author = $comment->find('.author', 0)->plaintext;
|
||||
$authorDom = $comment->find('.author', 0);
|
||||
$author = $authorDom->plaintext ?? null;
|
||||
|
||||
$header = $comment->find('.timeline-comment-header > h3', 0);
|
||||
$title .= ' / ' . ($header ? $header->plaintext : 'Activity');
|
||||
|
@ -276,6 +277,7 @@ class GithubIssueBridge extends BridgeAbstract
|
|||
case 2: // Project issues
|
||||
[$user, $project] = $path_segments;
|
||||
$show_comments = 'off';
|
||||
$context = 'Project Issues';
|
||||
break;
|
||||
case 3: // Project issues with issue comments
|
||||
if ($path_segments[2] !== static::URL_PATH) {
|
||||
|
@ -283,15 +285,18 @@ class GithubIssueBridge extends BridgeAbstract
|
|||
}
|
||||
[$user, $project] = $path_segments;
|
||||
$show_comments = 'on';
|
||||
$context = 'Project Issues';
|
||||
break;
|
||||
case 4: // Issue comments
|
||||
[$user, $project, /* issues */, $issue] = $path_segments;
|
||||
$context = 'Issue comments';
|
||||
break;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
||||
return [
|
||||
'context' => $context,
|
||||
'u' => $user,
|
||||
'p' => $project,
|
||||
'c' => $show_comments ?? null,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue