From 75cc52a62c5f5e6ec13b54c2e06bdce035814880 Mon Sep 17 00:00:00 2001 From: ORelio Date: Mon, 7 Jun 2021 20:11:12 +0200 Subject: [PATCH] [FilterBridge] Various improvements (#2148) - Add option for case-insensitive regex - Allow matching item content or author in addition to item title - Optionally attempt to convert encoding when applying matches --- bridges/FilterBridge.php | 124 +++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 43 deletions(-) diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 696b100f..ea4b9742 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -2,11 +2,11 @@ class FilterBridge extends FeedExpander { - const MAINTAINER = 'Frenzie'; + const MAINTAINER = 'Frenzie, ORelio'; const NAME = 'Filter'; const CACHE_TIMEOUT = 3600; // 1h const DESCRIPTION = 'Filters a feed of your choice'; - const URI = 'https://github.com/rss-bridge/rss-bridge'; + const URI = 'https://github.com/RSS-Bridge/rss-bridge'; const PARAMETERS = array(array( 'url' => array( @@ -14,7 +14,7 @@ class FilterBridge extends FeedExpander { 'required' => true, ), 'filter' => array( - 'name' => 'Filter item title (regular expression)', + 'name' => 'Filter (regular expression)', 'required' => false, ), 'filter_type' => array( @@ -22,60 +22,101 @@ class FilterBridge extends FeedExpander { 'type' => 'list', 'required' => false, 'values' => array( - 'Permit' => 'permit', - 'Block' => 'block', + 'Keep matching items' => 'permit', + 'Hide matching items' => 'block', ), 'defaultValue' => 'permit', ), - 'title_from_content' => array( - 'name' => 'Generate title from content', + 'case_insensitive' => array( + 'name' => 'Case-insensitive filter', 'type' => 'checkbox', 'required' => false, - ) + ), + 'fix_encoding' => array( + 'name' => 'Attempt Latin1/UTF-8 fixes when evaluating filter', + 'type' => 'checkbox', + 'required' => false, + ), + 'target_title' => array( + 'name' => 'Apply filter on title', + 'type' => 'checkbox', + 'required' => false, + 'defaultValue' => 'checked' + ), + 'target_content' => array( + 'name' => 'Apply filter on content', + 'type' => 'checkbox', + 'required' => false, + ), + 'target_author' => array( + 'name' => 'Apply filter on author', + 'type' => 'checkbox', + 'required' => false, + ), + 'title_from_content' => array( + 'name' => 'Generate title from content (overwrite existing title)', + 'type' => 'checkbox', + 'required' => false, + ), + 'length_limit' => array( + 'name' => 'Max length analyzed by filter (-1: no limit)', + 'type' => 'number', + 'required' => false, + 'defaultValue' => -1, + ), )); protected function parseItem($newItem){ $item = parent::parseItem($newItem); + // Generate title from first 50 characters of content? if($this->getInput('title_from_content') && array_key_exists('content', $item)) { - $content = str_get_html($item['content']); - $pos = strpos($item['content'], ' ', 50); - - $item['title'] = substr( - $content->plaintext, - 0, - $pos - ); - + $item['title'] = substr($content->plaintext, 0, $pos); if(strlen($content->plaintext) >= $pos) { $item['title'] .= '...'; } - } - switch(true) { - case $this->getFilterType() === 'permit': - if (preg_match($this->getFilter(), $item['title'])) { - return $item; - } - break; - case $this->getFilterType() === 'block': - if (!preg_match($this->getFilter(), $item['title'])) { - return $item; - } - break; + // Build regular expression + $regex = '/' . $this->getInput('filter') . '/'; + if($this->getInput('case_insensitive')) { + $regex .= 'i'; } - return null; - } - protected function getFilter(){ - return '/' . $this->getInput('filter') . '/'; - } + // Retrieve fields to check + $filter_fields = array(); + if($this->getInput('target_title')) { + $filter_fields[] = $item['title']; + } + if($this->getInput('target_content')) { + $filter_fields[] = $item['content']; + } + if($this->getInput('target_author')) { + $filter_fields[] = $item['author']; + } - protected function getFilterType(){ - return $this->getInput('filter_type'); + // Apply filter on item + $keep_item = false; + $length_limit = intval($this->getInput('length_limit')); + foreach($filter_fields as $field) { + if($length_limit > 0) { + $field = substr($field, 0, $length_limit); + } + $keep_item |= boolval(preg_match($regex, $field)); + if($this->getInput('fix_encoding')) { + $keep_item |= boolval(preg_match($regex, utf8_decode($field))); + $keep_item |= boolval(preg_match($regex, utf8_encode($field))); + } + } + + // Reverse result? (keep everything but matching items) + if($this->getInput('filter_type') === 'block') { + $keep_item = !$keep_item; + } + + return $keep_item ? $item : null; } public function getURI(){ @@ -84,18 +125,15 @@ class FilterBridge extends FeedExpander { if(empty($url)) { $url = parent::getURI(); } + return $url; } public function collectData(){ - if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { - // just in case someone find a way to access local files by playing with the url + if($this->getInput('url') && substr($this->getInput('url'), 0, 4) !== 'http') { + // just in case someone finds a way to access local files by playing with the url returnClientError('The url parameter must either refer to http or https protocol.'); } - try{ - $this->collectExpandableDatas($this->getURI()); - } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI()); - } + $this->collectExpandableDatas($this->getURI()); } }