From ee28b124e0b0335348dcdb4267b38e88c8506ebc Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Thu, 9 Aug 2018 21:55:43 +0200 Subject: [PATCH] [DanbooruBridge] Fix bridge This commit fixes an issue caused by self closing tags not supported by simplehtmldom (). Adds a monkey patch to extend simplehtmldom with the ability to detect that particular tag. Most of the code added is copied directly from simplehtmldom (see vendor/simplehtmldom) with adjustments to account for RSS-Bridge formatting. Related to: https://sourceforge.net/p/simplehtmldom/bugs/83/ Notice: The tag itself is valid according to Mozilla: The HTML element serves as a container for zero or more elements and one element to provide versions of an image for different display device scenarios. The browser will consider each of the child elements and select one corresponding to the best match found; if no matches are found among the elements, the file specified by the element's src attribute is selected. The selected image is then presented in the space occupied by the element. -- https://developer.mozilla.org/en-US/docs/Web/HTML/Element/picture References #753 --- bridges/DanbooruBridge.php | 73 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php index 82f21674..755399f4 100644 --- a/bridges/DanbooruBridge.php +++ b/bridges/DanbooruBridge.php @@ -1,7 +1,7 @@ getFullURI()) + $content = getContents($this->getFullURI()) or returnServerError('Could not request ' . $this->getName()); + $html = Fix_Simple_Html_Dom::str_get_html($content); + foreach($html->find(static::PATHTODATA) as $element) { $this->items[] = $this->getItemFromElement($element); } } } + +/** + * This class is a monkey patch to 'extend' simplehtmldom to recognize + * tags (HTML5) as self closing tag. This patch should be removed once + * simplehtmldom was fixed. This seems to be a issue with more tags: + * https://sourceforge.net/p/simplehtmldom/bugs/83/ + * + * The tag itself is valid according to Mozilla: + * + * The HTML element serves as a container for zero or more + * elements and one element to provide versions of an image for different + * display device scenarios. The browser will consider each of the child + * elements and select one corresponding to the best match found; if no matches + * are found among the elements, the file specified by the + * element's src attribute is selected. The selected image is then presented in + * the space occupied by the element. + * + * -- https://developer.mozilla.org/en-US/docs/Web/HTML/Element/picture + * + * Notice: This class uses parts of the original simplehtmldom, adjusted to pass + * the guidelines of RSS-Bridge (formatting) + */ +final class Fix_Simple_Html_Dom extends simple_html_dom { + + /* copy from simple_html_dom, added 'source' at the end */ + protected $self_closing_tags = array( + 'img' => 1, + 'br' => 1, + 'input' => 1, + 'meta' => 1, + 'link' => 1, + 'hr' => 1, + 'base' => 1, + 'embed' => 1, + 'spacer' => 1, + 'source' => 1 + ); + + /* copy from simplehtmldom, changed 'simple_html_dom' to 'Fix_Simple_Html_Dom' */ + public static function str_get_html($str, + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT) + { + $dom = new Fix_Simple_Html_Dom(null, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText); + + if (empty($str) || strlen($str) > MAX_FILE_SIZE) { + + $dom->clear(); + return false; + + } + + $dom->load($str, $lowercase, $stripRN); + + return $dom; + } +}