From 014b698f6751655d3efe047cb0b951641936b9fe Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sat, 1 Jun 2019 21:05:10 +0200 Subject: [PATCH] [html] Use find('*') over custom solution find('*') wasn't supported in older versions of simplehtmldom but it is supported now. Thus, all custom implementations can be replaced by the correct solution. --- lib/html.php | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/lib/html.php b/lib/html.php index 0778c640..13db97a4 100644 --- a/lib/html.php +++ b/lib/html.php @@ -32,18 +32,7 @@ function sanitize($html, $htmlContent = str_get_html($html); - /* - * Notice: simple_html_dom currently doesn't support "->find(*)", which is a - * known issue: https://sourceforge.net/p/simplehtmldom/bugs/157/ - * - * A solution to this is to find all nodes WITHOUT a specific attribute. If - * the attribute is very unlikely to appear in the DOM, this is essentially - * returning all nodes. - * - * "*[!b38fd2b1fe7f4747d6b1c1254ccd055e]" is doing exactly that. The attrib - * "b38fd2b1fe7f4747d6b1c1254ccd055e" is very unlikely to appear in any DOM. - */ - foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) { + foreach($htmlContent->find('*') as $element) { if(in_array($element->tag, $text_to_keep)) { $element->outertext = $element->plaintext; } elseif(in_array($element->tag, $tags_to_remove)) { @@ -90,18 +79,7 @@ function backgroundToImg($htmlContent) { $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; $htmlContent = str_get_html($htmlContent); - /* - * Notice: simple_html_dom currently doesn't support "->find(*)", which is a - * known issue: https://sourceforge.net/p/simplehtmldom/bugs/157/ - * - * A solution to this is to find all nodes WITHOUT a specific attribute. If - * the attribute is very unlikely to appear in the DOM, this is essentially - * returning all nodes. - * - * "*[!b38fd2b1fe7f4747d6b1c1254ccd055e]" is doing exactly that. The attrib - * "b38fd2b1fe7f4747d6b1c1254ccd055e" is very unlikely to appear in any DOM. - */ - foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) { + foreach($htmlContent->find('*') as $element) { if(preg_match($regex, $element->style, $matches) > 0) {