[html] Rename parameters for sanitize()

This commit is contained in:
logmanoriginal 2018-11-18 17:43:34 +01:00
parent 66e82e46db
commit 0d2ea9a677
1 changed files with 12 additions and 14 deletions

View File

@ -14,24 +14,22 @@
/** /**
* Removes unwanted tags from a given HTML text. * Removes unwanted tags from a given HTML text.
* *
* @param string $textToSanitize The HTML text to sanitize. * @param string $html The HTML text to sanitize.
* @param array $removedTags A list of tags to remove from the DOM. * @param array $tags_to_remove A list of tags to remove from the DOM.
* @param array $keptAttributes A list of attributes to keep on tags (other * @param array $attributes_to_keep A list of attributes to keep on tags (other
* attributes are removed). * attributes are removed).
* @param array $keptText A list of tags where the innertext replaces the tag * @param array $text_to_keep A list of tags where the innertext replaces the tag
* (i.e. `<p>Hello World!</p>` becomes `Hello World!`). * (i.e. `<p>Hello World!</p>` becomes `Hello World!`).
* @return object A simplehtmldom object of the remaining contents. * @return object A simplehtmldom object of the remaining contents.
* *
* @todo Check if this implementation is still necessary, because simplehtmldom * @todo Check if this implementation is still necessary, because simplehtmldom
* already removes some of the tags (search for `remove_noise` in simple_html_dom.php). * already removes some of the tags (search for `remove_noise` in simple_html_dom.php).
* @todo Rename parameters to make more sense. `$textToSanitize` must be HTML,
* `$removedTags`, `$keptAttributes` and `$keptText` are past tense.
*/ */
function sanitize($textToSanitize, function sanitize($html,
$removedTags = array('script', 'iframe', 'input', 'form'), $tags_to_remove = array('script', 'iframe', 'input', 'form'),
$keptAttributes = array('title', 'href', 'src'), $attributes_to_keep = array('title', 'href', 'src'),
$keptText = array()){ $text_to_keep = array()){
$htmlContent = str_get_html($textToSanitize); $htmlContent = str_get_html($html);
/* /*
* Notice: simple_html_dom currently doesn't support "->find(*)", which is a * Notice: simple_html_dom currently doesn't support "->find(*)", which is a
@ -45,13 +43,13 @@ $keptText = array()){
* "b38fd2b1fe7f4747d6b1c1254ccd055e" is very unlikely to appear in any DOM. * "b38fd2b1fe7f4747d6b1c1254ccd055e" is very unlikely to appear in any DOM.
*/ */
foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) { foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) {
if(in_array($element->tag, $keptText)) { if(in_array($element->tag, $text_to_keep)) {
$element->outertext = $element->plaintext; $element->outertext = $element->plaintext;
} elseif(in_array($element->tag, $removedTags)) { } elseif(in_array($element->tag, $tags_to_remove)) {
$element->outertext = ''; $element->outertext = '';
} else { } else {
foreach($element->getAllAttributes() as $attributeName => $attribute) { foreach($element->getAllAttributes() as $attributeName => $attribute) {
if(!in_array($attributeName, $keptAttributes)) if(!in_array($attributeName, $attributes_to_keep))
$element->removeAttribute($attributeName); $element->removeAttribute($attributeName);
} }
} }