[CssSelectorBridge] Handling of missing links (#3585)

When using parent element as URL selector:

* If no <a> inside some elements, ignore them
* If no <a> inside ALL elements, report an error

Fixes #3573 #issuecomment-1656943318
This commit is contained in:
ORelio 2023-07-31 19:07:34 +02:00 committed by GitHub
parent f957eea300
commit f8fd05f08f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 7 additions and 0 deletions

View File

@ -198,6 +198,9 @@ class CssSelectorBridge extends BridgeAbstract
} }
if ($link->tag != 'a') { if ($link->tag != 'a') {
$link = $link->find('a', 0); $link = $link->find('a', 0);
if (is_null($link)) {
continue;
}
} }
$item['uri'] = $link->href; $item['uri'] = $link->href;
$item['title'] = $link->plaintext; $item['title'] = $link->plaintext;
@ -209,6 +212,10 @@ class CssSelectorBridge extends BridgeAbstract
$link_to_item[$link->href] = $item; $link_to_item[$link->href] = $item;
} }
if (empty($link_to_item)) {
returnClientError('The provided URL selector matches some elements, but they do not contain links.');
}
$links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit); $links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit);
if (empty($links)) { if (empty($links)) {