fix(cvedetails,tldrtech) (#3735)

This commit is contained in:
Dag 2023-10-10 21:41:57 +02:00 committed by GitHub
parent 143f90da60
commit b6a9baff94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 79 deletions

View File

@ -36,12 +36,65 @@ class CVEDetailsBridge extends BridgeAbstract
private $vendor = '';
private $product = '';
// Return the URL to query.
// Because of the optional product ID, we need to attach it if it is
// set. The search result page has the exact same structure (with and
// without the product ID).
private function buildUrl()
public function collectData()
{
if ($this->html == null) {
$this->fetchContent();
}
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$detailLink = $tr->find('h3 > a', 0);
$detailHtml = getSimpleHTMLDOM($detailLink->href);
// The CVE number itself
$title = $tr->find('h3 > a', 0)->innertext;
$content = $tr->find('.cvesummarylong', 0)->innertext;
$cweList = $detailHtml->find('h2', 2)->next_sibling();
foreach ($cweList->find('li') as $li) {
$cweWithDescription = $li->find('a', 0)->innertext;
if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) {
$categories[] = 'CWE-' . $cwe[1];
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
}
}
if ($this->product != '') {
$categories[] = $this->product;
}
$this->items[] = [
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
'title' => $title,
'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext,
'content' => $content,
'categories' => $categories,
'enclosures' => $enclosures,
'uid' => $title,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
break;
}
}
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
// build url
// Return the URL to query.
// Because of the optional product ID, we need to attach it if it is
// set. The search result page has the exact same structure (with and
// without the product ID).
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
if ($this->getInput('product_id') !== '') {
$url .= '/product_id-' . $this->getInput('product_id');
@ -51,22 +104,12 @@ class CVEDetailsBridge extends BridgeAbstract
// number, which should be mostly accurate.
$url .= '?order=1'; // Order by CVE number DESC
return $url;
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
$html = getSimpleHTMLDOM($this->buildUrl());
$html = getSimpleHTMLDOM($url);
$this->html = defaultLinkTo($html, self::URI);
$vendor = $html->find('#contentdiv h1 > a', 0);
if ($vendor == null) {
returnServerError('Invalid Vendor ID ' .
$this->getInput('vendor_id') .
' or Product ID ' .
$this->getInput('product_id'));
returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id'));
}
$this->vendor = $vendor->innertext;
@ -76,7 +119,6 @@ class CVEDetailsBridge extends BridgeAbstract
}
}
// Build the name of the feed.
public function getName()
{
if ($this->getInput('vendor_id') == '') {
@ -94,57 +136,4 @@ class CVEDetailsBridge extends BridgeAbstract
return $name;
}
// Pull the data from the HTML response and fill the items..
public function collectData()
{
if ($this->html == null) {
$this->fetchContent();
}
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$detailLink = $tr->find('.cveheader > h3 > a', 0);
$detailHtml = getSimpleHTMLDOM($detailLink->href);
$div = $detailHtml->find('.cvedetailssummary', 0);
// The CVE number itself
$title = $div->find('h1 > a', 0)->innertext;
$content = $div->find('.ssc-paragraph', 0)->innertext;
$cweList = $detailHtml->find('h2', 2)->next_sibling();
foreach ($cweList->find('li') as $li) {
$cweWithDescription = $li->find('a', 0)->innertext;
preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe);
if (count($cwe) > 1) {
$categories[] = 'CWE-' . $cwe[1];
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
}
}
if ($this->product != '') {
$categories[] = $this->product;
}
$this->items[] = [
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
'title' => $title,
'timestamp' => $tr->find('td', 5)->innertext,
'content' => $content,
'categories' => $categories,
'enclosures' => $enclosures,
'uid' => $title,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
break;
}
}
}
}

View File

@ -35,7 +35,10 @@ class TldrTechBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI . $this->getInput('topic') . '/archives');
$topic = $this->getInput('topic');
$limit = $this->getInput('limit');
$url = self::URI . $topic . '/archives';
$html = getSimpleHTMLDOM($url);
$entries_root = $html->find('div.content-center.mt-5', 0);
$added = 0;
foreach ($entries_root->children() as $child) {
@ -46,22 +49,25 @@ class TldrTechBridge extends BridgeAbstract
$date_items = explode('/', $child->href);
$date = strtotime(end($date_items));
$this->items[] = [
'uri' => self::URI . $child->href,
'title' => $child->plaintext,
'uri' => self::URI . $child->href,
'title' => $child->plaintext,
'timestamp' => $date,
'content' => $this->parseEntry(self::URI . $child->href)
'content' => $this->extractContent(self::URI . $child->href),
];
$added++;
if ($added >= $this->getInput('limit')) {
if ($added >= $limit) {
break;
}
}
}
private function parseEntry($uri)
private function extractContent($url)
{
$html = getSimpleHTMLDOM($uri);
$html = getSimpleHTMLDOM($url);
$content = $html->find('div.content-center.mt-5', 0);
if (!$content) {
return '';
}
$subscribe_form = $content->find('div.mt-5 > div > form', 0);
if ($subscribe_form) {
$content->removeChild($subscribe_form->parent->parent);

View File

@ -13,6 +13,7 @@ final class CloudFlareException extends HttpException
'<title>Please Wait...',
'<title>Attention Required!',
'<title>Security | Glassdoor',
'<title>Access denied</title>', // cf as seen on patreon.com
];
foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($response->getBody(), $cloudflareTitle)) {

View File

@ -140,7 +140,7 @@ function _sanitize_path_name(string $s, string $pathName): string
}
/**
* This is buggy because strip tags removes a lot that isn't html
* This is buggy because strip_tags() removes a lot that isn't html
*/
function is_html(string $text): bool
{