From e5829d37b6e39f8d87cf06ce193f9c6b9ea51f8c Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Thu, 12 May 2022 15:53:03 -0400 Subject: [PATCH] [HaveIBeenPwnedBridge] Use API to get Data (#2720) --- bridges/HaveIBeenPwnedBridge.php | 90 +++++++++++++++++++------------- 1 file changed, 54 insertions(+), 36 deletions(-) diff --git a/bridges/HaveIBeenPwnedBridge.php b/bridges/HaveIBeenPwnedBridge.php index 6885dd64..91ea43f2 100644 --- a/bridges/HaveIBeenPwnedBridge.php +++ b/bridges/HaveIBeenPwnedBridge.php @@ -1,4 +1,11 @@ 20, ) )); + const API_URI = 'https://haveibeenpwned.com/api/v3'; const CACHE_TIMEOUT = 3600; - private $breachDateRegex = '/Breach date: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/'; - private $dateAddedRegex = '/Date added to HIBP: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/'; - private $accountsRegex = '/Compromised accounts: ([0-9,]+)/'; - private $breaches = array(); public function collectData() { - $html = getSimpleHTMLDOM(self::URI . '/PwnedWebsites'); + $data = json_decode(getContents(self::API_URI . '/breaches'), true); $breaches = array(); - foreach($html->find('div.row') as $breach) { + foreach($data as $breach) { $item = array(); - if ($breach->class != 'row') { - continue; - } + $pwnCount = number_format($breach['PwnCount']); + $item['title'] = $breach['Title'] . ' - ' + . $pwnCount . ' breached accounts'; + $item['dateAdded'] = $breach['AddedDate']; + $item['breachDate'] = $breach['BreachDate']; + $item['uri'] = self::URI . '/PwnedWebsites' . $breach['Name']; - preg_match($this->breachDateRegex, $breach->find('p', 1)->plaintext, $breachDate) - or returnServerError('Could not extract details'); - - preg_match($this->dateAddedRegex, $breach->find('p', 1)->plaintext, $dateAdded) - or returnServerError('Could not extract details'); - - preg_match($this->accountsRegex, $breach->find('p', 1)->plaintext, $accounts) - or returnServerError('Could not extract details'); - - $permalink = $breach->find('p', 1)->find('a', 0)->href; - - // Remove permalink - $breach->find('p', 1)->find('a', 0)->outertext = ''; - - $item['title'] = html_entity_decode($breach->find('h3', 0)->plaintext, ENT_QUOTES) - . ' - ' . $accounts[1] . ' breached accounts'; - $item['dateAdded'] = strtotime($dateAdded[1]); - $item['breachDate'] = strtotime($breachDate[1]); - $item['uri'] = self::URI . '/PwnedWebsites' . $permalink; - - $item['content'] = '

' . $breach->find('p', 0)->innertext . '

'; + $item['content'] = '

' . $breach['Description'] . '

'; $item['content'] .= '

' . $this->breachType($breach) . '

'; - $item['content'] .= '

' . $breach->find('p', 1)->innertext . '

'; + $breachDate = date('j F Y', strtotime($breach['BreachDate'])); + $addedDate = date('j F Y', strtotime($breach['AddedDate'])); + $compData = implode(', ', $breach['DataClasses']); + + $item['content'] .= << +Breach date: {$breachDate}
+Date added to HIBP: {$addedDate}
+Compromised accounts: {$pwnCount}
+Compromised data: {$compData}
+EOD; + $item['uid'] = $breach['Name']; $this->breaches[] = $item; } @@ -74,6 +72,27 @@ class HaveIBeenPwnedBridge extends BridgeAbstract { $this->createItems(); } + private const BREACH_TYPES = array( + 'IsVerified' => array( + false => 'Unverified breach, may be sourced from elsewhere' + ), + 'IsFabricated' => array( + true => 'Fabricated breach, likely not legitimate' + ), + 'IsSensitive' => array( + true => 'Sensitive breach, not publicly searchable' + ), + 'IsRetired' => array( + true => 'Retired breach, removed from system' + ), + 'IsSpamList' => array( + true => 'Spam list, used for spam marketing' + ), + 'IsMalware' => array( + true => 'Malware breach' + ), + ); + /** * Extract data breach type(s) */ @@ -81,12 +100,10 @@ class HaveIBeenPwnedBridge extends BridgeAbstract { $content = ''; - if ($breach->find('h3 > i', 0)) { - - foreach ($breach->find('h3 > i') as $i) { - $content .= $i->title . '.
'; + foreach (self::BREACH_TYPES as $type => $message) { + if (isset($message[$breach[$type]])) { + $content .= $message[$breach[$type]] . '.
'; } - } return $content; @@ -127,6 +144,7 @@ class HaveIBeenPwnedBridge extends BridgeAbstract { $item['timestamp'] = $breach[$this->getInput('order')]; $item['uri'] = $breach['uri']; $item['content'] = $breach['content']; + $item['uid'] = $breach['uid']; $this->items[] = $item;