[HaveIBeenPwnedBridge] Use API to get Data (#2720)

This commit is contained in:
Yaman Qalieh 2022-05-12 15:53:03 -04:00 committed by GitHub
parent 73b1a6a7aa
commit e5829d37b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 54 additions and 36 deletions

View File

@ -1,4 +1,11 @@
<?php <?php
/**
* Uses the API as documented here:
* https://haveibeenpwned.com/API/v3#AllBreaches
*
* Gets the latest breaches by the date of the breach or when it was added to
* HIBP.
* */
class HaveIBeenPwnedBridge extends BridgeAbstract { class HaveIBeenPwnedBridge extends BridgeAbstract {
const NAME = 'Have I Been Pwned (HIBP) Bridge'; const NAME = 'Have I Been Pwned (HIBP) Bridge';
const URI = 'https://haveibeenpwned.com'; const URI = 'https://haveibeenpwned.com';
@ -21,52 +28,43 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
'defaultValue' => 20, 'defaultValue' => 20,
) )
)); ));
const API_URI = 'https://haveibeenpwned.com/api/v3';
const CACHE_TIMEOUT = 3600; const CACHE_TIMEOUT = 3600;
private $breachDateRegex = '/Breach date: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/';
private $dateAddedRegex = '/Date added to HIBP: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/';
private $accountsRegex = '/Compromised accounts: ([0-9,]+)/';
private $breaches = array(); private $breaches = array();
public function collectData() { public function collectData() {
$html = getSimpleHTMLDOM(self::URI . '/PwnedWebsites'); $data = json_decode(getContents(self::API_URI . '/breaches'), true);
$breaches = array(); $breaches = array();
foreach($html->find('div.row') as $breach) { foreach($data as $breach) {
$item = array(); $item = array();
if ($breach->class != 'row') { $pwnCount = number_format($breach['PwnCount']);
continue; $item['title'] = $breach['Title'] . ' - '
} . $pwnCount . ' breached accounts';
$item['dateAdded'] = $breach['AddedDate'];
$item['breachDate'] = $breach['BreachDate'];
$item['uri'] = self::URI . '/PwnedWebsites' . $breach['Name'];
preg_match($this->breachDateRegex, $breach->find('p', 1)->plaintext, $breachDate) $item['content'] = '<p>' . $breach['Description'] . '</p>';
or returnServerError('Could not extract details');
preg_match($this->dateAddedRegex, $breach->find('p', 1)->plaintext, $dateAdded)
or returnServerError('Could not extract details');
preg_match($this->accountsRegex, $breach->find('p', 1)->plaintext, $accounts)
or returnServerError('Could not extract details');
$permalink = $breach->find('p', 1)->find('a', 0)->href;
// Remove permalink
$breach->find('p', 1)->find('a', 0)->outertext = '';
$item['title'] = html_entity_decode($breach->find('h3', 0)->plaintext, ENT_QUOTES)
. ' - ' . $accounts[1] . ' breached accounts';
$item['dateAdded'] = strtotime($dateAdded[1]);
$item['breachDate'] = strtotime($breachDate[1]);
$item['uri'] = self::URI . '/PwnedWebsites' . $permalink;
$item['content'] = '<p>' . $breach->find('p', 0)->innertext . '</p>';
$item['content'] .= '<p>' . $this->breachType($breach) . '</p>'; $item['content'] .= '<p>' . $this->breachType($breach) . '</p>';
$item['content'] .= '<p>' . $breach->find('p', 1)->innertext . '</p>';
$breachDate = date('j F Y', strtotime($breach['BreachDate']));
$addedDate = date('j F Y', strtotime($breach['AddedDate']));
$compData = implode(', ', $breach['DataClasses']);
$item['content'] .= <<<EOD
<p>
<strong>Breach date:</strong> {$breachDate}<br>
<strong>Date added to HIBP:</strong> {$addedDate}<br>
<strong>Compromised accounts:</strong> {$pwnCount}<br>
<strong>Compromised data:</strong> {$compData}<br>
EOD;
$item['uid'] = $breach['Name'];
$this->breaches[] = $item; $this->breaches[] = $item;
} }
@ -74,6 +72,27 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
$this->createItems(); $this->createItems();
} }
private const BREACH_TYPES = array(
'IsVerified' => array(
false => 'Unverified breach, may be sourced from elsewhere'
),
'IsFabricated' => array(
true => 'Fabricated breach, likely not legitimate'
),
'IsSensitive' => array(
true => 'Sensitive breach, not publicly searchable'
),
'IsRetired' => array(
true => 'Retired breach, removed from system'
),
'IsSpamList' => array(
true => 'Spam list, used for spam marketing'
),
'IsMalware' => array(
true => 'Malware breach'
),
);
/** /**
* Extract data breach type(s) * Extract data breach type(s)
*/ */
@ -81,12 +100,10 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
$content = ''; $content = '';
if ($breach->find('h3 > i', 0)) { foreach (self::BREACH_TYPES as $type => $message) {
if (isset($message[$breach[$type]])) {
foreach ($breach->find('h3 > i') as $i) { $content .= $message[$breach[$type]] . '.<br>';
$content .= $i->title . '.<br>';
} }
} }
return $content; return $content;
@ -127,6 +144,7 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
$item['timestamp'] = $breach[$this->getInput('order')]; $item['timestamp'] = $breach[$this->getInput('order')];
$item['uri'] = $breach['uri']; $item['uri'] = $breach['uri'];
$item['content'] = $breach['content']; $item['content'] = $breach['content'];
$item['uid'] = $breach['uid'];
$this->items[] = $item; $this->items[] = $item;