[AmazonPriceTrackerBridge] Minor fix for parser, and new strategy (#2603)

This commit is contained in:
Nemo 2022-04-04 23:11:40 +05:30 committed by GitHub
parent d34b94848b
commit e1e9a12440
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 38 additions and 10 deletions

View File

@ -49,6 +49,8 @@ class AmazonPriceTrackerBridge extends BridgeAbstract {
'.a-color-price', '.a-color-price',
); );
const WHITESPACE = " \t\n\r\0\x0B\xC2\xA0";
protected $title; protected $title;
/** /**
@ -154,6 +156,22 @@ EOT;
return false; return false;
} }
private function scrapePriceTwister($html) {
$str = $html->find('.twister-plus-buying-options-price-data', 0);
$data = json_decode($str->innertext, true);
if(count($data) === 1) {
$data = $data[0];
return array(
'displayPrice' => $data['displayPrice'],
'currency' => $data['currency'],
'shipping' => '0',
);
}
return false;
}
private function scrapePriceGeneric($html) { private function scrapePriceGeneric($html) {
$priceDiv = null; $priceDiv = null;
@ -168,12 +186,11 @@ EOT;
return false; return false;
} }
$priceString = $priceDiv->plaintext; $priceString = str_replace(str_split(self::WHITESPACE), '', $priceDiv->plaintext);
preg_match('/(\d+\.\d{0,2})/', $priceString, $matches);
preg_match('/[\d.,]+/', $priceString, $matches);
$price = $matches[0]; $price = $matches[0];
$currency = trim(str_replace($price, '', $priceString), " \t\n\r\0\x0B\xC2\xA0"); $currency = str_replace($price, '', $priceString);
if ($price != null && $currency != null) { if ($price != null && $currency != null) {
return array( return array(
@ -186,6 +203,21 @@ EOT;
return false; return false;
} }
private function renderContent($image, $data) {
$price = $data['displayPrice'];
if (!$price) {
$price = "{$data['price']} {$data['currency']}";
}
$html = "$image<br>Price: $price";
if ($data['shipping'] !== '0') {
$html .= "<br>Shipping: {$data['shipping']} {$data['currency']}</br>";
}
return $html;
}
/** /**
* Scrape method for Amazon product page * Scrape method for Amazon product page
* @return [type] [description] * @return [type] [description]
@ -195,20 +227,16 @@ EOT;
$this->title = $this->getTitle($html); $this->title = $this->getTitle($html);
$imageTag = $this->getImage($html); $imageTag = $this->getImage($html);
$data = $this->scrapePriceFromMetrics($html) ?: $this->scrapePriceGeneric($html); $data = $this->scrapePriceGeneric($html);
$item = array( $item = array(
'title' => $this->title, 'title' => $this->title,
'uri' => $this->getURI(), 'uri' => $this->getURI(),
'content' => "$imageTag<br/>Price: {$data['price']} {$data['currency']}", 'content' => $this->renderContent($imageTag, $data),
// This is to ensure that feed readers notice the price change // This is to ensure that feed readers notice the price change
'uid' => md5($data['price']) 'uid' => md5($data['price'])
); );
if ($data['shipping'] !== '0') {
$item['content'] .= "<br>Shipping: {$data['shipping']} {$data['currency']}</br>";
}
$this->items[] = $item; $this->items[] = $item;
} }
} }