Marktplaats categories added (#3761)

* Update MarktplaatsBridge.php

* Update MarktplaatsBridge.php only main categories

As the whole list is too big only main categories are used for now.

* Renamed parameter 2 to sc

Renamed unused method to better reflect it usage

* Update MarktplaatsBridge.php Several fixed

Categories completed
Added a default empty one
Check if the input is not empty before using
Added helper methods to generate the categorylist

* Update MarktplaatsBridge.php

Set the methods to private for the CI
This commit is contained in:
Park0 2023-10-22 17:36:36 +02:00 committed by GitHub
parent a6a4502209
commit f134808a26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 139 additions and 4 deletions

View File

@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract
'required' => true,
'title' => 'The search string for marktplaats',
],
'c' => [
'name' => 'Category',
'type' => 'list',
'values' => [
'Select a category' => '',
'Antiek en Kunst' => '1',
'Audio, Tv en Foto' => '31',
'Auto's' => '91',
'Auto-onderdelen' => '2600',
'Auto diversen' => '48',
'Boeken' => '201',
'Caravans en Kamperen' => '289',
'Cd's en Dvd's' => '1744',
'Computers en Software' => '322',
'Contacten en Berichten' => '378',
'Diensten en Vakmensen' => '1098',
'Dieren en Toebehoren' => '395',
'Doe-het-zelf en Verbouw' => '239',
'Fietsen en Brommers' => '445',
'Hobby en Vrije tijd' => '1099',
'Huis en Inrichting' => '504',
'Huizen en Kamers' => '1032',
'Kinderen en Baby's' => '565',
'Kleding | Dames' => '621',
'Kleding | Heren' => '1776',
'Motoren' => '678',
'Muziek en Instrumenten' => '728',
'Postzegels en Munten' => '1784',
'Sieraden, Tassen en Uiterlijk' => '1826',
'Spelcomputers en Games' => '356',
'Sport en Fitness' => '784',
'Telecommunicatie' => '820',
'Tickets en Kaartjes' => '1984',
'Tuin en Terras' => '1847',
'Vacatures' => '167',
'Vakantie' => '856',
'Verzamelen' => '895',
'Watersport en Boten' => '976',
'Witgoed en Apparatuur' => '537',
'Zakelijke goederen' => '1085',
'Diversen' => '428',
],
'required' => false,
'title' => 'The category to search in',
],
'z' => [
'name' => 'zipcode',
'type' => 'text',
@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract
'type' => 'checkbox',
'required' => false,
'title' => 'Include the raw data behind the content',
]
],
'sc' => [
'name' => 'Sub category',
'type' => 'number',
'required' => false,
'exampleValue' => '12345',
'title' => 'Sub category has to be given by id as the list is too big to show here.
Only use subcategories that belong to the main category. Both have to be correct',
],
]
];
const CACHE_TIMEOUT = 900;
@ -80,6 +133,12 @@ class MarktplaatsBridge extends BridgeAbstract
$excludeGlobal = true;
}
}
if (!empty($this->getInput('c'))) {
$query .= '&l1CategoryId=' . $this->getInput('c');
}
if (!is_null($this->getInput('sc'))) {
$query .= '&l2CategoryId=' . $this->getInput('sc');
}
$url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query;
$jsonString = getSimpleHTMLDOM($url);
$jsonObj = json_decode($jsonString);
@ -97,15 +156,15 @@ class MarktplaatsBridge extends BridgeAbstract
$item['enclosures'] = $listing->imageUrls;
if (is_array($listing->imageUrls)) {
foreach ($listing->imageUrls as $imgurl) {
$item['content'] .= "<br />\n<img src='https:" . $imgurl . "' />";
$item['content'] .= "<br />\n<img alt='' src='https:" . $imgurl . "' />";
}
} else {
$item['content'] .= "<br>\n<img src='https:" . $listing->imageUrls . "' />";
$item['content'] .= "<br>\n<img alt='' src='https:" . $listing->imageUrls . "' />";
}
}
if (!is_null($this->getInput('r'))) {
if ($this->getInput('r')) {
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing);
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing) . "<br />$url";
}
}
$item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100;
@ -130,4 +189,80 @@ class MarktplaatsBridge extends BridgeAbstract
}
return parent::getName();
}
/**
* Method can be used to scrape the subcategories from marktplaats
*/
private static function scrapeSubCategories()
{
$main = [];
$main['Select a category'] = '';
$marktplaatsHTML = file_get_html('https://www.marktplaats.nl');
foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) {
if (!str_contains($opt->innertext, 'categorie')) {
$main[$opt->innertext] = $opt->value;
$ids[] = $opt->value;
}
}
$result = [];
foreach ($ids as $id) {
$url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id;
$jsonstring = getContents($url);
$jsondata = json_decode((string)$jsonstring);
if (isset($jsondata->searchCategoryOptions)) {
$categories = $jsondata->searchCategoryOptions;
if (isset($jsondata->categoriesById->$id)) {
$maincategory = $jsondata->categoriesById->$id;
$array = [];
foreach ($categories as $categorie) {
$array[$categorie->fullName] = $categorie->id;
}
$result[$maincategory->fullName] = $array;
}
} else {
print($jsonstring);
}
}
$combinedResult = [
'main' => $main,
'sub' => $result
];
return $combinedResult;
}
/**
* Helper method to construct the array that could be used for categories
*
* @param $array
* @param $indent
* @return void
*/
private static function printArrayAsCode($array, $indent = 0)
{
foreach ($array as $key => $value) {
if (is_array($value)) {
echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL;
self::printArrayAsCode($value, $indent + 1);
echo str_repeat(' ', $indent) . '],' . PHP_EOL;
} else {
$value = str_replace('\'', '\\\'', $value);
$key = str_replace('\'', '\\\'', $key);
echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL;
}
}
}
private static function printScrapeArray()
{
$array = (MarktplaatsBridge::scrapeSubCategories());
echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['main'], 1);
echo '];' . PHP_EOL;
echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['sub'], 1);
echo '];' . PHP_EOL;
}
}