[YoutubeBridge] Fix parsing author name breaks the bridge

The author name is parsed by searching a string within the entire
HTML document:

$author = $html->innertext;
$author = substr($author, strpos($author, '"author=') + 8);
$author = substr($author, 0, strpos($author, '\u0026'));

This solution will return big portions of the HTML document if
the strpos function returns zero (not found).

This commit replaces the previous implementation by searching for
a specific script tag and making use of the JSON data inside it.

References #580
This commit is contained in:
logmanoriginal 2018-02-16 22:12:24 +01:00
parent 5aaab9eb8c
commit 4f6277b6b5
1 changed files with 13 additions and 3 deletions

View File

@ -56,9 +56,19 @@ class YoutubeBridge extends BridgeAbstract {
return; return;
} }
$author = $html->innertext; foreach($html->find('script') as $script){
$author = substr($author, strpos($author, '"author=') + 8); $data = trim($script->innertext);
$author = substr($author, 0, strpos($author, '\u0026'));
if(strpos($data, '{') !== 0)
continue; // Wrong script
$json = json_decode($data);
if(!isset($json->itemListElement))
continue; // Wrong script
$author = $json->itemListElement[0]->item->name;
}
if(!is_null($html->find('div#watch-description-text', 0))) if(!is_null($html->find('div#watch-description-text', 0)))
$desc = $html->find('div#watch-description-text', 0)->innertext; $desc = $html->find('div#watch-description-text', 0)->innertext;