From 3927ecd8220fca3e52ddc2ba42267e258b6a2a5d Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Thu, 9 Jun 2022 10:56:52 -0400 Subject: [PATCH] [UsenixBridge] Add bridge (#2800) --- bridges/UsenixBridge.php | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 bridges/UsenixBridge.php diff --git a/bridges/UsenixBridge.php b/bridges/UsenixBridge.php new file mode 100644 index 00000000..4f785a0e --- /dev/null +++ b/bridges/UsenixBridge.php @@ -0,0 +1,68 @@ + [ + ], + ]; + + public function collectData() + { + if ($this->queriedContext === 'USENIX ;login:') { + $this->collectLoginOnlineItems(); + return; + } + returnClientError('Illegal Context'); + } + + private function collectLoginOnlineItems(): void + { + $url = 'https://www.usenix.org/publications/loginonline'; + $dom = getSimpleHTMLDOMCached($url); + $items = $dom->find('div.view-content > div'); + + foreach ($items as $item) { + $title = $item->find('.views-field-title > span', 0); + $author = $item->find('.views-field-pseudo-author-list > span.field-content', 0); + $relativeUrl = $item->find('.views-field-nothing-1 > span > a', 0); + $uri = sprintf('https://www.usenix.org%s', $relativeUrl->href); + // June 2, 2022 + $createdAt = $item->find('div.views-field-field-lv2-publication-date > div > span', 0); + + $item = [ + 'title' => $title->innertext, + 'author' => strstr($author->plaintext, ',', true) ?: $author->plaintext, + 'uri' => $uri, + 'timestamp' => $createdAt->innertext, + ]; + + $this->items[] = array_merge($item, $this->getItemContent($uri)); + } + } + + private function getItemContent(string $uri) : array + { + $html = getSimpleHTMLDOMCached($uri); + $content = $html->find('.paragraphs-items-full', 0)->innertext; + $extra = $html->find('fieldset', 0); + if (!empty($extra)) { + $content .= $extra->innertext; + } + + $tags = []; + foreach($html->find('.field-name-field-lv2-tags div.field-item') as $tag) { + $tags[] = $tag->plaintext; + } + + return [ + 'content' => $content, + 'categories' => $tags + ]; + } +}