diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index ba707b34..a8a71718 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -18,7 +18,12 @@ class GithubIssueBridge extends BridgeAbstract{ ) ), - 'Project Issues'=>array(), + 'Project Issues'=>array( + 'c'=>array( + 'name'=>'Show Issues Comments', + 'type'=>'checkbox' + ) + ), 'Issue comments'=>array( 'i'=>array( 'name'=>'Issue number', @@ -32,7 +37,12 @@ class GithubIssueBridge extends BridgeAbstract{ $name=$this->getInput('u').'/'.$this->getInput('p'); switch($this->queriedContext){ case 'Project Issues': - $name=static::NAME.'s '.$name; + if($this->getInput('c')){ + $prefix=static::NAME.'s comments for '; + }else{ + $prefix=static::NAME.'s for '; + } + $name=$prefix.$name; break; case 'Issue comments': $name=static::NAME.' '.$name.' #'.$this->getInput('i'); @@ -42,48 +52,139 @@ class GithubIssueBridge extends BridgeAbstract{ } public function getURI(){ - $uri = static::URI.$this->getInput('u').'/'.$this->getInput('p').'/issues/'; + $uri = static::URI.$this->getInput('u').'/'.$this->getInput('p').'/issues'; if($this->queriedContext==='Issue comments'){ - $uri.=$this->getInput('i'); + $uri.='/'.$this->getInput('i'); + }else if($this->getInput('c')){ + $uri.='?q=is%3Aissue+sort%3Aupdated-desc'; } return $uri; } + protected function extractIssueComment($issueNbr,$title,$comment){ + $class=$comment->getAttribute('class'); + $classes=explode(' ',$class); + $event=false; + if(in_array('discussion-item',$classes)){ + $event=true; + } + + $author='unknown'; + if($comment->find('.author',0)){ + $author=$comment->find('.author',0)->plaintext; + } + + $uri=static::URI.$this->getInput('u').'/'.$this->getInput('p').'/issues/' + .$issueNbr; + + $comment=$comment->firstChild(); + if(!$event){ + $comment=$comment->nextSibling(); + } + + if($event){ + $title.=' / '.substr($class,strpos($class,'discussion-item-')+strlen('discussion-item-')); + if(!$comment->hasAttribute('id')){ + $items=array(); + $timestamp=strtotime($comment->find('relative-time',0)->getAttribute('datetime')); + $content=$comment->innertext; + while($comment=$comment->nextSibling()){ + $item=array(); + $item['author']=$author; + $item['title']=html_entity_decode($title,ENT_QUOTES,'UTF-8'); + $item['timestamp']=$timestamp; + $item['content']=$content.'

'.$comment->children(1)->innertext.'

'; + $item['uri']=$uri.'#'.$comment->children(1)->getAttribute('id'); + $items[]=$item; + } + return $items; + } + $content=$comment->parent()->innertext; + }else{ + $title.=' / '.trim($comment->firstChild()->plaintext); + $content="
".$comment->find('.comment-body',0)->innertext."
"; + } + + $item = array(); + $item['author']=$author; + $item['uri']= $uri.'#'.$comment->getAttribute('id'); + $item['title']=html_entity_decode($title,ENT_QUOTES,'UTF-8'); + $item['timestamp']=strtotime($comment->find('relative-time',0)->getAttribute('datetime')); + $item['content']=$content; + return $item; + } + + protected function extractIssueComments($issue){ + $items=array(); + $title=$issue->find('.gh-header-title',0)->plaintext; + $issueNbr=trim(substr($issue->find('.gh-header-number',0)->plaintext,1)); + $comments=$issue->find('.js-discussion',0); + foreach($comments->children() as $comment){ + $classes=explode(' ',$comment->getAttribute('class')); + if(in_array('discussion-item',$classes) || + in_array('timeline-comment-wrapper',$classes) + ){ + $item=$this->extractIssueComment($issueNbr,$title,$comment); + if(array_keys($item)!==range(0,count($item)-1)){ + $item=array($item); + } + $items=array_merge($items,$item); + } + } + return $items; + } + public function collectData(){ $html = $this->getSimpleHTMLDOM($this->getURI()) - or $this->returnServerError('No results for Github Issue '.$this->getInput('i').' in project '.$this->getInput('u').'/'.$this->getInput('p')); + or $this->returnServerError('No results for Github Issue '.$this->getURI()); switch($this->queriedContext){ case 'Issue comments': - foreach($html->find('.js-comment-container') as $comment){ - - $item = array(); - $item['author']=$comment->find('img',0)->getAttribute('alt'); - - $comment=$comment->firstChild()->nextSibling(); - - $item['uri']=$this->getURI().'#'.$comment->getAttribute('id'); - $item['title']=trim($comment->firstChild()->plaintext); - $item['timestamp']=strtotime($comment->find('relative-time',0)->getAttribute('datetime')); - $item['content']=$comment->find('.comment-body',0)->innertext; - - $this->items[]=$item; - } + $this->items=$this->extractIssueComments($html); break; case 'Project Issues': foreach($html->find('.js-active-navigation-container .js-navigation-item') as $issue){ - $item=array(); $info=$issue->find('.opened-by',0); + $issueNbr=substr(trim($info->plaintext),1,strpos(trim($info->plaintext),' ')); + + $item=array(); + $item['content']=''; + + if($this->getInput('c')){ + $uri=static::URI.$this->getInput('u').'/'.$this->getInput('p').'/issues/'.$issueNbr; + $issue=$this->getSimpleHTMLDOMCached($uri,1800); + if($issue){ + $this->items=array_merge($this->items,$this->extractIssueComments($issue)); + continue; + } + $item['content']='Can not extract comments from '.$uri; + } + $item['author']=$info->find('a',0)->plaintext; $item['timestamp']=strtotime($info->find('relative-time',0)->getAttribute('datetime')); - $item['title']=$issue->find('.js-navigation-open',0)->plaintext; + $item['title']=html_entity_decode( + $issue->find('.js-navigation-open',0)->plaintext, + ENT_QUOTES, + 'UTF-8' + ); $comments=$issue->find('.col-5',0)->plaintext; - $item['content']='Comments: '.($comments?$comments:'0'); + $item['content'].="\n".'Comments: '.($comments?$comments:'0'); $item['uri']=self::URI.$issue->find('.js-navigation-open',0)->getAttribute('href'); $this->items[]=$item; } break; } + + array_walk($this->items, function(&$item){ + $item['content']=preg_replace('/\s+/',' ',$item['content']); + $item['content']=str_replace('href="/','href="'.static::URI,$item['content']); + $item['content']=str_replace( + 'href="#', + 'href="'.substr($item['uri'],0,strpos($item['uri'],'#')+1), + $item['content'] + ); + $item['title']=preg_replace('/\s+/',' ',$item['title']); + }); } public function getCacheDuration(){ diff --git a/bridges/LegifranceJOBridge.php b/bridges/LegifranceJOBridge.php new file mode 100644 index 00000000..b35c7e61 --- /dev/null +++ b/bridges/LegifranceJOBridge.php @@ -0,0 +1,70 @@ +author; + $item['timestamp']=$this->timestamp; + $item['uri']=$this->uri.'#'.count($this->items); + $item['title']=$section->plaintext; + + if(!is_null($origin)){ + $item['title']='[ '.$item['title'].' / '.$subsection->plaintext.' ] '.$origin->plaintext; + $data=$origin; + }elseif(!is_null($subsection)){ + $item['title']='[ '.$item['title'].' ] '.$subsection->plaintext; + $data=$subsection; + }else{ + $data=$section; + } + + $item['content']=''; + foreach($data->nextSibling()->find('a') as $content){ + $text=$content->plaintext; + $href=$content->nextSibling()->getAttribute('resource'); + $item['content'].='

'.$text.'

'; + } + return $item; + } + + public function collectData(){ + $html=$this->getSimpleHTMLDOM(self::URI) + or $this->returnServer('Unable to download '.self::URI); + + $this->author=trim($html->find('h2.title',0)->plaintext); + $uri=$html->find('h2.titleELI',0)->plaintext; + $this->uri=trim(substr($uri,strpos($uri,'https'))); + $this->timestamp=strtotime(substr($this->uri,strpos($this->uri,'eli/jo/')+strlen('eli/jo/'))); + + foreach($html->find('h3') as $section){ + $subsections=$section->nextSibling()->find('h4'); + foreach($subsections as $subsection){ + $origins=$subsection->nextSibling()->find('h5'); + foreach($origins as $origin){ + $this->items[]=$this->extractItem($section,$subsection,$origin); + } + if(!empty($origins)){ + continue; + } + $this->items[]=$this->extractItem($section,$subsection); + } + if(!empty($subsections)){ + continue; + } + $this->items[]=$this->extractItem($section); + } + } +} + + diff --git a/bridges/SexactuBridge.php b/bridges/SexactuBridge.php index 9981e2bc..534cba5d 100644 --- a/bridges/SexactuBridge.php +++ b/bridges/SexactuBridge.php @@ -4,7 +4,6 @@ class SexactuBridge extends BridgeAbstract{ const MAINTAINER = "Riduidel"; const NAME = "Sexactu"; const URI = "https://www.gqmagazine.fr"; - const HTTPS_STATUS = HTTPS_STATUS_VALID; const DESCRIPTION = "Sexactu via rss-bridge"; public function collectData(){