From db3899f2e6d15f8bc447c4979ed56f0687ebd05d Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 3 Jan 2025 07:23:13 +0100 Subject: [PATCH] fix(legifrance): emergency repair, still semi-broken (#4391) --- bridges/DansTonChatBridge.php | 6 ++-- bridges/LegifranceJOBridge.php | 64 ++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 9712ec9d2f9..a7765f19618 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -10,9 +10,11 @@ class DansTonChatBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM(self::URI . 'latest.html'); + $url = self::URI . 'latest.html'; + $dom = getSimpleHTMLDOM($url); - foreach ($html->find('div.item') as $element) { + $items = $dom->find('div.item'); + foreach ($items as $element) { $item = []; $item['uri'] = $element->find('a', 0)->href; $titleContent = $element->find('h3 a', 0); diff --git a/bridges/LegifranceJOBridge.php b/bridges/LegifranceJOBridge.php index 2d86c2ce402..cf8f9f7206f 100644 --- a/bridges/LegifranceJOBridge.php +++ b/bridges/LegifranceJOBridge.php @@ -14,6 +14,37 @@ class LegifranceJOBridge extends BridgeAbstract private $timestamp; private $uri; + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI); + + $title = $html->find('h2.titleJO', 0); + + //$this->author = trim($title->plaintext); + $uri1 = $html->find('h2.titleELI', 0); + //$uri = $uri1->plaintext; + //$this->uri = trim(substr($uri, strpos($uri, 'https'))); + $this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5)); + + foreach ($html->find('h3') as $section) { + $subsections = $section->nextSibling()->find('h4'); + foreach ($subsections as $subsection) { + $origins = $subsection->nextSibling()->find('h5'); + foreach ($origins as $origin) { + $this->items[] = $this->extractItem($section, $subsection, $origin); + } + if (!empty($origins)) { + continue; + } + $this->items[] = $this->extractItem($section, $subsection); + } + if (!empty($subsections)) { + continue; + } + $this->items[] = $this->extractItem($section); + } + } + private function extractItem($section, $subsection = null, $origin = null) { $item = []; @@ -35,7 +66,9 @@ private function extractItem($section, $subsection = null, $origin = null) $item['content'] = ''; foreach ($data->nextSibling()->find('a') as $content) { $text = $content->plaintext; - $href = $content->nextSibling()->getAttribute('resource'); + $href = ''; + //$href = $content->nextSibling()->getAttribute('resource'); + $item['content'] .= '

' . $text . '

'; } return $item; @@ -45,33 +78,4 @@ public function getIcon() { return 'https://www.legifrance.gouv.fr/img/favicon.ico'; } - - public function collectData() - { - $html = getSimpleHTMLDOM(self::URI) - or $this->returnServer('Unable to download ' . self::URI); - - $this->author = trim($html->find('h2.titleJO', 0)->plaintext); - $uri = $html->find('h2.titleELI', 0)->plaintext; - $this->uri = trim(substr($uri, strpos($uri, 'https'))); - $this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5)); - - foreach ($html->find('h3') as $section) { - $subsections = $section->nextSibling()->find('h4'); - foreach ($subsections as $subsection) { - $origins = $subsection->nextSibling()->find('h5'); - foreach ($origins as $origin) { - $this->items[] = $this->extractItem($section, $subsection, $origin); - } - if (!empty($origins)) { - continue; - } - $this->items[] = $this->extractItem($section, $subsection); - } - if (!empty($subsections)) { - continue; - } - $this->items[] = $this->extractItem($section); - } - } }