diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php new file mode 100644 index 000000000000..6a6c2c78f612 --- /dev/null +++ b/bridges/SubstackBridge.php @@ -0,0 +1,49 @@ + [ + 'required' => false, + ] + ]; + + const PARAMETERS = [ + '' => [ + 'url' => [ + 'name' => 'Substack RSS URL', + 'required' => true, + 'type' => 'text', + 'defaultValue' => 'https://newsletter.pragmaticengineer.com/feed', + 'title' => 'Usually https:///feed' + ] + ] + ]; + + public function collectData() { + $headers = []; + if ($this->getOption('sid')) { + $url_parsed = parse_url($this->getInput('url')); + $authority = $url_parsed['host']; + $cookies = [ + 'ab_experiment_sampled=%22false%22', + 'substack.sid=' . $this->getOption('sid'), + 'substack.lli=1', + 'intro_popup_last_hidden_at=' . (new DateTime())->format('Y-m-d\TH:i:s.v\Z') + ]; + $headers = [ + 'Authority: ' . $authority, + 'Cache-Control: max-age=0', + 'User-Agent: ' . 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Cookie: ' . implode('; ', $cookies) + ]; + } + $this->collectExpandableDatas($this->getInput('url'), -1, $headers); + } +} diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index fe809bc259b3..ef001af145b8 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -7,7 +7,7 @@ abstract class FeedExpander extends BridgeAbstract { private array $feed; - public function collectExpandableDatas(string $url, $maxItems = -1) + public function collectExpandableDatas(string $url, $maxItems = -1, $headers = []) { if (!$url) { throw new \Exception('There is no $url for this RSS expander'); @@ -17,7 +17,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) $maxItems = 999; } $accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*']; - $httpHeaders = ['Accept: ' . implode(', ', $accept)]; + $httpHeaders = array_merge(['Accept: ' . implode(', ', $accept)], $headers); $xmlString = getContents($url, $httpHeaders); if ($xmlString === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); diff --git a/lib/FeedParser.php b/lib/FeedParser.php index b774cc14e239..321eb50a0183 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -167,6 +167,11 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array if (isset($namespaces['media'])) { $media = $feedItem->children($namespaces['media']); } + // xmlns:content="http://purl.org/rss/1.0/modules/content/" + if (isset($namespaces['content'])) { + $content = $feedItem->children($namespaces['content']); + $item['content'] = (string) $content; + } foreach ($namespaces as $namespaceName => $namespaceUrl) { if (in_array($namespaceName, ['', 'content', 'media'])) { continue;