From 2e93bfaba6a57114b8a1bd4a4872a41efc07d345 Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 31 Jul 2024 12:16:56 +0300 Subject: [PATCH 1/5] [SubstackBridge] Add Substack --- bridges/SubstackBridge.php | 49 ++++++++++++++++++++++++++++++++++++++ lib/FeedExpander.php | 4 ++-- 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 bridges/SubstackBridge.php diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php new file mode 100644 index 00000000000..6a6c2c78f61 --- /dev/null +++ b/bridges/SubstackBridge.php @@ -0,0 +1,49 @@ + [ + 'required' => false, + ] + ]; + + const PARAMETERS = [ + '' => [ + 'url' => [ + 'name' => 'Substack RSS URL', + 'required' => true, + 'type' => 'text', + 'defaultValue' => 'https://newsletter.pragmaticengineer.com/feed', + 'title' => 'Usually https:///feed' + ] + ] + ]; + + public function collectData() { + $headers = []; + if ($this->getOption('sid')) { + $url_parsed = parse_url($this->getInput('url')); + $authority = $url_parsed['host']; + $cookies = [ + 'ab_experiment_sampled=%22false%22', + 'substack.sid=' . $this->getOption('sid'), + 'substack.lli=1', + 'intro_popup_last_hidden_at=' . (new DateTime())->format('Y-m-d\TH:i:s.v\Z') + ]; + $headers = [ + 'Authority: ' . $authority, + 'Cache-Control: max-age=0', + 'User-Agent: ' . 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'Cookie: ' . implode('; ', $cookies) + ]; + } + $this->collectExpandableDatas($this->getInput('url'), -1, $headers); + } +} diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index fe809bc259b..ef001af145b 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -7,7 +7,7 @@ abstract class FeedExpander extends BridgeAbstract { private array $feed; - public function collectExpandableDatas(string $url, $maxItems = -1) + public function collectExpandableDatas(string $url, $maxItems = -1, $headers = []) { if (!$url) { throw new \Exception('There is no $url for this RSS expander'); @@ -17,7 +17,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) $maxItems = 999; } $accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*']; - $httpHeaders = ['Accept: ' . implode(', ', $accept)]; + $httpHeaders = array_merge(['Accept: ' . implode(', ', $accept)], $headers); $xmlString = getContents($url, $httpHeaders); if ($xmlString === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); From 6755472dde81a2c89315e3a33fecccf5c52ea2eb Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 31 Jul 2024 12:55:25 +0300 Subject: [PATCH 2/5] [SubstackBridge] Add docs --- docs/10_Bridge_Specific/Substack.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 docs/10_Bridge_Specific/Substack.md diff --git a/docs/10_Bridge_Specific/Substack.md b/docs/10_Bridge_Specific/Substack.md new file mode 100644 index 00000000000..7595bbefaab --- /dev/null +++ b/docs/10_Bridge_Specific/Substack.md @@ -0,0 +1,18 @@ +# SubstackBridge + +[Substack](https://substack.com) provides RSS feeds at `/feed` path, e.g., https://newsletter.pragmaticengineer.com/feed/. However, these feeds have two problems, addressed by this bridge: +- They use RSS 2.0 with the draft [content extension](https://web.resource.org/rss/1.0/modules/content/), which isn't supported by some readers; +- They don't have the full content for paywalled posts. + +Retrieving the full content is only possible _with an active subscription to the blog_. If you have one, Substack will return the full feed if it's fetched with the right set of cookies. Figuring out whether it's the intended behaviour is left as an exercise for the reader. + +To obtain the session cookie, authorize at https://substack.com/, open DevTools, go to Application -> Cookies -> https://substack.com, copy the value of `substack.sid` and paste it to the RSS bridge config: + +``` +[SubstackBridge] +sid = "" +``` + +Authorization sometimes requires CAPTCHA, hence this operation is manual. The cookie lives for three months. + +After you've done this, the bridge should return full feeds for your subscriptions. From 26fe2a25c5b67b6d547949538d999b99ae1b5ded Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 31 Jul 2024 13:21:27 +0300 Subject: [PATCH 3/5] [SubstackBridge] Fix lint --- bridges/SubstackBridge.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php index 6a6c2c78f61..0e48ef0c212 100644 --- a/bridges/SubstackBridge.php +++ b/bridges/SubstackBridge.php @@ -26,7 +26,8 @@ class SubstackBridge extends FeedExpander ] ]; - public function collectData() { + public function collectData() + { $headers = []; if ($this->getOption('sid')) { $url_parsed = parse_url($this->getInput('url')); @@ -40,7 +41,7 @@ public function collectData() { $headers = [ 'Authority: ' . $authority, 'Cache-Control: max-age=0', - 'User-Agent: ' . 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', 'Cookie: ' . implode('; ', $cookies) ]; } From 049eac54886327defcf016a09142572798a6e3de Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 31 Jul 2024 22:51:25 +0300 Subject: [PATCH 4/5] [SubstackBridge] Update description --- bridges/SubstackBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php index 0e48ef0c212..319fae78392 100644 --- a/bridges/SubstackBridge.php +++ b/bridges/SubstackBridge.php @@ -6,7 +6,7 @@ class SubstackBridge extends FeedExpander const NAME = 'Substack Bridge'; const URI = 'https://substack.com/'; const CACHE_TIMEOUT = 3600; //1hour - const DESCRIPTION = 'Full-content Substack feed. Requires active subscription.'; + const DESCRIPTION = 'Access paywalled Substack using a session cookie with an active subscription.'; const CONFIGURATION = [ 'sid' => [ From 70ae91771077a99985131defa30bff8ef408e19c Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 31 Jul 2024 22:55:58 +0300 Subject: [PATCH 5/5] [SubstackBridge] Update description (x2) --- bridges/SubstackBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/SubstackBridge.php b/bridges/SubstackBridge.php index 319fae78392..13eea02ebec 100644 --- a/bridges/SubstackBridge.php +++ b/bridges/SubstackBridge.php @@ -6,7 +6,7 @@ class SubstackBridge extends FeedExpander const NAME = 'Substack Bridge'; const URI = 'https://substack.com/'; const CACHE_TIMEOUT = 3600; //1hour - const DESCRIPTION = 'Access paywalled Substack using a session cookie with an active subscription.'; + const DESCRIPTION = 'Access Substack. Add full content for paywalled posts if you have a session cookie with an active subscription.'; const CONFIGURATION = [ 'sid' => [