Skip to content

Commit

Permalink
[SubstackBridge] Add Substack
Browse files Browse the repository at this point in the history
  • Loading branch information
SqrtMinusOne committed Jul 31, 2024
1 parent aa39898 commit bbfa1bc
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
49 changes: 49 additions & 0 deletions bridges/SubstackBridge.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

class SubstackBridge extends FeedExpander
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'Substack Bridge';
const URI = 'https://substack.com/';
const CACHE_TIMEOUT = 3600; //1hour
const DESCRIPTION = 'Full-content Substack feed. Requires active subscription.';

const CONFIGURATION = [
'sid' => [
'required' => false,
]
];

const PARAMETERS = [
'' => [
'url' => [
'name' => 'Substack RSS URL',
'required' => true,
'type' => 'text',
'defaultValue' => 'https://newsletter.pragmaticengineer.com/feed',
'title' => 'Usually https://<blog-url>/feed'
]
]
];

public function collectData() {
$headers = [];
if ($this->getOption('sid')) {
$url_parsed = parse_url($this->getInput('url'));
$authority = $url_parsed['host'];
$cookies = [
'ab_experiment_sampled=%22false%22',
'substack.sid=' . $this->getOption('sid'),
'substack.lli=1',
'intro_popup_last_hidden_at=' . (new DateTime())->format('Y-m-d\TH:i:s.v\Z')
];
$headers = [
'Authority: ' . $authority,
'Cache-Control: max-age=0',
'User-Agent: ' . 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Cookie: ' . implode('; ', $cookies)
];
}
$this->collectExpandableDatas($this->getInput('url'), -1, $headers);
}
}
4 changes: 2 additions & 2 deletions lib/FeedExpander.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ abstract class FeedExpander extends BridgeAbstract
{
private array $feed;

public function collectExpandableDatas(string $url, $maxItems = -1)
public function collectExpandableDatas(string $url, $maxItems = -1, $headers = [])
{
if (!$url) {
throw new \Exception('There is no $url for this RSS expander');
Expand All @@ -17,7 +17,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1)
$maxItems = 999;
}
$accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*'];
$httpHeaders = ['Accept: ' . implode(', ', $accept)];
$httpHeaders = array_merge(['Accept: ' . implode(', ', $accept)], $headers);
$xmlString = getContents($url, $httpHeaders);
if ($xmlString === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
Expand Down
5 changes: 5 additions & 0 deletions lib/FeedParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array
if (isset($namespaces['media'])) {
$media = $feedItem->children($namespaces['media']);
}
// xmlns:content="http://purl.org/rss/1.0/modules/content/"
if (isset($namespaces['content'])) {
$content = $feedItem->children($namespaces['content']);
$item['content'] = (string) $content;
}
foreach ($namespaces as $namespaceName => $namespaceUrl) {
if (in_array($namespaceName, ['', 'content', 'media'])) {
continue;
Expand Down

0 comments on commit bbfa1bc

Please sign in to comment.