Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
dvikan committed Oct 12, 2023
1 parent 22b7521 commit 3aa92ff
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 83 deletions.
107 changes: 24 additions & 83 deletions lib/FeedExpander.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ abstract class FeedExpander extends BridgeAbstract
const FEED_TYPE_RSS_2_0 = 'RSS_2_0';
const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0';

private $title;
private $uri;
private $icon;
private $feedType;
private string $feedType;
private FeedParser $feedParser;
private array $parsedFeed;

public function __construct(CacheInterface $cache, Logger $logger)
{
parent::__construct($cache, $logger);
$this->feedParser = new FeedParser();
}

public function collectExpandableDatas($url, $maxItems = -1)
public function collectExpandableDatas(string $url, $maxItems = -1)
{
if (empty($url)) {
if (!$url) {
throw new \Exception('There is no $url for this RSS expander');
}
if ($maxItems === -1) {
$maxItems = 999;
}
$accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*'];
$httpHeaders = ['Accept: ' . implode(', ', $accept)];
// Notice we do not use cache here on purpose. We want a fresh view of the RSS stream each time
Expand Down Expand Up @@ -53,80 +54,29 @@ public function collectExpandableDatas($url, $maxItems = -1)
// Restore previous behaviour in case other code relies on it being off
libxml_use_internal_errors(false);

$this->parsedFeed = $this->feedParser->parseFeed($xmlString);

if (isset($xml->item[0])) {
$this->feedType = self::FEED_TYPE_RSS_1_0;
// loadRss2Data
$channel = $xml->channel[0];
$this->title = trim((string)$channel->title);
$this->uri = trim((string)$channel->link);
if (!empty($channel->image)) {
$this->icon = trim((string)$channel->image->url);
}
// todo: set title, link, description, language, and so on
foreach ($xml->item as $item) {
$parsedItem = $this->parseItem($item);
if (!empty($parsedItem)) {
$this->items[] = $parsedItem;
}
if ($maxItems !== -1 && count($this->items) >= $maxItems) {
break;
}
}
$items = $xml->item;
} elseif (isset($xml->channel[0])) {
$this->feedType = self::FEED_TYPE_RSS_2_0;
// loadRss2Data
$channel = $xml->channel[0];
$this->title = trim((string)$channel->title);
$this->uri = trim((string)$channel->link);
if (!empty($channel->image)) {
$this->icon = trim((string)$channel->image->url);
}
// todo: set title, link, description, language, and so on
foreach ($channel->item as $item) {
$parsedItem = $this->parseItem($item);
if (!empty($parsedItem)) {
$this->items[] = $parsedItem;
}
if ($maxItems !== -1 && count($this->items) >= $maxItems) {
break;
}
}
$items = $xml->channel[0]->item;
} elseif (isset($xml->entry[0])) {
$this->feedType = self::FEED_TYPE_ATOM_1_0;
// loadAtomData
$this->title = (string)$xml->title;
// Find best link (only one, or first of 'alternate')
if (!isset($xml->link)) {
$this->uri = '';
} elseif (count($xml->link) === 1) {
$this->uri = (string)$xml->link[0]['href'];
} else {
$this->uri = '';
foreach ($xml->link as $link) {
if (strtolower($link['rel']) === 'alternate') {
$this->uri = (string)$link['href'];
break;
}
}
}
if (!empty($xml->icon)) {
$this->icon = (string)$xml->icon;
} elseif (!empty($xml->logo)) {
$this->icon = (string)$xml->logo;
}
// parse items
foreach ($xml->entry as $item) {
$parsedItem = $this->parseItem($item);
if (!empty($parsedItem)) {
$this->items[] = $parsedItem;
}
if ($maxItems !== -1 && count($this->items) >= $maxItems) {
break;
}
}
$items = $xml->entry;
} else {
throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url));
}
foreach ($items as $item) {
$parsedItem = $this->parseItem($item);
if ($parsedItem) {
$this->items[] = $parsedItem;
}
if (count($this->items) >= $maxItems) {
break;
}
}
return $this;
}

Expand All @@ -149,25 +99,16 @@ protected function parseItem($item)

public function getURI()
{
if (!empty($this->uri)) {
return $this->uri;
}
return parent::getURI();
return $this->parsedFeed['uri'] ?? parent::getURI();
}

public function getName()
{
if (!empty($this->title)) {
return $this->title;
}
return parent::getName();
return $this->parsedFeed['title'] ?? parent::getName();
}

public function getIcon()
{
if (!empty($this->icon)) {
return $this->icon;
}
return parent::getIcon();
return $this->parsedFeed['icon'] ?? parent::getIcon();
}
}
66 changes: 66 additions & 0 deletions lib/FeedParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,72 @@

final class FeedParser
{
public function parseFeed(string $xmlString): array
{
$xml = simplexml_load_string(trim($xmlString));
if ($xml === false) {
throw new \Exception('Unable to parse xml');
}
$feed = [
'title' => null,
'url' => null,
'icon' => null,
'items' => [],
];
if (isset($xml->item[0])) {
// rss 1.0
$channel = $xml->channel[0];
$feed['title'] = trim((string)$channel->title);
$feed['uri'] = trim((string)$channel->link);
if (!empty($channel->image)) {
$feed['icon'] = trim((string)$channel->image->url);
}
foreach ($xml->item as $item) {
$feed['items'][] = $this->parseRss1Item($item);
}
} elseif (isset($xml->channel[0])) {
// rss 2.0
$channel = $xml->channel[0];
$feed['title'] = trim((string)$channel->title);
$feed['uri'] = trim((string)$channel->link);
if (!empty($channel->image)) {
$feed['icon'] = trim((string)$channel->image->url);
}
foreach ($channel->item as $item) {
$feed['items'][] = $this->parseRss2Item($item);
}
} elseif (isset($xml->entry[0])) {
// atom 1.0
$feed['title'] = (string)$xml->title;
// Find best link (only one, or first of 'alternate')
if (!isset($xml->link)) {
$feed['uri'] = '';
} elseif (count($xml->link) === 1) {
$feed['uri'] = (string)$xml->link[0]['href'];
} else {
$feed['uri'] = '';
foreach ($xml->link as $link) {
if (strtolower((string) $link['rel']) === 'alternate') {
$feed['uri'] = (string)$link['href'];
break;
}
}
}
if (!empty($xml->icon)) {
$feed['icon'] = (string)$xml->icon;
} elseif (!empty($xml->logo)) {
$feed['icon'] = (string)$xml->logo;
}
foreach ($xml->entry as $item) {
$feed['items'][] = $this->parseAtomItem($item);
}
} else {
throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url));
}

return $feed;
}

public function parseAtomItem(\SimpleXMLElement $feedItem): array
{
// Some ATOM entries also contain RSS 2.0 fields
Expand Down

0 comments on commit 3aa92ff

Please sign in to comment.