Skip to content

Commit

Permalink
Better enclosures (#4944)
Browse files Browse the repository at this point in the history
* Better enclosures
#fix #4702
Improvement of #2898

* A few fixes

* Better enclosure titles

* Improve thumbnails

* Implement thumbnail for HTML+XPath

* Avoid duplicate enclosures
#fix #1668

* Fix regex

* Add basic support for media:credit
And use <figure> for enclosures

* Fix link encoding + simplify code

* Fix some SimplePie bugs
Encoding errors in enclosure links

* Remove debugging syslog

* Remove debugging syslog

* SimplePie fix multiple RSS2 enclosures
#fix #4974

* Improve thumbnails

* Performance with yield
Avoid generating all enclosures if not used

* API keep providing enclosures inside content
Clients are typically not showing the enclosures to the users (tested with News+, FeedMe, Readrops, Fluent Reader Lite)

* Lint

* Fix API output enclosure

* Fix API content strcut

* API tolerate enclosures without a type
  • Loading branch information
Alkarex authored Jan 6, 2023
1 parent af84806 commit 8f9c414
Show file tree
Hide file tree
Showing 9 changed files with 211 additions and 89 deletions.
2 changes: 1 addition & 1 deletion app/Controllers/feedController.php
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ public function contentSelectorPreviewAction() {
$this->view->htmlContent = $fullContent;
} else {
$this->view->selectorSuccess = false;
$this->view->htmlContent = $entry->content();
$this->view->htmlContent = $entry->content(false);
}
} catch (Exception $e) {
$this->view->fatalError = _t('feedback.sub.feed.selector_preview.http_error');
Expand Down
167 changes: 140 additions & 27 deletions app/Models/Entry.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ public static function fromArray(array $dao): FreshRSS_Entry {
$dao['content'] = '';
}
if (!empty($dao['thumbnail'])) {
$dao['content'] .= '<p class="enclosure-content"><img src="' . $dao['thumbnail'] . '" alt="" /></p>';
$dao['attributes']['thumbnail'] = [
'url' => $dao['thumbnail'],
];
}
$entry = new FreshRSS_Entry(
$dao['id_feed'] ?? 0,
Expand Down Expand Up @@ -116,15 +118,117 @@ public function authors(bool $asString = false) {
return $this->authors;
}
}
public function content(): string {
return $this->content;

/**
* Basic test without ambition to catch all cases such as unquoted addresses, variants of entities, HTML comments, etc.
*/
private static function containsLink(string $html, string $link): bool {
return preg_match('/(?P<delim>[\'"])' . preg_quote($link, '/') . '(?P=delim)/', $html) == 1;
}

private static function enclosureIsImage(array $enclosure): bool {
$elink = $enclosure['url'] ?? '';
$length = $enclosure['length'] ?? 0;
$medium = $enclosure['medium'] ?? '';
$mime = $enclosure['type'] ?? '';

return $elink != '' && $medium === 'image' || strpos($mime, 'image') === 0 ||
($mime == '' && $length == 0 && preg_match('/[.](avif|gif|jpe?g|png|svg|webp)$/i', $elink));
}

/** @return array<array<string,string>> */
public function enclosures(bool $searchBodyImages = false): array {
$results = [];
/**
* @param bool $withEnclosures Set to true to include the enclosures in the returned HTML, false otherwise.
* @param bool $allowDuplicateEnclosures Set to false to remove obvious enclosure duplicates (based on simple string comparison), true otherwise.
* @return string HTML content
*/
public function content(bool $withEnclosures = true, bool $allowDuplicateEnclosures = false): string {
if (!$withEnclosures) {
return $this->content;
}

$content = $this->content;

$thumbnail = $this->attributes('thumbnail');
if (!empty($thumbnail['url'])) {
$elink = $thumbnail['url'];
if ($allowDuplicateEnclosures || !self::containsLink($content, $elink)) {
$content .= <<<HTML
<figure class="enclosure">
<p class="enclosure-content">
<img class="enclosure-thumbnail" src="{$elink}" alt="" />
</p>
</figure>
HTML;
}
}

$attributeEnclosures = $this->attributes('enclosures');
if (empty($attributeEnclosures)) {
return $content;
}

foreach ($attributeEnclosures as $enclosure) {
$elink = $enclosure['url'] ?? '';
if ($elink == '') {
continue;
}
if (!$allowDuplicateEnclosures && self::containsLink($content, $elink)) {
continue;
}
$credit = $enclosure['credit'] ?? '';
$description = $enclosure['description'] ?? '';
$length = $enclosure['length'] ?? 0;
$medium = $enclosure['medium'] ?? '';
$mime = $enclosure['type'] ?? '';
$thumbnails = $enclosure['thumbnails'] ?? [];
$etitle = $enclosure['title'] ?? '';

$content .= '<figure class="enclosure">';

foreach ($thumbnails as $thumbnail) {
$content .= '<p><img class="enclosure-thumbnail" src="' . $thumbnail . '" alt="" title="' . $etitle . '" /></p>';
}

if (self::enclosureIsImage($enclosure)) {
$content .= '<p class="enclosure-content"><img src="' . $elink . '" alt="" title="' . $etitle . '" /></p>';
} elseif ($medium === 'audio' || strpos($mime, 'audio') === 0) {
$content .= '<p class="enclosure-content"><audio preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls" title="' . $etitle . '"></audio> <a download="" href="' . $elink . '">💾</a></p>';
} elseif ($medium === 'video' || strpos($mime, 'video') === 0) {
$content .= '<p class="enclosure-content"><video preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls" title="' . $etitle . '"></video> <a download="" href="' . $elink . '">💾</a></p>';
} else { //e.g. application, text, unknown
$content .= '<p class="enclosure-content"><a download="" href="' . $elink
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. ($medium == '' ? '' : '" data-medium="' . htmlspecialchars($medium, ENT_COMPAT, 'UTF-8'))
. '" title="' . $etitle . '">💾</a></p>';
}

if ($credit != '') {
$content .= '<p class="enclosure-credits">© ' . $credit . '</p>';
}
if ($description != '') {
$content .= '<figcaption class="enclosure-description">' . $description . '</figcaption>';
}
$content .= "</figure>\n";
}

return $content;
}

/** @return iterable<array<string,string>> */
public function enclosures(bool $searchBodyImages = false) {
$attributeEnclosures = $this->attributes('enclosures');
if (is_array($attributeEnclosures)) {
// FreshRSS 1.20.1+: The enclosures are saved as attributes
yield from $attributeEnclosures;
}
try {
$searchEnclosures = strpos($this->content, '<p class="enclosure-content') !== false;
$searchEnclosures = !is_array($attributeEnclosures) && (strpos($this->content, '<p class="enclosure-content') !== false);
$searchBodyImages &= (stripos($this->content, '<img') !== false);
$xpath = null;
if ($searchEnclosures || $searchBodyImages) {
Expand All @@ -133,6 +237,7 @@ public function enclosures(bool $searchBodyImages = false): array {
$xpath = new DOMXpath($dom);
}
if ($searchEnclosures) {
// Legacy code for database entries < FreshRSS 1.20.1
$enclosures = $xpath->query('//div[@class="enclosure"]/p[@class="enclosure-content"]/*[@src]');
foreach ($enclosures as $enclosure) {
$result = [
Expand All @@ -148,7 +253,7 @@ public function enclosures(bool $searchBodyImages = false): array {
case 'audio': $result['medium'] = 'audio'; break;
}
}
$results[] = $result;
yield Minz_Helper::htmlspecialchars_utf8($result);
}
}
if ($searchBodyImages) {
Expand All @@ -159,26 +264,31 @@ public function enclosures(bool $searchBodyImages = false): array {
$src = $img->getAttribute('data-src');
}
if ($src != null) {
$results[] = [
$result = [
'url' => $src,
'alt' => $img->getAttribute('alt'),
];
yield Minz_Helper::htmlspecialchars_utf8($result);
}
}
}
return $results;
} catch (Exception $ex) {
return $results;
Minz_Log::debug(__METHOD__ . ' ' . $ex->getMessage());
}
}

/**
* @return array<string,string>|null
*/
public function thumbnail() {
foreach ($this->enclosures(true) as $enclosure) {
if (!empty($enclosure['url']) && empty($enclosure['type'])) {
return $enclosure;
public function thumbnail(bool $searchEnclosures = true) {
$thumbnail = $this->attributes('thumbnail');
if (!empty($thumbnail['url'])) {
return $thumbnail;
}
if ($searchEnclosures) {
foreach ($this->enclosures(true) as $enclosure) {
if (self::enclosureIsImage($enclosure)) {
return $enclosure;
}
}
}
return null;
Expand Down Expand Up @@ -587,7 +697,7 @@ public function loadCompleteContent(bool $force = false): bool {

if ($entry) {
// l’article existe déjà en BDD, en se contente de recharger ce contenu
$this->content = $entry->content();
$this->content = $entry->content(false);
} else {
try {
// The article is not yet in the database, so let’s fetch it
Expand Down Expand Up @@ -629,7 +739,7 @@ public function toArray(): array {
'guid' => $this->guid(),
'title' => $this->title(),
'author' => $this->authors(true),
'content' => $this->content(),
'content' => $this->content(false),
'link' => $this->link(),
'date' => $this->date(true),
'hash' => $this->hash(),
Expand Down Expand Up @@ -677,7 +787,6 @@ public function toGReader(string $mode = ''): array {
'published' => $this->date(true),
// 'updated' => $this->date(true),
'title' => $this->title(),
'summary' => ['content' => $this->content()],
'canonical' => [
['href' => htmlspecialchars_decode($this->link(), ENT_QUOTES)],
],
Expand All @@ -697,13 +806,16 @@ public function toGReader(string $mode = ''): array {
if ($mode === 'compat') {
$item['title'] = escapeToUnicodeAlternative($this->title(), false);
unset($item['alternate'][0]['type']);
if (mb_strlen($this->content(), 'UTF-8') > self::API_MAX_COMPAT_CONTENT_LENGTH) {
$item['summary']['content'] = mb_strcut($this->content(), 0, self::API_MAX_COMPAT_CONTENT_LENGTH, 'UTF-8');
}
} elseif ($mode === 'freshrss') {
$item['summary'] = [
'content' => mb_strcut($this->content(true), 0, self::API_MAX_COMPAT_CONTENT_LENGTH, 'UTF-8'),
];
} else {
$item['content'] = [
'content' => $this->content(false),
];
}
if ($mode === 'freshrss') {
$item['guid'] = $this->guid();
unset($item['summary']);
$item['content'] = ['content' => $this->content()];
}
if ($category != null && $mode !== 'freshrss') {
$item['categories'][] = 'user/-/label/' . htmlspecialchars_decode($category->name(), ENT_QUOTES);
Expand All @@ -718,10 +830,11 @@ public function toGReader(string $mode = ''): array {
}
}
foreach ($this->enclosures() as $enclosure) {
if (!empty($enclosure['url']) && !empty($enclosure['type'])) {
if (!empty($enclosure['url'])) {
$media = [
'href' => $enclosure['url'],
'type' => $enclosure['type'],
'type' => $enclosure['type'] ?? $enclosure['medium'] ??
(self::enclosureIsImage($enclosure) ? 'image' : ''),
];
if (!empty($enclosure['length'])) {
$media['length'] = intval($enclosure['length']);
Expand Down
73 changes: 31 additions & 42 deletions app/Models/Feed.php
Original file line number Diff line number Diff line change
Expand Up @@ -502,61 +502,46 @@ public function loadEntries(SimplePie $simplePie) {

$content = html_only_entity_decode($item->get_content());

if ($item->get_enclosures() != null) {
$elinks = array();
$attributeThumbnail = $item->get_thumbnail() ?? [];
if (empty($attributeThumbnail['url'])) {
$attributeThumbnail['url'] = '';
}

$attributeEnclosures = [];
if (!empty($item->get_enclosures())) {
foreach ($item->get_enclosures() as $enclosure) {
$elink = $enclosure->get_link();
if ($elink != '' && empty($elinks[$elink])) {
$content .= '<div class="enclosure">';

if ($enclosure->get_title() != '') {
$content .= '<p class="enclosure-title">' . $enclosure->get_title() . '</p>';
}

$enclosureContent = '';
$elinks[$elink] = true;
if ($elink != '') {
$etitle = $enclosure->get_title() ?? '';
$credit = $enclosure->get_credit() ?? null;
$description = $enclosure->get_description() ?? '';
$mime = strtolower($enclosure->get_type() ?? '');
$medium = strtolower($enclosure->get_medium() ?? '');
$height = $enclosure->get_height();
$width = $enclosure->get_width();
$length = $enclosure->get_length();
if ($medium === 'image' || strpos($mime, 'image') === 0 ||
($mime == '' && $length == null && ($width != 0 || $height != 0 || preg_match('/[.](avif|gif|jpe?g|png|svg|webp)$/i', $elink)))) {
$enclosureContent .= '<p class="enclosure-content"><img src="' . $elink . '" alt="" /></p>';
} elseif ($medium === 'audio' || strpos($mime, 'audio') === 0) {
$enclosureContent .= '<p class="enclosure-content"><audio preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls"></audio> <a download="" href="' . $elink . '">💾</a></p>';
} elseif ($medium === 'video' || strpos($mime, 'video') === 0) {
$enclosureContent .= '<p class="enclosure-content"><video preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls"></video> <a download="" href="' . $elink . '">💾</a></p>';
} else { //e.g. application, text, unknown
$enclosureContent .= '<p class="enclosure-content"><a download="" href="' . $elink
. ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. ($medium == '' ? '' : '" data-medium="' . htmlspecialchars($medium, ENT_COMPAT, 'UTF-8'))
. '">💾</a></p>';
}

$thumbnailContent = '';
if ($enclosure->get_thumbnails() != null) {
$attributeEnclosure = [
'url' => $elink,
];
if ($etitle != '') $attributeEnclosure['title'] = $etitle;
if ($credit != null) $attributeEnclosure['credit'] = $credit->get_name();
if ($description != '') $attributeEnclosure['description'] = $description;
if ($mime != '') $attributeEnclosure['type'] = $mime;
if ($medium != '') $attributeEnclosure['medium'] = $medium;
if ($length != '') $attributeEnclosure['length'] = intval($length);
if ($height != '') $attributeEnclosure['height'] = intval($height);
if ($width != '') $attributeEnclosure['width'] = intval($width);

if (!empty($enclosure->get_thumbnails())) {
foreach ($enclosure->get_thumbnails() as $thumbnail) {
if (empty($elinks[$thumbnail])) {
$elinks[$thumbnail] = true;
$thumbnailContent .= '<p><img class="enclosure-thumbnail" src="' . $thumbnail . '" alt="" /></p>';
if ($thumbnail !== $attributeThumbnail['url']) {
$attributeEnclosure['thumbnails'][] = $thumbnail;
}
}
}

$content .= $thumbnailContent;
$content .= $enclosureContent;

if ($enclosure->get_description() != '') {
$content .= '<p class="enclosure-description">' . $enclosure->get_description() . '</p>';
}
$content .= "</div>\n";
$attributeEnclosures[] = $attributeEnclosure;
}
}
}
Expand Down Expand Up @@ -586,6 +571,10 @@ public function loadEntries(SimplePie $simplePie) {
);
$entry->_tags($tags);
$entry->_feed($this);
if (!empty($attributeThumbnail['url'])) {
$entry->_attributes('thumbnail', $attributeThumbnail);
}
$entry->_attributes('enclosures', $attributeEnclosures);
$entry->hash(); //Must be computed before loading full content
$entry->loadCompleteContent(); // Optionally load full content for truncated feeds

Expand Down
5 changes: 2 additions & 3 deletions app/views/helpers/index/normal/entry_header.phtml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
?><li class="item thumbnail <?= $topline_thumbnail ?> <?= $topline_summary ? '' : 'small' ?>"><?php
$thumbnail = $this->entry->thumbnail();
if ($thumbnail != null):
?><img src="<?= htmlspecialchars($thumbnail['url'], ENT_COMPAT, 'UTF-8') ?>" class="item-element "<?= $lazyload ? ' loading="lazy"' : '' ?><?=
empty($thumbnail['alt']) ? '' : ' alt="' . htmlspecialchars(strip_tags($thumbnail['alt']), ENT_COMPAT, 'UTF-8') . '"' ?> /><?php
?><img src="<?= $thumbnail['url'] ?>" class="item-element "<?= $lazyload ? ' loading="lazy"' : '' ?> alt="" /><?php
endif;
?></li><?php
endif; ?>
Expand All @@ -62,7 +61,7 @@
?></span><?php
endif;
if ($topline_summary):
?><div class="summary"><?= trim(mb_substr(strip_tags($this->entry->content()), 0, 500, 'UTF-8')) ?></div><?php
?><div class="summary"><?= trim(mb_substr(strip_tags($this->entry->content(false)), 0, 500, 'UTF-8')) ?></div><?php
endif;
?></a></li>
<?php if ($topline_date) { ?><li class="item date"><time datetime="<?= $this->entry->machineReadableDate() ?>" class="item-element"><?= $this->entry->date() ?></time>&nbsp;</li><?php } ?>
Expand Down
2 changes: 1 addition & 1 deletion app/views/index/normal.phtml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ $today = @strtotime('today');
<?php } ?>
</header>
<div class="text"><?php
echo $lazyload && $hidePosts ? lazyimg($this->entry->content()) : $this->entry->content();
echo $lazyload && $hidePosts ? lazyimg($this->entry->content(true)) : $this->entry->content(true);
?></div>
<?php
$display_authors_date = FreshRSS_Context::$user_conf->show_author_date === 'f' || FreshRSS_Context::$user_conf->show_author_date === 'b';
Expand Down
Loading

0 comments on commit 8f9c414

Please sign in to comment.