Skip to content

Commit

Permalink
[reddit] support comment embeds (#5366)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Apr 1, 2024
1 parent 64948f2 commit 095e5de
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
39 changes: 36 additions & 3 deletions gallery_dl/extractor/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def items(self):
yield Message.Url, url, submission

elif "gallery_data" in media:
for submission["num"], url in enumerate(
self._extract_gallery(media), 1):
for url in self._extract_gallery(media):
submission["num"] += 1
text.nameext_from_url(url, submission)
yield Message.Url, url, submission

Expand All @@ -99,14 +99,25 @@ def items(self):
urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
if ' href="' in html:
href = (' href="' in html)
media = ("media_metadata" in comment)

if media or href:
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
data = submission.copy()
data["comment"] = comment
else:
data = comment

if media:
for embed in self._extract_embed(comment):
submission["num"] += 1
text.nameext_from_url(embed, submission)
yield Message.Url, embed, submission

if href:
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, data))

Expand All @@ -118,6 +129,7 @@ def items(self):
if url.startswith((
"https://www.reddit.com/message/compose",
"https://reddit.com/message/compose",
"https://preview.redd.it/",
)):
continue

Expand Down Expand Up @@ -172,6 +184,27 @@ def _extract_gallery(self, submission):
submission["id"], item["media_id"])
self.log.debug(src)

def _extract_embed(self, submission):
meta = submission["media_metadata"]
if not meta:
return

for mid, data in meta.items():
if data["status"] != "valid" or "s" not in data:
self.log.warning(
"embed %s: skipping item %s (status: %s)",
submission["id"], mid, data.get("status"))
continue
src = data["s"]
url = src.get("u") or src.get("gif") or src.get("mp4")
if url:
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
else:
self.log.error(
"embed %s: unable to fetch download URL for item %s",
submission["id"], mid)
self.log.debug(src)

def _extract_video_ytdl(self, submission):
return "https://www.reddit.com" + submission["permalink"]

Expand Down
12 changes: 12 additions & 0 deletions test/results/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,18 @@
"#count" : 0,
},

{
"#url" : "https://www.reddit.com/r/RobloxArt/comments/15ko0qu/",
"#comment" : "comment embeds (#5366)",
"#category": ("", "reddit", "submission"),
"#class" : reddit.RedditSubmissionExtractor,
"#options" : {"comments": 10},
"#urls" : (
"https://i.redd.it/ppt5yciyipgb1.jpg",
"https://i.redd.it/u0ojzd69kpgb1.png",
),
},

{
"#url" : "https://www.reddit.com/user/TheSpiritTree/comments/srilyf/",
"#comment" : "user page submission (#2301)",
Expand Down

0 comments on commit 095e5de

Please sign in to comment.