Skip to content

Commit

Permalink
add support for webtoon.xyz
Browse files Browse the repository at this point in the history
  • Loading branch information
Dragonatorul committed Jan 31, 2024
1 parent 22647c2 commit 4cbbeb7
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,12 @@ Consider all listed sites to potentially be NSFW.
<td>Comics, Episodes</td>
<td></td>
</tr>
<tr>
<td>WebtoonXYZ</td>
<td>https://www.webtoon.xyz/</td>
<td>Chapters, Manga</td>
<td></td>
</tr>
<tr>
<td>Weibo</td>
<td>https://www.weibo.com/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
"weasyl",
"webmshare",
"webtoons",
"webtoonxyz",
"weibo",
"wikiart",
"wikifeet",
Expand Down
100 changes: 100 additions & 0 deletions gallery_dl/extractor/webtoonxyz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.webtoon.xyz/"""

from .common import ChapterExtractor, MangaExtractor
from .. import text, exception
import re


class WebtoonxyzBase():
"""Base class for Webtoon.xyz extractors"""
category = "webtoonxyz"
root = "https://www.webtoon.xyz"

@staticmethod
def parse_chapter_string(chapter_string, data):
match = re.match(
r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?",
text.unescape(chapter_string).strip())
manga, chapter, minor, title = match.groups()
manga = manga.strip() if manga else ""
data["manga"] = data.pop("manga", manga)
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = minor or ""
data["title"] = title or ""
data["lang"] = "en"
data["language"] = "English"


class WebtoonxyzChapterExtractor(WebtoonxyzBase, ChapterExtractor):
"""Extractor for manga-chapters from www.webtoon.xyz"""
pattern = (r"(?:https?://)?(?:www\.)?webtoon\.xyz"
r"(/read/[^/?#]+/[^/?#]+)")
example = "https://www.webtoon.xyz/read/MANGA/chapter-01/"

def metadata(self, page):
tags = text.extr(page, 'class="wp-manga-tags-list">', '</div>')
data = {"tags": list(text.split_html(tags)[::2])}
info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
if not info:
raise exception.NotFoundError("chapter")
self.parse_chapter_string(info, data)
return data

def images(self, page):
page = text.extr(
page, '<div class="reading-content">', '<div class="entry-header')
return [
(text.extr(img, 'src="', '"').strip(), None)
for img in text.extract_iter(page, '<img id="image-', '>')
]


class WebtoonxyzMangaExtractor(WebtoonxyzBase, MangaExtractor):
"""Extractor for manga from www.webtoon.xyz"""
chapterclass = WebtoonxyzChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?webtoon\.xyz(/read/[^/?#]+)/?$"
example = "https://www.webtoon.xyz/read/MANGA"

def chapters(self, page):
if 'class="error404' in page:
raise exception.NotFoundError("manga")
data = self.metadata(page)
result = []
for chapter in text.extract_iter(
page, '<li class="wp-manga-chapter', "</li>"):
url , pos = text.extract(chapter, '<a href="', '"')
info, _ = text.extract(chapter, ">", "</a>", pos)
self.parse_chapter_string(info, data)
result.append((url, data.copy()))
return result

def metadata(self, page):
extr = text.extract_from(text.extr(
page, 'class="summary_content">', 'class="manga-action"'))
return {
"manga" : text.extr(page, "<h1>", "</h1>").strip(),
"description": text.unescape(text.remove_html(text.extract(
page, ">", "</div>", page.index("summary__content"))[0])),
"rating" : text.parse_float(
extr('total_votes">', "</span>").strip()),
"manga_alt" : text.remove_html(
extr("Alternative </h5>\n</div>", "</div>")).split("; "),
"author" : list(text.extract_iter(
extr('class="author-content">', "</div>"), '"tag">', "</a>")),
"artist" : list(text.extract_iter(
extr('class="artist-content">', "</div>"), '"tag">', "</a>")),
"genres" : list(text.extract_iter(
extr('class="genres-content">', "</div>"), '"tag">', "</a>")),
"type" : text.remove_html(
extr("Type </h5>\n</div>", "</div>")),
"release" : text.parse_int(text.remove_html(
extr("Release </h5>\n</div>", "</div>"))),
"status" : text.remove_html(
extr("Status </h5>\n</div>", "</div>")),
}
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
"wallpapercave" : "Wallpaper Cave",
"webmshare" : "webmshare",
"webtoons" : "Webtoon",
"webtoonxyz" : "Webtoon.xyz",
"wikiart" : "WikiArt.org",
"wikimediacommons": "Wikimedia Commons",
"xbunkr" : "xBunkr",
Expand Down
62 changes: 62 additions & 0 deletions test/results/webtoonxyz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import webtoonxyz
from gallery_dl import exception


__tests__ = (
{
"#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/chapter-105/",
"#category": ("", "webtoonxyz", "chapter"),
"#class" : webtoonxyz.WebtoonxyzChapterExtractor,
"#pattern" : r"https://www\.webtoon\.xyz/wp-content/uploads/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+",
"#count" : 11,

"manga" : "The World After The End",
"title" : "",
"chapter" : 105,
"lang" : "en",
"language" : "English",
},

{
"#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/chapter-1000000/",
"#category": ("", "webtoonxyz", "chapter"),
"#class" : webtoonxyz.WebtoonxyzChapterExtractor,
"#exception": exception.NotFoundError,
},

{
"#url" : "https://www.webtoon.xyz/read/the-world-after-the-end/",
"#category": ("", "webtoonxyz", "manga"),
"#class" : webtoonxyz.WebtoonxyzMangaExtractor,
"#pattern" : r"https://www\.webtoon\.xyz/read/such-a-cute-spy/chapter-\d+([_-].+)?/",
"#count" : ">= 13",

"manga" : "The World After The End",
"author" : ["S-Cynaan", "Sing Shong"],
"artist" : ["Undead Potato"],
"genres" : [
"Action",
"Adventure",
"Fantasy",
],
"rating" : float,
"status" : "OnGoing",
"lang" : "en",
"language" : "English",
"manga_alt" : list,
},

{
"#url" : "https://www.webtoon.xyz/read/doesnotexist",
"#category": ("", "webtoonxyz", "manga"),
"#class" : webtoonxyz.WebtoonxyzMangaExtractor,
"#exception": exception.HttpError,
},

)

0 comments on commit 4cbbeb7

Please sign in to comment.