diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 660558ca4aa..515876b18d1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -271,6 +271,12 @@ Consider all listed sites to potentially be NSFW. Favorites, Pools, Posts, Redirects, Tag Searches + + Girls With Muscle + https://www.girlswithmuscle.com/ + Posts, Galleries, Search Results, Favorites + Supported + Gofile https://gofile.io/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e103cb1b565..3ed09a02395 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -55,6 +55,7 @@ "gelbooru", "gelbooru_v01", "gelbooru_v02", + "girlswithmuscle", "gofile", "hatenablog", "hentai2read", diff --git a/gallery_dl/extractor/girlswithmuscle.py b/gallery_dl/extractor/girlswithmuscle.py new file mode 100644 index 00000000000..30bcb35b918 --- /dev/null +++ b/gallery_dl/extractor/girlswithmuscle.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import re + +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache + + +class GirlswithmuscleExtractor(Extractor): + def login(self): + username, password = self._get_auth_info() + if username: + self.cookies_update(self._login_impl(username, password)) + + @staticmethod + def _is_logged_in(page_text: str) -> bool: + return 'Log in' not in page_text + + @staticmethod + def _get_csrfmiddlewaretoken(page: str) -> str: + return text.extract( + page, + 'name="csrfmiddlewaretoken" value="', + '"' + )[0] + + def _open_login_page(self): + """We need it to get second CSRF token""" + url = "https://www.girlswithmuscle.com/login/?next=/" + response = self.request(url) + return self._get_csrfmiddlewaretoken(response.text) + + def _send_login_request(self, username, password, csrf_mw): + """Actual login action""" + data = { + "csrfmiddlewaretoken": csrf_mw, + "username": username, + "password": password, + "next": "/" + } + + # Otherwise will be 403 Forbidden + self.session.headers['Origin'] = 'https://www.girlswithmuscle.com' + self.session.headers['Referer'] = \ + 'https://www.girlswithmuscle.com/login/?next=/' + + # if successful, will update cookies + url = "https://www.girlswithmuscle.com/login/" + response = self.request(url, method="post", data=data) + + if "Wrong username or password" in response.text: + raise exception.AuthenticationError() + elif not self._is_logged_in(response.text): + raise exception.AuthenticationError("Account data is missing") + + @cache(maxage=28 * 86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + csrf_mw = self._open_login_page() + self._send_login_request(username, password, csrf_mw) + return {c.name: c.value for c in self.session.cookies} + + +class GirlswithmusclePostExtractor(GirlswithmuscleExtractor): + """Extractor for individual posts on girlswithmuscle.com""" + category = "girlswithmuscle" + subcategory = "post" + directory_fmt = ("{category}", "{model}") + filename_fmt = "{model}_{id}.{extension}" + archive_fmt = "{type}_{model}_{id}" + pattern = (r"(?:https?://)?(?:www\.)?girlswithmuscle\.com" + r"/(\d+)/") + + def __init__(self, match): + Extractor.__init__(self, match) + self.id = match.groups()[0] + + def items(self): + self.login() + url = "https://girlswithmuscle.com/{}/".format(self.id) + page = self.request(url).text + + if page is None: + raise exception.NotFoundError("post") + + url = text.extr(page, 'class="main-image" src="', '"') + if url: + metadata = self.metadata(page, url, 'picture') + else: + url = text.extr(page, '' + info_source_end = "" + source = text.remove_html( + text.extr(page, info_source_begin, info_source_end)) + + info_uploader_begin = '
' + info_uploader_end = "
" + uploader = text.remove_html( + text.extr(page, info_uploader_begin, info_uploader_end)) + + tags = text.extr( + page, 'class="selected-tags">', "", '' + ).split(', ') + tags = [tag for tag in tags if tag] + + score = text.parse_int(text.remove_html( + text.extr(page, 'Score: ', '', "", None) + return 'unknown' if model.startswith('Picture #') else model + + @staticmethod + def _parse_model_list(model): + if model == 'unknown': + return [] + else: + return [name.strip() for name in model.split(',')] + + @staticmethod + def _parse_is_favorite(page): + fav_button = text.extr(page, 'id="favorite-button">', "", '') + unfav_button = text.extr(page, + 'class="actionbutton unfavorite-button">', + "", '') + + is_favorite = None + if unfav_button == 'Unfavorite': + is_favorite = True + if fav_button == 'Favorite': + is_favorite = False + + return is_favorite + + @staticmethod + def _parse_comments(page): + comments = text.extract_iter(page, '
', + '
') + return [comment.strip() for comment in comments] + + +class GirlswithmuscleGalleryExtractor(GirlswithmuscleExtractor): + """Extractor for individual posts on girlswithmuscle.com""" + category = "girlswithmuscle" + subcategory = "gallery" + pattern = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com/images/(.*)" + + def __init__(self, match): + Extractor.__init__(self, match) + self.query = match.groups()[0] + + def pages(self): + url = "https://www.girlswithmuscle.com/images/{}".format(self.query) + response = self.request(url) + if url != response.url: + msg = ('Request was redirected to "{}", try logging in'. + format(response.url)) + raise exception.AuthorizationError(msg) + page = response.text + + match = re.search(r"Page (\d+) of (\d+)", page) + current, total = match.groups() + current, total = text.parse_int(current), text.parse_int(total) + + yield page + for i in range(current + 1, total + 1): + url = ("https://www.girlswithmuscle.com/images/{}/{}". + format(i, self.query)) + yield self.request(url).text + + def items(self): + self.login() + for page in self.pages(): + for imgid in text.extract_iter(page, 'id="imgid-', '"'): + url = "https://www.girlswithmuscle.com/{}/".format(imgid) + yield Message.Queue, url, { + "gallery_name": self._parse_gallery_name(page), + "_extractor": GirlswithmusclePostExtractor + } + + @staticmethod + def _parse_gallery_name(page): + return text.extr(page, "", "") diff --git a/test/results/girlswithmuscle.py b/test/results/girlswithmuscle.py new file mode 100644 index 00000000000..61d8df0ccc2 --- /dev/null +++ b/test/results/girlswithmuscle.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +gallery_dl = __import__("gallery_dl.extractor.girlswithmuscle") +_gwm = getattr(gallery_dl.extractor, "girlswithmuscle") + + +__tests__ = ( +{ + "#url" : "https://www.girlswithmuscle.com/2136096/", + "#category" : ("", "girlswithmuscle", "post"), + "#class" : _gwm.GirlswithmusclePostExtractor, + + 'id' : '2136096', + 'model' : str, + 'tags' : list, + 'posted_dt' : '2023-12-12 16:04:03.438979+00:00', + 'source_filename': 'IMG_8714.png', + 'uploader' : 'toni1991', + 'score' : int, + 'extension' : 'png', + "type" : 'picture', + # These are not available, unless you're logged in + 'is_favorite' : None, + 'comments' : list, +}, + +{ + "#url" : "https://www.girlswithmuscle.com/1841638/", + "#category" : ("", "girlswithmuscle", "post"), + "#class" : _gwm.GirlswithmusclePostExtractor, + + 'id' : '1841638', + 'model' : str, + 'tags' : list, + 'posted_dt' : '2022-08-16 17:20:16.006855+00:00', + 'source_filename': 'Snapinsta_299658611_1185267375661829_6167677658282784059_n.mp4', + 'uploader' : 'BriedFrain', + 'score' : int, + 'extension' : 'mp4', + "type" : 'video', +}, + +{ + "#url" : "https://www.girlswithmuscle.com/images/?name=Samantha%20Jerring", + "#category" : ("", "girlswithmuscle", "gallery"), + "#class" : _gwm.GirlswithmuscleGalleryExtractor, + + "#count" : range(300, 3000), + "gallery_name" : str +}, + +)