Skip to content

Commit

Permalink
Update MindGeekAPI (SearchByName) (stashapp#711)
Browse files Browse the repository at this point in the history
  • Loading branch information
Belleyy authored Sep 26, 2021
1 parent 35c1177 commit 84541a6
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 16 deletions.
139 changes: 124 additions & 15 deletions scrapers/MindGeekAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,23 @@
import requests

#
# User variable
# User variables
#
# Ratio to consider the scene to scrape (Ratio between Title and API Title)
SET_RATIO = 0.75
# Print debug message.
PRINT_DEBUG = True
# Print ratio message. (Show title find in search)
PRINT_MATCH = True
# File used to store key to connect the API.
## File used to store key to connect the API.
STOCKAGE_FILE_APIKEY = "MindGeekAPI.ini"
# Tags you don't want to see appear in Scraper window.
# This file will be used for search by name. It can be useful if you only want to search on specific site. (Like only putting Parent studio and not Child)
# Copy MindGeekAPI.ini to another name (then put the name in the var below), then edit the file to remove the site you don't want to search.
STOCKAGE_FILE_APIKEY_SEARCH = ""

# Tags you don't want to see in the Scraper window.
IGNORE_TAGS = ["Sex","Feature","HD","Big Dick"]
# Tag you always want in Scraper window.
# Tags you want to add in the Scraper window.
FIXED_TAGS = ""
# Check the SSL Certificate.
CHECK_SSL_CERT = True
Expand All @@ -35,17 +39,19 @@ def debug(q):
print(q, file=sys.stderr)

def sendRequest(url, head):
#debug("[DEBUG] Request URL: {}".format(url))
try:
response = requests.get(url, headers=head, timeout=10, verify=CHECK_SSL_CERT)
except requests.exceptions.SSLError:
debug("[ERROR] SSL Error on this site. You can ignore this error with the 'CHECK_SSL_CERT' param inside the python file.")
sys.exit()
return None
#debug("[DEBUG] Returned URL: {}".format(response.url))
if response.content and response.status_code == 200:
return response
else:
debug("[REQUEST] Error, Status Code: {}".format(response.status_code))
if response.status_code == 429:
debug("[REQUEST] 429 Too Many Requests, You have sent too many requests in a given amount of time.")
debug("[REQUEST] 429 Too Many Requests, You have made too many requests in a given amount of time.")
return None

# Config
Expand Down Expand Up @@ -105,7 +111,7 @@ def api_token_get(url):
api_headers = {
'Instance': api_token,
'User-Agent': USER_AGENT,
'Origin': 'https://' + urlparse(url).netloc,
'Origin':'https://' + urlparse(url).netloc,
'Referer': url
}
return api_headers
Expand All @@ -123,7 +129,7 @@ def scraping_json(api_json, url=""):
scrape['url'] = url
# Studio
try:
api_json['collections'][0].get('name') # If this create a error it wont continue so no studio at all
api_json['collections'][0].get('name') # If this creates an error it wont continue so no studio at all
scrape['studio'] = {}
scrape['studio']['name'] = api_json['collections'][0].get('name')
except:
Expand Down Expand Up @@ -183,19 +189,24 @@ def scraping_json(api_json, url=""):
DATE_TODAY = datetime.today().strftime('%Y-%m-%d')

FRAGMENT = json.loads(sys.stdin.read())
SEARCH_TITLE = FRAGMENT.get("name")
SCENE_ID = FRAGMENT.get("id")
SCENE_TITLE = FRAGMENT.get("title")
SCENE_URL = FRAGMENT["url"]
SCENE_URL = FRAGMENT.get("url")
scraped_json = None

if "validName" in sys.argv and SCENE_URL is None:
sys.exit()

if SCENE_URL:
# fixing old scene
if 'brazzers.com/scenes/view/id/' in SCENE_URL:
debug("[INFO] Probably a old url, need to redirect")
debug("[INFO] Probably an old url, need to redirect")
try:
r = requests.get(SCENE_URL, headers={'User-Agent': USER_AGENT}, timeout=(3, 5))
SCENE_URL = r.url
except:
debug("[WARN] Redirect fail, could give incorrect result.")
debug("[WARN] Redirect failed, result may be inaccurate.")
# extract thing
url_domain = re.sub(r"www\.|\.com", "", urlparse(SCENE_URL).netloc)
debug("[DEBUG] Domain: {}".format(url_domain))
Expand All @@ -208,10 +219,12 @@ def scraping_json(api_json, url=""):
except:
url_sceneid = None
if url_sceneid is None:
debug("[ERROR] Can't get the ID of Scene. Are you sure that URL is from Mindgeek Network?")
debug("[ERROR] Can't get the ID of the Scene. Are you sure that URL is from Mindgeek Network?")
sys.exit()
else:
debug("[INFO] ID: {}".format(url_sceneid))


# API ACCES
api_headers = api_token_get(SCENE_URL)
api_URL = 'https://site-api.project1service.com/v2/releases/{}'.format(url_sceneid)
Expand All @@ -228,7 +241,7 @@ def scraping_json(api_json, url=""):
debug("[ERROR] Failed to get the JSON from API")
sys.exit(1)
if api_scene_json:
if api_scene_json.get('parent') is not None:
if api_scene_json.get('parent') is not None and api_scene_json['type'] != "scene":
if api_scene_json['parent']['type'] == "scene":
api_scene_json = api_scene_json.get('parent')
scraped_json = scraping_json(api_scene_json, SCENE_URL)
Expand All @@ -247,7 +260,7 @@ def scraping_json(api_json, url=""):
config.read(STOCKAGE_FILE_APIKEY)
dict_config = dict(config.items())
else:
debug("[ERROR] Can't search the scene ({} is missing)\nYou need to scrape 1 URL from the network, to be enable to search with your title on this network.".format(STOCKAGE_FILE_APIKEY))
debug("[ERROR] Can't search for the scene ({} is missing)\nYou need to scrape 1 URL from the network, to be enable to search with your title on this network.".format(STOCKAGE_FILE_APIKEY))
sys.exit(1)
# Loop the config
scraped_json = None
Expand Down Expand Up @@ -298,7 +311,103 @@ def scraping_json(api_json, url=""):
scraped_json = scraping_json(ratio_scene, making_url)
break
if scraped_json is None:
debug("[ERROR] API Search don't give correct result")
debug("[ERROR] API Search Error. No scenes found")
sys.exit(1)
elif SEARCH_TITLE:
if not STOCKAGE_FILE_APIKEY_SEARCH:
config_file_used = STOCKAGE_FILE_APIKEY
else:
config_file_used = STOCKAGE_FILE_APIKEY_SEARCH
if os.path.isfile(config_file_used):
config = ConfigParser()
config.read(config_file_used)
dict_config = dict(config.items())
else:
debug("[ERROR] Can't search for the scene ({} is missing)\nYou need to scrape 1 URL from the network, to be enable to search with your title on this network.".format(config_file_used))
sys.exit(1)
# Loop the config
scraped_json = None
result_search = []
list_domain = []
for config_section in dict_config:
if config_section == "DEFAULT":
continue
config_url = config.get(config_section, 'url')
config_domain = re.sub(r"www\.|\.com", "", urlparse(config_url).netloc)
list_domain.append(config_domain)
match_filter = re.match(r"{.+}", SEARCH_TITLE)
if match_filter:
filter_domain = re.sub(r"[{}]","", match_filter.group(0))
filter_domain = filter_domain.split(",")
if config_domain not in filter_domain:
debug("[INFO] Ignore {} (Filter query)".format(config_domain))
continue
debug("[INFO] Searching on: {}".format(config_domain))
# API ACCESS
api_headers = api_token_get(config_url)
search_url = 'https://site-api.project1service.com/v2/releases?title={}&type=scene&limit=40'.format(re.sub(r"{.+}\s*", "", SEARCH_TITLE))
api_search_json = sendRequest(search_url, api_headers)
if api_search_json is None:
debug("[ERROR] Request fail")
continue
try:
if type(api_search_json.json()) == list:
api_search_error = api_search_json.json()[0]['message']
debug("[ERROR] API Error Message: {}".format(api_search_error))
continue
else:
api_search_json = api_search_json.json()['result']
except:
debug("[ERROR] Failed to get the JSON from API ({})".format(config_domain))
continue

ratio_scene = None
making_url = None
for result in api_search_json:
search = {}
try:
result['collections'][0].get('name') # If this create a error it wont continue so no studio at all
search['studio'] = {}
search['studio']['name'] = result['collections'][0].get('name')
except:
debug("[WARN] No studio")
search['title'] = result.get('title')
title_filename = None
try:
api_filename = result['videos']['mediabook']['files']["320p"]['urls']['download']
title_filename = re.sub(r'^.+filename=', '', api_filename)
title_filename = re.sub(r'_.+$', '', title_filename)
except:
pass
if title_filename:
making_url = re.sub(r'/\d+/*.+', '/' + str(result.get("id")) + "/" + title_filename, config_url)
else:
making_url = re.sub(r'/\d+/*.+', '/' + str(result.get("id")) + "/", config_url)
try:
search['image'] = result['images']['poster']['0']["xl"]['url']
except:
pass
try:
search['performers'] = [{"name": x.get('name')} for x in result.get('actors')]
except:
pass
search['url'] = making_url
result_search.append(search)
if not result_search:
debug("[ERROR] API Search Error. No scenes found")
scraped_json = {"title":"No scenes found. (Hover to see everything)"}
scraped_json["details"] = """
Be sure to have site(s) in the config file (.ini). To add a site in the config, you need to scrape 1 url from the site.
Example: get a url from Brazzers -> use 'Scrape With...' -> now you can search on brazzers.
\nYou can also filter your search with '{site} query'.
You can use a multiple site filter, just separate sites with a comma.
(site = domain without www/com)
\nAvailable sites are shown in the tags.
"""
scraped_json['tags'] = [{"name": x} for x in list_domain]
scraped_json = [scraped_json]
else:
scraped_json = result_search

if scraped_json:
print(json.dumps(scraped_json))
14 changes: 13 additions & 1 deletion scrapers/MindGeekAPI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,16 @@ sceneByFragment:
- python
- MindGeekAPI.py
# - female_only # Only getting female performers
# Last Updated July 29, 2021
sceneByName:
action: script
script:
- python
- MindGeekAPI.py
- searchName
sceneByQueryFragment:
action: script
script:
- python
- MindGeekAPI.py
- validName
# Last Updated September 14, 2021

0 comments on commit 84541a6

Please sign in to comment.