Skip to content

Commit

Permalink
Less memory will be used when downloading
Browse files Browse the repository at this point in the history
This actually lowers memory usage when downloading files so you can run with max threads now.

Fixed #1331
  • Loading branch information
UltimaHoarder committed Jun 13, 2021
1 parent 4471bab commit 5478e01
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 65 deletions.
95 changes: 48 additions & 47 deletions apis/api_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import requests
from aiohttp import ClientSession
from aiohttp.client_exceptions import (ClientConnectorError, ClientOSError,
ClientPayloadError, ContentTypeError,ServerDisconnectedError)
ClientPayloadError, ContentTypeError,
ServerDisconnectedError)
from aiohttp.client_reqrep import ClientResponse
from aiohttp_socks import ChainProxyConnector, ProxyConnector, ProxyType
from database.models.media_table import media_table
Expand Down Expand Up @@ -87,6 +88,17 @@ def __init__(
self.dynamic_rules = dynamic_rules
self.auth = auth

def create_client_session(self):
proxies = self.proxies
proxy = self.proxies[randint(0, len(proxies) - 1)] if proxies else ""
connector = ProxyConnector.from_url(proxy) if proxy else None

final_cookies = self.auth.cookies if hasattr(self.auth, "cookies") else {}
client_session = ClientSession(
connector=connector, cookies=final_cookies, read_timeout=None
)
return client_session

def add_sessions(self, original_sessions: list, overwrite_old_sessions=True):
if overwrite_old_sessions:
sessions = []
Expand Down Expand Up @@ -152,12 +164,7 @@ async def json_request(
custom_session = False
if not session:
custom_session = True
proxies = self.proxies
proxy = self.proxies[randint(0, len(proxies) - 1)] if proxies else ""
connector = ProxyConnector.from_url(proxy) if proxy else None
session = ClientSession(
connector=connector, cookies=self.auth.cookies, read_timeout=None
)
session = self.create_client_session()
headers = self.session_rules(link)
headers["accept"] = "application/json, text/plain, */*"
headers["Connection"] = "keep-alive"
Expand All @@ -170,41 +177,32 @@ async def json_request(
request_method = session.post
elif method == "DELETE":
request_method = session.delete
result = None
while True:
try:
async with request_method(
link, headers=headers, data=payload
) as response:
if method == "HEAD":
response = await request_method(link, headers=headers, data=payload)
if method == "HEAD":
result = response
else:
if json_format and not stream:
result = await response.json()
elif stream and not json_format:
result = response
else:
if json_format and not stream:
result = await response.json()
elif stream and not json_format:
buffer = []
if response.status == 200:
async for data in response.content.iter_chunked(4096):
buffer.append(data)
length = len(data)
progress_bar.update(length)
else:
if response.content_length:
progress_bar.update_total_size(
-response.content_length
)
final_buffer = b"".join(buffer)
buffer.clear()
result = [response, final_buffer]
print
else:
result = await response.read()
if custom_session:
await session.close()
return result
result = await response.read()
break
except ClientConnectorError as e:
return
except (ClientPayloadError, ContentTypeError, ClientOSError,ServerDisconnectedError) as e:
break
except (
ClientPayloadError,
ContentTypeError,
ClientOSError,
ServerDisconnectedError,
) as e:
continue
if custom_session:
await session.close()
return result

async def async_requests(self, items: list[str], json_format=True):
tasks = []
Expand Down Expand Up @@ -256,7 +254,9 @@ async def check(download_item: media_table, response: ClientResponse):
filepath = os.path.join(
download_item.directory, download_item.filename
)
response_status = False
if response.status == 200:
response_status = True
if response.content_length:
download_item.size = response.content_length

Expand All @@ -266,7 +266,8 @@ async def check(download_item: media_table, response: ClientResponse):
else:
return download_item
else:
return download_item
if response_status:
return download_item

for download_item in download_list:
temp_response = [
Expand All @@ -288,19 +289,19 @@ async def check(download_item: media_table, response: ClientResponse):
[progress_bar.update_total_size(x.size) for x in download_list]

async def process_download(download_item: media_table):
response = await self.download_content(
result = await self.download_content(
download_item, session, progress_bar, subscription
)
if response:
data, download_item = response.values()
if data:
if result:
response, download_item = result.values()
if response:
download_path = os.path.join(
download_item.directory, download_item.filename
)
os.makedirs(os.path.dirname(download_path), exist_ok=True)
with open(download_path, "wb") as f:
f.write(data)
download_item.size = len(data)
await main_helper.write_data(
response, download_path, progress_bar
)
download_item.size = response.content_length
download_item.downloaded = True

max_threads = calculate_max_threads(self.max_threads)
Expand Down Expand Up @@ -333,7 +334,7 @@ async def download_content(
if not download_item.link:
continue
response: ClientResponse
response, task = await asyncio.ensure_future(
response = await asyncio.ensure_future(
self.json_request(
download_item.link,
session,
Expand Down Expand Up @@ -368,7 +369,7 @@ async def download_content(
link = main_helper.link_picker(media, quality)
download_item.link = link
continue
new_task["response"] = task
new_task["response"] = response
new_task["download_item"] = download_item
break
return new_task
Expand Down
26 changes: 18 additions & 8 deletions helpers/main_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from database.models.media_table import media_table
from sqlalchemy.orm.session import Session
from apis.onlyfans.classes.extras import content_types
import copy
import json
import math
Expand All @@ -22,13 +19,17 @@
import psutil
import requests
import ujson
from aiohttp.client_reqrep import ClientResponse
from apis.onlyfans import onlyfans as OnlyFans
from apis.onlyfans.classes import create_user
from apis.onlyfans.classes.extras import content_types
from bs4 import BeautifulSoup
from classes.prepare_metadata import format_variables, prepare_reformat
from database.models.media_table import media_table
from mergedeep import Strategy, merge
from sqlalchemy import inspect
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm.session import Session
from tqdm import tqdm

import helpers.db_helper as db_helper
Expand Down Expand Up @@ -726,9 +727,19 @@ def is_me(user_api):
return False


async def write_data(download_path:str, data:bytes):
with open(download_path, "wb") as f:
f.write(data)
async def write_data(response: ClientResponse, download_path: str, progress_bar):
if response.status == 200:
os.makedirs(os.path.dirname(download_path), exist_ok=True)
with open(download_path, "wb") as f:
async for data in response.content.iter_chunked(4096):
length = len(data)
progress_bar.update(length)
f.write(data)
return True
else:
if response.content_length:
progress_bar.update_total_size(-response.content_length)
return False


def export_data(
Expand Down Expand Up @@ -906,7 +917,7 @@ def module_chooser(domain, json_sites):
return string, site_names


def link_picker(media,video_quality):
def link_picker(media, video_quality):
link = ""
if "source" in media:
quality_key = "source"
Expand All @@ -929,4 +940,3 @@ def link_picker(media,video_quality):
if "src" in media:
link = media["src"]
return link

17 changes: 8 additions & 9 deletions modules/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,10 @@ async def profile_scraper(
header = y.header
if avatar:
override_media_types.append(["Avatars", avatar])
elif header:
override_media_types.append(["Headers", header])
progress_bar = download_session()
progress_bar.start(unit="B", unit_scale=True, miniters=1)
session = authed.session_manager.create_client_session()
for override_media_type in override_media_types:
new_dict = dict()
media_type = override_media_type[0]
Expand All @@ -301,21 +301,20 @@ async def profile_scraper(
os.makedirs(directory2, exist_ok=True)
download_path = os.path.join(directory2, media_link.split("/")[-2] + ".jpg")
response = await authed.session_manager.json_request(media_link, method="HEAD")
if overwrite_files:
if os.path.isfile(download_path):
if os.path.getsize(download_path) == response.content_length:
continue
if os.path.isfile(download_path):
if os.path.getsize(download_path) == response.content_length:
continue
progress_bar.update_total_size(response.content_length)
response, data = await authed.session_manager.json_request(
response = await authed.session_manager.json_request(
media_link,
session=session,
stream=True,
json_format=False,
sleep=False,
progress_bar=progress_bar,
)
downloaded = await main_helper.write_data(download_path, data)
if not downloaded:
continue
downloaded = await main_helper.write_data(response,download_path,progress_bar)
await session.close()
progress_bar.close()


Expand Down
4 changes: 3 additions & 1 deletion start_ofd.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,6 @@ async def main():
input()


asyncio.run(main())
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()

0 comments on commit 5478e01

Please sign in to comment.