Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix upload with multiple blocks to allow more than 60mb #72

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
172 changes: 106 additions & 66 deletions transferwee.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
will be shared via emails or link.
"""

from typing import Any, Dict, List, Optional, Union
from typing import Any, List, Optional, Union, Dict, cast
import binascii
import functools
import hashlib
Expand All @@ -61,7 +61,7 @@

WETRANSFER_EXPIRE_IN = 604800
WETRANSFER_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"

MAX_BLOCK_SIZE = 10 * 1024 * 1024

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -355,78 +355,94 @@ def _md5(file: str) -> str:
return h.hexdigest()


def _storm_prepare_item(file: str) -> Dict[str, Union[int, str]]:
def _storm_prepare_item(file: str) -> List[Dict[str, Any]]:
"""Given a file, prepare the block for blocks dictionary.

Return a dictionary with "content_length" and "content_md5_hex" keys.
"""
filesize = os.path.getsize(file)

return {"content_length": filesize, "content_md5_hex": _md5(file)}
with open(file, "rb") as f:
return [
{
"content_length": len(chunk),
"content_md5_hex": hashlib.md5(chunk).hexdigest(),
}
for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")
]


def _storm_prepare(authorization: str, filenames: List[str]) -> Dict[Any, Any]:
"""Given an Authorization token and filenames prepare for block uploads.

Return the parsed JSON response.
"""
j = {
"blocks": [_storm_prepare_item(f) for f in filenames],
files_bids = [_storm_prepare_item(f) for f in filenames]

blocks = [i for sublist in files_bids for i in sublist]
response: Dict[str, Union[bool, Dict[str, List[Any]]]] = {
"ok": True,
"data": {"blocks": []},
}
requests.options(
_storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "POST",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
r = requests.post(
_storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
json=j,
headers={
"Authorization": f"Bearer {authorization}",
"Origin": "https://wetransfer.com",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
return r.json()
chunk_size = 100
for i in range(0, len(blocks), chunk_size):
j = {
"blocks": blocks[i : i + chunk_size],
}
requests.options(
_storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "POST",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
r = requests.post(
_storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
json=j,
headers={
"Authorization": f"Bearer {authorization}",
"Origin": "https://wetransfer.com",
"User-Agent": WETRANSFER_USER_AGENT,
},
)

r_json = r.json()
if not r_json["ok"]:
logger.error(r_json)
response["ok"] = response["ok"] and r_json["ok"]
cast(Dict[str, List[Any]], response["data"])["blocks"] += cast(
Dict[str, List[Any]], r_json["data"]
)["blocks"]

return {"files_bids": files_bids, "blocks": response}


def _storm_finalize_item(
file: str, block_id: str
file: str, block_ids: List[str]
) -> Dict[str, Union[List[str], str]]:
"""Given a file and block_id prepare the item block dictionary.

Return a dictionary with "block_ids", "item_type" and "path" keys.

XXX: Is it possible to actually have more than one block?
XXX: If yes this - and probably other parts of the code involved with
XXX: blocks - needs to be instructed to handle them instead of
XXX: assuming that one file is associated with one block.
"""
filename = os.path.basename(file)

return {
"block_ids": [
block_id,
],
"block_ids": block_ids,
"item_type": "file",
"path": filename,
}


def _storm_finalize(
authorization: str, filenames: List[str], block_ids: List[str]
authorization: str, filenames: List[str], block_ids: List[List[str]]
) -> Dict[Any, Any]:
"""Given an Authorization token, filenames and block ids finalize upload.

Return the parsed JSON response.
"""
j = {
"items": [
_storm_finalize_item(f, bid)
for f, bid in zip(filenames, block_ids)
_storm_finalize_item(f, bids)
for f, bids in zip(filenames, block_ids)
],
}
requests.options(
Expand Down Expand Up @@ -464,32 +480,42 @@ def _storm_finalize(
return r.json()


def _storm_upload(url: str, file: str) -> None:
def _storm_upload(urls: List[str], file: str) -> None:
"""Given an url and file upload it.

Does not return anything.
"""
requests.options(
url,
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "PUT",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
with open(file, "rb") as f:
requests.put(
url,
data=f,
headers={
"Origin": "https://wetransfer.com",
"Content-MD5": binascii.b2a_base64(
binascii.unhexlify(_md5(file)), newline=False
),
"X-Uploader": "storm",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
for url, chunk in zip(
urls,
[
chunk
for chunk in iter(
functools.partial(f.read, MAX_BLOCK_SIZE), b""
)
],
):
requests.options(
url,
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "PUT",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
requests.put(
url,
data=chunk,
headers={
"Origin": "https://wetransfer.com",
"Content-MD5": binascii.b2a_base64(
binascii.unhexlify(hashlib.md5(chunk).hexdigest()),
newline=False,
),
"X-Uploader": "storm",
"User-Agent": WETRANSFER_USER_AGENT,
},
)


def _finalize_upload(
Expand Down Expand Up @@ -582,19 +608,33 @@ def upload(
logger.debug(f"Get transfer id {transfer['id']}")
logger.debug("Doing preflight storm")
_storm_preflight(transfer["storm_upload_token"], files)
logger.debug("Preparing storm block upload")
blocks = _storm_prepare(transfer["storm_upload_token"], files)
for f, b in zip(files, blocks["data"]["blocks"]):
logger.debug(f"Preparing storm block upload")
prepare_data = _storm_prepare(transfer["storm_upload_token"], files)
blocks = prepare_data["blocks"]["data"]["blocks"]
start_block_index = 0
file_index = 0
file_bids = []
for f in files:
logger.debug(f"Uploading file {f}")
_storm_upload(b["presigned_put_url"], f)
logger.debug("Finalizing storm batch upload")
file_chunks_count = len(prepare_data["files_bids"][file_index])
file_blocks = blocks[
start_block_index : start_block_index + file_chunks_count
]
file_bids.append(file_blocks)
_storm_upload([b["presigned_put_url"] for b in file_blocks], f)
start_block_index += file_chunks_count
file_index += 1
logger.debug(f"Finalizing storm batch upload")
_storm_finalize(
transfer["storm_upload_token"],
files,
[b["block_id"] for b in blocks["data"]["blocks"]],
[[b["block_id"] for b in f] for f in file_bids],
)
logger.debug(f"Finalizing upload with transfer id {transfer['id']}")
shortened_url = _finalize_upload(transfer["id"], s)["shortened_url"]
finalize_json = _finalize_upload(transfer["id"], s)
if not ("shortened_url" in finalize_json):
logger.error(finalize_json)
shortened_url = finalize_json["shortened_url"]
_close_session(s)
return shortened_url

Expand Down
Loading