Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core]: Avoid PyZipFile in code upload #179

Merged
merged 3 commits into from
Apr 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions pctasks/client/pctasks/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import os
import pathlib
import zipfile
from time import perf_counter
from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union
from urllib.parse import urlparse
Expand All @@ -18,6 +17,7 @@
WorkflowNotFoundError,
)
from pctasks.client.settings import ClientSettings
from pctasks.core.importer import write_code
from pctasks.core.models.record import Record
from pctasks.core.models.response import (
JobPartitionRunRecordListResponse,
Expand Down Expand Up @@ -180,17 +180,7 @@ def upload_code(self, local_path: Union[pathlib.Path, str]) -> UploadCodeResult:
raise OSError(f"Path {path} does not exist.")

file_obj: Union[io.BufferedReader, io.BytesIO]
if path.is_file():
file_obj = path.open("rb")
name = path.name

else:
file_obj = io.BytesIO()
with zipfile.PyZipFile(file_obj, "w") as zf:
zf.writepy(str(path))
file_obj.seek(0)

name = path.with_suffix(".zip").name
name, file_obj = write_code(path)

try:
resp = self._call_api(
Expand Down
32 changes: 31 additions & 1 deletion pctasks/core/pctasks/core/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
# For background, checkout out Recipe 10.11 in the Python Cookbook (3rd edition)
from __future__ import annotations

import io
import logging
import pathlib
import site
import subprocess
import sys
import zipfile
from tempfile import TemporaryDirectory
from typing import List, Optional
from typing import List, Optional, Tuple, Union

from pctasks.core.storage import Storage

Expand Down Expand Up @@ -133,3 +134,32 @@ def ensure_code(
sys.path.insert(0, str(output_path))

return pathlib.Path(output_path)


def write_code(
file_path: pathlib.Path,
) -> Tuple[str, Union[io.BufferedReader, io.BytesIO]]:
"""
Like :meth:`zipfile.PyZipFile.writepy`, but doesn't use ``.pyc`` files.

Parameters
----------
file_path: pathlib.Path
The the Path object that's the directory of code you want to upload.
"""
file_path = pathlib.Path(file_path)

file_obj: Union[io.BufferedReader, io.BytesIO]
if file_path.is_file():
file_obj = file_path.open("rb")
name = file_path.name

else:
name = file_path.with_suffix(".zip").name
file_obj = io.BytesIO()
with zipfile.ZipFile(file_obj, "w") as zf:
for path in file_path.glob("**/*.py"):
zf.write(path, path.relative_to(file_path.parent))

file_obj.seek(0)
return name, file_obj
20 changes: 6 additions & 14 deletions pctasks/core/pctasks/core/storage/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import hashlib
import io
import logging
import pathlib
import zipfile
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime as Datetime
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Union

import orjson

Expand Down Expand Up @@ -131,23 +129,17 @@ def upload_bytes(
) -> None:
"""Upload bytes to a storage file."""

def upload_code(self, file_path: str) -> str:
def upload_code(self, file_path: Union[str, pathlib.Path]) -> str:
"""Upload a Python module or package."""
from pctasks.core.importer import write_code

path = pathlib.Path(file_path)

if not path.exists():
raise OSError(f"Path {path} does not exist.")

if path.is_file():
data = path.read_bytes()
name = path.name
else:
buf = io.BytesIO()
with zipfile.PyZipFile(buf, "w") as zf:
zf.writepy(str(path))

data = buf.getvalue()
name = path.with_suffix(".zip").name
name, buf = write_code(path)
data = buf.read()

token = hashlib.md5(data).hexdigest()
dst_path = f"{token}/{name}"
Expand Down
20 changes: 19 additions & 1 deletion pctasks/core/tests/storage/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import subprocess
import sys
import unittest.mock
import zipfile
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional

from pctasks.core.importer import ensure_code, ensure_requirements
from pctasks.core.importer import ensure_code, ensure_requirements, write_code
from pctasks.core.storage.blob import BlobStorage
from pctasks.dev.blob import temp_azurite_blob_storage

Expand Down Expand Up @@ -109,3 +110,20 @@ def test_ensure_requirements_target_dir():
finally:
if target_dir in sys.path:
sys.path.remove(target_dir)


def test_write_code():
with TemporaryDirectory() as temp_dir:
p = Path(temp_dir)
pkg = p / "my_package"
module = pkg / "my_file.py"
module.parent.mkdir(exist_ok=True)
module.touch()

name, buf = write_code(pkg)

assert name == "my_package.zip"

with zipfile.ZipFile(buf) as zf:
assert len(zf.filelist) == 1
assert zf.filelist[0].filename == "my_package/my_file.py"