diff --git a/pctasks/client/pctasks/client/client.py b/pctasks/client/pctasks/client/client.py index da0485b9..249024b3 100644 --- a/pctasks/client/pctasks/client/client.py +++ b/pctasks/client/pctasks/client/client.py @@ -2,7 +2,6 @@ import logging import os import pathlib -import zipfile from time import perf_counter from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union from urllib.parse import urlparse @@ -18,6 +17,7 @@ WorkflowNotFoundError, ) from pctasks.client.settings import ClientSettings +from pctasks.core.importer import write_code from pctasks.core.models.record import Record from pctasks.core.models.response import ( JobPartitionRunRecordListResponse, @@ -180,17 +180,7 @@ def upload_code(self, local_path: Union[pathlib.Path, str]) -> UploadCodeResult: raise OSError(f"Path {path} does not exist.") file_obj: Union[io.BufferedReader, io.BytesIO] - if path.is_file(): - file_obj = path.open("rb") - name = path.name - - else: - file_obj = io.BytesIO() - with zipfile.PyZipFile(file_obj, "w") as zf: - zf.writepy(str(path)) - file_obj.seek(0) - - name = path.with_suffix(".zip").name + name, file_obj = write_code(path) try: resp = self._call_api( diff --git a/pctasks/core/pctasks/core/importer.py b/pctasks/core/pctasks/core/importer.py index 7371014b..82a21cc1 100644 --- a/pctasks/core/pctasks/core/importer.py +++ b/pctasks/core/pctasks/core/importer.py @@ -7,6 +7,7 @@ # For background, checkout out Recipe 10.11 in the Python Cookbook (3rd edition) from __future__ import annotations +import io import logging import pathlib import site @@ -14,7 +15,7 @@ import sys import zipfile from tempfile import TemporaryDirectory -from typing import List, Optional +from typing import List, Optional, Tuple, Union from pctasks.core.storage import Storage @@ -133,3 +134,32 @@ def ensure_code( sys.path.insert(0, str(output_path)) return pathlib.Path(output_path) + + +def write_code( + file_path: pathlib.Path, +) -> Tuple[str, Union[io.BufferedReader, io.BytesIO]]: + """ + Like :meth:`zipfile.PyZipFile.writepy`, but doesn't use ``.pyc`` files. + + Parameters + ---------- + file_path: pathlib.Path + The the Path object that's the directory of code you want to upload. + """ + file_path = pathlib.Path(file_path) + + file_obj: Union[io.BufferedReader, io.BytesIO] + if file_path.is_file(): + file_obj = file_path.open("rb") + name = file_path.name + + else: + name = file_path.with_suffix(".zip").name + file_obj = io.BytesIO() + with zipfile.ZipFile(file_obj, "w") as zf: + for path in file_path.glob("**/*.py"): + zf.write(path, path.relative_to(file_path.parent)) + + file_obj.seek(0) + return name, file_obj diff --git a/pctasks/core/pctasks/core/storage/base.py b/pctasks/core/pctasks/core/storage/base.py index 1d8a6c88..cf86e406 100644 --- a/pctasks/core/pctasks/core/storage/base.py +++ b/pctasks/core/pctasks/core/storage/base.py @@ -1,12 +1,10 @@ import hashlib -import io import logging import pathlib -import zipfile from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime as Datetime -from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Union import orjson @@ -131,23 +129,17 @@ def upload_bytes( ) -> None: """Upload bytes to a storage file.""" - def upload_code(self, file_path: str) -> str: + def upload_code(self, file_path: Union[str, pathlib.Path]) -> str: """Upload a Python module or package.""" + from pctasks.core.importer import write_code + path = pathlib.Path(file_path) if not path.exists(): raise OSError(f"Path {path} does not exist.") - if path.is_file(): - data = path.read_bytes() - name = path.name - else: - buf = io.BytesIO() - with zipfile.PyZipFile(buf, "w") as zf: - zf.writepy(str(path)) - - data = buf.getvalue() - name = path.with_suffix(".zip").name + name, buf = write_code(path) + data = buf.read() token = hashlib.md5(data).hexdigest() dst_path = f"{token}/{name}" diff --git a/pctasks/core/tests/storage/test_importer.py b/pctasks/core/tests/storage/test_importer.py index a09ef128..ad629ba0 100644 --- a/pctasks/core/tests/storage/test_importer.py +++ b/pctasks/core/tests/storage/test_importer.py @@ -2,11 +2,12 @@ import subprocess import sys import unittest.mock +import zipfile from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional -from pctasks.core.importer import ensure_code, ensure_requirements +from pctasks.core.importer import ensure_code, ensure_requirements, write_code from pctasks.core.storage.blob import BlobStorage from pctasks.dev.blob import temp_azurite_blob_storage @@ -109,3 +110,20 @@ def test_ensure_requirements_target_dir(): finally: if target_dir in sys.path: sys.path.remove(target_dir) + + +def test_write_code(): + with TemporaryDirectory() as temp_dir: + p = Path(temp_dir) + pkg = p / "my_package" + module = pkg / "my_file.py" + module.parent.mkdir(exist_ok=True) + module.touch() + + name, buf = write_code(pkg) + + assert name == "my_package.zip" + + with zipfile.ZipFile(buf) as zf: + assert len(zf.filelist) == 1 + assert zf.filelist[0].filename == "my_package/my_file.py"