diff --git a/.flake8 b/.flake8 deleted file mode 100644 index b91d060..0000000 --- a/.flake8 +++ /dev/null @@ -1,12 +0,0 @@ -[flake8] -extend-select = B950 -extend-ignore = E203,E501 -max-line-length = 88 -exclude = - .git, - __pycache__, - _datalad_buildsupport, - .tox, - build, - versioneer.py, - src/datalad_cds/_version.py diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index f0368c2..c8f36df 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -2,7 +2,9 @@ name: check on: push: + branches: [main] pull_request: + branches: [main] jobs: tox: @@ -16,7 +18,6 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up system - shell: bash run: | sudo apt-get update -qq sudo apt-get install git-annex diff --git a/.github/workflows/docbuild.yml b/.github/workflows/docbuild.yml index 0da220a..ad363d8 100644 --- a/.github/workflows/docbuild.yml +++ b/.github/workflows/docbuild.yml @@ -1,6 +1,10 @@ name: docs -on: [push, pull_request] +on: + push: + branches: [main] + pull_request: + branches: [main] jobs: build: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8aa0566 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,43 @@ +name: Release + +on: + push: + tags: + - 'v*' + +jobs: + build: + name: Build python package distributions + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install pypa/build + run: pip install build + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: Publish release to PyPI + runs-on: ubuntu-latest + needs: build + environment: + name: pypi + url: https://pypi.org/p/datalad-cds + permissions: + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test_crippledfs.yml b/.github/workflows/test_crippledfs.yml deleted file mode 100644 index d226645..0000000 --- a/.github/workflows/test_crippledfs.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: crippled-filesystems - -on: [pull_request] - -jobs: - test: - - runs-on: ubuntu-latest - - steps: - - name: Set up system - shell: bash - run: | - bash <(wget -q -O- http://neuro.debian.net/_files/neurodebian-travis.sh) - # enable repo for devel git-annex, if needed - #sudo sed -e 's|\(deb.*data\)|#\1|' -e 's|/debian |/debian-devel |' /etc/apt/sources.list.d/neurodebian.sources.list | sudo tee /etc/apt/sources.list.d/neurodebian-devel.sources.list - sudo apt-get update -qq - sudo apt-get install eatmydata - sudo eatmydata apt-get install git-annex-standalone dosfstools - # 500 MB VFAT FS in a box - sudo dd if=/dev/zero of=/crippledfs.img count=500 bs=1M - sudo mkfs.vfat /crippledfs.img - # mount - sudo mkdir /crippledfs - sudo mount -o "uid=$(id -u),gid=$(id -g)" /crippledfs.img /crippledfs - - name: Set up environment - run: | - git config --global user.email "test@github.land" - git config --global user.name "GitHub Almighty" - - uses: actions/checkout@v3 - - name: Set up Python 3.10.8 - uses: actions/setup-python@v4 - with: - python-version: 3.10.8 - - name: Install dependencies - run: | - pip install -r requirements-devel.txt - python -m pip install --upgrade pip - - name: Installation - run: | - # package install - python -m pip install . - - name: Run tests - env: - # forces all test repos/paths into the VFAT FS - TMPDIR: /crippledfs - run: | - mkdir -p __testhome__ - cd __testhome__ - # give detailed info on actual test setup - datalad wtf - echo "== mount >>" - mount - echo "<< mount ==" - python -m pytest -s -v --doctest-modules --cov=datalad_cds_extension --pyargs datalad_cds_extension \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2352a2d..7d7e411 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,13 @@ -.pybuild/ -.coverage -/.tox *.egg-info *.py[coe] .#* .*.swp -pip-wheel-metadata -docs/build -docs/source/generated +.coverage +.hypothesis +.pybuild/ +/.tox build/ dist/ -*.grib +docs/build +docs/source/generated +pip-wheel-metadata diff --git a/README.md b/README.md index d94473a..67eb514 100644 --- a/README.md +++ b/README.md @@ -1,156 +1,87 @@ -# DataLad CDS Extension +# DataLad extension for the Copernicus Climate Data Store -## Table of contents -- Recommended knowledge -- Set up -- Usage -- Request know-how -- Options +## What? +A DataLad extension to integrate with the Copernicus Climate Data Store (CDS). +So far this just implements a `datalad download-cds` command that can be used to fetch data from the CDS +and record this action in a way so that `datalad get` (or just `git annex get`) can redo the download in the future. -## Recommended Knowledge: +## Why? -DataLad https://www.datalad.org/ +This extension enables automated provenance tracking for fetching data from the CDS. +In a dataset that retrieves data from the CDS using this extension it will become visible how this data was initially fetched +and how it can be retrieved again in the future. -## Set up -Before installing this extension, please install datalad! -https://handbook.datalad.org/en/latest/intro/installation.html +## How? -Clone this repository and run +You will first have to create an account with the CDS, +if you don't have one already. +You can do so here: - pip install -e . +Next, +you will need to create the "~/.cdsapirc" file as described here: . +This file is required since the datalad-cds extension internally uses the cdsapi package +and therefore uses its authentication mechanism. -Make sure you have valid credentials for the cds api! -If you're not registered yet, here is the manual: -https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome \ -Create a DataLad dataset: - - datalad create -c text2git DataLad-101 -Change to the dataset: - - cd Datalad-101 - -Now you can execute the datalad-download-cds command! - -Datalad handbook: -http://handbook.datalad.org/en/latest/ - -Datalad documentation: -https://docs.datalad.org/en/stable/index.html - -## Usage -Extension for the automatic download from the CDS DataStore. -Works like `datalad download-url` - - -In general a command looks like this: - - datalad download-cds [-h] [-d PATH] [-O PATH] [--archive] [--nosave] [-m MESSAGE] - [--version] filenames - -Example: - - datalad download-cds test.txt -m "This is the commit message" - - -In this case test.txt contains a cds request. - - 'derived-reanalysis-energy-moisture-budget', - { - 'format': 'zip', - 'variable': 'divergence_of_vertical_integral_of_latent_heat_flux', - 'year': '1979', - 'month': '01', - 'area': [ - 90, 0, -90, - 360, - ], - }, - 'download.zip' - -You can generate yourself the request here: -https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset - -Example for a request generated by the CDS data store: - - import cdsapi - - c = cdsapi.Client() - - c.retrieve( - 'derived-reanalysis-energy-moisture-budget', - { - 'format': 'zip', - 'variable': 'divergence_of_vertical_integral_of_latent_heat_flux', - 'year': '1979', - 'month': '01', - 'area': [ - 90, 0, -90, - 360, - ], - }, - 'download.zip') - -### You only need the request in between the brackets of the retrieve method! - -## Request Know-How - -A request always consists of: - -A dataset: - -`'derived-reanalysis-energy-moisture-budget'` - -request-parameters (in form of a dictionary): +Also, +you need to install datalad and the datalad-cds extension. +Both can be had through pip. +Now you are ready to use the extension. +When you look through the CDS you will notice that for any given dataset you can select a subset of the data using the "Download data" tab. +After you do that you can use the "Show API request" button at the bottom to get a short python script that would fetch the chosen subset using the cdsapi. +The following is an example of that: +```python +#!/usr/bin/env python +import cdsapi +c = cdsapi.Client() +c.retrieve( + "reanalysis-era5-pressure-levels", + { + "variable": "temperature", + "pressure_level": "1000", + "product_type": "reanalysis", + "year": "2008", + "month": "01", + "day": "01", + "time": "12:00", + "format": "grib" + }, + "download.grib", +) +``` + +To fetch the same data to the same local file using datalad-cds we just need to adapt this a little: +```bash +$ datalad download-cds --path download.grib ' { - 'format': 'zip', - 'variable': 'divergence_of_vertical_integral_of_latent_heat_flux', - 'year': '1979', - 'month': '01', - 'area': [ - 90, 0, -90, - 360, - ], + "dataset": "reanalysis-era5-pressure-levels", + "sub-selection": { + "variable": "temperature", + "pressure_level": "1000", + "product_type": "reanalysis", + "year": "2008", + "month": "01", + "day": "01", + "time": "12:00", + "format": "grib" + } } +' +``` -A filename where the request will get written into: - -`'download.zip'` - -The first two parameters are mandatory! If you do not specify the file where it gets written into in the file of the general request, you have to do it in the command. - -Example: - - datalad download-cds test.txt --path test2.zip - -If you specify both, the path in the command will be used! - -## Options - -### filename -This is the file, in which the cds request is stored - -### -h, --help -Shows the help message, --help shows the man page - -### -d PATH, --dataset PATH -Defines the dataset, not necessary to define - -### --path PATH, -O PATH -If specified, overrides the PATH of where the file gets written to. If not specified, it has to be present in the cds-request-file - -### --archive -pass the downloaded files to datalad add-archive-content –delete. - -### --nosave -by default all modifications to a dataset are immediately saved. Giving this option will disable this behavior. +The local path to save to ("download.grib") becomes the `--path` argument. +The dataset name ("reanalysis-era5-pressure-levels" in this case) becomes the value of the `dataset` key in a json object that describes the data to be downloaded. +The sub-selection of the dataset becomes the value of the `sub-selection` key. -### -m MESSAGE, --message MESSAGE -Message to be added to the git log +After executing the above `datalad download-cds` command in a DataLad dataset a file called "download.grib" should be newly created. +This file will have its origin tracked in git-annex (you can see that by running `git annex whereis download.grib`). +If you now `datalad drop` the file +and then `datalad get` it you'll see that git-annex will automatically re-retrieve the file from the CDS +as if it was just another location to get data from. -### --version -show the module and its version +To see more possible usage options take a look at the help page of the command (`datalad download-cds --help`) +or the documentation at . diff --git a/docs/source/cli_reference.rst b/docs/source/cli_reference.rst new file mode 100644 index 0000000..2a6babd --- /dev/null +++ b/docs/source/cli_reference.rst @@ -0,0 +1,7 @@ +Command line reference +====================== + +.. toctree:: + :maxdepth: 1 + + generated/man/datalad-download-cds diff --git a/docs/source/conf.py b/docs/source/conf.py index 86ae0e7..7370271 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,6 +12,7 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import datetime import subprocess import sys from os import pardir @@ -89,9 +90,11 @@ master_doc = "index" # General information about the project. -project = "DataLad extension for downloading from the Copernicus Climate Data Store" -copyright = "" -author = "" +project = "DataLad CDS extension" +copyright = "2018-{}, Matthias Riße, the DataLad team and contributors".format( + datetime.datetime.now().year +) +author = "Matthias Riße" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -117,7 +120,7 @@ todo_include_todos = True # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {"https://docs.python.org/": None} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # -- Options for HTML output ---------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 6b1d068..909ef2f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,8 +1,8 @@ -DataLad extension template -************************** +DataLad CDS extension +********************* -This is a template for creating a `DataLad `__ extension -that equips DataLad with additional functionality. +This is a DataLad extension that enables fetching of data from the Copernicus Climate Data Store +in a way that is tightly integrated with DataLad and git-annex. API @@ -11,20 +11,19 @@ API High-level API commands ----------------------- -.. currentmodule:: datalad.api -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 - download_cds + python_reference.rst Command line reference ---------------------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 - generated/man/datalad-download-cds + cli_reference.rst Indices and tables diff --git a/docs/source/python_reference.rst b/docs/source/python_reference.rst new file mode 100644 index 0000000..4bbf3fb --- /dev/null +++ b/docs/source/python_reference.rst @@ -0,0 +1,8 @@ +High-level API commands +======================= + +.. currentmodule:: datalad.api +.. autosummary:: + :toctree: generated + + download_cds diff --git a/pyproject.toml b/pyproject.toml index 31915b3..7505d0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,14 +29,12 @@ dependencies = [ [project.optional-dependencies] devel = [ - "black", "coverage", - "flake8", - "flake8-bugbear", - "isort", + "hypothesis", "mypy", "pytest", "pytest-cov", + "ruff", "sphinx", "sphinx_rtd_theme", ] @@ -45,7 +43,7 @@ devel = [ repository = "https://github.com/m.risse/datalad-cds" [project.scripts] -git-annex-remote-cdsrequest = "datalad_cds.cdsrequest:main" +git-annex-remote-cds = "datalad_cds.cds_remote:main" [project.entry-points."datalad.extensions"] download-cds = "datalad_cds:command_suite" @@ -65,36 +63,29 @@ tag_prefix = "v" show_missing = true omit = ["src/datalad_cds/_version.py"] -[tool.black] +[tool.ruff] line-length = 88 -extend-exclude = ''' -( - ^/_datalad_buildsupport - | ^/versioneer.py - | ^/src/datalad_cds/_version.py -) -''' - -[tool.isort] -profile = "black" -skip_gitignore = true -extend_skip = [ +extend-exclude = [ "_datalad_buildsupport/", - "versioneer.py", "src/datalad_cds/_version.py", + "versioneer.py", ] +[tool.ruff.lint] +extend-select = ["I"] + [tool.mypy] ignore_missing_imports = true disallow_untyped_calls = true disallow_untyped_defs = true disallow_incomplete_defs = true exclude = [ - '.tox/', - 'build/', - '_datalad_buildsupport/', - 'datalad_cds/_version.py', - 'versioneer.py', + ".tox/", + "_datalad_buildsupport/", + "build/", + "src/datalad_cds/_version.py", + "venv/", + "versioneer.py", ] [[tool.mypy.overrides]] @@ -111,6 +102,3 @@ module = [ "_datalad_buildsupport.*", ] follow_imports = "skip" - -# [tool.flake8] -# flake8 still does not support pyproject.toml, so it's configured in .flake8 diff --git a/src/datalad_cds/__init__.py b/src/datalad_cds/__init__.py index 09c8e5b..c9370ee 100644 --- a/src/datalad_cds/__init__.py +++ b/src/datalad_cds/__init__.py @@ -16,7 +16,7 @@ # specification of a command, any number of commands can be defined ( # importable module that contains the command implementation - "datalad_cds.downloadcds", + "datalad_cds.download_cds", # name of the command class implementation in above module "DownloadCDS", # optional name of the command in the cmdline API diff --git a/src/datalad_cds/cds_remote.py b/src/datalad_cds/cds_remote.py new file mode 100755 index 0000000..2c49d88 --- /dev/null +++ b/src/datalad_cds/cds_remote.py @@ -0,0 +1,86 @@ +import pathlib +import subprocess + +import cdsapi +from annexremote import Master, ProtocolError, RemoteError, SpecialRemote + +import datalad_cds.spec + +CDS_REMOTE_UUID = "923e2755-e747-42f4-890a-9c921068fb82" + + +class CDSRemote(SpecialRemote): + transfer_store = None + remove = None + + def initremote(self) -> None: + pass + + def prepare(self) -> None: + pass + + def _is_dry_run(self) -> bool: + try: + remote_name = self.annex.getgitremotename() + except ProtocolError: + return False + git_dir = self.annex.getgitdir() + result = subprocess.run( + [ + "git", + "--git-dir={}".format(git_dir), + "config", + "--get", + "remote.{}.dry-run".format(remote_name), + ], + capture_output=True, + text=True, + ) + return result.returncode == 0 and result.stdout.strip() == "true" + + def _retrieve_cds(self, spec: datalad_cds.spec.Spec, filename: str) -> None: + if self._is_dry_run(): + pathlib.Path(filename).write_text(spec.to_json()) + return + c = cdsapi.Client() + c.retrieve(spec.dataset, spec.sub_selection, filename) + + def transfer_retrieve(self, key: str, filename: str) -> None: + urls = self.annex.geturls(key, "cds:") + for url in urls: + try: + self._retrieve_cds(datalad_cds.spec.Spec.from_url(url), filename) + break + except: # noqa: E722 + pass + else: + raise RemoteError("Failed to handle key {}".format(key)) + + def whereis(self, key: str) -> str: + url = self.annex.geturls(key, "cds:")[0] + return datalad_cds.spec.Spec.from_url(url).to_json() + + def checkpresent(self, key: str) -> bool: + # We just assume that we can always handle the key + return True + + def claimurl(self, url: str) -> bool: + return url.startswith("cds:") + + def checkurl(self, url: str) -> bool: + return url.startswith("cds:") + + def getcost(self) -> int: + # This is a very expensive remote + return 1000 + + def getavailability(self) -> str: + # The Climate Data Store is publicly available on the internet + return "global" + + +def main() -> None: + master = Master() + remote = CDSRemote(master) + master.LinkRemote(remote) + master.Listen() diff --git a/src/datalad_cds/cdsrequest.py b/src/datalad_cds/cdsrequest.py deleted file mode 100755 index 17efa0a..0000000 --- a/src/datalad_cds/cdsrequest.py +++ /dev/null @@ -1,77 +0,0 @@ -import ast -import base64 -import inspect -import logging -import urllib - -import cdsapi -from annexremote import Master, SpecialRemote - -logger = logging.getLogger("datalad.download-cds.cdsrequest") - -cdsrequest_REMOTE_UUID = "1da43985-0b6e-4123-89f0-90b88021ed34" - - -class HandleUrlError(Exception): - pass - - -def fromUrl(url: str) -> str: - if not url.startswith("cdsrequest:v1-"): - raise ValueError("unsupported URL value encountered") - return base64.urlsafe_b64decode( - urllib.parse.unquote(url.replace("cdsrequest:v1-", "")).encode("utf-8") - ).decode("utf-8") - - -class CdsRemote(SpecialRemote): - transfer_store = None - remove = None - - def initremote(self) -> None: - pass - - def prepare(self) -> None: - pass - - def _execute_cds(self, request: str, filename: str) -> None: - dictStart = request.index("{") - dataset_to = request[0:dictStart] - request_dict_str = request[dictStart : len(request)] - logger.debug("downloading %s", dataset_to) - - request_dict = ast.literal_eval(request_dict_str) - c = cdsapi.Client() - c.retrieve(dataset_to, request_dict, filename) - - def transfer_retrieve(self, key: str, filename: str) -> None: - logger.debug( - "%s called with key %s and filename %s", - inspect.stack()[0][3], - key, - ) - urls = self.annex.geturls(key, "cdsrequest:") - logger.debug("urls for this key: %s", urls) - for url in urls: - self._execute_cds(fromUrl(url), filename) - - def checkpresent(self, key: str) -> bool: - return True - - def claimurl(self, url: str) -> bool: - return url.startswith("cdsrequest:") - - def checkurl(self, url: str) -> bool: - return url.startswith("cdsrequest:") - - -def main() -> None: - master = Master() - remote = CdsRemote(master) - master.LinkRemote(remote) - logger.addHandler(master.LoggingHandler()) - master.Listen() - - -if __name__ == "__main__": - main() diff --git a/src/datalad_cds/compat.py b/src/datalad_cds/compat.py new file mode 100644 index 0000000..a0c419e --- /dev/null +++ b/src/datalad_cds/compat.py @@ -0,0 +1,10 @@ +import sys + + +def removeprefix(s: str, prefix: str) -> str: + if sys.version_info >= (3, 9): # pragma: py-lt-39 + return s.removeprefix(prefix) + else: # pragma: py-gte-39 + if s.startswith(prefix): + return s[len(prefix) :] + return s diff --git a/src/datalad_cds/download_cds.py b/src/datalad_cds/download_cds.py new file mode 100644 index 0000000..b364148 --- /dev/null +++ b/src/datalad_cds/download_cds.py @@ -0,0 +1,130 @@ +"""DataLad extension for the Climate Data Store""" + +__docformat__ = "restructuredtext" +import logging +from typing import Iterable, Literal, Optional, Union + +from datalad.distribution.dataset import ( + EnsureDataset, + datasetmethod, + require_dataset, +) +from datalad.interface.base import Interface, build_doc, eval_results +from datalad.interface.common_opts import nosave_opt, save_message_opt +from datalad.interface.results import get_status_dict +from datalad.support.annexrepo import AnnexRepo +from datalad.support.constraints import EnsureNone, EnsureStr +from datalad.support.param import Parameter + +import datalad_cds.cds_remote +import datalad_cds.spec + +logger = logging.getLogger("datalad.cds.download_cds") + + +# decoration auto-generates standard help +@build_doc +# all commands must be derived from Interface +class DownloadCDS(Interface): + """Downloads specified datasets from the CDS data store""" + + _params_ = dict( + spec=Parameter( + doc="""A json string or python dictionary containing the key + "dataset" with the datasets name (i.e. what is shown as the first + parameter to cdsapi.Client.retrieve if you do a "Show API request" + on some dataset in the CDS) and the key "sub-selection" with the + sub-selection of the dataset that should be fetched (i.e. what is + shown as the second parameter to cdsapi.Client.retrieve).""", + ), + dataset=Parameter( + args=("-d", "--dataset"), + metavar="PATH", + doc="""specify the dataset to add files to. If no dataset is given, + an attempt is made to identify the dataset based on the current + working directory. Use [CMD: --nosave CMD][PY: save=False PY] to + prevent adding files to the dataset.""", + constraints=EnsureDataset() | EnsureNone(), + ), + path=Parameter( + args=("-O", "--path"), + doc="""target path to download to.""", + constraints=EnsureStr(), + ), + lazy=Parameter( + args=("--lazy",), + action="store_true", + doc="""By default the file will be immediately downloaded. If the + lazy flag is supplied then the CDS request is only recorded as a + source for the file, but no download is initiated. Keep in mind that + there is no way to validate the correctness of the request if the + lazy flag is used.""", + ), + save=nosave_opt, + message=save_message_opt, + ) + + @staticmethod + @datasetmethod(name="download_cds") + @eval_results + def __call__( + spec: Union[str, dict], + path: str, + *, + dataset: Optional[str] = None, + message: Optional[str] = None, + save: bool = True, + lazy: bool = False, + ) -> Iterable[dict]: + if isinstance(spec, dict): + parsed_spec = datalad_cds.spec.Spec.from_dict(spec) + elif isinstance(spec, str): + parsed_spec = datalad_cds.spec.Spec.from_json(spec) + else: + raise TypeError("spec could not be parsed") + ds = require_dataset(dataset, check_installed=True) + ensure_special_remote_exists_and_is_enabled(ds.repo, "cds") + pathobj = ds.pathobj / path + url = parsed_spec.to_url() + options = [] + if lazy: + options.append("--relaxed") + ds.repo.add_url_to_file(pathobj, url, options=options) + if save: + msg = ( + message + if message is not None + else "[DATALAD] Download from Climate Data Store" + ) + yield ds.save(pathobj, message=msg) + yield get_status_dict(action="cds", ds=ds, status="ok") + + +def ensure_special_remote_exists_and_is_enabled( + repo: AnnexRepo, remote: Literal["cds"] +) -> None: + """Initialize and enable the cds special remote, if it isn't already. + + Very similar to datalad.customremotes.base.ensure_datalad_remote. + """ + + uuids = {"cds": datalad_cds.cds_remote.CDS_REMOTE_UUID} + uuid = uuids[remote] + + name = repo.get_special_remotes().get(uuid, {}).get("name") + if not name: + repo.init_remote( + remote, + [ + "encryption=none", + "type=external", + "autoenable=true", + "externaltype={}".format(remote), + "uuid={}".format(uuid), + ], + ) + elif repo.is_special_annex_remote(name, check_if_known=False): + logger.debug("special remote %s is enabled", name) + else: + logger.debug("special remote %s found, enabling", name) + repo.enable_remote(name) diff --git a/src/datalad_cds/downloadcds.py b/src/datalad_cds/downloadcds.py deleted file mode 100644 index f3082b2..0000000 --- a/src/datalad_cds/downloadcds.py +++ /dev/null @@ -1,185 +0,0 @@ -"""DataLad cds downloader""" - -__docformat__ = "restructuredtext" -import base64 -import logging -import os.path as op -import urllib.parse -from typing import Dict, Iterable, List, Literal, Optional - -from datalad.distribution.dataset import ( - EnsureDataset, - datasetmethod, - require_dataset, - resolve_path, -) -from datalad.interface.base import Interface, build_doc -from datalad.interface.common_opts import nosave_opt, save_message_opt -from datalad.interface.results import get_status_dict -from datalad.interface.utils import eval_results -from datalad.support.annexrepo import AnnexRepo -from datalad.support.constraints import EnsureNone, EnsureStr -from datalad.support.param import Parameter - -import datalad_cds.cdsrequest - -logger = logging.getLogger("datalad.cds.download-cds") - - -# decoration auto-generates standard help -@build_doc -# all commands must be derived from Interface -class DownloadCDS(Interface): - """Downloads specified datasets from the CDS data store""" - - _params_ = dict( - user_string_input=Parameter(doc="""json file with retrieve request"""), - dataset=Parameter( - args=("-d", "--dataset"), - metavar="PATH", - doc="""specify the dataset to add files to. If no dataset is given, - an attempt is made to identify the dataset based on the current - working directory. Use [CMD: --nosave CMD][PY: save=False PY] to - prevent adding files to the dataset.""", - constraints=EnsureDataset() | EnsureNone(), - ), - path=Parameter( - args=("-O", "--path"), - doc="""target for download. If the path has a trailing separator, - it is treated as a directory, and each specified URL is downloaded - under that directory to a base name taken from the URL. Without a - trailing separator, the value specifies the name of the downloaded - file (file name extensions inferred from the URL may be added to it, - if they are not yet present) and only a single URL should be given. - In both cases, leading directories will be created if needed. This - argument defaults to the current directory.""", - constraints=EnsureStr() | EnsureNone(), - ), - archive=Parameter( - args=("--archive",), - action="store_true", - doc="""pass the downloaded files to [CMD: :command:`datalad - add-archive-content --delete` CMD][PY: add_archive_content(..., - delete=True) PY]""", - ), - save=nosave_opt, - message=save_message_opt, - ) - - @staticmethod - @datasetmethod(name="download_cds") - @eval_results - def __call__( - user_string_input: str, - dataset: Optional[str] = None, - path: Optional[str] = None, - archive: bool = False, - save: bool = True, - message: Optional[str] = None, - ) -> Iterable[Dict]: - inputList = fileToList(user_string_input) - request_str = inputList[0] - ds = require_dataset(dataset, check_installed=True, purpose="download cds") - if not path: - path = inputList[1] - """ - if(not op.exists(path)): - raise ValueError("The path in the file is not valid!") - """ - path = str(resolve_path(path or op.curdir, ds=dataset)) - url = toUrl(request_str) - logger.debug("url is %s", url) - pathobj = ds.pathobj / path - logger.debug("target path is %s", pathobj) - - ensure_special_remote_exists_and_is_enabled(ds.repo, "cdsrequest") - ds.repo.add_url_to_file(pathobj, url) - - msg = """\ -[DATALAD cdsrequest] {} -=== Do not change lines below === -This was the request: -{} -The file of the request: -{} -^^^ Do not change lines above ^^^ - """ - - msg = msg.format(message if message is not None else "", request_str, pathobj) - if save: - yield ds.save(pathobj, message=msg) - yield get_status_dict(action="cdsrequest", status="ok") - if archive: - yield from ds.add_archive_content( - pathobj, - delete=True, - on_failure="ignore", - return_type="generator", - result_renderer="disabled", - ) - - -def ensure_special_remote_exists_and_is_enabled( - repo: AnnexRepo, remote: Literal["cdsrequest"] -) -> None: - """Initialize and enable the cdsrequest special remote, if it isn't already. - Very similar to datalad.customremotes.base.ensure_datalad_remote. - """ - - uuids = {"cdsrequest": datalad_cds.cdsrequest.cdsrequest_REMOTE_UUID} - uuid = uuids[remote] - - name = repo.get_special_remotes().get(uuid, {}).get("name") - if not name: - repo.init_remote( - remote, - [ - "encryption=none", - "type=external", - "autoenable=true", - "externaltype={}".format(remote), - "uuid={}".format(uuid), - ], - ) - - elif repo.is_special_annex_remote(name, check_if_known=False): - logger.debug("special remote %s is enabled", name) - - else: - logger.debug("special remote %s found, enabling", name) - repo.enable_remote(name) - - -def fileToList(input_file: str) -> List[str]: - readfile = open(input_file) - readstr = readfile.read() - - startDict = readstr.index("{") - endDict = readstr.index("}") - string_server = readstr[0:startDict] - dictString = readstr[startDict : endDict + 1] - string_to = readstr[endDict + 1 : len(readstr)] - - dictString.replace("\n", "") - string_server = string_server[1 : len(string_server) - 1] - string_to = string_to[1 : len(string_to) - 1] - - string_server = string_server.replace("\n", "") - string_server = string_server.replace(",", "") - string_server = string_server.replace('"', "") - string_server = string_server.replace("'", "") - string_server = string_server.replace(" ", "") - - string_to = string_to.replace(",", "") - string_to = string_to.replace('"', "") - string_to = string_to.replace("'", "") - string_to = string_to.replace("\n", "") - string_to = string_to.replace(" ", "") - - return [string_server + dictString, string_to] - - -def toUrl(request: str) -> str: - return "cdsrequest:v1-" + urllib.parse.quote( - base64.urlsafe_b64encode(request.encode("utf-8")) - ) diff --git a/src/datalad_cds/spec.py b/src/datalad_cds/spec.py new file mode 100644 index 0000000..ba67300 --- /dev/null +++ b/src/datalad_cds/spec.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import base64 +import dataclasses +import json +import urllib.parse +from typing import Any, Dict + +import datalad_cds.compat + + +@dataclasses.dataclass +class Spec: + dataset: str + sub_selection: dict + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Spec: + return cls(dataset=d["dataset"], sub_selection=d["sub-selection"]) + + @classmethod + def from_json(cls, s: str) -> Spec: + return cls.from_dict(json.loads(s)) + + @classmethod + def from_url(cls, url: str) -> Spec: + if not url.startswith("cds:v1-"): + raise ValueError("unsupported URL value encountered") + spec = cls.from_json( + base64.urlsafe_b64decode( + urllib.parse.unquote( + datalad_cds.compat.removeprefix(url, "cds:v1-") + ).encode("utf-8") + ).decode("utf-8") + ) + return spec + + def to_dict(self) -> Dict[str, Any]: + return { + "dataset": self.dataset, + "sub-selection": self.sub_selection, + } + + def to_json(self) -> str: + return json.dumps(self.to_dict(), separators=(",", ":")) + + def to_url(self) -> str: + json_spec = self.to_json() + url = "cds:v1-" + urllib.parse.quote( + base64.urlsafe_b64encode(json_spec.encode("utf-8")) + ) + return url diff --git a/tests/conftest.py b/tests/conftest.py index 788c6fd..53fe7af 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1 +1,12 @@ +import pathlib + +import datalad.api as da +import pytest from datalad.conftest import setup_package # noqa: F401 + + +@pytest.fixture +def empty_dataset(tmp_path: pathlib.Path) -> da.Dataset: + dataset = da.create(tmp_path) + yield dataset + dataset.drop(what="all", reckless="kill", recursive=True) diff --git a/tests/test_download_cds.py b/tests/test_download_cds.py new file mode 100644 index 0000000..cc57ac5 --- /dev/null +++ b/tests/test_download_cds.py @@ -0,0 +1,53 @@ +import json +import os +from typing import Union + +import datalad.api as da +import datalad_cds +import pytest + +request_dict = { + "dataset": "reanalysis-era5-pressure-levels", + "sub-selection": { + "variable": "temperature", + "pressure_level": "1000", + "product_type": "reanalysis", + "date": "2017-12-01/2017-12-31", + "time": "12:00", + "format": "grib", + }, +} + + +@pytest.mark.parametrize("cds_request", [request_dict, json.dumps(request_dict)]) +def test_download_cds(cds_request: Union[str, dict], empty_dataset: da.Dataset) -> None: + dataset = empty_dataset + dataset.configuration("set", [("remote.cds.dry-run", "true")], scope="local") + dataset.download_cds( + cds_request, + path="download.grib", + ) + actual_request = datalad_cds.spec.Spec.from_json( + (dataset.pathobj / "download.grib").read_text() + ) + if isinstance(cds_request, dict): + expected_request = datalad_cds.spec.Spec.from_dict(cds_request) + elif isinstance(cds_request, str): + expected_request = datalad_cds.spec.Spec.from_json(cds_request) + assert actual_request == expected_request + + +@pytest.mark.parametrize("cds_request", [request_dict, json.dumps(request_dict)]) +def test_download_cds_lazy( + cds_request: Union[str, dict], empty_dataset: da.Dataset +) -> None: + dataset = empty_dataset + dataset.download_cds( + cds_request, + path="download.grib", + lazy=True, + ) + assert ( + os.readlink(dataset.pathobj / "download.grib") + == ".git/annex/objects/2x/JF/URL--cds&cv1-eyJkYXRhc2V0IjoicmVhbmFs-b66f78bc76f33a53e89a1c679e330019/URL--cds&cv1-eyJkYXRhc2V0IjoicmVhbmFs-b66f78bc76f33a53e89a1c679e330019" + ) diff --git a/tests/test_register.py b/tests/test_register.py index d6d471e..e912b3f 100644 --- a/tests/test_register.py +++ b/tests/test_register.py @@ -1,4 +1,5 @@ -def test_register() -> None: - import datalad.api as da +import datalad.api as da + +def test_register() -> None: assert hasattr(da, "download_cds") diff --git a/tests/test_spec.py b/tests/test_spec.py new file mode 100644 index 0000000..1d84eb8 --- /dev/null +++ b/tests/test_spec.py @@ -0,0 +1,15 @@ +import hypothesis as h +import pytest +from datalad_cds.spec import Spec + + +@h.given(...) +def test_spec_url_equality(spec: Spec) -> None: + assert Spec.from_url(spec.to_url()) == spec + + +@h.given(...) +def test_spec_invalid_url_causes_value_error(url: str) -> None: + h.assume(not url.startswith("cds:v1-")) + with pytest.raises(ValueError): + Spec.from_url(url) diff --git a/tox.ini b/tox.ini index 7fcf85d..e406bbf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,25 +1,23 @@ [tox] envlist = format - typing lint - py3 + typing + pytest [testenv:format] deps = .[devel] -commands = - black --check . - isort --check . +commands = ruff format --check -[testenv:typing] +[testenv:lint] deps = .[devel] -commands = mypy . +commands = ruff check -[testenv:lint] +[testenv:typing] deps = .[devel] -commands = flake8 . +commands = mypy . -[testenv:py3] +[testenv:pytest] deps = .[devel] use_develop = true setenv = @@ -27,6 +25,4 @@ setenv = GIT_AUTHOR_EMAIL = test@test GIT_COMMITTER_NAME = Test Committer GIT_COMMITTER_EMAIL = test@test -commands = - python --version - pytest --cov=datalad_cds tests {posargs} +commands = pytest --cov=datalad_cds {posargs} diff --git a/versioneer.py b/versioneer.py index 18e34c2..1e3753e 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,5 +1,5 @@ -# Version: 0.28 +# Version: 0.29 """The Versioneer - like a rocketeer, but for versions. @@ -10,7 +10,7 @@ * https://github.com/python-versioneer/python-versioneer * Brian Warner * License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3 +* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 * [![Latest Version][pypi-image]][pypi-url] * [![Build Status][travis-image]][travis-url] @@ -316,7 +316,8 @@ import subprocess import sys from pathlib import Path -from typing import Callable, Dict +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import NoReturn import functools have_tomllib = True @@ -332,8 +333,16 @@ class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + versionfile_source: str + versionfile_build: Optional[str] + parentdir_prefix: Optional[str] + verbose: Optional[bool] -def get_root(): + +def get_root() -> str: """Get the project root directory. We require that all commands are run from the project root, i.e. the @@ -341,13 +350,23 @@ def get_root(): """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " @@ -372,23 +391,24 @@ def get_root(): return root -def get_config_from_root(root): +def get_config_from_root(root: str) -> VersioneerConfig: """Read the project setup.cfg file to determine Versioneer config.""" # This might raise OSError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . - root = Path(root) - pyproject_toml = root / "pyproject.toml" - setup_cfg = root / "setup.cfg" - section = None + root_pth = Path(root) + pyproject_toml = root_pth / "pyproject.toml" + setup_cfg = root_pth / "setup.cfg" + section: Union[Dict[str, Any], configparser.SectionProxy, None] = None if pyproject_toml.exists() and have_tomllib: try: with open(pyproject_toml, 'rb') as fobj: pp = tomllib.load(fobj) section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError): - pass + except (tomllib.TOMLDecodeError, KeyError) as e: + print(f"Failed to load config from {pyproject_toml}: {e}") + print("Try to load it from setup.cfg") if not section: parser = configparser.ConfigParser() with open(setup_cfg) as cfg_file: @@ -397,16 +417,25 @@ def get_config_from_root(root): section = parser["versioneer"] + # `cast`` really shouldn't be used, but its simplest for the + # common VersioneerConfig users at the moment. We verify against + # `None` values elsewhere where it matters + cfg = VersioneerConfig() cfg.VCS = section['VCS'] cfg.style = section.get("style", "") - cfg.versionfile_source = section.get("versionfile_source") + cfg.versionfile_source = cast(str, section.get("versionfile_source")) cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = section.get("tag_prefix") + cfg.tag_prefix = cast(str, section.get("tag_prefix")) if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" cfg.parentdir_prefix = section.get("parentdir_prefix") - cfg.verbose = section.get("verbose") + if isinstance(section, configparser.SectionProxy): + # Make sure configparser translates to bool + cfg.verbose = section.getboolean("verbose") + else: + cfg.verbose = section.get("verbose") + return cfg @@ -419,22 +448,28 @@ class NotThisMethod(Exception): HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" HANDLERS.setdefault(vcs, {})[method] = f return f return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None - popen_kwargs = {} + popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() @@ -450,8 +485,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break - except OSError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -479,7 +513,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, # that just contains the computed version number. # This file is released into the public domain. -# Generated by versioneer-0.28 +# Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -489,11 +523,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, import re import subprocess import sys -from typing import Callable, Dict +from typing import Any, Callable, Dict, List, Optional, Tuple import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -509,8 +543,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + -def get_config(): +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -532,9 +573,9 @@ class NotThisMethod(Exception): HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -543,13 +584,19 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None - popen_kwargs = {} + popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() @@ -565,8 +612,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break - except OSError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -586,7 +632,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -611,13 +661,13 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: @@ -639,7 +689,11 @@ def git_get_keywords(versionfile_abs): @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") @@ -703,7 +757,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -743,7 +802,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None @@ -835,14 +894,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -867,7 +926,7 @@ def render_pep440(pieces): return rendered -def render_pep440_branch(pieces): +def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards @@ -897,7 +956,7 @@ def render_pep440_branch(pieces): return rendered -def pep440_split_post(ver): +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the @@ -907,7 +966,7 @@ def pep440_split_post(ver): return vc[0], int(vc[1] or 0) if len(vc) == 2 else None -def render_pep440_pre(pieces): +def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: @@ -931,7 +990,7 @@ def render_pep440_pre(pieces): return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -958,7 +1017,7 @@ def render_pep440_post(pieces): return rendered -def render_pep440_post_branch(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. @@ -987,7 +1046,7 @@ def render_pep440_post_branch(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. @@ -1009,7 +1068,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -1029,7 +1088,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -1049,7 +1108,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -1085,7 +1144,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -1133,13 +1192,13 @@ def get_versions(): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: @@ -1161,7 +1220,11 @@ def git_get_keywords(versionfile_abs): @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") @@ -1225,7 +1288,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -1265,7 +1333,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None @@ -1357,7 +1425,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): return pieces -def do_vcs_install(versionfile_source, ipy): +def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py @@ -1395,7 +1463,11 @@ def do_vcs_install(versionfile_source, ipy): run_command(GITS, ["add", "--"] + files) -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -1420,7 +1492,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.28) from +# This file was generated by 'versioneer.py' (0.29) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -1437,7 +1509,7 @@ def get_versions(): """ -def versions_from_file(filename): +def versions_from_file(filename: str) -> Dict[str, Any]: """Try to determine the version from _version.py if present.""" try: with open(filename) as f: @@ -1454,9 +1526,8 @@ def versions_from_file(filename): return json.loads(mo.group(1)) -def write_to_version_file(filename, versions): +def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: """Write the given version number to the given _version.py file.""" - os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: @@ -1465,14 +1536,14 @@ def write_to_version_file(filename, versions): print("set %s to '%s'" % (filename, versions["version"])) -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -1497,7 +1568,7 @@ def render_pep440(pieces): return rendered -def render_pep440_branch(pieces): +def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards @@ -1527,7 +1598,7 @@ def render_pep440_branch(pieces): return rendered -def pep440_split_post(ver): +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the @@ -1537,7 +1608,7 @@ def pep440_split_post(ver): return vc[0], int(vc[1] or 0) if len(vc) == 2 else None -def render_pep440_pre(pieces): +def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: @@ -1561,7 +1632,7 @@ def render_pep440_pre(pieces): return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -1588,7 +1659,7 @@ def render_pep440_post(pieces): return rendered -def render_pep440_post_branch(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. @@ -1617,7 +1688,7 @@ def render_pep440_post_branch(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. @@ -1639,7 +1710,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -1659,7 +1730,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -1679,7 +1750,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -1719,7 +1790,7 @@ class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" -def get_versions(verbose=False): +def get_versions(verbose: bool = False) -> Dict[str, Any]: """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. @@ -1734,7 +1805,7 @@ def get_versions(verbose=False): assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose + verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" @@ -1795,12 +1866,12 @@ def get_versions(verbose=False): "date": None} -def get_version(): +def get_version() -> str: """Get the short version string for this project.""" return get_versions()["version"] -def get_cmdclass(cmdclass=None): +def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): """Get the custom setuptools subclasses used by Versioneer. If the package uses a different cmdclass (e.g. one from numpy), it @@ -1828,16 +1899,16 @@ def get_cmdclass(cmdclass=None): class cmd_version(Command): description = "report generated version string" - user_options = [] - boolean_options = [] + user_options: List[Tuple[str, str, str]] = [] + boolean_options: List[str] = [] - def initialize_options(self): + def initialize_options(self) -> None: pass - def finalize_options(self): + def finalize_options(self) -> None: pass - def run(self): + def run(self) -> None: vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) @@ -1867,12 +1938,12 @@ def run(self): # we override different "build_py" commands for both environments if 'build_py' in cmds: - _build_py = cmds['build_py'] + _build_py: Any = cmds['build_py'] else: from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1891,12 +1962,12 @@ def run(self): cmds["build_py"] = cmd_build_py if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] + _build_ext: Any = cmds['build_ext'] else: from setuptools.command.build_ext import build_ext as _build_ext class cmd_build_ext(_build_ext): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1923,7 +1994,7 @@ def run(self): cmds["build_ext"] = cmd_build_ext if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe + from cx_Freeze.dist import build_exe as _build_exe # type: ignore # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1932,7 +2003,7 @@ def run(self): # ... class cmd_build_exe(_build_exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1956,12 +2027,12 @@ def run(self): if 'py2exe' in sys.modules: # py2exe enabled? try: - from py2exe.setuptools_buildexe import py2exe as _py2exe + from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe + from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore class cmd_py2exe(_py2exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1984,12 +2055,12 @@ def run(self): # sdist farms its file list building out to egg_info if 'egg_info' in cmds: - _egg_info = cmds['egg_info'] + _egg_info: Any = cmds['egg_info'] else: from setuptools.command.egg_info import egg_info as _egg_info class cmd_egg_info(_egg_info): - def find_sources(self): + def find_sources(self) -> None: # egg_info.find_sources builds the manifest list and writes it # in one shot super().find_sources() @@ -2021,12 +2092,12 @@ def find_sources(self): # we override different "sdist" commands for both environments if 'sdist' in cmds: - _sdist = cmds['sdist'] + _sdist: Any = cmds['sdist'] else: from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): - def run(self): + def run(self) -> None: versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old @@ -2034,7 +2105,7 @@ def run(self): self.distribution.metadata.version = versions["version"] return _sdist.run(self) - def make_release_tree(self, base_dir, files): + def make_release_tree(self, base_dir: str, files: List[str]) -> None: root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) @@ -2099,7 +2170,7 @@ def make_release_tree(self, base_dir, files): """ -def do_setup(): +def do_setup() -> int: """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: @@ -2126,6 +2197,7 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + maybe_ipy: Optional[str] = ipy if os.path.exists(ipy): try: with open(ipy, "r") as f: @@ -2146,16 +2218,16 @@ def do_setup(): print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) - ipy = None + maybe_ipy = None # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. - do_vcs_install(cfg.versionfile_source, ipy) + do_vcs_install(cfg.versionfile_source, maybe_ipy) return 0 -def scan_setup_py(): +def scan_setup_py() -> int: """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False @@ -2192,7 +2264,7 @@ def scan_setup_py(): return errors -def setup_command(): +def setup_command() -> NoReturn: """Set up Versioneer and exit with appropriate error code.""" errors = do_setup() errors += scan_setup_py()