Skip to content

Commit

Permalink
chore: add more return metadata and dry run (#129)
Browse files Browse the repository at this point in the history
have a defined return object with more information and a dry run to access this object without downloading anything
  • Loading branch information
renaudjester committed Oct 28, 2024
1 parent 73df91b commit 280c320
Show file tree
Hide file tree
Showing 41 changed files with 1,316 additions and 83 deletions.
8 changes: 8 additions & 0 deletions copernicusmarine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
logging.config.dictConfig(logging_configuration_dict)
logging.Formatter.converter = time.gmtime

from copernicusmarine.core_functions.models import (
DatasetCoordinatesExtent,
FileGet,
GeographicalExtent,
ResponseGet,
ResponseSubset,
TimeExtent,
)
from copernicusmarine.python_interface.describe import describe
from copernicusmarine.python_interface.get import get
from copernicusmarine.python_interface.login import login
Expand Down
2 changes: 2 additions & 0 deletions copernicusmarine/catalogue_parser/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import re

# TODO: change to pydantic
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Type, TypeVar, Union
Expand Down
2 changes: 2 additions & 0 deletions copernicusmarine/catalogue_parser/request_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class SubsetRequest:
netcdf_compression_enabled: bool = False
netcdf_compression_level: Optional[int] = None
netcdf3_compatible: bool = False
dry_run: bool = False

def update(self, new_dict: dict):
"""Method to update values in SubsetRequest object.
Expand Down Expand Up @@ -217,6 +218,7 @@ class GetRequest:
sync_delete: bool = False
index_parts: bool = False
direct_download: Optional[list[str]] = None
dry_run: bool = False

def update(self, new_dict: dict):
"""Method to update values in GetRequest object.
Expand Down
13 changes: 12 additions & 1 deletion copernicusmarine/command_line_interface/group_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)

logger = logging.getLogger("copernicusmarine")
blank_logger = logging.getLogger("copernicusmarine_blank_logger")


@click.group()
Expand Down Expand Up @@ -224,6 +225,13 @@ def cli_get() -> None:
default=False,
help="Option to get the index files of an INSITU dataset. Temporary option.",
)
@click.option(
"--dry-run",
type=bool,
is_flag=True,
default=False,
help="Runs query without downloading data.",
)
@tqdm_disable_option
@click.option(
"--log-level",
Expand Down Expand Up @@ -265,6 +273,7 @@ def get(
sync: bool,
sync_delete: bool,
index_parts: bool,
dry_run: bool,
disable_progress_bar: bool,
log_level: str,
staging: bool,
Expand All @@ -285,7 +294,7 @@ def get(
create_get_template()
return

return get_function(
result = get_function(
dataset_id=dataset_id,
force_dataset_version=dataset_version,
force_dataset_part=dataset_part,
Expand All @@ -307,6 +316,8 @@ def get(
sync=sync,
sync_delete=sync_delete,
index_parts=index_parts,
dry_run=dry_run,
disable_progress_bar=disable_progress_bar,
staging=staging,
)
blank_logger.info(result.model_dump_json(indent=2))
13 changes: 12 additions & 1 deletion copernicusmarine/command_line_interface/group_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
)

logger = logging.getLogger("copernicusmarine")
blank_logger = logging.getLogger("copernicusmarine_blank_logger")


@click.group()
Expand Down Expand Up @@ -314,6 +315,13 @@ def cli_subset() -> None:
"quotes ' in the request."
),
)
@click.option(
"--dry-run",
type=bool,
is_flag=True,
default=False,
help="Runs query without downloading data.",
)
@tqdm_disable_option
@click.option(
"--log-level",
Expand Down Expand Up @@ -390,6 +398,7 @@ def subset(
motu_api_request: Optional[str],
force_download: bool,
overwrite_output_data: bool,
dry_run: bool,
disable_progress_bar: bool,
log_level: str,
staging: bool = False,
Expand All @@ -410,7 +419,7 @@ def subset(
create_subset_template()
return

subset_function(
response = subset_function(
dataset_id,
dataset_version,
dataset_part,
Expand All @@ -437,9 +446,11 @@ def subset(
motu_api_request,
force_download,
overwrite_output_data,
dry_run,
disable_progress_bar,
staging,
netcdf_compression_enabled,
netcdf_compression_level,
netcdf3_compatible=netcdf3_compatible,
)
blank_logger.info(response.model_dump_json(indent=2))
10 changes: 7 additions & 3 deletions copernicusmarine/core_functions/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os
import pathlib
from typing import List, Optional
from typing import Optional

from copernicusmarine.catalogue_parser.request_structure import (
GetRequest,
Expand All @@ -12,6 +12,7 @@
from copernicusmarine.core_functions.credentials_utils import (
get_and_check_username_password,
)
from copernicusmarine.core_functions.models import ResponseGet
from copernicusmarine.core_functions.services_utils import (
CommandType,
RetrievalService,
Expand Down Expand Up @@ -48,9 +49,10 @@ def get_function(
sync: bool,
sync_delete: bool,
index_parts: bool,
dry_run: bool,
disable_progress_bar: bool,
staging: bool,
) -> List[pathlib.Path]:
) -> ResponseGet:
VersionVerifier.check_version_get(staging)
if staging:
logger.warning(
Expand Down Expand Up @@ -119,6 +121,8 @@ def get_function(
direct_download_files = get_direct_download_files(file_list_path)
if direct_download_files:
get_request.direct_download = direct_download_files
if create_file_list or dry_run:
get_request.dry_run = True

return _run_get_request(
username=username,
Expand All @@ -139,7 +143,7 @@ def _run_get_request(
credentials_file: Optional[pathlib.Path],
disable_progress_bar: bool,
staging: bool = False,
) -> List[pathlib.Path]:
) -> ResponseGet:
logger.debug("Checking username and password...")
username, password = get_and_check_username_password(
username, password, credentials_file
Expand Down
75 changes: 74 additions & 1 deletion copernicusmarine/core_functions/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import Literal, get_args
import pathlib
from typing import Literal, Optional, get_args

from pydantic import BaseModel, model_serializer

FileFormat = Literal["netcdf", "zarr"]
DEFAULT_FILE_FORMAT: FileFormat = "netcdf"
Expand All @@ -15,3 +18,73 @@
BoundingBoxMethod = Literal["inside", "nearest", "outside"]
DEFAULT_BOUNDING_BOX_METHOD: BoundingBoxMethod = "inside"
DEFAULT_BOUNDING_BOX_METHODS = list(get_args(BoundingBoxMethod))


class FileGet(BaseModel):
#: Full url of the location of the file server side.
url: str
#: Size of the file in MB.
size: float
#: Last modified date.
last_modified: str
#: Path to the local downloaded file
output: pathlib.Path


class ResponseGet(BaseModel):
"""Metadata returned when using :func:`~copernicusmarine.get`"""

#: Description of the files concerned by the query
files: list[FileGet]


class GeographicalExtent(BaseModel):
"""Interval for geographical coordinates."""

minimum: Optional[float]
maximum: Optional[float]


class TimeExtent(BaseModel):
"""Interval for time coordinate."""

minimum: Optional[str]
maximum: Optional[str]


class DatasetCoordinatesExtent(BaseModel):
#: Longitude Interval of the subsetted data.
longitude: GeographicalExtent
#: Latitude Interval of the subsetted data.
latitude: GeographicalExtent
#: Time Interval of the subsetted data in iso8601 string
time: TimeExtent
#: Depth Interval of the subsetted data.
depth: Optional[GeographicalExtent] = None
#: Elevation Interval of the subsetted data.
#: Is relevant if data are requested for elevation
#: instead of depth
elevation: Optional[GeographicalExtent] = None

@model_serializer(mode="wrap")
def _serialize(self, handler):
d = handler(self)
if not self.depth:
del d["depth"]
if not self.elevation:
del d["elevation"]
return d


class ResponseSubset(BaseModel):
"""Metadata returned when using :func:`~copernicusmarine.subset`"""

#: Path to the result file.
output: pathlib.Path
#: Estimation of the size of the final result file in MB.
size: Optional[float]
#: Estimation of the maximum amount of data needed to
#: get the final result in MB.
data_needed: Optional[float]
#: The bounds of the subsetted dataset.
coodinates_extent: DatasetCoordinatesExtent
9 changes: 6 additions & 3 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)
from copernicusmarine.core_functions.models import (
BoundingBoxMethod,
ResponseSubset,
SubsetMethod,
)
from copernicusmarine.core_functions.services_utils import (
Expand Down Expand Up @@ -68,12 +69,13 @@ def subset_function(
motu_api_request: Optional[str],
force_download: bool,
overwrite_output_data: bool,
dry_run: bool,
disable_progress_bar: bool,
staging: bool,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf3_compatible: bool,
) -> pathlib.Path:
) -> ResponseSubset:
VersionVerifier.check_version_subset(staging)
if staging:
logger.warning(
Expand Down Expand Up @@ -120,6 +122,7 @@ def subset_function(
"netcdf_compression_enabled": netcdf_compression_enabled,
"netcdf_compression_level": netcdf_compression_level,
"netcdf3_compatible": netcdf3_compatible,
"dry_run": dry_run,
}
subset_request.update(request_update_dict)
if not subset_request.dataset_id:
Expand Down Expand Up @@ -189,7 +192,7 @@ def subset_function(
retrieval_service.service_format
== CopernicusMarineServiceFormat.ZARR
):
output_path = download_zarr(
response = download_zarr(
username,
password,
subset_request,
Expand All @@ -200,7 +203,7 @@ def subset_function(
)
else:
raise ServiceNotSupported(retrieval_service.service_type)
return output_path
return response


def create_subset_template() -> None:
Expand Down
4 changes: 0 additions & 4 deletions copernicusmarine/core_functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,6 @@ def next_or_raise_exception(
raise exception_to_raise from exception


def flatten(list: list[list[_T]]) -> list[_T]:
return [item for sublist in list for item in sublist]


def construct_url_with_query_params(url, query_params: dict) -> Optional[str]:
req = PreparedRequest()
req.prepare_url(url, query_params)
Expand Down
Loading

0 comments on commit 280c320

Please sign in to comment.