From 05c1d7054d84bf461375d8474f58436da32f5826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Thu, 17 Oct 2024 12:21:20 -0300 Subject: [PATCH] UPDATE ERA5 API CALLS --> CDS has changed the ERA5 datasets & api calls --- poetry.lock | 41 +++- pyproject.toml | 2 +- satellite/downloader/__init__.py | 2 +- satellite/downloader/extract_reanalysis.py | 267 ++++++--------------- satellite/downloader/request.py | 8 +- 5 files changed, 119 insertions(+), 201 deletions(-) diff --git a/poetry.lock b/poetry.lock index 52e36b8..ec5c849 100644 --- a/poetry.lock +++ b/poetry.lock @@ -285,17 +285,36 @@ webencodings = "*" [package.extras] css = ["tinycss2 (>=1.1.0,<1.3)"] +[[package]] +name = "cads-api-client" +version = "1.4.5" +description = "CADS API Python client" +optional = false +python-versions = "*" +files = [ + {file = "cads_api_client-1.4.5-py3-none-any.whl", hash = "sha256:36c746396972e06711e7f608183d58a1213bcfa8fd87413cd099ac4ca92bd11a"}, + {file = "cads_api_client-1.4.5.tar.gz", hash = "sha256:4b98b36e07f1a5868390e15346e19bf52b6ef20c2a14a9b437d7883676af6325"}, +] + +[package.dependencies] +attrs = "*" +multiurl = "*" +requests = "*" +typing-extensions = "*" + [[package]] name = "cdsapi" -version = "0.5.1" +version = "0.7.3" description = "Climate Data Store API" optional = false python-versions = "*" files = [ - {file = "cdsapi-0.5.1.tar.gz", hash = "sha256:19f3e92f1996cc1115d0b0281617edeaecf3eefca03fbd384cfbc520d5f0476d"}, + {file = "cdsapi-0.7.3-py2.py3-none-any.whl", hash = "sha256:3bf432783e6ff0b47b0b33466c6e05e7ddad52fda5f05bf269596f5be30d623b"}, + {file = "cdsapi-0.7.3.tar.gz", hash = "sha256:883a1376ca495457eb55fd548dbbb6f5b64f2e4c880b3586dd37ba9041e51c82"}, ] [package.dependencies] +cads-api-client = ">=1.3.2" requests = ">=2.5.0" tqdm = "*" @@ -1828,6 +1847,22 @@ files = [ {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, ] +[[package]] +name = "multiurl" +version = "0.3.1" +description = "A package to download several URL as one, as well as supporting multi-part URLs" +optional = false +python-versions = "*" +files = [ + {file = "multiurl-0.3.1.tar.gz", hash = "sha256:c7001437b59d56d4c310d725c3dcfff98c97c4b652893d88989853827465d442"}, +] + +[package.dependencies] +python-dateutil = "*" +pytz = "*" +requests = "*" +tqdm = "*" + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -3894,4 +3929,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "86630e357eb192ef8ce5e348dd03d78131f7af1d4d593569548eefcab86ec85c" +content-hash = "9c3b5cf64d12569fb9e3e7317e63eedeb0c2c7bb601e349324cc376ba6c76823" diff --git a/pyproject.toml b/pyproject.toml index e11fd21..4e97c14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ packages = [ [tool.poetry.dependencies] python = ">=3.10,<3.12" -cdsapi = "^0.5.1" +cdsapi = ">=0.7.3" pandas = ">=2.0.0" numpy = ">=1.16.4" SQLAlchemy = ">=2.0.28" diff --git a/satellite/downloader/__init__.py b/satellite/downloader/__init__.py index fdb4d74..d640f25 100644 --- a/satellite/downloader/__init__.py +++ b/satellite/downloader/__init__.py @@ -2,4 +2,4 @@ """satellite_weather_downloader Weather Collection Python package""" # TODO: Docstrings from . import request # noqa -from .extract_reanalysis import download_br_netcdf, download_netcdf # noqa +from .extract_reanalysis import download_netcdf # noqa diff --git a/satellite/downloader/extract_reanalysis.py b/satellite/downloader/extract_reanalysis.py index 4a6f681..dca4059 100644 --- a/satellite/downloader/extract_reanalysis.py +++ b/satellite/downloader/extract_reanalysis.py @@ -33,17 +33,17 @@ import logging import os -import re +from dotenv import load_dotenv from datetime import datetime, timedelta from pathlib import Path -from typing import Optional, Tuple, Union +from typing import Optional, Literal -import pandas as pd import urllib3 from cdsapi.api import Client _GLOBE_AREA = {"N": 90.0, "W": -180.0, "S": -90.0, "E": 180.0} _DATA_DIR = Path.home() / "copernicus_data" +_LOCALES = ["BR", "AR"] _HELP = "Use `help(extract_reanalysis.download_br_netcdf)` for more info." @@ -58,42 +58,18 @@ _MIN_DELAY = _CUR_DATE - timedelta(days=6) _MIN_DELAY_F = datetime.strftime(_MIN_DELAY, _DATE_FORMAT) +load_dotenv() -def download_br_netcdf( - date: Optional[str] = None, - date_end: Optional[str] = None, - data_dir: Optional[str] = str(_DATA_DIR), - user_key: Optional[str] = None, -): - if date and not date_end: - filename = f"BR_{date}" - - elif all([date, date_end]): - filename = f"BR_{date}_{date_end}" - else: - filename = f"BR_{_MIN_DELAY_F}" - - filename = filename.replace("-", "") - - return download_netcdf( - filename=filename, - date=date, - date_end=date_end, - area={"N": 5.5, "W": -74.0, "S": -33.75, "E": -32.25}, - data_dir=data_dir, - user_key=user_key, - ) - - -# TODO: make download_netcdf accepts date and datetime types. def download_netcdf( - filename: str, + filename: str = None, date: Optional[str] = None, date_end: Optional[str] = None, - area: Optional[dict] = _GLOBE_AREA, - data_dir: Optional[str] = str(_DATA_DIR), + locale: Optional[Literal["BR", "AR"]] = None, + area: Optional[dict] = None, + output_dir: Optional[str] = str(_DATA_DIR), user_key: Optional[str] = None, + verbose: bool = False ): """ Creates the request for Copernicus API. Extracts the latitude and @@ -138,38 +114,53 @@ def download_netcdf( to transform into a `xarray.Dataset` with the CopeBRDatasetExtension located in `satellite.weather` module. """ - Path(str(data_dir)).mkdir(parents=True, exist_ok=True) + Path(str(output_dir)).mkdir(parents=True, exist_ok=True) if not user_key: - cdsapi_key = os.getenv("CDSAPI_KEY") + cdsapi_token = os.getenv("CDSAPI_TOKEN") else: - cdsapi_key = user_key + cdsapi_token = user_key - if not cdsapi_key: + if not cdsapi_token: raise EnvironmentError( "Environment variable CDSAPI_KEY not found in the system.\n" - 'Execute `$ export CDSAPI_KEY=":" to fix.\n' + 'Execute `$ export CDSAPI_TOKEN="" to fix.\n' "These credentials are found in your Copernicus User Page: \n" "https://cds.climate.copernicus.eu/user/USER" ) conn = Client( - url="https://cds.climate.copernicus.eu/api/v2", - key=cdsapi_key, + url="https://cds.climate.copernicus.eu/api", + key=cdsapi_token, ) - if date and not date_end: - year, month, day = _format_dates(date) + if locale and locale not in _LOCALES: + raise ValueError(f"locale {locale} not supported. Options: {_LOCALES}") - elif all([date, date_end]): - year, month, day = _format_dates(date, date_end) - - elif not date and not date_end: - logging.warning( - "No date provided, downloading last" + f" available date: {_MIN_DELAY_F}" - ) - year, month, day = _format_dates(_MIN_DELAY_F) + if not area: + match locale: + case None: + area = _GLOBE_AREA + case "BR": + area = {"N": 5.5, "W": -74.0, "S": -33.75, "E": -32.25} + case "AR": + area = {"N": -21.0, "W": -74.0, "S": -56.0, "E": -53.0} + if date and not date_end: + filename = f"{locale or 'WW'}_{date}" + date_req = str(date) + elif all([date, date_end]): + filename = f"{locale or 'WW'}_{date}_{date_end}" + date_req = f"{date}/{date_end}" + elif not any([date, date_end]): + if verbose: + logging.warning( + "No date provided, downloading last" + f" available date: {_MIN_DELAY_F}" + ) + date = _MIN_DELAY_F + date_req = str(date) + filename = f"{locale or 'WW'}_{date}" else: raise Exception( f""" @@ -178,7 +169,7 @@ def download_netcdf( """ ) - if not list(area.keys()) == ["N", "W", "S", "E"]: + if set(area.keys()) != set(["N", "W", "S", "E"]): raise KeyError( """ Wrong area format; @@ -187,7 +178,8 @@ def download_netcdf( ) if not all([isinstance(v, (int, float)) for v in area.values()]): - raise ValueError("Coordinate values must be rather int or float values") + raise ValueError( + "Coordinate values must be rather int or float values") if abs(area["N"]) > 90 or abs(area["S"]) > 90: raise ValueError("Latitude must be between -90 and 90") @@ -195,146 +187,37 @@ def download_netcdf( if abs(area["W"]) > 180 or abs(area["E"]) > 180: raise ValueError("Longitude must be between -180 and 180") - file = f"{data_dir}/{filename}.nc" + file = f"{output_dir}/{filename}.nc" + if Path(file).exists(): return file - else: - try: - urllib3.disable_warnings() - conn.retrieve( - "reanalysis-era5-single-levels", - { - "product_type": "reanalysis", - "variable": [ - "2m_temperature", - "total_precipitation", - "2m_dewpoint_temperature", - "mean_sea_level_pressure", - ], - "year": year, - "month": month, - "day": day, - "time": [ - "00:00", - "03:00", - "06:00", - "09:00", - "12:00", - "15:00", - "18:00", - "21:00", - ], - "area": list(area.values()), - "format": "netcdf", - }, - str(file), - ) - return str(file) - - except Exception as e: - logging.error(e) - raise e - - -def _format_dates( - date: str, - date_end: Optional[str] = None, -) -> Tuple[Union[str, list], Union[str, list], Union[str, list]]: - """ - Returns the days, months and years by given a date or - a date range. - Attrs: - date (str) : Initial date. - date_end (str): If provided, defines a date range to be extracted. - Returns: - year (str or list) : The year(s) related to date provided. - month (str or list): The month(s) related to date provided. - day (str or list) : The day(s) related to date provided. - """ - - ini_date = datetime.strptime(date, _DATE_FORMAT) - year, month, day = date.split("-") - - if ini_date > _MIN_DELAY: - raise Exception( - f""" - Invalid date. The last update date is: - {_MIN_DELAY_F} - {_HELP} - """ - ) - - # check for right initial date format - if not re.match(_RE_FORMAT, date): - raise Exception( - f""" - Invalid initial date. Format: - {_ISO_FORMAT} - {_HELP} - """ - ) - - # an end date can be passed to define the date range - # if there is no end date, only the day specified on - # `date` will be downloaded - if date_end: - end_date = datetime.strptime(date_end, _DATE_FORMAT) - - # check for right end date format - if not re.match(_RE_FORMAT, date_end): - raise Exception( - f""" - Invalid end date. Format: - {_ISO_FORMAT} - {_HELP} - """ - ) - - # safety limit for Copernicus limit and file size: 1 year - max_api_query = timedelta(days=367) - if end_date - ini_date > max_api_query: - raise Exception( - f""" - Maximum query reached (limit: {max_api_query.days} days). - {_HELP} - """ - ) - - # end date can't be bigger than initial date - if end_date < ini_date: - raise Exception( - f""" - Please select a valid date range. - {_HELP} - """ - ) - - # the date range will be responsible for match the requests - # if the date is across months. For example a week that ends - # after the month. - df = pd.date_range(start=date, end=date_end) - year_set = set() - month_set = set() - day_set = set() - for date in df: - date_f = str(date) - iso_form = date_f.split(" ")[0] - year_, month_, day_ = iso_form.split("-") - year_set.add(year_) - month_set.add(month_) - day_set.add(day_) - # parsing the correct types - month = list(month_set) - day = list(day_set) - # sorting them (can't do inline) - month.sort() - day.sort() - - if len(year_set) == 1: - year = str(year_set.pop()) - else: - year = list(year_set) - year.sort() - - return year, month, day + urllib3.disable_warnings() + conn.retrieve( + "reanalysis-era5-land", + { + "product_type": ["reanalysis"], + "variable": [ + "2m_temperature", + "total_precipitation", + "2m_dewpoint_temperature", + "surface_pressure", + ], + "date": date_req, + "time": [ + "00:00", + "03:00", + "06:00", + "09:00", + "12:00", + "15:00", + "18:00", + "21:00", + ], + "area": [area["N"], area["W"], area["S"], area["E"]], + "format": "netcdf", + }, + str(file), + ).download() + + return str(file) diff --git a/satellite/downloader/request.py b/satellite/downloader/request.py index 216f5a1..effc8f2 100644 --- a/satellite/downloader/request.py +++ b/satellite/downloader/request.py @@ -44,8 +44,8 @@ def ERA5_reanalysis( if char not in allowed_chars: raise ValueError(f"Invalid character {char}") - cdsapi_key = os.getenv("CDSAPI_KEY") - if not cdsapi_key: + cdsapi_token = os.getenv("CDSAPI_KEY") + if not cdsapi_token: raise EnvironmentError( "Environment variable CDSAPI_KEY not found in the system.\n" 'Execute `$ export CDSAPI_KEY="{MY_UID}:{MY_KEY}" to fix.\n' @@ -54,8 +54,8 @@ def ERA5_reanalysis( ) conn = Client( - url="https://cds.climate.copernicus.eu/api/v2", - key=cdsapi_key, + url="https://cds.climate.copernicus.eu/api", + key=cdsapi_token, ) options = reanalysis_prompt(