From 648889fee678c10baac472fb9d7375b2e353a82f Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 4 Dec 2024 17:29:11 +0100 Subject: [PATCH] use yarl --- erddapy/core/url.py | 94 ++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/erddapy/core/url.py b/erddapy/core/url.py index 9453387..629fda4 100644 --- a/erddapy/core/url.py +++ b/erddapy/core/url.py @@ -16,6 +16,7 @@ import httpx import pytz from pandas import to_datetime +from yarl import URL OptionalStr = str | None OptionalBool = bool | None @@ -23,6 +24,9 @@ OptionalList = list[str] | tuple[str] | None +_BIG_NUMBER = int(1e6) + + def quote_url(url: str) -> str: """Quote URL args for modern ERDDAP servers.""" # No idea why csv must be quoted in 2.23 but ncCF doesn't :-/ @@ -250,30 +254,6 @@ def get_search_url( # noqa: PLR0913 url: the search URL. """ - server = server.rstrip("/") - base = ( - "{server}/search/advanced.{response}" - "?page={page}" - "&itemsPerPage={itemsPerPage}" - "&protocol={protocol}" - "&cdm_data_type={cdm_data_type}" - "&institution={institution}" - "&ioos_category={ioos_category}" - "&keywords={keywords}" - "&long_name={long_name}" - "&standard_name={standard_name}" - "&variableName={variableName}" - "&minLon={minLon}" - "&maxLon={maxLon}" - "&minLat={minLat}" - "&maxLat={maxLat}" - "&minTime={minTime}" - "&maxTime={maxTime}" - ) - if search_for: - search_for = parse.quote_plus(search_for) - base += "&searchFor={searchFor}" - # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`. min_time = kwargs.pop("min_time", "") max_time = kwargs.pop("max_time", "") @@ -321,33 +301,37 @@ def get_search_url( # noqa: PLR0913 "tsv0", ] if response in non_paginated_responses: - items_per_page = int(1e6) + items_per_page = _BIG_NUMBER default = "(ANY)" - url = base.format( - server=server, - response=response, - page=page, - itemsPerPage=items_per_page, - protocol=kwargs.get("protocol", default), - cdm_data_type=kwargs.get("cdm_data_type", default), - institution=kwargs.get("institution", default), - ioos_category=kwargs.get("ioos_category", default), - keywords=kwargs.get("keywords", default), - long_name=kwargs.get("long_name", default), - standard_name=kwargs.get("standard_name", default), - variableName=kwargs.get("variableName", default), - minLon=kwargs.get("min_lon", default), - maxLon=kwargs.get("max_lon", default), - minLat=kwargs.get("min_lat", default), - maxLat=kwargs.get("max_lat", default), - minTime=kwargs.get("min_time", default), - maxTime=kwargs.get("max_time", default), - searchFor=search_for, - ) - # ERDDAP 2.10 no longer accepts strings placeholder for dates. - # Removing them entirely should be OK for older versions too. - return url.replace("&minTime=(ANY)", "").replace("&maxTime=(ANY)", "") + query = { + "page": f"{page}", + "itemsPerPage": f"{items_per_page}", + "protocol": kwargs.get("protocol", default), + "cdm_data_type": kwargs.get("cdm_data_type", default), + "institution": kwargs.get("institution", default), + "ioos_category": kwargs.get("ioos_category", default), + "keywords": kwargs.get("keywords", default), + "long_name": kwargs.get("long_name", default), + "standard_name": kwargs.get("standard_name", default), + "variableName": kwargs.get("variableName", default), + "minLon": kwargs.get("min_lon", default), + "maxLon": kwargs.get("max_lon", default), + "minLat": kwargs.get("min_lat", default), + "maxLat": kwargs.get("max_lat", default), + # ERDDAP 2.10 no longer accepts strings placeholder for dates. + # Removing them entirely should be OK for older versions too. + "minTime": kwargs.get("min_time", ""), + "maxTime": kwargs.get("max_time", ""), + } + if search_for: + query.update({"searchFor": f"{search_for}"}) + + url = URL(server) + path = "search" + name = f"advanced.{response}" + url = (url / path / name).with_query(query) + return str(url) def get_info_url( @@ -369,9 +353,13 @@ def get_info_url( url: the info URL for the `response` chosen. """ + url = URL(server) if dataset_id is None: - return f"{server}/info/index.{response}?itemsPerPage=1000000" - return f"{server}/info/{dataset_id}/index.{response}" + url = (url / "info" / f"index.{response}").with_query( + {"itemsPerPage": _BIG_NUMBER}, + ) + url = url / "info" / f"{dataset_id}" / f"index.{response}" + return str(url) def get_categorize_url( @@ -521,7 +509,7 @@ def get_download_url( # noqa: PLR0913, C901 return _distinct(url, distinct=distinct) -download_formats = [ +download_formats = ( "asc", "csv", "csvp", @@ -567,4 +555,4 @@ def get_download_url( # noqa: PLR0913, C901 "png", "largePng", "transparentPng", -] +)