From 648889fee678c10baac472fb9d7375b2e353a82f Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Wed, 4 Dec 2024 17:29:11 +0100
Subject: [PATCH] use yarl

---
 erddapy/core/url.py | 94 ++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 53 deletions(-)

diff --git a/erddapy/core/url.py b/erddapy/core/url.py
index 9453387..629fda4 100644
--- a/erddapy/core/url.py
+++ b/erddapy/core/url.py
@@ -16,6 +16,7 @@
 import httpx
 import pytz
 from pandas import to_datetime
+from yarl import URL
 
 OptionalStr = str | None
 OptionalBool = bool | None
@@ -23,6 +24,9 @@
 OptionalList = list[str] | tuple[str] | None
 
 
+_BIG_NUMBER = int(1e6)
+
+
 def quote_url(url: str) -> str:
     """Quote URL args for modern ERDDAP servers."""
     # No idea why csv must be quoted in 2.23 but ncCF doesn't :-/
@@ -250,30 +254,6 @@ def get_search_url(  # noqa: PLR0913
         url: the search URL.
 
     """
-    server = server.rstrip("/")
-    base = (
-        "{server}/search/advanced.{response}"
-        "?page={page}"
-        "&itemsPerPage={itemsPerPage}"
-        "&protocol={protocol}"
-        "&cdm_data_type={cdm_data_type}"
-        "&institution={institution}"
-        "&ioos_category={ioos_category}"
-        "&keywords={keywords}"
-        "&long_name={long_name}"
-        "&standard_name={standard_name}"
-        "&variableName={variableName}"
-        "&minLon={minLon}"
-        "&maxLon={maxLon}"
-        "&minLat={minLat}"
-        "&maxLat={maxLat}"
-        "&minTime={minTime}"
-        "&maxTime={maxTime}"
-    )
-    if search_for:
-        search_for = parse.quote_plus(search_for)
-        base += "&searchFor={searchFor}"
-
     # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`.
     min_time = kwargs.pop("min_time", "")
     max_time = kwargs.pop("max_time", "")
@@ -321,33 +301,37 @@ def get_search_url(  # noqa: PLR0913
         "tsv0",
     ]
     if response in non_paginated_responses:
-        items_per_page = int(1e6)
+        items_per_page = _BIG_NUMBER
 
     default = "(ANY)"
-    url = base.format(
-        server=server,
-        response=response,
-        page=page,
-        itemsPerPage=items_per_page,
-        protocol=kwargs.get("protocol", default),
-        cdm_data_type=kwargs.get("cdm_data_type", default),
-        institution=kwargs.get("institution", default),
-        ioos_category=kwargs.get("ioos_category", default),
-        keywords=kwargs.get("keywords", default),
-        long_name=kwargs.get("long_name", default),
-        standard_name=kwargs.get("standard_name", default),
-        variableName=kwargs.get("variableName", default),
-        minLon=kwargs.get("min_lon", default),
-        maxLon=kwargs.get("max_lon", default),
-        minLat=kwargs.get("min_lat", default),
-        maxLat=kwargs.get("max_lat", default),
-        minTime=kwargs.get("min_time", default),
-        maxTime=kwargs.get("max_time", default),
-        searchFor=search_for,
-    )
-    # ERDDAP 2.10 no longer accepts strings placeholder for dates.
-    # Removing them entirely should be OK for older versions too.
-    return url.replace("&minTime=(ANY)", "").replace("&maxTime=(ANY)", "")
+    query = {
+        "page": f"{page}",
+        "itemsPerPage": f"{items_per_page}",
+        "protocol": kwargs.get("protocol", default),
+        "cdm_data_type": kwargs.get("cdm_data_type", default),
+        "institution": kwargs.get("institution", default),
+        "ioos_category": kwargs.get("ioos_category", default),
+        "keywords": kwargs.get("keywords", default),
+        "long_name": kwargs.get("long_name", default),
+        "standard_name": kwargs.get("standard_name", default),
+        "variableName": kwargs.get("variableName", default),
+        "minLon": kwargs.get("min_lon", default),
+        "maxLon": kwargs.get("max_lon", default),
+        "minLat": kwargs.get("min_lat", default),
+        "maxLat": kwargs.get("max_lat", default),
+        # ERDDAP 2.10 no longer accepts strings placeholder for dates.
+        # Removing them entirely should be OK for older versions too.
+        "minTime": kwargs.get("min_time", ""),
+        "maxTime": kwargs.get("max_time", ""),
+    }
+    if search_for:
+        query.update({"searchFor": f"{search_for}"})
+
+    url = URL(server)
+    path = "search"
+    name = f"advanced.{response}"
+    url = (url / path / name).with_query(query)
+    return str(url)
 
 
 def get_info_url(
@@ -369,9 +353,13 @@ def get_info_url(
         url: the info URL for the `response` chosen.
 
     """
+    url = URL(server)
     if dataset_id is None:
-        return f"{server}/info/index.{response}?itemsPerPage=1000000"
-    return f"{server}/info/{dataset_id}/index.{response}"
+        url = (url / "info" / f"index.{response}").with_query(
+            {"itemsPerPage": _BIG_NUMBER},
+        )
+    url = url / "info" / f"{dataset_id}" / f"index.{response}"
+    return str(url)
 
 
 def get_categorize_url(
@@ -521,7 +509,7 @@ def get_download_url(  # noqa: PLR0913, C901
     return _distinct(url, distinct=distinct)
 
 
-download_formats = [
+download_formats = (
     "asc",
     "csv",
     "csvp",
@@ -567,4 +555,4 @@ def get_download_url(  # noqa: PLR0913, C901
     "png",
     "largePng",
     "transparentPng",
-]
+)