rapidsai · raydouglass · Dec 11, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 11, 2024
@@ -11,15 +11,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import cudf
-import dask_cudf
-import yaml
 import os
 import pandas as pd
-import cugraph.dask as dcg
+import yaml
 from pathlib import Path
-import urllib.request
+from urllib.requests import urlretrieve
+
+import cudf
+import cugraph.dask as dcg
+import dask_cudf
 from cugraph.structure.graph_classes import Graph
+from cugraph.utilities import install_ssl_cert
 
 
 class DefaultDownloadDir:
@@ -142,7 +144,8 @@ def __download_csv(self, url):
         filename = self.metadata["name"] + self.metadata["file_type"]
         if self._dl_path.path.is_dir():
             self._path = self._dl_path.path / filename
-            urllib.request.urlretrieve(url, str(self._path))
+            install_ssl_cert()
+            urlretrieve(url, str(self._path))
 
         else:
             raise RuntimeError(
@@ -458,7 +461,8 @@ def download_all(force=False):
                 filename = meta["name"] + meta["file_type"]
                 save_to = default_download_dir.path / filename
                 if not save_to.is_file() or force:
-                    urllib.request.urlretrieve(meta["url"], str(save_to))
+                    install_ssl_cert()
+                    urlretrieve(meta["url"], str(save_to))
 
 
 def set_download_dir(path):

@@ -13,16 +13,14 @@
 
 import warnings
 import tarfile
-
 import urllib.request
 
 import cudf
 from cugraph.datasets.dataset import (
     DefaultDownloadDir,
     default_download_dir,
 )
-
-# results_dir_path = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "tests" / "resultsets"
+from cugraph.utilities import install_ssl_cert
 
 
 class Resultset:
@@ -107,6 +105,7 @@ def load_resultset(resultset_name, resultset_download_url):
         if not curr_resultset_download_dir.exists():
             curr_resultset_download_dir.mkdir(parents=True, exist_ok=True)
         if not compressed_file_path.exists():
+            install_ssl_cert()
             urllib.request.urlretrieve(resultset_download_url, compressed_file_path)
         tar = tarfile.open(str(compressed_file_path), "r:gz")
         # TODO: pass filter="fully_trusted" when minimum supported Python version >=3.12

diff --git a/python/cugraph/cugraph/utilities/__init__.py b/python/cugraph/cugraph/utilities/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -18,6 +18,7 @@
 from cugraph.utilities.nx_factory import df_edge_score_to_dictionary
 from cugraph.utilities.nx_factory import cugraph_to_nx
 from cugraph.utilities.utils import (
+    install_ssl_cert,
     import_optional,
     ensure_cugraph_obj,
     ensure_cugraph_obj_for_nx,

diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py
@@ -25,6 +25,10 @@
 
 from warnings import warn
 
+import certifi
+from ssl import create_default_context
+from urllib.request import build_opener, HTTPSHandler, install_opener
+
 # optional dependencies
 try:
     import cupy as cp
@@ -549,3 +553,13 @@ def create_directory_with_overwrite(directory):
     if os.path.exists(directory):
         shutil.rmtree(directory)
     os.makedirs(directory)
+
+
+def install_ssl_cert():
+    """
+    Build and install an opener with the custom HTTPS handler. Use this when
+    downloading datasets to have the proper SSL certificate.
+    """
+    ssl_context = create_default_context(cafile=certifi.where())
+    https_handler = HTTPSHandler(context=ssl_context)
+    install_opener(build_opener(https_handler))