diff --git a/tardis/data/atomic_data_repo.yml b/tardis/data/atomic_data_repo.yml index a554a3851d1..fb746d36fa0 100644 --- a/tardis/data/atomic_data_repo.yml +++ b/tardis/data/atomic_data_repo.yml @@ -6,3 +6,4 @@ kurucz_cd23_chianti_H_He: - https://dev.azure.com/tardis-sn/TARDIS/_apis/git/repositories/tardis-refdata/items?path=atom_data/kurucz_cd23_chianti_H_He.h5&resolveLfs=true - https://media.githubusercontent.com/media/tardis-sn/tardis-refdata/master/atom_data/kurucz_cd23_chianti_H_He.h5 uuid: NA + md5: 69a304e1e85e06508fe02dd8c5ba9397 diff --git a/tardis/io/atom_data/atom_web_download.py b/tardis/io/atom_data/atom_web_download.py index a7d9fec17ea..753dc012156 100644 --- a/tardis/io/atom_data/atom_web_download.py +++ b/tardis/io/atom_data/atom_web_download.py @@ -44,7 +44,8 @@ def download_atom_data(atomic_data_name=None): dst_dir = os.path.join(get_data_dir(), f"{atomic_data_name}.h5") src_url = atomic_repo[atomic_data_name]["url"] - mirrors = atomic_repo[atomic_data_name]["mirrors"] + mirrors = tuple(atomic_repo[atomic_data_name]["mirrors"]) + checksum = atomic_repo[atomic_data_name]["md5"] logger.info(f"Downloading atomic data from {src_url} to {dst_dir}") - download_from_url(src_url, dst_dir, mirrors) + download_from_url(src_url, dst_dir, checksum, mirrors) diff --git a/tardis/io/util.py b/tardis/io/util.py index 79526cf90a3..01f157d31ad 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -1,22 +1,22 @@ # Utility functions for the IO part of TARDIS +import collections.abc as collections_abc +import hashlib +import logging import os import re import shutil -import logging - -import pandas as pd -import numpy as np -import collections.abc as collections_abc from collections import OrderedDict +from functools import lru_cache +import numpy as np +import pandas as pd import yaml - -from tardis import constants as const from astropy import units as u from astropy.utils.data import download_file from tardis import __path__ as TARDIS_PATH +from tardis import constants as const logger = logging.getLogger(__name__) @@ -387,7 +387,8 @@ def to_hdf( ) -def download_from_url(url, dst, src=None): +@lru_cache(maxsize=None) +def download_from_url(url, dst, checksum, src=None, retries=3): """Download files from a given URL Parameters @@ -396,9 +397,24 @@ def download_from_url(url, dst, src=None): URL to download from dst : str Destination folder for the downloaded file - src : list + src : tuple List of URLs to use as mirrors """ cached_file_path = download_file(url, sources=src, pkgname="tardis") - shutil.copy(cached_file_path, dst) + + with open(cached_file_path, "rb") as f: + new_checksum = hashlib.md5(f.read()).hexdigest() + + if checksum == new_checksum: + shutil.copy(cached_file_path, dst) + + elif checksum != new_checksum and retries > 0: + retries -= 1 + logger.warning( + f"Incorrect checksum, retrying... ({retries+1} attempts remaining)" + ) + download_from_url(url, dst, checksum, src, retries) + + else: + logger.error("Maximum number of retries reached. Aborting")