From a7616231a40a60ce7b9bfc3882b90fce790c9dc4 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 2 Feb 2024 16:01:14 +0100 Subject: [PATCH] download_hash.py: generalized and data-driven The script is currently limited to one hardcoded URL for kubernetes related binaries, and a fixed set of architectures. The solution is three-fold: 1. Use an url template dictionary for each download -> this allow to easily add support for new downloads. 2. Source the architectures to search from the existing data 3. Enumerate the existing versions in the data and start searching from the last one until no newer version is found (newer in the version order sense, irrespective of actual age) --- scripts/download_hash.py | 50 +++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 61025c8bf5e..e3eb3bf4ce2 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -6,7 +6,7 @@ import sys -from itertools import count +from itertools import count, groupby from collections import defaultdict import requests from ruamel.yaml import YAML @@ -25,36 +25,48 @@ def open_checksums_yaml(): return data, yaml +def version_compare(version): + return Version(version.removeprefix("v")) def download_hash(minors): - architectures = ["arm", "arm64", "amd64", "ppc64le"] - downloads = ["kubelet", "kubectl", "kubeadm"] + downloads = { + "containerd_archive": "https://github.com/containerd/containerd/releases/download/v{version}/containerd-{version}-{os}-{arch}.tar.gz.sha256sum", + "kubeadm": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubeadm.sha256", + "kubectl": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubectl.sha256", + "kubelet": "https://dl.k8s.io/release/{version}/bin/linux/{arch}/kubelet.sha256", + "runc": "https://github.com/opencontainers/runc/releases/download/{version}/runc.{arch}.sha256sum", + } data, yaml = open_checksums_yaml() - if not minors: - minors = {'.'.join(minor.split('.')[:-1]) for minor in data["kubelet_checksums"]["amd64"].keys()} - for download in downloads: + for download, url in downloads.items(): checksum_name = f"{download}_checksums" - data[checksum_name] = defaultdict(dict, data[checksum_name]) - for arch in architectures: - for minor in minors: - if not minor.startswith("v"): - minor = f"v{minor}" - for release in (f"{minor}.{patch}" for patch in count(start=0, step=1)): - if release in data[checksum_name][arch]: + for arch, versions in data[checksum_name].items(): + for minor, patches in groupby(versions.copy().keys(), lambda v : '.'.join(v.split('.')[:-1])): + for version in (f"{minor}.{patch}" for patch in + count(start=int(max(patches, key=version_compare).split('.')[-1]), + step=1)): + # Those barbaric generators do the following: + # Group all patches versions by minor number, take the newest and start from that + # to find new versions + if version in versions and versions[version] != 0: continue - hash_file = requests.get(f"https://dl.k8s.io/release/{release}/bin/linux/{arch}/{download}.sha256", allow_redirects=True) + hash_file = requests.get(downloads[download].format( + version = version, + os = "linux", + arch = arch + ), + allow_redirects=True) if hash_file.status_code == 404: - print(f"Unable to find {download} hash file for release {release} (arch: {arch})") + print(f"Unable to find {download} hash file for version {version} (arch: {arch}) at {hash_file.url}") break hash_file.raise_for_status() - sha256sum = hash_file.content.decode().strip() + sha256sum = hash_file.content.decode().split(' ')[0] if len(sha256sum) != 64: - raise Exception(f"Checksum has an unexpected length: {len(sha256sum)} (binary: {download}, arch: {arch}, release: 1.{minor}.{patch})") - data[checksum_name][arch][release] = sha256sum + raise Exception(f"Checksum has an unexpected length: {len(sha256sum)} (binary: {download}, arch: {arch}, release: {version}, checksum: '{sha256sum}')") + data[checksum_name][arch][version] = sha256sum data[checksum_name] = {arch : {r : releases[r] for r in sorted(releases.keys(), - key=lambda v : Version(v[1:]), + key=version_compare, reverse=True)} for arch, releases in data[checksum_name].items()}