From 3b3ba48592ef54321ba4b14d46a9af0aa17aaacf Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 22 Dec 2022 16:28:33 +0800 Subject: [PATCH] [ci] Sync CI cache script & workflow (#6959) Issue: #6445 ### Brief Summary --- .github/workflows/cache.yml | 22 +++ .../workflows/scripts/ci_common/bootstrap.py | 4 +- .github/workflows/scripts/ci_common/dep.py | 28 ++-- .../scripts/requirements_synccache.txt | 1 + .github/workflows/scripts/sync-cache.py | 136 ++++++++++++++++++ 5 files changed, 180 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/cache.yml create mode 100644 .github/workflows/scripts/requirements_synccache.txt create mode 100755 .github/workflows/scripts/sync-cache.py diff --git a/.github/workflows/cache.yml b/.github/workflows/cache.yml new file mode 100644 index 0000000000000..3914ccfecb27d --- /dev/null +++ b/.github/workflows/cache.yml @@ -0,0 +1,22 @@ +name: Sync Near Cache +on: + workflow_dispatch: + +env: + TI_CI: "1" + +jobs: + sync_cache: + name: Sync cache + runs-on: + - self-hosted + - Linux + steps: + - name: Sync Cache + run: | + .github/workflows/scripts/sync-cache.py + env: + BOT_MINIO_ACCESS_KEY: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + BOT_MINIO_SECRET_KEY: ${{ secrets.BOT_MINIO_SECRET_KEY }} + BOT_OSS_ACCESS_KEY: ${{ secrets.BOT_OSS_ACCESS_KEY }} + BOT_OSS_SECRET_KEY: ${{ secrets.BOT_OSS_SECRET_KEY }} diff --git a/.github/workflows/scripts/ci_common/bootstrap.py b/.github/workflows/scripts/ci_common/bootstrap.py index 17c0f486b3aea..21aefdf1e7825 100644 --- a/.github/workflows/scripts/ci_common/bootstrap.py +++ b/.github/workflows/scripts/ci_common/bootstrap.py @@ -19,11 +19,11 @@ def is_in_venv() -> bool: and sys.base_prefix != sys.prefix) -def ensure_dependencies(): +def ensure_dependencies(fn='requirements.txt'): ''' Automatically install dependencies if they are not installed. ''' - p = Path(__file__).parent.parent / 'requirements.txt' + p = Path(__file__).parent.parent / fn if not p.exists(): raise RuntimeError(f'Cannot find {p}') diff --git a/.github/workflows/scripts/ci_common/dep.py b/.github/workflows/scripts/ci_common/dep.py index 121d0c0bd92e7..d7082d8934e8b 100644 --- a/.github/workflows/scripts/ci_common/dep.py +++ b/.github/workflows/scripts/ci_common/dep.py @@ -51,6 +51,10 @@ def unzip(filename, extract_dir, strip=0): ar.close() +def escape_url(url): + return url.replace('/', '_').replace(':', '_') + + def download_dep(url, outdir, *, strip=0, force=False): ''' Download a dependency archive from `url` and expand it to `outdir`, @@ -64,20 +68,26 @@ def download_dep(url, outdir, *, strip=0, force=False): parsed = urlparse(url) name = Path(parsed.path).name - escaped = url.replace('/', '_').replace(':', '_') + escaped = escape_url(url) depcache = get_cache_home() / 'deps' depcache.mkdir(parents=True, exist_ok=True) local_cached = depcache / escaped + near_caches = [ + f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}' + f'https://taichi-bots.oss-cn-beijing.aliyuncs.com/depcache/{escaped}/{name}' + ] + if not local_cached.exists(): - cached_url = f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}' - try: - resp = requests.head(cached_url, timeout=1) - if resp.ok: - print('Using near cache: ', cached_url) - url = cached_url - except Exception: - pass + for u in near_caches: + try: + resp = requests.head(u, timeout=1) + if resp.ok: + print('Using near cache: ', u) + url = u + break + except Exception: + pass import tqdm diff --git a/.github/workflows/scripts/requirements_synccache.txt b/.github/workflows/scripts/requirements_synccache.txt new file mode 100644 index 0000000000000..30ddf823b87c1 --- /dev/null +++ b/.github/workflows/scripts/requirements_synccache.txt @@ -0,0 +1 @@ +boto3 diff --git a/.github/workflows/scripts/sync-cache.py b/.github/workflows/scripts/sync-cache.py new file mode 100755 index 0000000000000..71127f68caf70 --- /dev/null +++ b/.github/workflows/scripts/sync-cache.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +import ci_common # isort: skip, early initialization happens here + +import os +import re +import tempfile +from pathlib import Path +from urllib.parse import urlparse + +import requests +import tqdm +from ci_common.bootstrap import ensure_dependencies +from ci_common.dep import escape_url + +ensure_dependencies('requirements_synccache.txt') +RE = re.compile(r'(https?:\/\/[A-Za-z0-9\-./_%]+\.(tar\.gz|tgz|zip|exe))', + re.I) +base = Path(__file__).parent + + +def walk(path): + for f in path.iterdir(): + if f.is_dir(): + yield from walk(f) + else: + yield f + + +def find_urls(): + for f in walk(base): + if f.suffix not in ('.py', '.sh', '.ps1', '.yml', '.yaml', ''): + continue + + with f.open() as f: + urls = RE.findall(f.read()) + for url in urls: + yield url[0] + + +def download(url): + ''' + Download to temp file + ''' + f = tempfile.TemporaryFile() + parsed = urlparse(url) + name = Path(parsed.path).name + + with requests.get(url, stream=True) as r: + r.raise_for_status() + total_size = int(r.headers.get('content-length', 0)) + prog = tqdm.tqdm(unit="B", + unit_scale=True, + unit_divisor=1024, + total=total_size, + desc=f'💾 {name}') + with prog: + for chunk in r.iter_content(chunk_size=8192): + sz = f.write(chunk) + prog.update(sz) + + return f + + +def upload(cli, prompt, bucket, path, f): + ''' + Upload to cache + ''' + total_size = f.seek(0, 2) + f.seek(0, 0) + prog = tqdm.tqdm(unit="B", + unit_scale=True, + unit_divisor=1024, + total=total_size, + desc=f'📤 {prompt}') + + with prog: + orig, f.close = f.close, lambda: None + cli.upload_fileobj(f, bucket, path, Callback=prog.update) + f.close = orig + + +def probe(url): + try: + resp = requests.head(url, timeout=5) + if resp.ok: + return True + except Exception: + pass + + return False + + +def make_cli(endpoint, key_id, key_secret, addr_style='path'): + import boto3 + from botocore.client import Config + return boto3.client( + 's3', + endpoint_url=endpoint, + aws_access_key_id=key_id, + aws_secret_access_key=key_secret, + config=Config(s3={'addressing_style': addr_style}), + ) + + +def main(): + mastercli = make_cli('http://botmaster.tgr:9000', + os.environ['BOT_MINIO_ACCESS_KEY'], + os.environ['BOT_MINIO_SECRET_KEY']) + osscli = make_cli('https://oss-cn-beijing.aliyuncs.com', + os.environ['BOT_OSS_ACCESS_KEY'], + os.environ['BOT_OSS_SECRET_KEY'], + addr_style='virtual') + + for url in find_urls(): + f = None + print(f'🔍 {url}') + escaped = escape_url(url) + name = Path(urlparse(url).path).name + + if not probe( + f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}'): + f = f or download(url) + upload(mastercli, 'Near Cache', 'misc', + f'depcache/{escaped}/{name}', f) + + if not probe( + f'https://taichi-bots.oss-cn-beijing.aliyuncs.com/depcache/{escaped}/{name}' + ): + f = f or download(url) + upload(osscli, 'Aliyun OSS', 'taichi-bots', + f'depcache/{escaped}/{name}', f) + + +if __name__ == '__main__': + main()