Skip to content

Commit

Permalink
[ci] Sync CI cache script & workflow (#6959)
Browse files Browse the repository at this point in the history
Issue: #6445

### Brief Summary
  • Loading branch information
feisuzhu authored Dec 22, 2022
1 parent ee146c8 commit 3b3ba48
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 11 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/cache.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Sync Near Cache
on:
workflow_dispatch:

env:
TI_CI: "1"

jobs:
sync_cache:
name: Sync cache
runs-on:
- self-hosted
- Linux
steps:
- name: Sync Cache
run: |
.github/workflows/scripts/sync-cache.py
env:
BOT_MINIO_ACCESS_KEY: ${{ secrets.BOT_MINIO_ACCESS_KEY }}
BOT_MINIO_SECRET_KEY: ${{ secrets.BOT_MINIO_SECRET_KEY }}
BOT_OSS_ACCESS_KEY: ${{ secrets.BOT_OSS_ACCESS_KEY }}
BOT_OSS_SECRET_KEY: ${{ secrets.BOT_OSS_SECRET_KEY }}
4 changes: 2 additions & 2 deletions .github/workflows/scripts/ci_common/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def is_in_venv() -> bool:
and sys.base_prefix != sys.prefix)


def ensure_dependencies():
def ensure_dependencies(fn='requirements.txt'):
'''
Automatically install dependencies if they are not installed.
'''
p = Path(__file__).parent.parent / 'requirements.txt'
p = Path(__file__).parent.parent / fn
if not p.exists():
raise RuntimeError(f'Cannot find {p}')

Expand Down
28 changes: 19 additions & 9 deletions .github/workflows/scripts/ci_common/dep.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def unzip(filename, extract_dir, strip=0):
ar.close()


def escape_url(url):
return url.replace('/', '_').replace(':', '_')


def download_dep(url, outdir, *, strip=0, force=False):
'''
Download a dependency archive from `url` and expand it to `outdir`,
Expand All @@ -64,20 +68,26 @@ def download_dep(url, outdir, *, strip=0, force=False):

parsed = urlparse(url)
name = Path(parsed.path).name
escaped = url.replace('/', '_').replace(':', '_')
escaped = escape_url(url)
depcache = get_cache_home() / 'deps'
depcache.mkdir(parents=True, exist_ok=True)
local_cached = depcache / escaped

near_caches = [
f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}'
f'https://taichi-bots.oss-cn-beijing.aliyuncs.com/depcache/{escaped}/{name}'
]

if not local_cached.exists():
cached_url = f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}'
try:
resp = requests.head(cached_url, timeout=1)
if resp.ok:
print('Using near cache: ', cached_url)
url = cached_url
except Exception:
pass
for u in near_caches:
try:
resp = requests.head(u, timeout=1)
if resp.ok:
print('Using near cache: ', u)
url = u
break
except Exception:
pass

import tqdm

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/scripts/requirements_synccache.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
boto3
136 changes: 136 additions & 0 deletions .github/workflows/scripts/sync-cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env python3

import ci_common # isort: skip, early initialization happens here

import os
import re
import tempfile
from pathlib import Path
from urllib.parse import urlparse

import requests
import tqdm
from ci_common.bootstrap import ensure_dependencies
from ci_common.dep import escape_url

ensure_dependencies('requirements_synccache.txt')
RE = re.compile(r'(https?:\/\/[A-Za-z0-9\-./_%]+\.(tar\.gz|tgz|zip|exe))',
re.I)
base = Path(__file__).parent


def walk(path):
for f in path.iterdir():
if f.is_dir():
yield from walk(f)
else:
yield f


def find_urls():
for f in walk(base):
if f.suffix not in ('.py', '.sh', '.ps1', '.yml', '.yaml', ''):
continue

with f.open() as f:
urls = RE.findall(f.read())
for url in urls:
yield url[0]


def download(url):
'''
Download to temp file
'''
f = tempfile.TemporaryFile()
parsed = urlparse(url)
name = Path(parsed.path).name

with requests.get(url, stream=True) as r:
r.raise_for_status()
total_size = int(r.headers.get('content-length', 0))
prog = tqdm.tqdm(unit="B",
unit_scale=True,
unit_divisor=1024,
total=total_size,
desc=f'💾 {name}')
with prog:
for chunk in r.iter_content(chunk_size=8192):
sz = f.write(chunk)
prog.update(sz)

return f


def upload(cli, prompt, bucket, path, f):
'''
Upload to cache
'''
total_size = f.seek(0, 2)
f.seek(0, 0)
prog = tqdm.tqdm(unit="B",
unit_scale=True,
unit_divisor=1024,
total=total_size,
desc=f'📤 {prompt}')

with prog:
orig, f.close = f.close, lambda: None
cli.upload_fileobj(f, bucket, path, Callback=prog.update)
f.close = orig


def probe(url):
try:
resp = requests.head(url, timeout=5)
if resp.ok:
return True
except Exception:
pass

return False


def make_cli(endpoint, key_id, key_secret, addr_style='path'):
import boto3
from botocore.client import Config
return boto3.client(
's3',
endpoint_url=endpoint,
aws_access_key_id=key_id,
aws_secret_access_key=key_secret,
config=Config(s3={'addressing_style': addr_style}),
)


def main():
mastercli = make_cli('http://botmaster.tgr:9000',
os.environ['BOT_MINIO_ACCESS_KEY'],
os.environ['BOT_MINIO_SECRET_KEY'])
osscli = make_cli('https://oss-cn-beijing.aliyuncs.com',
os.environ['BOT_OSS_ACCESS_KEY'],
os.environ['BOT_OSS_SECRET_KEY'],
addr_style='virtual')

for url in find_urls():
f = None
print(f'🔍 {url}')
escaped = escape_url(url)
name = Path(urlparse(url).path).name

if not probe(
f'http://botmaster.tgr:9000/misc/depcache/{escaped}/{name}'):
f = f or download(url)
upload(mastercli, 'Near Cache', 'misc',
f'depcache/{escaped}/{name}', f)

if not probe(
f'https://taichi-bots.oss-cn-beijing.aliyuncs.com/depcache/{escaped}/{name}'
):
f = f or download(url)
upload(osscli, 'Aliyun OSS', 'taichi-bots',
f'depcache/{escaped}/{name}', f)


if __name__ == '__main__':
main()

0 comments on commit 3b3ba48

Please sign in to comment.