Skip to content

Commit

Permalink
Merge branch 'main' into eujingchua/fix-empty-tunables
Browse files Browse the repository at this point in the history
  • Loading branch information
motus authored Dec 4, 2024
2 parents d390f24 + b66e134 commit 6a49e8f
Show file tree
Hide file tree
Showing 204 changed files with 5,317 additions and 1,851 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.0
current_version = 0.6.1
commit = True
tag = True

Expand Down
1 change: 1 addition & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"discretization",
"discretize",
"drivername",
"dropna",
"dstpath",
"dtype",
"duckdb",
Expand Down
107 changes: 59 additions & 48 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,43 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

FROM mcr.microsoft.com/devcontainers/miniconda:3 AS base
FROM mcr.microsoft.com/vscode/devcontainers/base AS base

# Add some additional packages for the devcontainer terminal environment.
USER root
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
bash bash-completion \
less colordiff \
curl jq \
curl gpg ca-certificates \
jq \
ripgrep \
vim-nox neovim python3-pynvim \
make \
rename \
sudo \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& echo "C-w: unix-filename-rubout" >> /etc/inputrc
# Also tweak C-w to stop at slashes as well instead of just spaces

# Prepare the mlos_deps.yml file in a cross platform way.
FROM mcr.microsoft.com/vscode/devcontainers/base AS deps-prep
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3-minimal python3-setuptools
COPY --chown=vscode . /tmp/conda-tmp/
RUN /tmp/conda-tmp/prep-deps-files.sh \
&& ls -l /tmp/conda-tmp/ # && cat /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/mlos_deps.yml

FROM base AS conda

# Set some cache dirs to be owned by the vscode user even as we're currently
# executing as root to build the container image.
# NOTE: We do *not* mark these as volumes - it doesn't help rebuilding at all.

RUN addgroup conda \
&& adduser vscode conda

ARG PIP_CACHE_DIR=/var/cache/pip
ENV PIP_CACHE_DIR=/var/cache/pip
RUN mkdir -p ${PIP_CACHE_DIR} \
Expand All @@ -36,59 +52,54 @@ RUN mkdir -p ${CONDA_PKGS_DIRS} \

USER vscode:conda

# Upgrade conda and use strict priorities
# Use the mamba solver (necessary for some quality of life speedups due to
# required packages to support Windows)
RUN umask 0002 \
# Try and prime the devcontainer's ssh known_hosts keys with the github one for scripted calls.
RUN mkdir -p /home/vscode/.ssh \
&& ( \
grep -q ^github.com /home/vscode/.ssh/known_hosts \
|| ssh-keyscan github.com | tee -a /home/vscode/.ssh/known_hosts \
)

COPY --from=deps-prep --chown=vscode:conda /tmp/conda-tmp/mlos_deps.yml /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/

# Combine the installation of miniconda and the mlos dependencies into a single step in order to save space.
# This allows the mlos env to reference the base env's packages without duplication across layers.
RUN echo "Setup miniconda" \
&& curl -Ss --url https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$(uname -m).sh -o /tmp/miniconda3.sh \
&& sudo sh /tmp/miniconda3.sh -b -u -p /opt/conda \
&& rm -rf /tmp/miniconda3.sh \
&& echo "# Adjust the conda installation to be user/group writable." \
&& sudo /opt/conda/bin/conda init --system \
&& sudo chgrp -R conda /opt/conda \
&& sudo chmod -R g+wX /opt/conda \
&& find /opt/conda -type d -print0 | xargs -0 sudo chmod -c g+s \
&& umask 0002 \
&& echo "# Use conda-forge first to get the latest versions of packages " \
&& echo "# and reduce duplication with mlos env (which also uses conda-forge first)." \
&& echo "# Upgrade conda and use strict priorities" \
&& echo "# Use the mamba solver (necessary for some quality of life speedups due to required packages to support Windows)" \
&& /opt/conda/bin/conda init \
&& /opt/conda/bin/conda config --set channel_priority strict \
&& /opt/conda/bin/conda info \
&& /opt/conda/bin/conda update -v -y -n base -c defaults --all \
&& /opt/conda/bin/conda update -v -y -n base -c conda-forge -c defaults --all \
&& /opt/conda/bin/conda list -n base \
&& /opt/conda/bin/conda install -v -y -n base conda-libmamba-solver \
&& /opt/conda/bin/conda config --set solver libmamba \
&& /opt/conda/bin/conda install -v -y -n base -c conda-forge -c defaults conda-libmamba-solver \
&& /opt/conda/bin/conda config --system --set solver libmamba \
&& echo "# Install some additional editor packages for the base environment." \
&& /opt/conda/bin/conda run -n base pip install --no-cache-dir -U pynvim \
&& echo "# Clean up conda cache to save some space." \
&& /opt/conda/bin/conda list -n base \
&& /opt/conda/bin/conda clean -v -y -a \
&& /opt/conda/bin/conda run -n base pip cache purge

# No longer relevant since we're using conda-forge in the environment files by default now.
## Update the base. This helps save space by making sure the same version
## python is used for both the base env and mlos env.
#RUN umask 0002 \
# && /opt/conda/bin/conda update -v -y -n base -c defaults --all \
# && /opt/conda/bin/conda update -v -y -n base -c defaults conda python \
# && /opt/conda/bin/conda clean -v -y -a \
# && /opt/conda/bin/conda run -n base pip cache purge

# Install some additional editor packages for the base environment.
RUN umask 0002 \
&& /opt/conda/bin/conda run -n base pip install --no-cache-dir -U pynvim

# Setup (part of) the mlos environment in the devcontainer.
# NOTEs:
# - The mlos_deps.yml file is prepared by the prep-container-build script(s).
# - The rest happens during first container start once the source is available.
# See Also: updateContentCommand in .devcontainer/devcontainer.json
RUN mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/
RUN /opt/conda/bin/conda init bash \
&& /opt/conda/bin/conda config --set solver libmamba

# Prepare the mlos_deps.yml file in a cross platform way.
FROM mcr.microsoft.com/devcontainers/miniconda:3 AS deps-prep
COPY --chown=vscode:conda . /tmp/conda-tmp/
RUN /tmp/conda-tmp/prep-deps-files.sh \
&& ls -l /tmp/conda-tmp/ # && cat /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/mlos_deps.yml

# Install some additional dependencies for the mlos environment.
# Make sure they have conda group ownership to make the devcontainer more
# reliable useable across vscode uid changes.
FROM base AS devcontainer
USER vscode
COPY --from=deps-prep --chown=vscode:conda /tmp/conda-tmp/mlos_deps.yml /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/
RUN umask 0002 \
&& /opt/conda/bin/conda run -n base pip cache purge \
&& echo "# Install some additional dependencies for the mlos environment." \
&& echo "# Make sure they have conda group ownership to make the devcontainer more" \
&& echo "# reliable useable across vscode uid changes." \
&& sg conda -c "/opt/conda/bin/conda env create -n mlos -v -f /tmp/conda-tmp/mlos_deps.yml" \
&& sg conda -c "/opt/conda/bin/conda run -n mlos pip install --no-cache-dir -U -r /tmp/conda-tmp/combined.requirements.txt" \
&& sg conda -c "/opt/conda/bin/conda run -n mlos pip cache purge" \
&& sg conda -c "/opt/conda/bin/conda clean -v -y -a" \
&& mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/
RUN mkdir -p /home/vscode/.conda/envs \
&& mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/ \
&& mkdir -p /home/vscode/.conda/envs \
&& ln -s /opt/conda/envs/mlos /home/vscode/.conda/envs/mlos

#ENV PATH=/opt/conda/bin:$PATH
ENV PATH=/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
2 changes: 1 addition & 1 deletion .devcontainer/build/build-devcontainer-cli.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ if ("$env:NO_CACHE" -eq 'true') {
else {
$cacheFrom = 'mloscore.azurecr.io/devcontainer-cli:latest'
$devcontainer_cli_build_args += " --cache-from $cacheFrom"
docker pull $cacheFrom
docker pull --platform linux/amd64 $cacheFrom
}

$cmd = "docker.exe build -t devcontainer-cli:latest -t cspell:latest " +
Expand Down
14 changes: 10 additions & 4 deletions .devcontainer/build/build-devcontainer-cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,26 @@ set -eu
scriptdir=$(dirname "$(readlink -f "$0")")
cd "$scriptdir/"

source ../common.sh

# Build the helper container that has the devcontainer CLI for building the devcontainer.

if [ ! -w /var/run/docker.sock ]; then
echo "ERROR: $USER does not have write access to /var/run/docker.sock. Please add $USER to the docker group." >&2
exit 1
fi
DOCKER_GID=$(stat -c'%g' /var/run/docker.sock)
DOCKER_GID=$(stat $STAT_FORMAT_GID_ARGS /var/run/docker.sock)
# Make this work inside a devcontainer as well.
if [ -w /var/run/docker-host.sock ]; then
DOCKER_GID=$(stat -c'%g' /var/run/docker-host.sock)
DOCKER_GID=$(stat $STAT_FORMAT_GID_ARGS /var/run/docker-host.sock)
fi

export DOCKER_BUILDKIT=${DOCKER_BUILDKIT:-1}

# TODO: Add multiplatform build support?
#devcontainer_cli_build_args='--platform linux/amd64,linux/arm64'
devcontainer_cli_build_args=''

if docker buildx version 2>/dev/null; then
devcontainer_cli_build_args+=' --progress=plain'
else
Expand All @@ -33,10 +39,10 @@ fi
if [ "${NO_CACHE:-}" == 'true' ]; then
devcontainer_cli_build_args+=' --no-cache --pull'
else
cacheFrom='mloscore.azurecr.io/devcontainer-cli:latest'
cacheFrom='mloscore.azurecr.io/devcontainer-cli'
tmpdir=$(mktemp -d)
devcontainer_cli_build_args+=" --cache-from $cacheFrom"
docker --config="$tmpdir" pull "$cacheFrom" || true
docker --config="$tmpdir" pull --platform linux/$(uname -m) "$cacheFrom" || true
rmdir "$tmpdir"
fi

Expand Down
4 changes: 2 additions & 2 deletions .devcontainer/build/build-devcontainer.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ if ($null -eq $env:DOCKER_BUILDKIT) {
$devcontainer_build_args = ''
if ("$env:NO_CACHE" -eq 'true') {
$base_image = (Get-Content "$rootdir/.devcontainer/Dockerfile" | Select-String '^FROM' | Select-Object -ExpandProperty Line | ForEach-Object { $_ -replace '^FROM\s+','' } | ForEach-Object { $_ -replace ' AS\s+.*','' } | Select-Object -First 1)
docker pull $base_image
docker pull --platform linux/amd64 $base_image
$devcontainer_build_args = '--no-cache'
}
else {
$cacheFrom = 'mloscore.azurecr.io/mlos-devcontainer:latest'
$devcontainer_build_args = "--cache-from $cacheFrom"
docker pull "$cacheFrom"
docker pull --platform linux/amd64 "$cacheFrom"
}

# Make this work inside a devcontainer as well.
Expand Down
34 changes: 23 additions & 11 deletions .devcontainer/build/build-devcontainer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,54 @@ set -x

set -eu
scriptdir=$(dirname "$(readlink -f "$0")")
repo_root=$(readlink -f "$scriptdir/../..")
repo_name=$(basename "$repo_root")
cd "$scriptdir/"

source ../common.sh

DEVCONTAINER_IMAGE="devcontainer-cli:latest"
MLOS_AUTOTUNING_IMAGE="mlos-devcontainer:latest"

# Build the helper container that has the devcontainer CLI for building the devcontainer.
NO_CACHE=${NO_CACHE:-} ./build-devcontainer-cli.sh

DOCKER_GID=$(stat -c'%g' /var/run/docker.sock)
DOCKER_GID=$(stat $STAT_FORMAT_GID_ARGS /var/run/docker.sock)
# Make this work inside a devcontainer as well.
if [ -w /var/run/docker-host.sock ]; then
DOCKER_GID=$(stat -c'%g' /var/run/docker-host.sock)
DOCKER_GID=$(stat $STAT_FORMAT_GID_ARGS /var/run/docker-host.sock)
fi
if [[ $OSTYPE =~ darwin* ]]; then
DOCKER_GID=0
fi

# Build the devcontainer image.
rootdir=$(readlink -f "$scriptdir/../..")
rootdir="$repo_root"

# Run the initialize command on the host first.
# Note: command should already pull the cached image if possible.
pwd
devcontainer_json=$(cat "$rootdir/.devcontainer/devcontainer.json" | sed -e 's|//.*||' -e 's|/\*|\n&|g;s|*/|&\n|g' | sed -e '/\/\*/,/*\//d')
initializeCommand=$(echo "$devcontainer_json" | docker run -i --rm devcontainer-cli jq -e -r '.initializeCommand[]')
devcontainer_json=$(cat "$rootdir/.devcontainer/devcontainer.json" | sed -e 's|^[ \t]*//.*||' -e 's|/\*|\n&|g;s|*/|&\n|g' | sed -e '/\/\*/,/*\//d')
initializeCommand=$(echo "$devcontainer_json" | docker run -i --rm $DEVCONTAINER_IMAGE jq -e -r '.initializeCommand[]')
if [ -z "$initializeCommand" ]; then
echo "No initializeCommand found in devcontainer.json" >&2
exit 1
else
eval "pushd "$rootdir/"; $initializeCommand; popd"
fi

# TODO: Add multi-platform build support?
#devcontainer_build_args='--platform linux/amd64,linux/arm64'
devcontainer_build_args=''
if [ "${NO_CACHE:-}" == 'true' ]; then
base_image=$(grep '^FROM ' "$rootdir/.devcontainer/Dockerfile" | sed -e 's/^FROM //' -e 's/ AS .*//' | head -n1)
docker pull "$base_image" || true
docker pull --platform linux/$(uname -m) "$base_image" || true
devcontainer_build_args='--no-cache'
else
cache_from='mloscore.azurecr.io/mlos-devcontainer:latest'
devcontainer_build_args="--cache-from $cache_from --cache-from mlos-devcontainer:latest"
cache_from='mloscore.azurecr.io/mlos-devcontainer'
devcontainer_build_args="--cache-from $cache_from --cache-from mlos-devcontainer"
tmpdir=$(mktemp -d)
docker --config="$tmpdir" pull "$cache_from" || true
docker --config="$tmpdir" pull --platform linux/$(uname -m) "$cache_from" || true
rmdir "$tmpdir"
fi

Expand All @@ -61,10 +73,10 @@ docker run -i --rm \
--env http_proxy=${http_proxy:-} \
--env https_proxy=${https_proxy:-} \
--env no_proxy=${no_proxy:-} \
devcontainer-cli \
$DEVCONTAINER_IMAGE \
devcontainer build --workspace-folder /src \
$devcontainer_build_args \
--image-name mlos-devcontainer:latest
--image-name $MLOS_AUTOTUNING_IMAGE
if [ "${CONTAINER_REGISTRY:-}" != '' ]; then
docker tag mlos-devcontainer:latest "$CONTAINER_REGISTRY/mlos-devcontainer:latest"
fi
18 changes: 18 additions & 0 deletions .devcontainer/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
##
## Copyright (c) Microsoft Corporation.
## Licensed under the MIT License.
##
case $OSTYPE in
linux*)
STAT_FORMAT_GID_ARGS="-c%g"
STAT_FORMAT_INODE_ARGS="-c%i"
;;
darwin*)
STAT_FORMAT_GID_ARGS="-f%g"
STAT_FORMAT_INODE_ARGS="-f%i"
;;
*)
echo "ERROR: Unhandled OSTYPE: $OSTYPE"
exit 1
;;
esac
3 changes: 2 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@
"streetsidesoftware.code-spell-checker",
"tamasfe.even-better-toml",
"trond-snekvik.simple-rst",
"tyriar.sort-lines"
"tyriar.sort-lines",
"ms-toolsai.jupyter"
]
}
}
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ To save space in the ACR, we purge images older than 7 days.
```sh
#DRY_RUN_ARGS='--dry-run'

PURGE_CMD="acr purge --filter 'devcontainer-cli:.*' --filter 'mlos-devcontainer:.*' --untagged --ago 7d $DRY_RUN_ARGS"
PURGE_CMD="acr purge --filter 'devcontainer-cli:.*' --filter 'mlos-devcontainer:.*' --untagged --ago 30d --keep 3 $DRY_RUN_ARGS"

# Setup a daily task:
az acr task create --name dailyPurgeTask --cmd "$PURGE_CMD" --registry mloscore --schedule "0 1 * * *" --context /dev/null
Expand Down
12 changes: 9 additions & 3 deletions .devcontainer/scripts/prep-container-build
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
#!/usr/bin/env bash
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

Expand All @@ -18,9 +18,15 @@ if [ ! -f .env ]; then
echo "Creating empty .env file for devcontainer."
touch .env
fi
# Add some info about the host OS to the .env file.
egrep -v '^HOST_OSTYPE=' .env > .env.tmp || true
echo "HOST_OSTYPE=$OSTYPE" >> .env.tmp
mv .env.tmp .env

# Also prep the random NGINX_PORT for the docker-compose command.
if ! [ -e .devcontainer/.env ] || ! egrep -q "^NGINX_PORT=[0-9]+$" .devcontainer/.env; then
NGINX_PORT=$(($(shuf -i 0-30000 -n 1) + 80))
RANDOM=${RANDOM:-$$}
NGINX_PORT=$((($RANDOM % 30000) + 1 + 80))
echo "NGINX_PORT=$NGINX_PORT" > .devcontainer/.env
fi

Expand Down Expand Up @@ -55,6 +61,6 @@ if [ "${NO_CACHE:-}" != 'true' ]; then
## Make sure we use an empty config to avoid auth issues for devs with the
## registry, which should allow anonymous pulls
#tmpdir=$(mktemp -d)
#docker --config="$tmpdir" pull -q "$cacheFrom" >/dev/null || true
#docker --config="$tmpdir" pull --platform linux/$(uname -m) -q "$cacheFrom" >/dev/null || true
#rmdir "$tmpdir"
fi
2 changes: 1 addition & 1 deletion .devcontainer/scripts/prep-container-build.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ if ($env:NO_CACHE -ne 'true') {
$cacheFrom = 'mloscore.azurecr.io/mlos-devcontainer'
# Skip pulling for now (see TODO note above)
Write-Host "Consider pulling image $cacheFrom for build caching."
#docker pull $cacheFrom
#docker pull --platform linux/amd64 $cacheFrom
}
Loading

0 comments on commit 6a49e8f

Please sign in to comment.