docker-compose.yaml

version: "3"

volumes:
  pip_cache:
  model_cache:
  conda_cache:

services:
  app:
    # force amd so it's the same as remote
    platform: linux/amd64
    build:
      context: ./docker/notebook
      dockerfile: Dockerfile
    # command: python3 main.py
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
      - XRT_TPU_CONFIG="localservice;0;localhost:51011"
    # mps not supported yet
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - capabilities: [gpu]
    # https://stackoverflow.com/questions/43844639/how-do-i-add-cached-or-delegated-into-a-docker-compose-yml-volumes-list
    volumes:
      - pip_cache:/root/.cache/pip:delegated
      # TODO: make this one sync w/ localhost
      - model_cache:/root/.cache/huggingface:delegated
      - ./image_dir:/image_dir:default

  base:
    platform: linux/amd64
    build:
      context: ./ml-base
      dockerfile: Dockerfile.base
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
    volumes:
      - pip_cache:/root/.cache/pip:delegated
      # TODO: make this one sync w/ localhost
      - model_cache:/root/.cache/huggingface:delegated
      - ./image_dir:/image_dir:default
  training-base:
    platform: linux/amd64
    build:
      context: ./ml-base
      dockerfile: Dockerfile.training-base
      args:
        - BASE_IMAGE=stable-diffusion_base
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
    volumes:
      - conda_cache:/root/.conda:delegated
      - pip_cache:/root/.cache/pip:delegated
      # TODO: make this one sync w/ localhost
      - model_cache:/root/.cache/huggingface:delegated
      - ./image_dir:/image_dir:default

  training:
    # force amd so it's the same as remote
    platform: linux/amd64
    build:
      context: ./docker/training
      dockerfile: Dockerfile
      args:
        - BASE_IMAGE=stable-diffusion_training-base
      # entrypoint:
      #   - ./train-style.sh
    entrypoint: ./train.sh
    command:
      - --model=stabilityai/stable-diffusion-2-1
      - --data=/gcs/md-ml/training-data-styles/jpl-512
      - --output=/image_dir
      - --steps=50
      - --phrase=dopeaf
      - --repeat=100
      - --batch=10
      - --token=poster
      - --learning=0.01
      - --kind=style
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
    volumes:
      - conda_cache:/root/.conda:delegated
      - pip_cache:/root/.cache/pip:delegated
      - ./model-cache:/root/.cache/huggingface:default
      - ./image_dir:/image_dir:default
  training-update:
    # force amd so it's the same as remote
    platform: linux/amd64
    build:
      context: ./docker/training-update
      dockerfile: Dockerfile
      args:
        - BASE_IMAGE=stable-diffusion_training-base
      # entrypoint:
      #   - ./train-style.sh
    entrypoint: ./train.sh
    command:
      - --model=stabilityai/stable-diffusion-2-1
      - --data=/gcs/md-ml/training-data-styles/jpl-512
      - --output=/image_dir
      - --steps=50
      - --phrase=dopeaf
      - --repeat=100
      - --batch=10
      - --token=poster
      - --learning=0.01
      - --kind=style
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
    volumes:
      - conda_cache:/root/.conda:delegated
      - pip_cache:/root/.cache/pip:delegated
      - ./model-cache:/root/.cache/huggingface:default
      - ./image_dir:/image_dir:default

  train:
    build:
      context: ./py-jobs
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
      - GOOGLE_APPLICATION_CREDENTIALS=creds.json
      - GOOGLE_CLOUD_PROJECT=md-wbeebe-0808-example-apps
      - REGION=us-central1
      - GCS_BUCKET=gs://md-ml
      - GCP_TOPIC=training-requests
      - GCP_SUBSCRIPTION=trainer-sub

  # flow:
  #   build:
  #     context: ./flow
  #   command: python main.py
  #   environment:
  #     - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
  #     - GOOGLE_APPLICATION_CREDENTIALS=creds.json
  #     - GOOGLE_CLOUD_PROJECT=md-wbeebe-0808-example-apps
  #     - REGION=us-central1
  #     - GCS_BUCKET=gs://md-ml
  #     - GCP_TOPIC=training-requests
  #     - GCP_SUBSCRIPTION=trainer-sub

  model-loader:
    build:
      context: ./docker/model-loader
      dockerfile: Dockerfile
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
      - GOOGLE_APPLICATION_CREDENTIALS=creds.json
      - PROJECT_ID=md-wbeebe-0808-example-apps
      - REGION=us-central1
      - GCS_BUCKET=gs://md-ml
  model-server:
    platform: linux/amd64
    build:
      context: ./model-server
      dockerfile: Dockerfile
    ports:
    - 6000:5000
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
      - GOOGLE_APPLICATION_CREDENTIALS=creds.json
      - PROJECT_ID=md-wbeebe-0808-example-apps
      - REGION=us-central1
      - GCS_BUCKET=gs://md-ml
    volumes:
      - model_cache:/root/.cache/huggingface:delegated
  kflow:
    build:
      context: ./kflow
      dockerfile: Dockerfile
    environment:
      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
      - GOOGLE_APPLICATION_CREDENTIALS=creds.json
      - PROJECT_ID=md-wbeebe-0808-example-apps
      - REGION=us-central1
      - GCS_BUCKET=gs://md-ml
    volumes:
      - ./creds.json:/creds.json
      - ./kflow/components:/os-shared:default

  open-ai:
    build:
      context: ./open-ai
      dockerfile: Dockerfile
    ports:
      - 6000:5000
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY}

  gcloud:
    image: google/cloud-sdk:latest
    volumes:
      - ./creds.json:/creds.json
      - ./gcloud-vol:/root/gcloud-vol
    command: >
      bash -c "
      mkdir -p /root/gcloud-vol/jpl4 &&
      gcloud auth activate-service-account --key-file=/creds.json &&
      gcloud config set project md-wbeebe-0808-example-apps &&
      gsutil -m cp -r \
        "gs://md-ml/training-job-1672624636/model" \
        ./root/gcloud-vol/jpl4
      "