-
-
Notifications
You must be signed in to change notification settings - Fork 14
/
docker-compose-cuda.yml
38 lines (38 loc) · 1.12 KB
/
docker-compose-cuda.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
services:
ezlocalai:
build:
context: .
dockerfile: cuda.Dockerfile
environment:
- EZLOCALAI_URL=${EZLOCALAI_URL-http://localhost:8091}
- EZLOCALAI_API_KEY=${EZLOCALAI_API_KEY-}
- GPU_LAYERS=${GPU_LAYERS-0}
- MAIN_GPU=${MAIN_GPU-0}
- DEFAULT_MODEL=${DEFAULT_MODEL-TheBloke/phi-2-dpo-GGUF}
- LLM_MAX_TOKENS=${LLM_MAX_TOKENS-0}
- WHISPER_MODEL=${WHISPER_MODEL-base.en}
- IMG_ENABLED=${IMG_ENABLED-true}
- IMG_DEVICE=${IMG_DEVICE-cpu}
- SD_MODEL=${SD_MODEL}
- VISION_MODEL=${VISION_MODEL}
- LLM_BATCH_SIZE=${LLM_BATCH_SIZE-1024}
- CUDA_DOCKER_ARCH=all
- TENSOR_SPLIT=${TENSOR_SPLIT-}
restart: unless-stopped
ports:
- "8091:8091"
- "8502:8502"
volumes:
- ./models:/app/models
- ./hf:/home/root/.cache/huggingface/hub
- ./outputs:/app/outputs
- ./voices:/app/voices
- ./whispercpp:/app/whispercpp
- ./xttsv2_2.0.2:/app/xttsv2_2.0.2
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]