Skip to content

Commit

Permalink
Fix workflow and messages
Browse files Browse the repository at this point in the history
  • Loading branch information
Josh-XT committed Jan 27, 2024
1 parent 8181461 commit 77a1f8b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-docker-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
- name: Get full image path
id: get_image_path
run: |
echo "IMAGE_PATH=$(echo ghcr.io/${{ env.GITHUB_USER }}/${{ env.REPO_NAME }}:${{ matrix.tag_name }}-${{ env.BRANCH_NAME }}-${{ github.sha }})" >> $GITHUB_ENV
echo "IMAGE_PATH=$(echo ghcr.io/${{ env.GITHUB_USER }}/${{ env.REPO_NAME }}:cpu-dev-${{ env.BRANCH_NAME }}-${{ github.sha }})" >> $GITHUB_ENV
test-local-llm:
uses: josh-xt/AGiXT/.github/workflows/operation-test-with-jupyter.yml@main
Expand Down
16 changes: 14 additions & 2 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,27 @@ version: '3.8'

services:
local-llm:
image: ghcr.io/josh-xt/local-llm:cpu-dev-dev
image: ghcr.io/josh-xt/local-llm:cpu-dev
environment:
- LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-}
- GPU_LAYERS=0
- GPU_LAYERS=${GPU_LAYERS-0}
- MAIN_GPU=${MAIN_GPU-0}
- DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo}
- WHISPER_MODEL=${WHISPER_MODEL-base.en}
- CMAKE_ARGS="-DLLAMA_CUBLAS=on"
- LLAMA_CUBLAS=1
- CUDA_DOCKER_ARCH=all
restart: unless-stopped
ports:
- "8091:8091"
volumes:
- ./models:/app/models
- ./outputs:/app/outputs
- ./voices:/app/voices
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [ gpu ]
4 changes: 2 additions & 2 deletions local_llm/LLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,13 @@ def __init__(
GPU_LAYERS = os.environ.get("GPU_LAYERS", 0)
if torch.cuda.is_available() and int(GPU_LAYERS) == 0:
vram = round(torch.cuda.get_device_properties(0).total_memory / 1024**3)
logging.info(f"[LLM] {vram} GB of VRAM detected.")
logging.info(f"[LLM] {vram}GB of VRAM detected.")
if vram >= 48 or vram <= 2:
GPU_LAYERS = vram
else:
GPU_LAYERS = vram * 2
logging.info(
f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading."
f"[LLM] Loading {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading. Please wait..."
)
self.params = {}
self.model_name = model
Expand Down

0 comments on commit 77a1f8b

Please sign in to comment.