diff --git a/docs/_src/api/openapi/openapi-1.8.1rc0.json b/docs/_src/api/openapi/openapi-1.8.1rc0.json index 4acf2a9c9d..7e588f01fa 100644 --- a/docs/_src/api/openapi/openapi-1.8.1rc0.json +++ b/docs/_src/api/openapi/openapi-1.8.1rc0.json @@ -398,6 +398,28 @@ } } } + }, + "/health": { + "get": { + "tags": [ + "health" + ], + "summary": "Get Health Status", + "description": "This endpoint allows external systems to monitor the health of the Haystack REST API.", + "operationId": "get_health_status", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + } + } + } } }, "components": { @@ -511,6 +533,20 @@ } } }, + "CPUUsage": { + "title": "CPUUsage", + "required": [ + "used" + ], + "type": "object", + "properties": { + "used": { + "title": "Used", + "type": "number", + "description": "REST API average CPU usage in percentage" + } + } + }, "CreateLabelSerialized": { "title": "CreateLabelSerialized", "required": [ @@ -693,6 +729,56 @@ }, "additionalProperties": false }, + "GPUInfo": { + "title": "GPUInfo", + "required": [ + "index", + "usage" + ], + "type": "object", + "properties": { + "index": { + "title": "Index", + "type": "integer", + "description": "GPU index" + }, + "usage": { + "title": "Usage", + "allOf": [ + { + "$ref": "#/components/schemas/GPUUsage" + } + ], + "description": "GPU usage details" + } + } + }, + "GPUUsage": { + "title": "GPUUsage", + "required": [ + "kernel_usage", + "memory_total", + "memory_used" + ], + "type": "object", + "properties": { + "kernel_usage": { + "title": "Kernel Usage", + "type": "number", + "description": "GPU kernel usage in percentage" + }, + "memory_total": { + "title": "Memory Total", + "type": "integer", + "description": "Total GPU memory in megabytes" + }, + "memory_used": { + "title": "Memory Used", + "type": "integer", + "description": "REST API used GPU memory in megabytes" + } + } + }, "HTTPValidationError": { "title": "HTTPValidationError", "type": "object", @@ -706,6 +792,48 @@ } } }, + "HealthResponse": { + "title": "HealthResponse", + "required": [ + "version", + "cpu", + "memory" + ], + "type": "object", + "properties": { + "version": { + "title": "Version", + "type": "string", + "description": "Haystack version" + }, + "cpu": { + "title": "Cpu", + "allOf": [ + { + "$ref": "#/components/schemas/CPUUsage" + } + ], + "description": "CPU usage details" + }, + "memory": { + "title": "Memory", + "allOf": [ + { + "$ref": "#/components/schemas/MemoryUsage" + } + ], + "description": "Memory usage details" + }, + "gpus": { + "title": "Gpus", + "type": "array", + "items": { + "$ref": "#/components/schemas/GPUInfo" + }, + "description": "GPU usage details" + } + } + }, "Label": { "title": "Label", "required": [ @@ -774,6 +902,20 @@ } } }, + "MemoryUsage": { + "title": "MemoryUsage", + "required": [ + "used" + ], + "type": "object", + "properties": { + "used": { + "title": "Used", + "type": "number", + "description": "REST API used memory in percentage" + } + } + }, "QueryRequest": { "title": "QueryRequest", "required": [ diff --git a/docs/_src/api/openapi/openapi.json b/docs/_src/api/openapi/openapi.json index 4acf2a9c9d..7e588f01fa 100644 --- a/docs/_src/api/openapi/openapi.json +++ b/docs/_src/api/openapi/openapi.json @@ -398,6 +398,28 @@ } } } + }, + "/health": { + "get": { + "tags": [ + "health" + ], + "summary": "Get Health Status", + "description": "This endpoint allows external systems to monitor the health of the Haystack REST API.", + "operationId": "get_health_status", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + } + } + } } }, "components": { @@ -511,6 +533,20 @@ } } }, + "CPUUsage": { + "title": "CPUUsage", + "required": [ + "used" + ], + "type": "object", + "properties": { + "used": { + "title": "Used", + "type": "number", + "description": "REST API average CPU usage in percentage" + } + } + }, "CreateLabelSerialized": { "title": "CreateLabelSerialized", "required": [ @@ -693,6 +729,56 @@ }, "additionalProperties": false }, + "GPUInfo": { + "title": "GPUInfo", + "required": [ + "index", + "usage" + ], + "type": "object", + "properties": { + "index": { + "title": "Index", + "type": "integer", + "description": "GPU index" + }, + "usage": { + "title": "Usage", + "allOf": [ + { + "$ref": "#/components/schemas/GPUUsage" + } + ], + "description": "GPU usage details" + } + } + }, + "GPUUsage": { + "title": "GPUUsage", + "required": [ + "kernel_usage", + "memory_total", + "memory_used" + ], + "type": "object", + "properties": { + "kernel_usage": { + "title": "Kernel Usage", + "type": "number", + "description": "GPU kernel usage in percentage" + }, + "memory_total": { + "title": "Memory Total", + "type": "integer", + "description": "Total GPU memory in megabytes" + }, + "memory_used": { + "title": "Memory Used", + "type": "integer", + "description": "REST API used GPU memory in megabytes" + } + } + }, "HTTPValidationError": { "title": "HTTPValidationError", "type": "object", @@ -706,6 +792,48 @@ } } }, + "HealthResponse": { + "title": "HealthResponse", + "required": [ + "version", + "cpu", + "memory" + ], + "type": "object", + "properties": { + "version": { + "title": "Version", + "type": "string", + "description": "Haystack version" + }, + "cpu": { + "title": "Cpu", + "allOf": [ + { + "$ref": "#/components/schemas/CPUUsage" + } + ], + "description": "CPU usage details" + }, + "memory": { + "title": "Memory", + "allOf": [ + { + "$ref": "#/components/schemas/MemoryUsage" + } + ], + "description": "Memory usage details" + }, + "gpus": { + "title": "Gpus", + "type": "array", + "items": { + "$ref": "#/components/schemas/GPUInfo" + }, + "description": "GPU usage details" + } + } + }, "Label": { "title": "Label", "required": [ @@ -774,6 +902,20 @@ } } }, + "MemoryUsage": { + "title": "MemoryUsage", + "required": [ + "used" + ], + "type": "object", + "properties": { + "used": { + "title": "Used", + "type": "number", + "description": "REST API used memory in percentage" + } + } + }, "QueryRequest": { "title": "QueryRequest", "required": [ diff --git a/rest_api/pyproject.toml b/rest_api/pyproject.toml index b4e9895e93..df7a38e88c 100644 --- a/rest_api/pyproject.toml +++ b/rest_api/pyproject.toml @@ -30,6 +30,8 @@ dependencies = [ "uvicorn<1", "gunicorn<21", "python-multipart<1", # optional FastAPI dependency for form data + "pynvml", + "psutil" ] dynamic = ["version"] @@ -69,3 +71,7 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + +[tool.black] +line-length = 120 +skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed. diff --git a/rest_api/rest_api/controller/health.py b/rest_api/rest_api/controller/health.py new file mode 100644 index 0000000000..83452f6130 --- /dev/null +++ b/rest_api/rest_api/controller/health.py @@ -0,0 +1,110 @@ +from typing import List, Optional + +import logging + +import os +import pynvml +import psutil + +from pydantic import BaseModel, Field, validator + +from fastapi import FastAPI, APIRouter + +import haystack + +from rest_api.utils import get_app +from rest_api.config import LOG_LEVEL + +logging.getLogger("haystack").setLevel(LOG_LEVEL) +logger = logging.getLogger("haystack") + + +router = APIRouter() +app: FastAPI = get_app() + + +class CPUUsage(BaseModel): + used: float = Field(..., description="REST API average CPU usage in percentage") + + @validator("used") + @classmethod + def used_check(cls, v): + return round(v, 2) + + +class MemoryUsage(BaseModel): + used: float = Field(..., description="REST API used memory in percentage") + + @validator("used") + @classmethod + def used_check(cls, v): + return round(v, 2) + + +class GPUUsage(BaseModel): + kernel_usage: float = Field(..., description="GPU kernel usage in percentage") + memory_total: int = Field(..., description="Total GPU memory in megabytes") + memory_used: Optional[int] = Field(..., description="REST API used GPU memory in megabytes") + + @validator("kernel_usage") + @classmethod + def kernel_usage_check(cls, v): + return round(v, 2) + + +class GPUInfo(BaseModel): + index: int = Field(..., description="GPU index") + usage: GPUUsage = Field(..., description="GPU usage details") + + +class HealthResponse(BaseModel): + version: str = Field(..., description="Haystack version") + cpu: CPUUsage = Field(..., description="CPU usage details") + memory: MemoryUsage = Field(..., description="Memory usage details") + gpus: List[GPUInfo] = Field(default_factory=list, description="GPU usage details") + + +@router.get("/health", response_model=HealthResponse, status_code=200) +def get_health_status(): + """ + This endpoint allows external systems to monitor the health of the Haystack REST API. + """ + + gpus: List[GPUInfo] = [] + + try: + pynvml.nvmlInit() + gpu_count = pynvml.nvmlDeviceGetCount() + for i in range(gpu_count): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + info = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_mem_total = float(info.total) / 1024 / 1024 + gpu_mem_used = None + for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle): + if proc.pid == os.getpid(): + gpu_mem_used = float(proc.usedGpuMemory) / 1024 / 1024 + break + gpu_info = GPUInfo( + index=i, + usage=GPUUsage( + memory_total=round(gpu_mem_total), + kernel_usage=pynvml.nvmlDeviceGetUtilizationRates(handle).gpu, + memory_used=round(gpu_mem_used) if gpu_mem_used is not None else None, + ), + ) + + gpus.append(gpu_info) + except pynvml.NVMLError: + logger.warning("No NVIDIA GPU found.") + + p_cpu_usage = 0 + p_memory_usage = 0 + cpu_count = os.cpu_count() or 1 + p = psutil.Process() + p_cpu_usage = p.cpu_percent() / cpu_count + p_memory_usage = p.memory_percent() + + cpu_usage = CPUUsage(used=p_cpu_usage) + memory_usage = MemoryUsage(used=p_memory_usage) + + return HealthResponse(version=haystack.__version__, cpu=cpu_usage, memory=memory_usage, gpus=gpus) diff --git a/rest_api/rest_api/utils.py b/rest_api/rest_api/utils.py index cc496ca290..174913b5f4 100644 --- a/rest_api/rest_api/utils.py +++ b/rest_api/rest_api/utils.py @@ -25,13 +25,14 @@ def get_app() -> FastAPI: app = FastAPI(title="Haystack REST API", debug=True, version=haystack_version, root_path=ROOT_PATH) # Creates the router for the API calls - from rest_api.controller import file_upload, search, feedback, document + from rest_api.controller import file_upload, search, feedback, document, health router = APIRouter() router.include_router(search.router, tags=["search"]) router.include_router(feedback.router, tags=["feedback"]) router.include_router(file_upload.router, tags=["file-upload"]) router.include_router(document.router, tags=["document"]) + router.include_router(health.router, tags=["health"]) # This middleware enables allow all cross-domain requests to the API from a browser. For production # deployments, it could be made more restrictive. diff --git a/rest_api/test/test_rest_api.py b/rest_api/test/test_rest_api.py index f358e41d99..93c7908bc1 100644 --- a/rest_api/test/test_rest_api.py +++ b/rest_api/test/test_rest_api.py @@ -4,13 +4,15 @@ from pathlib import Path from textwrap import dedent from unittest import mock -from unittest.mock import MagicMock +from unittest.mock import MagicMock, Mock +import functools import numpy as np import pandas as pd import pytest from fastapi.testclient import TestClient from haystack import Document, Answer +import haystack from haystack.nodes import BaseReader, BaseRetriever from haystack.document_stores import BaseDocumentStore from haystack.schema import Label @@ -499,3 +501,36 @@ def test_get_feedback_malformed_query(client, feedback): feedback["unexpected_field"] = "misplaced-value" response = client.post(url="/feedback", json=feedback) assert response.status_code == 422 + + +def test_get_health_check(client): + with mock.patch("rest_api.controller.health.os") as os: + os.cpu_count.return_value = 4 + os.getpid.return_value = int(2345) + with mock.patch("rest_api.controller.health.pynvml") as pynvml: + pynvml.nvmlDeviceGetCount.return_value = 2 + pynvml.nvmlDeviceGetHandleByIndex.return_value = "device" + pynvml.nvmlDeviceGetMemoryInfo.return_value = Mock(total=34359738368) + pynvml.nvmlDeviceGetComputeRunningProcesses.return_value = [ + Mock(pid=int(1234), usedGpuMemory=4000000000), + Mock(pid=int(2345), usedGpuMemory=2097152000), + Mock(pid=int(3456), usedGpuMemory=2000000000), + ] + pynvml.nvmlDeviceGetUtilizationRates.return_value = Mock(gpu=45) + with mock.patch("rest_api.controller.health.psutil") as psutil: + psutil.virtual_memory.return_value = Mock(total=34359738368) + psutil.Process.return_value = Mock( + cpu_percent=Mock(return_value=200), memory_percent=Mock(return_value=75) + ) + + response = client.get(url="/health") + assert response.status_code == 200 + assert response.json() == { + "version": haystack.__version__, + "cpu": {"used": 50.0}, + "memory": {"used": 75.0}, + "gpus": [ + {"index": 0, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}}, + {"index": 1, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}}, + ], + }