diff --git a/Makefile b/Makefile index 9833da9..9803a50 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,7 @@ migrate: poetry run python labs/manage.py migrate createuser: - DJANGO_SUPERUSER_PASSWORD=admin poetry run python labs/manage.py createsuperuser --noinput --username=admin --email=a@b.com + DJANGO_SUPERUSER_PASSWORD=admin poetry run python labs/manage.py createsuperuser --noinput --username=admin --email=admin@example.com load_fixtures: python labs/manage.py loaddata $(wildcard labs/fixtures/*.json) diff --git a/labs/api/schemas/codemonkey.py b/labs/api/schemas/codemonkey.py index 2864b1d..fdc0ee3 100644 --- a/labs/api/schemas/codemonkey.py +++ b/labs/api/schemas/codemonkey.py @@ -1,6 +1,7 @@ from typing import List, Optional from api.schemas.github import GithubSchema +from django.conf import settings from pydantic import BaseModel @@ -19,8 +20,8 @@ class VectorizeRepositorySchema(BaseModel): class FindEmbeddingsSchema(LocalRepositoryShema): - similarity_threshold: float = 0.7 - max_results: int = 10 + similarity_threshold: float = settings.EMBEDDINGS_SIMILARITY_THRESHOLD + max_results: int = settings.EMBEDDINGS_MAX_RESULTS class PreparePromptContextSchema(BaseModel): diff --git a/labs/config/celery.py b/labs/config/celery.py index 9a201c7..f88d84b 100644 --- a/labs/config/celery.py +++ b/labs/config/celery.py @@ -1,10 +1,10 @@ import logging import os -import config.configuration_variables as settings import redis from celery import Celery from celery.signals import task_failure +from django.conf import settings from kombu import Queue from redbeat import RedBeatSchedulerEntry, schedulers diff --git a/labs/config/configuration_variables.py b/labs/config/configuration_variables.py deleted file mode 100644 index 36aeccd..0000000 --- a/labs/config/configuration_variables.py +++ /dev/null @@ -1,25 +0,0 @@ -import os - -from logger import setup_logger - -setup_logger() - - -GITHUB_API_BASE_URL = "https://api.github.com" - -CLONE_DESTINATION_DIR = os.getenv("CLONE_DESTINATION_DIR", "/tmp/") - -DATABASE_USER = os.environ.get("DATABASE_USER", "postgres") -DATABASE_PASS = os.environ.get("DATABASE_PASS", "postgres") -DATABASE_HOST = os.environ.get("DATABASE_HOST", "localhost") -DATABASE_PORT = os.environ.get("DATABASE_PORT", "5432") -DATABASE_NAME = os.environ.get("DATABASE_NAME", "postgres") -DATABASE_URL = f"postgresql://{DATABASE_USER}:{DATABASE_PASS}@{DATABASE_HOST}:{DATABASE_PORT}/{DATABASE_NAME}" - -CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL") -CELERY_BACKEND_URL = os.environ.get("CELERY_BACKEND_URL") - -REDIS_HOST = os.environ.get("REDIS_HOST") -REDIS_PORT = os.environ.get("REDIS_PORT") - -LOCAL_LLM_HOST = os.environ.get("LOCAL_LLM_HOST", "http://ollama:11434") diff --git a/labs/config/logger.py b/labs/config/logger.py new file mode 100644 index 0000000..b7cf996 --- /dev/null +++ b/labs/config/logger.py @@ -0,0 +1,19 @@ +from datetime import datetime + +from django.conf import settings +from pythonjsonlogger import jsonlogger + + +class CustomJsonFormatter(jsonlogger.JsonFormatter): + def add_fields(self, log_record, record, message_dict): + super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict) + + if not log_record.get("timestamp"): + log_record["timestamp"] = datetime.now().strftime(settings.LOGGING_DATETIME_FORMAT) + + if log_record.get("level"): + log_record["level"] = log_record["level"].upper() + else: + log_record["level"] = record.levelname.upper() + + log_record["project"] = "labs" diff --git a/labs/config/settings.py b/labs/config/settings.py index 8f73602..15e23af 100644 --- a/labs/config/settings.py +++ b/labs/config/settings.py @@ -87,6 +87,50 @@ } +# Logging +def create_logging_directory() -> Path: + logs_path = BASE_DIR.parent / "logs" + if not os.path.exists(logs_path): + os.makedirs(logs_path) + + return logs_path / "debug.log" + + +LOGGING_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S,%f" +LOGGING = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "[%(asctime)s][%(levelname)s][%(name)s]: %(message)s", + "datefmt": LOGGING_DATETIME_FORMAT, + }, + "json": {"()": "config.logger.CustomJsonFormatter"}, + }, + "handlers": { + "console": { + "level": "DEBUG", + "class": "logging.StreamHandler", + "formatter": "standard", + }, + "file": { + "level": "DEBUG", + "class": "logging.handlers.RotatingFileHandler", + "filename": create_logging_directory(), + "maxBytes": 10000000, # 10 Mb + "backupCount": 5, + "formatter": "json", + }, + }, + "loggers": { + "root": { + "level": "DEBUG", + "handlers": ["console", "file"], + "propagate": False, + } + }, +} + # Password validation # https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators @@ -127,3 +171,20 @@ # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + +# Celery settings +CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL") +CELERY_BACKEND_URL = os.environ.get("CELERY_BACKEND_URL") + +# Redis settings +REDIS_HOST = os.environ.get("REDIS_HOST") +REDIS_PORT = os.environ.get("REDIS_PORT") + +# Custom settings +GITHUB_API_BASE_URL = "https://api.github.com" + +LOCAL_LLM_HOST = os.environ.get("LOCAL_LLM_HOST", "http://ollama:11434") + +CLONE_DESTINATION_DIR = os.getenv("CLONE_DESTINATION_DIR", "/tmp/") +EMBEDDINGS_SIMILARITY_THRESHOLD = 0.7 +EMBEDDINGS_MAX_RESULTS = 10 diff --git a/labs/embeddings/embedder.py b/labs/embeddings/embedder.py index a0b122a..51ddea8 100644 --- a/labs/embeddings/embedder.py +++ b/labs/embeddings/embedder.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Union +from django.conf import settings from embeddings.models import Embedding from pgvector.django import CosineDistance @@ -20,7 +21,11 @@ def embed(self, prompt, *args, **kwargs) -> Embeddings: return self.embedder.embed(prompt, *args, **kwargs) def retrieve_embeddings( - self, query: str, repository: str, similarity_threshold: float = 0.7, max_results: int = 10 + self, + query: str, + repository: str, + similarity_threshold: float = settings.EMBEDDINGS_SIMILARITY_THRESHOLD, + max_results: int = settings.EMBEDDINGS_MAX_RESULTS, ) -> List[Embedding]: query = query.replace("\n", "") embedded_query = self.embed(prompt=query).embeddings diff --git a/labs/embeddings/ollama.py b/labs/embeddings/ollama.py index caee400..bc5f6ff 100644 --- a/labs/embeddings/ollama.py +++ b/labs/embeddings/ollama.py @@ -1,4 +1,4 @@ -from config.configuration_variables import LOCAL_LLM_HOST +from django.conf import settings from embeddings.embedder import Embeddings from ollama import Client @@ -7,7 +7,7 @@ class OllamaEmbedder: def __init__(self, model): self._model_name = model - self._client = Client(LOCAL_LLM_HOST) + self._client = Client(settings.LOCAL_LLM_HOST) def embed(self, prompt, *args, **kwargs) -> Embeddings: result = self._client.embed(self._model_name, prompt, *args, **kwargs) diff --git a/labs/github/github.py b/labs/github/github.py index 98f3f6c..2521bb1 100644 --- a/labs/github/github.py +++ b/labs/github/github.py @@ -3,9 +3,9 @@ import os from dataclasses import dataclass -import config.configuration_variables as settings import git import requests +from django.conf import settings logger = logging.getLogger(__name__) diff --git a/labs/llm/ollama.py b/labs/llm/ollama.py index 2afcf99..0d8df62 100644 --- a/labs/llm/ollama.py +++ b/labs/llm/ollama.py @@ -1,4 +1,4 @@ -from config.configuration_variables import LOCAL_LLM_HOST +from django.conf import settings from ollama import Client @@ -6,7 +6,7 @@ class OllamaRequester: def __init__(self, model): self._model_name = model - self._client = Client(LOCAL_LLM_HOST) + self._client = Client(settings.LOCAL_LLM_HOST) def completion_without_proxy(self, messages, *args, **kwargs): """ diff --git a/labs/logger.py b/labs/logger.py deleted file mode 100644 index 476aabe..0000000 --- a/labs/logger.py +++ /dev/null @@ -1,49 +0,0 @@ -import logging -from datetime import datetime -from logging.handlers import RotatingFileHandler - -from pythonjsonlogger import jsonlogger - -DEFAULT_MAX_BYTES = 10000000 -DEFAULT_BACKUP_COUNT = 5 -LOG_FORMAT = "%Y-%m-%d %H:%M:%S,%f" - - -class CustomJsonFormatter(jsonlogger.JsonFormatter): - def add_fields(self, log_record, record, message_dict): - super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict) - - if not log_record.get("timestamp"): - log_record["timestamp"] = datetime.now().strftime(LOG_FORMAT) - - if log_record.get("level"): - log_record["level"] = log_record["level"].upper() - else: - log_record["level"] = record.levelname - - log_record["project"] = "codemonkey" - - -def setup_logger(): - logging.basicConfig(level=logging.DEBUG, datefmt=LOG_FORMAT) - logger = logging.getLogger("labs") - logger.propagate = False - - log_format = "[%(asctime)s][%(levelname)s][%(name)s]: %(message)s" - formatter = logging.Formatter(fmt=log_format, datefmt=LOG_FORMAT) - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - - try: - formatter = CustomJsonFormatter() - handler = RotatingFileHandler( - "logs/debug.log", - maxBytes=DEFAULT_MAX_BYTES, - backupCount=DEFAULT_BACKUP_COUNT, - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - - except Exception: - pass diff --git a/labs/tasks/llm.py b/labs/tasks/llm.py index cc8fdf4..9e57168 100644 --- a/labs/tasks/llm.py +++ b/labs/tasks/llm.py @@ -1,9 +1,9 @@ import json import logging -import config.configuration_variables as settings from config.celery import app from core.models import Model, VectorizerModel +from django.conf import settings from embeddings.embedder import Embedder from embeddings.vectorizers.vectorizer import Vectorizer from llm.requester import Requester @@ -85,7 +85,13 @@ def vectorize_repository_task(prefix="", repository_path=""): @app.task -def find_embeddings_task(prefix="", issue_body="", repository_path="", similarity_threshold=0.7, max_results=10): +def find_embeddings_task( + prefix="", + issue_body="", + repository_path="", + similarity_threshold=settings.EMBEDDINGS_SIMILARITY_THRESHOLD, + max_results=settings.EMBEDDINGS_MAX_RESULTS, +): embedder_class, *embeder_args = Model.get_active_embedding_model() embeddings_results = Embedder(embedder_class, *embeder_args).retrieve_embeddings( redis_client.get(RedisVariable.ISSUE_BODY, prefix=prefix, default=issue_body), diff --git a/labs/tasks/repository.py b/labs/tasks/repository.py index 78fc250..1c05fcd 100644 --- a/labs/tasks/repository.py +++ b/labs/tasks/repository.py @@ -1,8 +1,8 @@ import json -import config.configuration_variables as settings from config.celery import app from decorators import time_and_log_function +from django.conf import settings from github.github import GithubRequests from parsers.response import create_file, modify_file, parse_llm_output from tasks.redis_client import RedisStrictClient, RedisVariable diff --git a/labs/tasks/run.py b/labs/tasks/run.py index d72ba71..b057b3e 100644 --- a/labs/tasks/run.py +++ b/labs/tasks/run.py @@ -1,8 +1,8 @@ import os.path -import config.configuration_variables as settings from celery import chain from config.celery import app +from django.conf import settings from tasks import ( apply_code_changes_task, clone_repository_task,