Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Refactored configuration_variables.py and logger.py #167

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ migrate:
poetry run python labs/manage.py migrate

createuser:
DJANGO_SUPERUSER_PASSWORD=admin poetry run python labs/manage.py createsuperuser --noinput --username=admin --email=a@b.com
DJANGO_SUPERUSER_PASSWORD=admin poetry run python labs/manage.py createsuperuser --noinput --username=admin --email=admin@example.com

load_fixtures:
python labs/manage.py loaddata $(wildcard labs/fixtures/*.json)
5 changes: 4 additions & 1 deletion labs/api/schemas/codemonkey.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Optional

from api.schemas.github import GithubSchema
from django.conf import settings
from pydantic import BaseModel


Expand All @@ -18,7 +19,9 @@ class VectorizeRepositorySchema(BaseModel):
repository_path: str


class FindEmbeddingsSchema(LocalRepositoryShema): ...
class FindEmbeddingsSchema(LocalRepositoryShema):
similarity_threshold: float = settings.EMBEDDINGS_SIMILARITY_TRESHOLD
max_results: int = settings.EMBEDDINGS_MAX_RESULTS


class PreparePromptContextSchema(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion labs/config/celery.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
import os

import config.configuration_variables as settings
import redis
from celery import Celery
from celery.signals import task_failure
from django.conf import settings
from kombu import Queue
from redbeat import RedBeatSchedulerEntry, schedulers

Expand Down
25 changes: 0 additions & 25 deletions labs/config/configuration_variables.py

This file was deleted.

19 changes: 19 additions & 0 deletions labs/config/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from datetime import datetime

from django.conf import settings
from pythonjsonlogger import jsonlogger


class CustomJsonFormatter(jsonlogger.JsonFormatter):
def add_fields(self, log_record, record, message_dict):
super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)

if not log_record.get("timestamp"):
log_record["timestamp"] = datetime.now().strftime(settings.LOGGING_DATETIME_FORMAT)

if log_record.get("level"):
log_record["level"] = log_record["level"].upper()
else:
log_record["level"] = record.levelname

log_record["project"] = "codemonkey"
61 changes: 61 additions & 0 deletions labs/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,50 @@
}


# Logging
def create_logging_directory() -> Path:
logs_path = BASE_DIR.parent / "logs"
if not os.path.exists(logs_path):
os.makedirs(logs_path)

return logs_path / "debug.log"


LOGGING_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S,%f"
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {
"format": "[%(asctime)s][%(levelname)s][%(name)s]: %(message)s",
"datefmt": LOGGING_DATETIME_FORMAT,
},
"json": {"()": "config.logger.CustomJsonFormatter"},
},
"handlers": {
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "standard",
},
"file": {
"level": "DEBUG",
"class": "logging.handlers.RotatingFileHandler",
"filename": create_logging_directory(),
"maxBytes": 10000000,
"backupCount": 5,
"formatter": "json",
},
},
"loggers": {
"labs": {
"level": "DEBUG",
"handlers": ["console", "file"],
"propagate": False,
}
},
}

# Password validation
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators

Expand Down Expand Up @@ -127,3 +171,20 @@
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field

DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

# Celery settings
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL")
CELERY_BACKEND_URL = os.environ.get("CELERY_BACKEND_URL")

# Redis settings
REDIS_HOST = os.environ.get("REDIS_HOST")
REDIS_PORT = os.environ.get("REDIS_PORT")

# Custom settings
GITHUB_API_BASE_URL = "https://api.github.com"

LOCAL_LLM_HOST = os.environ.get("LOCAL_LLM_HOST", "http://ollama:11434")

CLONE_DESTINATION_DIR = os.getenv("CLONE_DESTINATION_DIR", "/tmp/")
EMBEDDINGS_SIMILARITY_TRESHOLD = 0.7
EMBEDDINGS_MAX_RESULTS = 10
jfrverdasca marked this conversation as resolved.
Show resolved Hide resolved
9 changes: 7 additions & 2 deletions labs/embeddings/embedder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Union

from django.conf import settings
from embeddings.models import Embedding
from pgvector.django import CosineDistance

Expand All @@ -20,7 +21,11 @@ def embed(self, prompt, *args, **kwargs) -> Embeddings:
return self.embedder.embed(prompt, *args, **kwargs)

def retrieve_embeddings(
self, query: str, repository: str, similarity_threshold: int = 0.7, number_of_results: int = 10
self,
query: str,
repository: str,
similarity_threshold: float = settings.EMBEDDINGS_SIMILARITY_TRESHOLD,
max_results: int = settings.EMBEDDINGS_MAX_RESULTS,
) -> List[Embedding]:
query = query.replace("\n", "")
embedded_query = self.embed(prompt=query).embeddings
Expand All @@ -29,7 +34,7 @@ def retrieve_embeddings(

return Embedding.objects.annotate(distance=CosineDistance("embedding", embedded_query[0])).filter(
repository=repository, distance__lt=similarity_threshold
)[:number_of_results]
)[:max_results]

def reembed_code(
self,
Expand Down
4 changes: 2 additions & 2 deletions labs/embeddings/ollama.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from config.configuration_variables import LOCAL_LLM_HOST
from django.conf import settings
from embeddings.embedder import Embeddings
from ollama import Client

Expand All @@ -7,7 +7,7 @@ class OllamaEmbedder:
def __init__(self, model):
self._model_name = model

self._client = Client(LOCAL_LLM_HOST)
self._client = Client(settings.LOCAL_LLM_HOST)

def embed(self, prompt, *args, **kwargs) -> Embeddings:
result = self._client.embed(self._model_name, prompt, *args, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion labs/github/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import os
from dataclasses import dataclass

import config.configuration_variables as settings
import git
import requests
from django.conf import settings

logger = logging.getLogger(__name__)

Expand Down
4 changes: 2 additions & 2 deletions labs/llm/ollama.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from config.configuration_variables import LOCAL_LLM_HOST
from django.conf import settings
from ollama import Client


class OllamaRequester:
def __init__(self, model):
self._model_name = model

self._client = Client(LOCAL_LLM_HOST)
self._client = Client(settings.LOCAL_LLM_HOST)

def completion_without_proxy(self, messages, *args, **kwargs):
"""
Expand Down
49 changes: 0 additions & 49 deletions labs/logger.py

This file was deleted.

22 changes: 18 additions & 4 deletions labs/tasks/llm.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import logging

import config.configuration_variables as settings
from config.celery import app
from core.models import Model, VectorizerModel
from django.conf import settings
from embeddings.embedder import Embedder
from embeddings.vectorizers.vectorizer import Vectorizer
from llm.requester import Requester
Expand Down Expand Up @@ -85,11 +85,19 @@ def vectorize_repository_task(prefix="", repository_path=""):


@app.task
def find_embeddings_task(prefix="", issue_body="", repository_path=""):
def find_embeddings_task(
prefix="",
issue_body="",
repository_path="",
similarity_threshold=settings.EMBEDDINGS_SIMILARITY_TRESHOLD,
max_results=settings.EMBEDDINGS_MAX_RESULTS,
):
embedder_class, *embeder_args = Model.get_active_embedding_model()
embeddings_results = Embedder(embedder_class, *embeder_args).retrieve_embeddings(
redis_client.get(RedisVariable.ISSUE_BODY, prefix=prefix, default=issue_body),
redis_client.get(RedisVariable.REPOSITORY_PATH, prefix=prefix, default=repository_path),
similarity_threshold,
max_results,
)
similar_embeddings = [
(embedding.repository, embedding.file_path, embedding.text) for embedding in embeddings_results
Expand All @@ -102,7 +110,10 @@ def find_embeddings_task(prefix="", issue_body="", repository_path=""):


@app.task
def prepare_prompt_and_context_task(prefix="", issue_body="", embeddings=[]):
def prepare_prompt_and_context_task(prefix="", issue_body="", embeddings=None):
if not embeddings:
embeddings = []

prompt = get_prompt(redis_client.get(RedisVariable.ISSUE_BODY, prefix=prefix, default=issue_body))
redis_client.set(RedisVariable.PROMPT, prefix=prefix, value=prompt)

Expand All @@ -116,7 +127,10 @@ def prepare_prompt_and_context_task(prefix="", issue_body="", embeddings=[]):


@app.task
def get_llm_response_task(prefix="", context={}):
def get_llm_response_task(prefix="", context=None):
if not context:
context = {}

context = json.loads(redis_client.get(RedisVariable.CONTEXT, prefix=prefix, default=context))
llm_response = get_llm_response(context)

Expand Down
2 changes: 1 addition & 1 deletion labs/tasks/repository.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json

import config.configuration_variables as settings
from config.celery import app
from decorators import time_and_log_function
from django.conf import settings
from github.github import GithubRequests
from parsers.response import create_file, modify_file, parse_llm_output
from tasks.redis_client import RedisStrictClient, RedisVariable
Expand Down
2 changes: 1 addition & 1 deletion labs/tasks/run.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os.path

import config.configuration_variables as settings
from celery import chain
from config.celery import app
from django.conf import settings
from tasks import (
apply_code_changes_task,
clone_repository_task,
Expand Down
Loading