From 4cc28b9da396c7a24c80e181f9229d3015d7b979 Mon Sep 17 00:00:00 2001 From: jfrverdasca Date: Mon, 2 Dec 2024 14:48:13 +0000 Subject: [PATCH] Partial project refactor (#156) --- .env.sample | 11 +- docker-compose.yml | 2 +- labs/api/codemonkey_endpoints.py | 163 +++++++++--------- labs/api/github_endpoints.py | 116 ++++++------- labs/api/schemas.py | 102 ----------- labs/api/schemas/__init__.py | 0 labs/api/schemas/codemonkey.py | 34 ++++ labs/api/schemas/github.py | 48 ++++++ labs/config/configuration_variables.py | 17 -- labs/embeddings/vectorizers/base.py | 4 +- .../vectorizers/chunk_vectorizer.py | 10 +- .../vectorizers/python_vectorizer.py | 8 +- labs/github/github.py | 27 +-- labs/llm.py | 6 +- labs/repo.py | 34 ++-- labs/run.py | 26 +-- labs/tasks/__init__.py | 20 +-- labs/tasks/llm.py | 13 +- labs/tasks/{repo.py => repository.py} | 48 +++--- labs/tasks/run.py | 37 ++-- labs/tests/test_codemonkey_endpoints.py | 66 +++---- labs/tests/test_github_requests.py | 26 +-- labs/tests/test_llm.py | 12 +- 23 files changed, 390 insertions(+), 440 deletions(-) delete mode 100644 labs/api/schemas.py create mode 100644 labs/api/schemas/__init__.py create mode 100644 labs/api/schemas/codemonkey.py create mode 100644 labs/api/schemas/github.py rename labs/tasks/{repo.py => repository.py} (65%) diff --git a/.env.sample b/.env.sample index 76fecf5..fac3dc3 100644 --- a/.env.sample +++ b/.env.sample @@ -8,24 +8,15 @@ DATABASE_PASS=postgres DATABASE_NAME=postgres DATABASE_PORT=63045 -GITHUB_OWNER= -GITHUB_REPO= -GITHUB_USERNAME= -GITHUB_ACCESS_TOKEN= - PROMTAIL_ACCESS_TOKEN= PROMTAIL_ID= CELERY_BROKER_URL=redis://127.0.0.1:6379 CELERY_BACKEND_URL=redis://127.0.0.1:6379 -LOCAL_REPOS_PATH= +LOCAL_REPOSITORIES_PATH= REDIS_HOST=redis REDIS_PORT=6379 LOCAL_LLM_HOST=http://localhost:11434 - -# Labs main flow -EMBEDDINGS_MODEL= -LLM_MODEL= diff --git a/docker-compose.yml b/docker-compose.yml index 9b62301..acee052 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,7 +64,7 @@ services: - labs-db volumes: - ./labs/:/app/ - - ${LOCAL_REPOS_PATH}:/local-repos/ + - ${LOCAL_REPOSITORIES_PATH}:/local-repos/ ollama: image: ollama/ollama:latest diff --git a/labs/api/codemonkey_endpoints.py b/labs/api/codemonkey_endpoints.py index 246b7af..1cd5c42 100644 --- a/labs/api/codemonkey_endpoints.py +++ b/labs/api/codemonkey_endpoints.py @@ -1,18 +1,15 @@ import logging -from api.schemas import ( - ApplyCodeChangesRequest, - CommitChangesRequest, - CreateBranchRequest, - CreatePullRequestRequest, - FindSimilarEmbeddingsRequest, - GetIssueRequest, - GetLLMResponseRequest, - PreparePromptAndContextRequest, - RunOnLocalRepoRequest, - RunOnRepoRequest, - VectorizeRepoToDatabaseRequest, +from api.schemas.codemonkey import ( + ApplyCodeChangesSchema, + FindEmbeddingsSchema, + GithubRepositorySchema, + LLMReponseSchema, + LocalRepositoryShema, + PreparePromptContextSchema, + VectorizeRepositorySchema, ) +from api.schemas.github import BranchIssueSchema, CommitSchema, IssueSchema, PullRequestSchema from asgiref.sync import sync_to_async from decorators import async_time_and_log_function from django.http import HttpRequest @@ -23,13 +20,13 @@ commit_changes_task, create_branch_task, create_pull_request_task, - find_similar_embeddings_task, + find_embeddings_task, get_issue_task, get_llm_response_task, prepare_prompt_and_context_task, - run_on_local_repo_task, - run_on_repo_task, - vectorize_repo_to_database_task, + run_on_local_repository_task, + run_on_repository_task, + vectorize_repository_task, ) logger = logging.getLogger(__name__) @@ -37,122 +34,118 @@ router = Router(tags=["codemonkey"]) -@router.post("/run_on_repo") +@router.post("/run_on_repository") @async_time_and_log_function -async def run_on_repo_endpoint(request: HttpRequest, run_on_repo: RunOnRepoRequest): +async def run_on_repository_endpoint(request: HttpRequest, run_on_repository: GithubRepositorySchema): try: - run_on_repo_task( - token=run_on_repo.github_token, - repo_owner=run_on_repo.repo_owner, - repo_name=run_on_repo.repo_name, - issue_number=run_on_repo.issue_number, - username=run_on_repo.username, - original_branch=run_on_repo.original_branch, + run_on_repository_task( + token=run_on_repository.token, + repository_owner=run_on_repository.repository_owner, + repository_name=run_on_repository.repository_name, + issue_number=run_on_repository.issue_number, + username=run_on_repository.username, + original_branch=run_on_repository.original_branch, ) except Exception as ex: logger.exception("Internal server error") raise HttpError(status_code=500, message="Internal server error: " + str(ex)) -@router.post("/run_on_local_repo", response={200: None}) +@router.post("/run_on_local_repository", response={200: None}) @async_time_and_log_function -async def run_on_local_repo_endpoint(request: HttpRequest, run_on_local_repo: RunOnLocalRepoRequest): +async def run_on_local_repository_endpoint(request: HttpRequest, run_on_local_repository: LocalRepositoryShema): try: - run_on_local_repo_task(repo_path=run_on_local_repo.repo_path, issue_text=run_on_local_repo.issue_text) + run_on_local_repository_task( + repository_path=run_on_local_repository.repository_path, issue_text=run_on_local_repository.prompt + ) except Exception as ex: logger.exception("Internal server error") raise HttpError(status_code=500, message="Internal server error: " + str(ex)) -@router.post("/get_issue") +@router.post("/vectorize_repository") @async_time_and_log_function -async def get_issue_endpoint(request: HttpRequest, get_issue: GetIssueRequest): - return get_issue_task( - token=get_issue.github_token, - repo_owner=get_issue.repo_owner, - repo_name=get_issue.repo_name, - issue_number=get_issue.issue_number, - username=get_issue.username, +async def vectorize_repository_endpoint(request: HttpRequest, vectorize_repository: VectorizeRepositorySchema): + return await sync_to_async(vectorize_repository_task, thread_sensitive=True)( + repository_path=vectorize_repository.repository_path ) -@router.post("/create_branch") +@router.post("/find_embeddings") @async_time_and_log_function -async def create_branch_endpoint(request: HttpRequest, create_branch: CreateBranchRequest): - return create_branch_task( - token=create_branch.github_token, - repo_owner=create_branch.repo_owner, - repo_name=create_branch.repo_name, - issue_number=create_branch.issue_number, - username=create_branch.username, - original_branch=create_branch.original_branch, - issue_title=create_branch.issue_title, +async def find_embeddings_endpoint(request: HttpRequest, find_embeddings: FindEmbeddingsSchema): + return await sync_to_async(find_embeddings_task, thread_sensitive=True)( + issue_body=find_embeddings.prompt, repository_path=find_embeddings.repository_path ) -@router.post("/vectorize_repo_to_database") +@router.post("/prepare_prompt_and_context") @async_time_and_log_function -async def vectorize_repo_to_database_endpoint( - request: HttpRequest, vectorize_repo_to_database: VectorizeRepoToDatabaseRequest -): - return await sync_to_async(vectorize_repo_to_database_task, thread_sensitive=True)( - repo_destination=vectorize_repo_to_database.repo_destination +async def prepare_prompt_and_context_endpoint(request: HttpRequest, prepare_prompt_context: PreparePromptContextSchema): + return await sync_to_async(prepare_prompt_and_context_task, thread_sensitive=True)( + issue_body=prepare_prompt_context.prompt, embeddings=prepare_prompt_context.embeddings ) -@router.post("/find_similar_embeddings") +@router.post("/get_llm_response") @async_time_and_log_function -async def find_similar_embeddings_endpoint(request: HttpRequest, find_similar_embeddings: FindSimilarEmbeddingsRequest): - return await sync_to_async(find_similar_embeddings_task, thread_sensitive=True)( - issue_body=find_similar_embeddings.issue_body, repo_destination=find_similar_embeddings.repo_destination - ) +async def get_llm_response_endpoint(request: HttpRequest, llm_reponse: LLMReponseSchema): + return await sync_to_async(get_llm_response_task, thread_sensitive=True)(context=llm_reponse.context) -@router.post("/prepare_prompt_and_context") +@router.post("/apply_code_changes") @async_time_and_log_function -async def prepare_prompt_and_context_endpoint( - request: HttpRequest, prepare_prompt_and_context: PreparePromptAndContextRequest -): - return await sync_to_async(prepare_prompt_and_context_task, thread_sensitive=True)( - issue_body=prepare_prompt_and_context.issue_body, embeddings=prepare_prompt_and_context.embeddings - ) +async def apply_code_changes_endpoint(request: HttpRequest, apply_code_changes: ApplyCodeChangesSchema): + return await sync_to_async(apply_code_changes_task, thread_sensitive=True)(llm_response=apply_code_changes.changes) -@router.post("/get_llm_response") +@router.post("/get_issue") @async_time_and_log_function -async def get_llm_response_endpoint(request: HttpRequest, get_llm_reponse: GetLLMResponseRequest): - return await sync_to_async(get_llm_response_task, thread_sensitive=True)(prepared_context=get_llm_reponse.context) +async def get_issue_endpoint(request: HttpRequest, issue: IssueSchema): + return get_issue_task( + token=issue.token, + repository_owner=issue.repository_owner, + repository_name=issue.repository_name, + username=issue.username, + issue_number=issue.issue_number, + ) -@router.post("/apply_code_changes") +@router.post("/create_branch") @async_time_and_log_function -async def apply_code_changes_endpoint(request: HttpRequest, apply_code_changes: ApplyCodeChangesRequest): - return await sync_to_async(apply_code_changes_task, thread_sensitive=True)( - llm_response=apply_code_changes.llm_response +async def create_branch_endpoint(request: HttpRequest, branch: BranchIssueSchema): + return create_branch_task( + token=branch.token, + repository_owner=branch.repository_owner, + repository_name=branch.repository_name, + username=branch.username, + issue_number=branch.issue_number, + original_branch=branch.original_branch, + issue_title=branch.issue_title, ) @router.post("/commit_changes") @async_time_and_log_function -async def commit_changes_endpoint(request: HttpRequest, commit_changes: CommitChangesRequest): +async def commit_changes_endpoint(request: HttpRequest, commit: CommitSchema): return await sync_to_async(commit_changes_task, thread_sensitive=True)( - token=commit_changes.github_token, - repo_owner=commit_changes.repo_owner, - repo_name=commit_changes.repo_name, - username=commit_changes.username, - branch_name=commit_changes.branch_name, - files_modified=commit_changes.files, + token=commit.token, + repository_owner=commit.repository_owner, + repository_name=commit.repository_name, + username=commit.username, + branch_name=commit.branch_name, + files_modified=commit.files, ) @router.post("/create_pull_request") @async_time_and_log_function -async def create_pull_request_endpoint(request: HttpRequest, create_pull_request: CreatePullRequestRequest): +async def create_pull_request_endpoint(request: HttpRequest, pull_request: PullRequestSchema): return await sync_to_async(create_pull_request_task, thread_sensitive=True)( - token=create_pull_request.github_token, - repo_owner=create_pull_request.repo_owner, - repo_name=create_pull_request.repo_name, - username=create_pull_request.username, - branch_name=create_pull_request.branch_name, - original_branch=create_pull_request.original_branch, + token=pull_request.token, + repository_owner=pull_request.repository_owner, + repository_name=pull_request.repository_name, + username=pull_request.username, + branch_name=pull_request.changes_branch_name, + original_branch=pull_request.base_branch_name, ) diff --git a/labs/api/github_endpoints.py b/labs/api/github_endpoints.py index 4c453c5..61517d2 100644 --- a/labs/api/github_endpoints.py +++ b/labs/api/github_endpoints.py @@ -1,13 +1,13 @@ import logging -from api.schemas import ( - ChangeIssueStatusRequest, - CommitChangesRequest, - CreateBranchRequest, - CreatePullRequestRequest, - GithubModel, - IssueRequest, - ListIssuesRequest, +from api.schemas.github import ( + BranchSchema, + CommitSchema, + GithubSchema, + IssueSchema, + IssueStatusSchema, + ListIssuesSchema, + PullRequestSchema, ) from decorators import async_time_and_log_function from django.http import HttpRequest @@ -20,117 +20,117 @@ router = Router(tags=["github"]) -@router.post("/list-issues") +@router.post("/list_issues") @async_time_and_log_function -async def list_issues(request: HttpRequest, github: GithubModel, params: ListIssuesRequest): +async def list_issues_endpoint(request: HttpRequest, params: ListIssuesSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.list_issues(assignee=params.assignee, state=params.state, per_page=params.per_page) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) -@router.post("/get-issue") +@router.post("/get_issue") @async_time_and_log_function -async def get_issue(request: HttpRequest, github: GithubModel, params: IssueRequest): +async def get_issue_endpoint(request: HttpRequest, params: IssueSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.get_issue(issue_number=params.issue_number) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) -@router.post("/create-branch") +@router.post("/create_branch") @async_time_and_log_function -async def create_branch(request: HttpRequest, github: GithubModel, params: CreateBranchRequest): +async def create_branch_endpoint(request: HttpRequest, params: BranchSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.create_branch(branch_name=params.branch_name, original_branch=params.original_branch) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) -@router.post("/change-issue-status") +@router.post("/change_issue_status") @async_time_and_log_function -async def change_issue_status(request: HttpRequest, github: GithubModel, params: ChangeIssueStatusRequest): +async def change_issue_status_endpoint(request: HttpRequest, params: IssueStatusSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) - return github_requests.change_issue_status(issue_number=params.issue_number, state=params.state) + return github_requests.change_issue_status(issue_number=params.issue_number, status=params.status) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) -@router.post("/commit-changes") +@router.post("/commit_changes") @async_time_and_log_function -async def commit_changes(request: HttpRequest, github: GithubModel, params: CommitChangesRequest): +async def commit_changes_endpoint(request: HttpRequest, params: CommitSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.commit_changes( message=params.message, branch_name=params.branch_name, files=params.files ) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) -@router.post("/create-pull-request") +@router.post("/create_pull_request") @async_time_and_log_function -async def create_pull_request(request: HttpRequest, github: GithubModel, params: CreatePullRequestRequest): +async def create_pull_request_endpoint(request: HttpRequest, params: PullRequestSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.create_pull_request( - head=params.head, base=params.base, title=params.title, body=params.body + head=params.head_branch_name, base=params.base_branch_name, title=params.title, body=params.body ) except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) @router.post("/clone") @async_time_and_log_function -async def clone_repo(request: HttpRequest, github: GithubModel): +async def clone_repository_endpoint(request: HttpRequest, params: GithubSchema): try: github_requests = GithubRequests( - github_token=github.github_token, - repo_owner=github.repo_owner, - repo_name=github.repo_name, - username=github.username, + github_token=params.token, + repository_owner=params.repository_owner, + repository_name=params.repository_name, + username=params.username, ) return github_requests.clone() except Exception as e: - logger.exception("Internal server error") + logger.exception("Internal server error", exc_info=e) raise HttpError(status_code=500, message="Internal server error: " + str(e)) diff --git a/labs/api/schemas.py b/labs/api/schemas.py deleted file mode 100644 index aca5b7c..0000000 --- a/labs/api/schemas.py +++ /dev/null @@ -1,102 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel, constr - - -class GithubModel(BaseModel): - token: str - repo_owner: str - repo_name: str - username: str - - -class RunOnRepoRequest(BaseModel): - github_token: str - repo_owner: str - repo_name: str - username: str - issue_number: int - original_branch: Optional[str] = "main" - - -class RunOnLocalRepoRequest(BaseModel): - repo_path: str - issue_text: constr(min_length=10) = "string" - - -class GetIssueRequest(BaseModel): - github_token: str - repo_owner: str - repo_name: str - username: str - issue_number: int - - -class CreateBranchRequest(BaseModel): - github_token: str - repo_owner: str - repo_name: str - username: str - issue_number: int - original_branch: str - issue_title: str - - -class VectorizeRepoToDatabaseRequest(BaseModel): - repo_destination: str - - -class FindSimilarEmbeddingsRequest(BaseModel): - repo_destination: str - issue_body: str - - -class PreparePromptAndContextRequest(BaseModel): - issue_body: str - embeddings: list - - -class GetLLMResponseRequest(BaseModel): - context: dict - - -class ApplyCodeChangesRequest(BaseModel): - llm_response: str - - -class ListIssuesRequest(BaseModel): - assignee: Optional[str] = None - state: str = "open" - per_page: int = 100 - - -class ChangeIssueStatusRequest(BaseModel): - issue_number: int - state: str - - -class CommitChangesRequest(BaseModel): - github_token: str - repo_owner: str - repo_name: str - username: str - branch_name: str - files: list - - -class CreatePullRequestRequest(BaseModel): - github_token: str - repo_owner: str - repo_name: str - username: str - original_branch: str - branch_name: str - - -class IssueRequest(BaseModel): - issue_number: int - - -class CallLLMRequest(BaseModel): - issue_summary: str - token: str diff --git a/labs/api/schemas/__init__.py b/labs/api/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/labs/api/schemas/codemonkey.py b/labs/api/schemas/codemonkey.py new file mode 100644 index 0000000..2b6a3e9 --- /dev/null +++ b/labs/api/schemas/codemonkey.py @@ -0,0 +1,34 @@ +from typing import List, Optional + +from api.schemas.github import GithubSchema +from pydantic import BaseModel + + +class GithubRepositorySchema(GithubSchema): + issue_number: int + original_branch: Optional[str] = None + + +class LocalRepositoryShema(BaseModel): + repository_path: str + prompt: str + + +class VectorizeRepositorySchema(BaseModel): + repository_path: str + + +class FindEmbeddingsSchema(LocalRepositoryShema): ... + + +class PreparePromptContextSchema(BaseModel): + prompt: str + embeddings: List[List[str]] + + +class LLMReponseSchema(BaseModel): + context: dict + + +class ApplyCodeChangesSchema(BaseModel): + changes: str diff --git a/labs/api/schemas/github.py b/labs/api/schemas/github.py new file mode 100644 index 0000000..dd43e0c --- /dev/null +++ b/labs/api/schemas/github.py @@ -0,0 +1,48 @@ +from typing import List, Optional + +from pydantic import BaseModel + + +class GithubSchema(BaseModel): + token: str + repository_owner: str + repository_name: str + username: str + + +class IssueSchema(GithubSchema): + issue_number: int + + +class BranchSchema(GithubSchema): + original_branch: str = "main" + branch_name: str + + +class BranchIssueSchema(IssueSchema): + original_branch: str = "main" + issue_title: str + + +class ListIssuesSchema(GithubSchema): + assignee: Optional[str] = None + state: str = "open" + per_page: int = 100 + + +class IssueStatusSchema(GithubSchema): + issue_number: int + status: str + + +class CommitSchema(GithubSchema): + message: Optional[str] = None + branch_name: str + files: List[str] + + +class PullRequestSchema(GithubSchema): + changes_branch_name: str + base_branch_name: str + title: Optional[str] = None + body: Optional[str] = None diff --git a/labs/config/configuration_variables.py b/labs/config/configuration_variables.py index a97dc12..36aeccd 100644 --- a/labs/config/configuration_variables.py +++ b/labs/config/configuration_variables.py @@ -1,27 +1,10 @@ import os -from pathlib import Path from logger import setup_logger setup_logger() -PROJ_ROOT = Path(__file__).resolve().parents[1] - -DATA_DIR = PROJ_ROOT / "data" -RAW_DATA_DIR = DATA_DIR / "raw" -INTERIM_DATA_DIR = DATA_DIR / "interim" -PROCESSED_DATA_DIR = DATA_DIR / "processed" -EXTERNAL_DATA_DIR = DATA_DIR / "external" - -MODELS_DIR = PROJ_ROOT / "models" -REPORTS_DIR = PROJ_ROOT / "reports" -FIGURES_DIR = REPORTS_DIR / "figures" - -GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN") -GITHUB_OWNER = os.environ.get("GITHUB_OWNER") -GITHUB_REPO = os.environ.get("GITHUB_REPO") -GITHUB_USERNAME = os.environ.get("GITHUB_USERNAME") GITHUB_API_BASE_URL = "https://api.github.com" CLONE_DESTINATION_DIR = os.getenv("CLONE_DESTINATION_DIR", "/tmp/") diff --git a/labs/embeddings/vectorizers/base.py b/labs/embeddings/vectorizers/base.py index 8287e4e..a2f4dae 100644 --- a/labs/embeddings/vectorizers/base.py +++ b/labs/embeddings/vectorizers/base.py @@ -2,5 +2,5 @@ class Vectorizer: def __init__(self, vectorizer, *args, **kwargs): self.vectorizer = vectorizer(*args, **kwargs) - def vectorize_to_database(self, include_file_extensions, repo_destination, *args, **kwargs) -> None: - self.vectorizer.vectorize_to_database(include_file_extensions, repo_destination, *args, **kwargs) + def vectorize_to_database(self, include_file_extensions, repository_path, *args, **kwargs) -> None: + self.vectorizer.vectorize_to_database(include_file_extensions, repository_path, *args, **kwargs) diff --git a/labs/embeddings/vectorizers/chunk_vectorizer.py b/labs/embeddings/vectorizers/chunk_vectorizer.py index d6306e9..cf0fa9e 100644 --- a/labs/embeddings/vectorizers/chunk_vectorizer.py +++ b/labs/embeddings/vectorizers/chunk_vectorizer.py @@ -53,7 +53,7 @@ def load_docs(self, root_dir, file_extensions=None): loader = TextLoader(file_path, encoding="utf-8") docs.extend(loader.load_and_split()) except Exception: - logger.exception("Failed to load repo documents into memory.") + logger.exception("Failed to load repository documents into memory.") return docs def split_docs(self, docs): @@ -61,16 +61,16 @@ def split_docs(self, docs): text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0) return text_splitter.split_documents(docs) - def vectorize_to_database(self, include_file_extensions, repo_destination, *args, **kwargs): + def vectorize_to_database(self, include_file_extensions, repository_path, *args, **kwargs): logger.debug("Loading and splitting all documents into chunks.") - docs = self.load_docs(repo_destination, include_file_extensions) + docs = self.load_docs(repository_path, include_file_extensions) texts = self.split_docs(docs) files_and_texts = [(text.metadata["source"], text.page_content) for text in texts] texts = [file_and_text[1] for file_and_text in files_and_texts] - logger.debug("Embedding all repo documents.") + logger.debug("Embedding all repository documents.") embeddings = self.embedder.embed(prompt=texts) logger.debug("Storing all embeddings.") - self.embedder.reembed_code(files_texts=files_and_texts, embeddings=embeddings, repository=repo_destination) # type: ignore + self.embedder.reembed_code(files_texts=files_and_texts, embeddings=embeddings, repository=repository_path) # type: ignore diff --git a/labs/embeddings/vectorizers/python_vectorizer.py b/labs/embeddings/vectorizers/python_vectorizer.py index 6b74c30..36bbd09 100644 --- a/labs/embeddings/vectorizers/python_vectorizer.py +++ b/labs/embeddings/vectorizers/python_vectorizer.py @@ -65,7 +65,7 @@ def load_docs(self, root_dir, file_extensions=None): docs.extend(loader.load_and_split()) except Exception: - logger.exception("Failed to load repo documents into memory.") + logger.exception("Failed to load repository documents into memory.") continue @@ -121,8 +121,8 @@ def load_docs(self, root_dir, file_extensions=None): return docs - def vectorize_to_database(self, include_file_extensions, repo_destination, *args, **kwargs): - docs = self.load_docs(repo_destination, include_file_extensions) + def vectorize_to_database(self, include_file_extensions, repository_path, *args, **kwargs): + docs = self.load_docs(repository_path, include_file_extensions) logger.debug(f"Loading {len(docs)} documents...") @@ -133,5 +133,5 @@ def vectorize_to_database(self, include_file_extensions, repo_destination, *args self.embedder.reembed_code( files_texts=[(doc.metadata["source"], doc.page_content)], embeddings=embeddings, - repository=repo_destination, + repository=repository_path, ) diff --git a/labs/github/github.py b/labs/github/github.py index 6c7d0b4..c856e11 100644 --- a/labs/github/github.py +++ b/labs/github/github.py @@ -14,13 +14,13 @@ class GithubRequests: """Class to handle Github API requests""" - def __init__(self, github_token, repo_owner, repo_name, username=None): + def __init__(self, github_token, repository_owner, repository_name, username=None): self.github_token = github_token - self.repo_owner = repo_owner - self.repo_name = repo_name + self.repository_owner = repository_owner + self.repository_name = repository_name self.username = username - self.github_api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}" - self.directory_dir = f"{settings.CLONE_DESTINATION_DIR}{repo_owner}/{repo_name}" + self.github_api_url = f"{settings.GITHUB_API_BASE_URL}/repos/{repository_owner}/{repository_name}" + self.directory_dir = f"{settings.CLONE_DESTINATION_DIR}{repository_owner}/{repository_name}" def _get(self, url, headers={}, params={}): try: @@ -104,8 +104,8 @@ def create_branch(self, branch_name, original_branch="main"): return self._post(create_ref_url, headers, data) return None - def change_issue_status(self, issue_number, state): - if state not in ["open", "closed"]: + def change_issue_status(self, issue_number, status): + if status not in ["open", "closed"]: raise ValueError("Invalid state. The state must be 'open' or 'closed'.") url = f"{self.github_api_url}/issues/{issue_number}" @@ -113,7 +113,7 @@ def change_issue_status(self, issue_number, state): "Authorization": f"token {self.github_token}", "user-agent": "request", } - data = {"state": state} + data = {"state": status} return self._patch(url, headers, data) @@ -141,6 +141,9 @@ def commit_changes(self, message, branch_name, files): tree_items = [] for file_path in files: file_name = file_path.replace(f"{self.directory_dir}/", "") + if file_name.startswith("/"): + file_name = file_name[1:] + # Step 3: Read the file content and encode it in Base64 with open(file_path, "rb") as file: file_content = base64.b64encode(file.read()).decode("utf-8") @@ -156,6 +159,10 @@ def commit_changes(self, message, branch_name, files): tree_data = {"base_tree": base_tree_sha, "tree": tree_items} tree_url = f"{self.github_api_url}/git/trees" tree_response_json = self._post(tree_url, headers, tree_data) + if "status" in tree_response_json: + logger.error(f"Error while creating tree with updated files: {tree_response_json['message']}") + return None + new_tree_sha = tree_response_json["sha"] # Step 6: Create a new commit with the new tree @@ -183,9 +190,9 @@ def create_pull_request(self, head, base="main", title="New Pull Request", body= def clone(self): try: - url = f"https://github.com/{self.repo_owner}/{self.repo_name}.git" + url = f"https://github.com/{self.repository_owner}/{self.repository_name}.git" branch = "main" - probe = settings.CLONE_DESTINATION_DIR + f"{self.repo_owner}/{self.repo_name}/.git" + probe = settings.CLONE_DESTINATION_DIR + f"{self.repository_owner}/{self.repository_name}/.git" if not os.path.exists(probe): git.Repo.clone_from(url, self.directory_dir, branch=branch) return self.directory_dir diff --git a/labs/llm.py b/labs/llm.py index 6e8cc77..33c0126 100644 --- a/labs/llm.py +++ b/labs/llm.py @@ -131,7 +131,7 @@ def get_llm_response(prepared_context): @time_and_log_function -def call_llm_with_context(repo_destination, issue_summary): +def call_llm_with_context(repository_path, issue_summary): if not issue_summary: logger.error("issue_summary cannot be empty.") raise ValueError("issue_summary cannot be empty.") @@ -140,10 +140,10 @@ def call_llm_with_context(repo_destination, issue_summary): embedder = Embedder(embedder_class, *embeder_args) vectorizer_class = VectorizerModel.get_active_vectorizer() - Vectorizer(vectorizer_class, embedder).vectorize_to_database(None, repo_destination) + Vectorizer(vectorizer_class, embedder).vectorize_to_database(None, repository_path) # find_similar_embeddings narrows down codebase to files that matter for the issue at hand. - context = embedder.retrieve_embeddings(issue_summary, repo_destination) + context = embedder.retrieve_embeddings(issue_summary, repository_path) prompt = get_prompt(issue_summary) prepared_context = prepare_context(context, prompt) diff --git a/labs/repo.py b/labs/repo.py index 2a06ba1..54dcddf 100644 --- a/labs/repo.py +++ b/labs/repo.py @@ -8,17 +8,17 @@ logger = logging.getLogger(__name__) -def clone_repository(repo_url, local_path): - logger.debug(f"Cloning repo from {repo_url}") - subprocess.run(["git", "clone", repo_url, local_path]) +def clone_repository(repository_url, local_path): + logger.debug(f"Cloning repository from {repository_url}") + subprocess.run(["git", "clone", repository_url, local_path]) @time_and_log_function -def get_issue(token, repo_owner, repo_name, username, issue_number): +def get_issue(token, repository_owner, repository_name, username, issue_number): github_request = GithubRequests( github_token=token, - repo_owner=repo_owner, - repo_name=repo_name, + repository_owner=repository_owner, + repository_name=repository_name, username=username, ) return github_request.get_issue(issue_number) @@ -27,8 +27,8 @@ def get_issue(token, repo_owner, repo_name, username, issue_number): @time_and_log_function def create_branch( token, - repo_owner, - repo_name, + repository_owner, + repository_name, username, issue_number, issue_title, @@ -36,8 +36,8 @@ def create_branch( ): github_request = GithubRequests( github_token=token, - repo_owner=repo_owner, - repo_name=repo_name, + repository_owner=repository_owner, + repository_name=repository_name, username=username, ) branch_name = f"{issue_number}-{issue_title}" @@ -51,22 +51,22 @@ def change_issue_to_in_progress(): @time_and_log_function -def commit_changes(token, repo_owner, repo_name, username, branch_name, file_list): +def commit_changes(token, repository_owner, repository_name, username, branch_name, file_list, message="Fix"): github_request = GithubRequests( github_token=token, - repo_owner=repo_owner, - repo_name=repo_name, + repository_owner=repository_owner, + repository_name=repository_name, username=username, ) - return github_request.commit_changes("fix", branch_name=branch_name, files=file_list) + return github_request.commit_changes(message, branch_name, file_list) @time_and_log_function -def create_pull_request(token, repo_owner, repo_name, username, original_branch, branch_name): +def create_pull_request(token, repository_owner, repository_name, username, original_branch, branch_name): github_request = GithubRequests( github_token=token, - repo_owner=repo_owner, - repo_name=repo_name, + repository_owner=repository_owner, + repository_name=repository_name, username=username, ) return github_request.create_pull_request(branch_name, base=original_branch) diff --git a/labs/run.py b/labs/run.py index e117958..538998d 100644 --- a/labs/run.py +++ b/labs/run.py @@ -16,41 +16,41 @@ @time_and_log_function -def run_on_repo(token, repo_owner, repo_name, username, issue_number, original_branch="main"): - issue = get_issue(token, repo_owner, repo_name, username, issue_number) +def run_on_repository(token, repository_owner, repository_name, username, issue_number, original_branch="main"): + issue = get_issue(token, repository_owner, repository_name, username, issue_number) issue_title = issue["title"].replace(" ", "-") issue_summary = issue["body"] branch_name = create_branch( token, - repo_owner, - repo_name, + repository_owner, + repository_name, username, issue_number, issue_title, original_branch, ) - repo_url = f"https://github.com/{repo_owner}/{repo_name}" - logger.debug(f"Cloning repo from {repo_url}") + repository_url = f"https://github.com/{repository_owner}/{repository_name}" + logger.debug(f"Cloning repository from {repository_url}") - repo_destination = f"{settings.CLONE_DESTINATION_DIR}{repo_owner}/{repo_name}" - clone_repository(repo_url, repo_destination) + repository_path = f"{settings.CLONE_DESTINATION_DIR}{repository_owner}/{repository_name}" + clone_repository(repository_url, repository_path) - success, llm_response = call_llm_with_context(repo_destination, issue_summary) + success, llm_response = call_llm_with_context(repository_path, issue_summary) if not success: logger.error("Failed to get a response from LLM, aborting run.") return response_output = call_agent_to_apply_code_changes(llm_response[1].choices[0].message.content) - commit_changes(token, repo_owner, repo_name, username, branch_name, response_output) - create_pull_request(token, repo_owner, repo_name, username, branch_name) + commit_changes(token, repository_owner, repository_name, username, branch_name, response_output) + create_pull_request(token, repository_owner, repository_name, username, branch_name) @time_and_log_function -def run_on_local_repo(repo_path, issue_text): - success, llm_response = call_llm_with_context(repo_path, issue_text) +def run_on_local_repo(repository_path, issue_text): + success, llm_response = call_llm_with_context(repository_path, issue_text) if not success: logger.error("Failed to get a response from LLM, aborting run.") return diff --git a/labs/tasks/__init__.py b/labs/tasks/__init__.py index 5109802..00c5bf8 100644 --- a/labs/tasks/__init__.py +++ b/labs/tasks/__init__.py @@ -1,31 +1,31 @@ from tasks.llm import ( - find_similar_embeddings_task, + find_embeddings_task, get_llm_response_task, prepare_prompt_and_context_task, - vectorize_repo_to_database_task, + vectorize_repository_task, ) -from tasks.repo import ( +from tasks.repository import ( apply_code_changes_task, - clone_repo_task, + clone_repository_task, commit_changes_task, create_branch_task, create_pull_request_task, get_issue_task, ) -from tasks.run import init_task, run_on_local_repo_task, run_on_repo_task +from tasks.run import init_task, run_on_local_repository_task, run_on_repository_task __all__ = [ - "vectorize_repo_to_database_task", - "find_similar_embeddings_task", + "vectorize_repository_task", + "find_embeddings_task", "prepare_prompt_and_context_task", "get_llm_response_task", "get_issue_task", "create_branch_task", - "clone_repo_task", + "clone_repository_task", "apply_code_changes_task", "commit_changes_task", "create_pull_request_task", "init_task", - "run_on_repo_task", - "run_on_local_repo_task", + "run_on_repository_task", + "run_on_local_repository_task", ] diff --git a/labs/tasks/llm.py b/labs/tasks/llm.py index 776413d..d920d29 100644 --- a/labs/tasks/llm.py +++ b/labs/tasks/llm.py @@ -1,5 +1,4 @@ import json -import logging import config.configuration_variables as settings import redis @@ -9,20 +8,18 @@ from embeddings.vectorizers.base import Vectorizer from llm import get_llm_response, get_prompt, prepare_context -logger = logging.getLogger(__name__) - redis_client = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=0, decode_responses=True) @app.task -def vectorize_repo_to_database_task(prefix="", repo_destination=""): - repo_destination = redis_client.get(f"{prefix}_repo_destination") if prefix else repo_destination +def vectorize_repository_task(prefix="", repository_path=""): + repository_path = redis_client.get(f"{prefix}_repository_path") if prefix else repository_path embedder_class, *embeder_args = Model.get_active_embedding_model() embedder = Embedder(embedder_class, *embeder_args) vectorizer_class = VectorizerModel.get_active_vectorizer() - Vectorizer(vectorizer_class, embedder).vectorize_to_database(None, repo_destination) + Vectorizer(vectorizer_class, embedder).vectorize_to_database(None, repository_path) if prefix: return prefix @@ -30,11 +27,11 @@ def vectorize_repo_to_database_task(prefix="", repo_destination=""): @app.task -def find_similar_embeddings_task(prefix="", issue_body="", repo_destination=""): +def find_embeddings_task(prefix="", issue_body="", repository_path=""): embedder_class, *embeder_args = Model.get_active_embedding_model() embeddings_results = Embedder(embedder_class, *embeder_args).retrieve_embeddings( redis_client.get(f"{prefix}_issue_body") if prefix else issue_body, - redis_client.get(f"f{prefix}_repo_destination") if prefix else repo_destination, + redis_client.get(f"f{prefix}_repository_path") if prefix else repository_path, ) similar_embeddings = [ (embedding.repository, embedding.file_path, embedding.text) for embedding in embeddings_results diff --git a/labs/tasks/repo.py b/labs/tasks/repository.py similarity index 65% rename from labs/tasks/repo.py rename to labs/tasks/repository.py index fe077ca..496748a 100644 --- a/labs/tasks/repo.py +++ b/labs/tasks/repository.py @@ -13,14 +13,14 @@ @app.task -def get_issue_task(prefix="", token="", repo_owner="", repo_name="", issue_number="", username=""): +def get_issue_task(prefix="", token="", repository_owner="", repository_name="", issue_number="", username=""): token = redis_client.get(f"{prefix}_token") if prefix else token - repo_owner = redis_client.get(f"{prefix}_repo_owner") if prefix else repo_owner - repo_name = redis_client.get(f"{prefix}_repo_name") if prefix else repo_name + repository_owner = redis_client.get(f"{prefix}_repository_owner") if prefix else repository_owner + repository_name = redis_client.get(f"{prefix}_repository_name") if prefix else repository_name username = redis_client.get(f"{prefix}_username") if prefix else username issue_number = redis_client.get(f"{prefix}_issue_number") if prefix else issue_number - issue = get_issue(token, repo_owner, repo_name, username, issue_number) + issue = get_issue(token, repository_owner, repository_name, username, issue_number) if prefix: issue_title = issue["title"].replace(" ", "-") @@ -35,16 +35,16 @@ def get_issue_task(prefix="", token="", repo_owner="", repo_name="", issue_numbe def create_branch_task( prefix="", token="", - repo_owner="", - repo_name="", + repository_owner="", + repository_name="", issue_number="", username="", original_branch="", issue_title="", ): token = redis_client.get(f"{prefix}_token") if prefix else token - repo_owner = redis_client.get(f"{prefix}_repo_owner") if prefix else repo_owner - repo_name = redis_client.get(f"{prefix}_repo_name") if prefix else repo_name + repository_owner = redis_client.get(f"{prefix}_repository_owner") if prefix else repository_owner + repository_name = redis_client.get(f"{prefix}_repository_name") if prefix else repository_name username = redis_client.get(f"{prefix}_username") if prefix else username issue_number = redis_client.get(f"{prefix}_issue_number") if prefix else issue_number original_branch = redis_client.get(f"{prefix}_original_branch") if prefix else original_branch @@ -52,8 +52,8 @@ def create_branch_task( branch_name = create_branch( token, - repo_owner, - repo_name, + repository_owner, + repository_name, username, issue_number, issue_title, @@ -67,14 +67,14 @@ def create_branch_task( @app.task -def clone_repo_task(prefix="", repo_owner="", repo_name=""): - repo_owner = redis_client.get(f"{prefix}_repo_owner") if prefix else repo_owner - repo_name = redis_client.get(f"{prefix}_repo_name") if prefix else repo_name - repo_destination = f"{settings.CLONE_DESTINATION_DIR}{repo_owner}/{repo_name}" - clone_repository(f"https://github.com/{repo_owner}/{repo_name}", repo_destination) +def clone_repository_task(prefix="", repository_owner="", repository_name=""): + repository_owner = redis_client.get(f"{prefix}_repository_owner") if prefix else repository_owner + repository_name = redis_client.get(f"{prefix}_repository_name") if prefix else repository_name + repository_path = f"{settings.CLONE_DESTINATION_DIR}{repository_owner}/{repository_name}" + clone_repository(f"https://github.com/{repository_owner}/{repository_name}", repository_path) if prefix: - redis_client.set(f"{prefix}_repo_destination", repo_destination, ex=300) + redis_client.set(f"{prefix}_repository_path", repository_path, ex=300) return prefix return True @@ -94,16 +94,16 @@ def apply_code_changes_task(prefix="", llm_response=""): def commit_changes_task( prefix="", token="", - repo_owner="", - repo_name="", + repository_owner="", + repository_name="", username="", branch_name="", files_modified=[], ): commit_changes( token=redis_client.get(f"{prefix}_token") if prefix else token, - repo_owner=redis_client.get(f"{prefix}_repo_owner") if prefix else repo_owner, - repo_name=redis_client.get(f"{prefix}_repo_name") if prefix else repo_name, + repository_owner=redis_client.get(f"{prefix}_repository_owner") if prefix else repository_owner, + repository_name=redis_client.get(f"{prefix}_repository_name") if prefix else repository_name, username=redis_client.get(f"{prefix}_username") if prefix else username, branch_name=(redis_client.get(f"{prefix}_branch_name") if prefix else branch_name), file_list=(json.loads(redis_client.get(f"{prefix}_files_modified")) if prefix else files_modified), @@ -118,16 +118,16 @@ def commit_changes_task( def create_pull_request_task( prefix="", token="", - repo_owner="", - repo_name="", + repository_owner="", + repository_name="", username="", branch_name="", original_branch="", ): create_pull_request( token=redis_client.get(f"{prefix}_token") if prefix else token, - repo_owner=redis_client.get(f"{prefix}_repo_owner") if prefix else repo_owner, - repo_name=redis_client.get(f"{prefix}_repo_name") if prefix else repo_name, + repository_owner=redis_client.get(f"{prefix}_repository_owner") if prefix else repository_owner, + repository_name=redis_client.get(f"{prefix}_repository_name") if prefix else repository_name, username=redis_client.get(f"{prefix}_username") if prefix else username, original_branch=(redis_client.get(f"{prefix}_original_branch") if prefix else original_branch), branch_name=(redis_client.get(f"{prefix}_branch_name") if prefix else branch_name), diff --git a/labs/tasks/run.py b/labs/tasks/run.py index a129110..f48b53f 100644 --- a/labs/tasks/run.py +++ b/labs/tasks/run.py @@ -7,15 +7,15 @@ from config.celery import app from tasks import ( apply_code_changes_task, - clone_repo_task, + clone_repository_task, commit_changes_task, create_branch_task, create_pull_request_task, - find_similar_embeddings_task, + find_embeddings_task, get_issue_task, get_llm_response_task, prepare_prompt_and_context_task, - vectorize_repo_to_database_task, + vectorize_repository_task, ) logger = logging.getLogger(__name__) @@ -25,9 +25,9 @@ @app.task(bind=True) def init_task(self, **kwargs): - if "repo_destination" in kwargs: - if not os.path.exists(kwargs["repo_destination"]): - raise FileNotFoundError(f"Directory {kwargs['repo_destination']} does not exist") + if "repository_path" in kwargs: + if not os.path.exists(kwargs["repository_path"]): + raise FileNotFoundError(f"Directory {kwargs['repository_path']} does not exist") prefix = self.request.id for k, v in kwargs.items(): redis_client.set(f"{prefix}_{k}", v, ex=3600) @@ -35,18 +35,18 @@ def init_task(self, **kwargs): @app.task -def run_on_repo_task( +def run_on_repository_task( token: str, - repo_owner: str, - repo_name: str, + repository_owner: str, + repository_name: str, username: str, issue_number: int, original_branch: str = "main", ): data = { "token": token, - "repo_owner": repo_owner, - "repo_name": repo_name, + "repository_owner": repository_owner, + "repository_name": repository_name, "username": username, "issue_number": issue_number, "original_branch": original_branch, @@ -55,9 +55,9 @@ def run_on_repo_task( init_task.s(**data), get_issue_task.s(), create_branch_task.s(), - clone_repo_task.s(), - vectorize_repo_to_database_task.s(), - find_similar_embeddings_task.s(), + clone_repository_task.s(), + vectorize_repository_task.s(), + find_embeddings_task.s(), prepare_prompt_and_context_task.s(), get_llm_response_task.s(), apply_code_changes_task.s(), @@ -67,17 +67,16 @@ def run_on_repo_task( @app.task -def run_on_local_repo_task(repo_path, issue_text): +def run_on_local_repository_task(repository_path, issue_text): data = { - "repo_path": repo_path, "issue_text": issue_text, "issue_body": issue_text, - "repo_destination": repo_path, + "repository_path": repository_path, } chain( init_task.s(**data), - vectorize_repo_to_database_task.s(), - find_similar_embeddings_task.s(), + vectorize_repository_task.s(), + find_embeddings_task.s(), prepare_prompt_and_context_task.s(), get_llm_response_task.s(), apply_code_changes_task.s(), diff --git a/labs/tests/test_codemonkey_endpoints.py b/labs/tests/test_codemonkey_endpoints.py index bd5d579..8c1cc93 100644 --- a/labs/tests/test_codemonkey_endpoints.py +++ b/labs/tests/test_codemonkey_endpoints.py @@ -9,12 +9,12 @@ class TestCodemonkeyEndpoints: @pytest.mark.asyncio - @patch("api.codemonkey_endpoints.run_on_local_repo_task") - async def test_run_on_local_repo_endpoint(self, mock_task): + @patch("api.codemonkey_endpoints.run_on_local_repository_task") + async def test_run_on_local_repository_endpoint(self, mock_task): mock_task.return_value = None response = await client.post( - "/run_on_local_repo", - json={"repo_path": "path/to/repo", "issue_text": "example issue text"}, + "/run_on_local_repository", + json={"repository_path": "path/to/repo", "prompt": "example issue text"}, ) assert response.status_code == 200 mock_task.assert_called_once() @@ -26,9 +26,9 @@ async def test_get_issue_endpoint(self, mock_task): response = await client.post( "/get_issue", json={ - "github_token": "token", - "repo_owner": "owner", - "repo_name": "name", + "token": "token", + "repository_owner": "owner", + "repository_name": "name", "issue_number": 1, "username": "user", }, @@ -37,15 +37,15 @@ async def test_get_issue_endpoint(self, mock_task): mock_task.assert_called_once() @pytest.mark.asyncio - @patch("api.codemonkey_endpoints.run_on_repo_task") - async def test_run_on_repo_endpoint(self, mock_task): + @patch("api.codemonkey_endpoints.run_on_repository_task") + async def test_run_on_repository_endpoint(self, mock_task): mock_task.return_value = None response = await client.post( - "/run_on_repo", + "/run_on_repository", json={ - "github_token": "token", - "repo_owner": "owner", - "repo_name": "name", + "token": "token", + "repository_owner": "owner", + "repository_name": "name", "issue_number": 1, "username": "user", "original_branch": "main", @@ -61,9 +61,9 @@ async def test_create_branch_endpoint(self, mock_task): response = await client.post( "/create_branch", json={ - "github_token": "token", - "repo_owner": "owner", - "repo_name": "name", + "token": "token", + "repository_owner": "owner", + "repository_name": "name", "issue_number": 1, "username": "user", "original_branch": "main", @@ -74,22 +74,22 @@ async def test_create_branch_endpoint(self, mock_task): mock_task.assert_called_once() @pytest.mark.asyncio - @patch("api.codemonkey_endpoints.vectorize_repo_to_database_task") - async def test_vectorize_repo_to_database_endpoint(self, mock_task): + @patch("api.codemonkey_endpoints.vectorize_repository_task") + async def test_vectorize_repository_endpoint(self, mock_task): mock_task.return_value = {} response = await client.post( - "/vectorize_repo_to_database", - json={"repo_destination": "destination/path"}, + "/vectorize_repository", + json={"repository_path": "destination/path"}, ) assert response.status_code == 200 mock_task.assert_called_once() @pytest.mark.asyncio - @patch("api.codemonkey_endpoints.find_similar_embeddings_task") - async def test_find_similar_embeddings_endpoint(self, mock_task): + @patch("api.codemonkey_endpoints.find_embeddings_task") + async def test_find_embeddings_endpoint(self, mock_task): mock_task.return_value = {} response = await client.post( - "/find_similar_embeddings", json={"repo_destination": "destination/path", "issue_body": "issue body"} + "/find_embeddings", json={"repository_path": "destination/path", "prompt": "issue body"} ) assert response.status_code == 200 mock_task.assert_called_once() @@ -100,7 +100,7 @@ async def test_prepare_prompt_and_context_endpoint(self, mock_task): mock_task.return_value = {} response = await client.post( "/prepare_prompt_and_context", - json={"issue_body": "body", "embeddings": []}, + json={"prompt": "body", "embeddings": []}, ) assert response.status_code == 200 mock_task.assert_called_once() @@ -117,7 +117,7 @@ async def test_get_llm_response_endpoint(self, mock_task): @patch("api.codemonkey_endpoints.apply_code_changes_task") async def test_apply_code_changes_endpoint(self, mock_task): mock_task.return_value = {} - response = await client.post("/apply_code_changes", json={"llm_response": "response"}) + response = await client.post("/apply_code_changes", json={"changes": "response"}) assert response.status_code == 200 mock_task.assert_called_once() @@ -128,9 +128,9 @@ async def test_commit_changes_endpoint(self, mock_task): response = await client.post( "/commit_changes", json={ - "github_token": "token", - "repo_owner": "owner", - "repo_name": "name", + "token": "token", + "repository_owner": "owner", + "repository_name": "name", "username": "user", "branch_name": "branch", "files": [], @@ -146,12 +146,12 @@ async def test_create_pull_request_endpoint(self, mock_task): response = await client.post( "/create_pull_request", json={ - "github_token": "token", - "repo_owner": "owner", - "repo_name": "name", + "token": "token", + "repository_owner": "owner", + "repository_name": "name", "username": "user", - "branch_name": "branch", - "original_branch": "main", + "changes_branch_name": "branch", + "base_branch_name": "main", }, ) assert response.status_code == 200 diff --git a/labs/tests/test_github_requests.py b/labs/tests/test_github_requests.py index 595b4e1..94cfdbc 100644 --- a/labs/tests/test_github_requests.py +++ b/labs/tests/test_github_requests.py @@ -17,16 +17,16 @@ def test_list_issues_default_parameters(self, mocker): mock_get.return_value.json.return_value = sample_response github_token = "valid_token" - repo_owner = "owner_username" - repo_name = "repository_name" + repository_owner = "owner_username" + repository_name = "repository_name" username = "your_username" - github_requests = GithubRequests(github_token, repo_owner, repo_name, username) + github_requests = GithubRequests(github_token, repository_owner, repository_name, username) issues = github_requests.list_issues() assert issues == sample_response mock_get.assert_called_once_with( - f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues", + f"https://api.github.com/repos/{repository_owner}/{repository_name}/issues", headers={ "Authorization": f"token {github_token}", "Accept": "application/vnd.github.v3+json", @@ -43,7 +43,7 @@ def test_list_issues_http_failure(self, mocker): mock_response.raise_for_status.side_effect = requests.exceptions.RequestException("HTTP Error") mocker.patch("requests.get", return_value=mock_response) - github_requests = GithubRequests(github_token="fake_token", repo_owner="owner", repo_name="repo") + github_requests = GithubRequests(github_token="fake_token", repository_owner="owner", repository_name="repo") issues = github_requests.list_issues() @@ -57,10 +57,10 @@ def test_get_issue_returns_correct_details(self, mocker): mock_get.return_value.json.return_value = sample_response github_token = "valid_token" - repo_owner = "owner_username" - repo_name = "repository_name" + repository_owner = "owner_username" + repository_name = "repository_name" username = "your_username" - github_requests = GithubRequests(github_token, repo_owner, repo_name, username) + github_requests = GithubRequests(github_token, repository_owner, repository_name, username) issue = github_requests.get_issue(1) @@ -71,7 +71,7 @@ def test_handle_http_request_failure_get_issue(self, mocker): mock_response.raise_for_status.side_effect = requests.exceptions.RequestException("Mocked Request Exception") mocker.patch("requests.get", return_value=mock_response) - github_requests = GithubRequests(github_token="fake_token", repo_owner="owner", repo_name="repo") + github_requests = GithubRequests(github_token="fake_token", repository_owner="owner", repository_name="repo") issue = github_requests.get_issue(1) @@ -82,9 +82,9 @@ def test_change_issue_status(self, mocker): mock_response.json.return_value = {"status": "closed"} mocker.patch("requests.patch", return_value=mock_response) - github_requests = GithubRequests(github_token="fake_token", repo_owner="owner", repo_name="repo") + github_requests = GithubRequests(github_token="fake_token", repository_owner="owner", repository_name="repo") - response = github_requests.change_issue_status(issue_number=1, state="closed") + response = github_requests.change_issue_status(issue_number=1, status="closed") assert response == {"status": "closed"} @@ -120,7 +120,7 @@ def test_commit_changes_successfully(self, mocker): mock_response_patch.json.return_value = {"sha": "fake_update_sha"} mocker.patch("requests.patch", return_value=mock_response_patch) - github_requests = GithubRequests(github_token="fake_token", repo_owner="owner", repo_name="repo") + github_requests = GithubRequests(github_token="fake_token", repository_owner="owner", repository_name="repo") result = github_requests.commit_changes( message="Commit message", @@ -137,7 +137,7 @@ def test_create_pull_request_default_parameters(self, mocker): mock_response.raise_for_status.return_value = None mocker.patch("requests.post", return_value=mock_response) - github_requests = GithubRequests(github_token="fake_token", repo_owner="owner", repo_name="repo") + github_requests = GithubRequests(github_token="fake_token", repository_owner="owner", repository_name="repo") pull_request = github_requests.create_pull_request(head="feature_branch") diff --git a/labs/tests/test_llm.py b/labs/tests/test_llm.py index 4a6edb6..6f0f6f2 100644 --- a/labs/tests/test_llm.py +++ b/labs/tests/test_llm.py @@ -18,11 +18,11 @@ class TestCallLLMWithContext: def test_empty_summary(self): - repo_destination = "repo_destination" + repository_path = "repository_path" issue_summary = "" with pytest.raises(Exception) as excinfo: - call_llm_with_context(repo_destination, issue_summary) + call_llm_with_context(repository_path, issue_summary) assert "issue_summary cannot be empty" in str(excinfo.value) @@ -69,9 +69,9 @@ class TestLocalLLM: @skip("This is used locally with an Ollama instance running in docker") def test_local_llm_connection(self, mocked_context, mocked_vectorize_to_database): mocked_context.return_value = [["file1", "/path/to/file1", "content"]] - repo_destination = "repo" + repository_destination = "repo" issue_summary = "Fix the bug in the authentication module" - success, response = call_llm_with_context(repo_destination, issue_summary) + success, response = call_llm_with_context(repository_destination, issue_summary) assert success @@ -92,9 +92,9 @@ def test_local_llm_redirect( ): mocked_context.return_value = [["file1", "/path/to/file1", "content"]] mocked_validate_llm_reponse.return_value = False, "" - repo_destination = "repo" + repository_destination = "repo" issue_summary = "Fix the bug in the authentication module" - call_llm_with_context(repo_destination, issue_summary) + call_llm_with_context(repository_destination, issue_summary) mocked_local_llm.assert_called_once()