Skip to content

Commit

Permalink
Improve logging when executors fail, and ensure that an exception is …
Browse files Browse the repository at this point in the history
…raised so that we definitely post a response back to the waiting handin server
  • Loading branch information
Ben Lerner committed Aug 28, 2024
1 parent 347b3e1 commit c8767f5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
9 changes: 6 additions & 3 deletions worker/orca_grader/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from orca_grader.config import APP_CONFIG
from orca_grader.db.operations import reenqueue_job, censored_url
from orca_grader.docker_utils.images.clean_up import clean_up_unused_images
from orca_grader.exceptions import InvalidWorkerStateException, NoImageNameFoundException
from orca_grader.exceptions import InvalidWorkerStateException, NoImageNameFoundException, ExecutorExecutionException
from orca_grader.executor.builder.docker_grading_job_executor_builder import DockerGradingJobExecutorBuilder
from orca_grader.executor.builder.grading_job_executor_builder import GradingJobExecutorBuilder
from orca_grader.job_retrieval.local.local_grading_job_retriever import LocalGradingJobRetriever
Expand Down Expand Up @@ -131,7 +131,7 @@ def run_grading_job(grading_job: GradingJobJSON, no_container: bool,
image_name = get_image_name_for_sha(container_sha)
if image_name is None:
_LOGGER.info(
f"No image {image_name} found in local docker registry.")
f"No image with SHA {container_sha} found in local docker registry; trying to retrieve from orchestrator.")
images_endpoint = f"{APP_CONFIG.orca_web_server_host}/images"
tgz_file_name = retrieve_image_tgz_for_unique_name(images_endpoint, container_sha)
image_name = load_image_from_tgz(tgz_file_name)
Expand Down Expand Up @@ -217,11 +217,14 @@ def handle_grading_job(grading_job: GradingJobJSON, image_name: str | None = Non
result = executor.execute()
if result.was_successful:
_LOGGER.info("Job was completed successfully.")
_LOGGER.debug("\n".join(result.results))
else:
warn_str = "The processes timed out during execution." if \
result.did_timeout else "An error was encountered during execution."
_LOGGER.warning(warn_str)
_LOGGER.debug("\n".join(result.results))
_LOGGER.warning("\n".join(result.results))
raise ExecutorExecutionException(warn_str + "\n".join(result.results))



def inform_client_of_reenqueue(grading_job: GradingJobJSON,
Expand Down
9 changes: 9 additions & 0 deletions worker/orca_grader/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ class NoImageNameFoundException(Exception):

def __init__(self, msg: str):
self.msg = msg


class ExecutorExecutionException(Exception):
"""
Used to signal when executing a job's executor failed for some reason.
"""

def __init__(self, msg: str):
self.msg = msg

0 comments on commit c8767f5

Please sign in to comment.