Skip to content

Commit

Permalink
Merge branch 'main' into feat/image-and-job-status
Browse files Browse the repository at this point in the history
  • Loading branch information
williams-jack committed Jul 31, 2024
2 parents cc69d15 + 82b7308 commit cc8b446
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 125 deletions.
10 changes: 9 additions & 1 deletion worker/orca_grader/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
from typing import List, Optional
import tempfile
from subprocess import CalledProcessError
from orca_grader.common.services.push_results import push_results_with_exception
from orca_grader.common.types.grading_job_json_types import GradingJobJSON
from orca_grader.config import APP_CONFIG
Expand Down Expand Up @@ -67,6 +68,8 @@ def process_jobs_from_db(no_container: bool,
if stop_future in done:
break

print(f"Pulled job with key {grading_job['key']} and url {grading_job['response_url']}")

job_execution_future = futures_executor.submit(
run_grading_job, grading_job, no_container, container_command)
done, not_done = concurrent.futures.wait(
Expand All @@ -81,6 +84,7 @@ def process_jobs_from_db(no_container: bool,
if job_execution_future in done:
if type(job_execution_future.exception()) == InvalidWorkerStateException:
exit(1)
print("Job completed.")
clean_up_unused_images()

if stop_future in done:
Expand Down Expand Up @@ -108,6 +112,10 @@ def run_grading_job(grading_job: GradingJobJSON, no_container: bool,
else:
handle_grading_job(grading_job, container_sha)
except Exception as e:
print(e)
if type(e) == CalledProcessError:
print(e.stdout)
print(e.stderr)
if "response_url" in grading_job:
push_results_with_exception(grading_job, e)
else:
Expand Down Expand Up @@ -152,7 +160,7 @@ def handle_grading_job(grading_job: GradingJobJSON, container_sha: str | None =
if result and result.stdout:
# TODO: make this a log statement of some sort.
print(result.stdout.decode())
elif result and result.stderr:
if result and result.stderr:
print(result.stderr.decode())


Expand Down
3 changes: 2 additions & 1 deletion worker/orca_grader/common/services/push_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ def push_results_to_response_url(job_result: GradingJobResult,
**job_result.to_json(interpolated_dirs=interpolated_dirs),
"key": key
}
print(result_as_json)
_send_results_with_exponential_backoff(result_as_json, response_url)


def push_results_with_exception(grading_job: GradingJobJSON,
e: Exception) -> None:
output = GradingJobResult([], [e]).to_json()
output = GradingJobResult([], [e])
key, response_url = grading_job["key"], grading_job["response_url"]
push_results_to_response_url(output, key, response_url, {})

Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from urllib.parse import urlparse, unquote
from os.path import basename
from orca_grader.container.build_script.code_file.sub_mime_types import SubmissionMIMEType
from orca_grader.container.build_script.code_file.mime_types import MIMEType
from orca_grader.common.types.grading_job_json_types import CodeFileInfoJSON

class CodeFileInfo:
"""
CodeFileInfo contains:
- A URL to download/save/extract either an assignment, submission, or
- A URL to download/save/extract either an assignment, submission, or
grading file.
- The MIME type of that file.
- The source of the code file. See ./code_file_source.py for more info.
"""

def __init__(self, url: str, mime_type: SubmissionMIMEType,
def __init__(self, url: str, mime_type: MIMEType,
save_dir_name: str, should_replace_paths: bool) -> None:
self.__url = url
self.__mime_type = mime_type
Expand All @@ -21,16 +21,16 @@ def __init__(self, url: str, mime_type: SubmissionMIMEType,

def get_url(self) -> str:
return self.__url
def get_mime_type(self) -> SubmissionMIMEType:

def get_mime_type(self) -> MIMEType:
return self.__mime_type

def should_replace_paths(self) -> bool:
return self.__should_replace_paths

def get_save_dir_name(self) -> str:
return self.__save_dir_name

# https://stackoverflow.com/questions/18727347/how-to-extract-a-filename-from-a-url-append-a-word-to-it
def get_file_name(self) -> str:
"""
Expand All @@ -41,5 +41,5 @@ def get_file_name(self) -> str:
return basename(file_path)

def json_to_code_file_info(json_code_file: CodeFileInfoJSON, dir_name: str) -> CodeFileInfo:
return CodeFileInfo(json_code_file["url"], SubmissionMIMEType(json_code_file["mime_type"]),
return CodeFileInfo(json_code_file["url"], MIMEType(json_code_file["mime_type"]),
dir_name, json_code_file["should_replace_paths"])
33 changes: 33 additions & 0 deletions worker/orca_grader/container/build_script/code_file/mime_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from enum import Enum


class MIMEType(Enum):
"""
An enumeration representing MIME types that files associated with an assignment
or submission may take on.
"""

TAR = "application/x-tar"
GZ = "application/gzip"
TAR_GZ = "application/x-gtar"
ZIP = "application/zip"
SEVEN_ZIP = "application/x-7z-compressed"
JAVA = "text/x-java"
JAVA_ARCHIVE = "application/java-archive"
JAVA_CLASS = "application/java-vm"
JAVASCRIPT = "text/javascript"
PYRET = "pyret"
RACKET_SCHEME = "scheme"
LISP = "text/x-common-lisp"
ML = "mllike"
HASKELL = "text/x-haskell"
LIT_HASKELL = "text/z-literate-haskell"
PLAIN_TEXT = "text/plain"


ARCHIVE_MIMES = [MIMEType.ZIP, MIMEType.TAR,
MIMEType.GZ, MIMEType.TAR_GZ]


def is_archive_mime_type(mime_type: MIMEType) -> bool:
return mime_type in ARCHIVE_MIMES
Original file line number Diff line number Diff line change
Expand Up @@ -5,87 +5,96 @@
import gzip
from typing import Dict
from orca_grader.container.build_script.code_file.code_file_info import CodeFileInfo
from orca_grader.container.build_script.code_file.sub_mime_types import SubmissionMIMEType
from orca_grader.container.build_script.code_file.mime_types import MIMEType
import subprocess
from orca_grader.common.services.download_file import download_file

__EXTRACTION_TIMEOUT = 60 * 2.5 # 2 minutes & 30 seconds
__EXTRACTION_TIMEOUT = 60 * 2.5 # 2 minutes & 30 seconds


def extract_tar_file(from_path: str, to_path: str, compression_option: str = "") -> str:
subprocess.run(["tar", f"-x{compression_option}f", from_path, "-C", to_path],
stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path
subprocess.run(["tar", f"-x{compression_option}f", from_path, "-C", to_path],
stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path


def extract_gz_file(from_path: str, to_path: str) -> str:
from_f_name = path.splitext(path.basename(from_path))[0] # Remove .gz from file name.
f_out_path = path.join(to_path, from_f_name)
with gzip.open(from_path, "rb") as f_in:
with open(f_out_path, 'wb') as f_out:
copyfileobj(f_in, f_out)
return f_out_path
# Remove .gz from file name.
from_f_name = path.splitext(path.basename(from_path))[0]
f_out_path = path.join(to_path, from_f_name)
with gzip.open(from_path, "rb") as f_in:
with open(f_out_path, 'wb') as f_out:
copyfileobj(f_in, f_out)
return f_out_path


def extract_zip_file(from_path: str, to_path: str) -> str:
subprocess.run(["unzip", from_path, "-d", to_path], stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path
subprocess.run(["unzip", from_path, "-d", to_path], stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path


def extract_7zip_file(from_path: str, to_path: str) -> str:
subprocess.run(["7z", "x", from_path, f"-o{to_path}"], stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path

subprocess.run(["7z", "x", from_path, f"-o{to_path}"], stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
return to_path


class CodeFileProcessor:

def __init__(self, interpolated_dirs: Dict[str, str]) -> None:
self.__interpolated_dirs = interpolated_dirs

def process_file(self, code_file: CodeFileInfo, download_dir: str, extraction_dir: str) -> None:
os.makedirs(download_dir)
os.makedirs(extraction_dir)
downloaded_file_path = self._download_code_file(code_file, download_dir)
extracted_file_path = self._extract_code_file(code_file, downloaded_file_path, extraction_dir)
if code_file.should_replace_paths():
self.__replace_paths(extracted_file_path)

def _download_code_file(self, code_file: CodeFileInfo, download_path: str) -> str:
file_name = code_file.get_file_name()
file_path = path.join(download_path, file_name)
return download_file(code_file.get_url(), file_path)

def _extract_code_file(self, code_file: CodeFileInfo, from_path: str, to_path: str) -> str:
mime_to_extraction = {
SubmissionMIMEType.TAR: lambda from_path, to_path: extract_tar_file(from_path, to_path),
SubmissionMIMEType.TAR_GZ: lambda from_path, to_path: extract_tar_file(from_path, to_path, 'z'),
SubmissionMIMEType.GZ: extract_gz_file,
SubmissionMIMEType.ZIP: extract_zip_file,
SubmissionMIMEType.SEVEN_ZIP: extract_7zip_file
}
mime_type = code_file.get_mime_type()
if mime_type in mime_to_extraction:
extracted_path = mime_to_extraction[mime_type](from_path, to_path)
else:
extracted_path = path.join(to_path, code_file.get_file_name())
copyfile(from_path, extracted_path)
return extracted_path

def __replace_paths(self, file_path: str):
if path.isdir(file_path):
for file_name in os.listdir(file_path):
self.__replace_paths(path.join(file_path, file_name))
else:
file_name = path.basename(file_path)
dir_name = path.dirname(file_path)
name, ext = path.splitext(file_name)
edited_file_name = f"{name}_edited{ext}"
edited_file_path = path.join(dir_name, edited_file_name)
with open(file_path, 'r') as original_file:
with open(edited_file_path, 'w') as edited_file:
for line in original_file.readlines():
edited_file.write(
reduce(
lambda current, key: current.replace(key, self.__interpolated_dirs[key]),
self.__interpolated_dirs,
line)
)
os.remove(file_path)
os.rename(path.join(dir_name, edited_file_name), file_path)
def __init__(self, interpolated_dirs: Dict[str, str]) -> None:
self.__interpolated_dirs = interpolated_dirs

def process_file(self, code_file: CodeFileInfo, download_dir: str, extraction_dir: str) -> None:
os.makedirs(download_dir)
os.makedirs(extraction_dir)
downloaded_file_path = self._download_code_file(
code_file, download_dir)
extracted_file_path = self._extract_code_file(
code_file, downloaded_file_path, extraction_dir)
if code_file.should_replace_paths():
self.__replace_paths(extracted_file_path)

def _download_code_file(self, code_file: CodeFileInfo, download_path: str) -> str:
file_name = code_file.get_file_name()
file_path = path.join(download_path, file_name)
return download_file(code_file.get_url(), file_path)

def _extract_code_file(self, code_file: CodeFileInfo, from_path: str, to_path: str) -> str:
mime_to_extraction = {
MIMEType.TAR: lambda from_path, to_path: extract_tar_file(from_path, to_path),
MIMEType.TAR_GZ: lambda from_path, to_path: extract_tar_file(from_path, to_path, 'z'),
MIMEType.GZ: extract_gz_file,
MIMEType.ZIP: extract_zip_file,
MIMEType.SEVEN_ZIP: extract_7zip_file
}
mime_type = code_file.get_mime_type()
if mime_type in mime_to_extraction:
extracted_path = mime_to_extraction[mime_type](from_path, to_path)
else:
extracted_path = path.join(to_path, code_file.get_file_name())
copyfile(from_path, extracted_path)
return extracted_path

def __replace_paths(self, file_path: str):
if path.isdir(file_path):
for file_name in os.listdir(file_path):
self.__replace_paths(path.join(file_path, file_name))
else:
file_name = path.basename(file_path)
dir_name = path.dirname(file_path)
name, ext = path.splitext(file_name)
edited_file_name = f"{name}_edited{ext}"
edited_file_path = path.join(dir_name, edited_file_name)
with open(file_path, 'r') as original_file:
with open(edited_file_path, 'w') as edited_file:
for line in original_file.readlines():
edited_file.write(
reduce(
lambda current, key: current.replace(
key, self.__interpolated_dirs[key]),
self.__interpolated_dirs,
line)
)
os.remove(file_path)
os.rename(path.join(dir_name, edited_file_name), file_path)

This file was deleted.

17 changes: 12 additions & 5 deletions worker/orca_grader/container/do_grading.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
import traceback
from pathlib import Path
from typing import Dict, List, TextIO
from orca_grader.common.services.push_results import push_results_to_response_url
from orca_grader.container.build_script.preprocess.preprocessor import GradingScriptPreprocessor
Expand All @@ -16,10 +17,16 @@
GradingJobJSON,
GradingScriptCommandJSON
)
from orca_grader.container.fs_tree import tree


def do_grading(secret: str, grading_job_json: GradingJobJSON) -> GradingJobResult:
command_responses: List[GradingScriptCommandResponse] = []
interpolated_dirs = {
"$DOWNLOADED": f"{secret}/downloaded",
"$EXTRACTED": f"{secret}/extracted",
"$BUILD": f"{secret}/build"
}
# The following exceptions are used to encapsulate things "expected to go wrong":
# - InvalidGradingJobJSONException*: Thrown when job JSON doesn't match schema (see validations/).
# - PreprocessingException: Thrown when a GradingJob's script is not valid.
Expand All @@ -30,15 +37,15 @@ def do_grading(secret: str, grading_job_json: GradingJobJSON) -> GradingJobResul
try:
code_files = produce_code_files_dictionary(grading_job_json["files"])
commands: List[GradingScriptCommandJSON] = grading_job_json["script"]
interpolated_dirs = {
"$DOWNLOADED": f"{secret}/downloaded",
"$EXTRACTED": f"{secret}/extracted",
"$BUILD": f"{secret}/build"
}
code_file_processor = CodeFileProcessor(interpolated_dirs)
preprocessor = GradingScriptPreprocessor(secret, commands, code_files,
code_file_processor)
script: GradingScriptCommand = preprocessor.preprocess_job()
print("****Directories and their files:****")
for actual_dir in interpolated_dirs.values():
print(f"{actual_dir}:")
for line in tree(Path(actual_dir)):
print(line)
output: GradingJobResult = script.execute(command_responses)
except PreprocessingException as preprocess_e:
output = GradingJobResult(command_responses, [preprocess_e])
Expand Down
Loading

0 comments on commit cc8b446

Please sign in to comment.