Merge branch 'main' into feat/image-and-job-status

CodeGrade · Jul 31, 2024 · cc8b446 · cc8b446
2 parents cc69d15 + 82b7308
commit cc8b446
Show file tree

Hide file tree

Showing 11 changed files with 194 additions and 125 deletions.
diff --git a/worker/orca_grader/__main__.py b/worker/orca_grader/__main__.py
@@ -5,6 +5,7 @@
 import time
 from typing import List, Optional
 import tempfile
+from subprocess import CalledProcessError
 from orca_grader.common.services.push_results import push_results_with_exception
 from orca_grader.common.types.grading_job_json_types import GradingJobJSON
 from orca_grader.config import APP_CONFIG
@@ -67,6 +68,8 @@ def process_jobs_from_db(no_container: bool,
                 if stop_future in done:
                     break
 
+                print(f"Pulled job with key {grading_job['key']} and url {grading_job['response_url']}")
+
                 job_execution_future = futures_executor.submit(
                     run_grading_job, grading_job, no_container, container_command)
                 done, not_done = concurrent.futures.wait(
@@ -81,6 +84,7 @@ def process_jobs_from_db(no_container: bool,
                 if job_execution_future in done:
                     if type(job_execution_future.exception()) == InvalidWorkerStateException:
                         exit(1)
+                    print("Job completed.")
                     clean_up_unused_images()
 
                 if stop_future in done:
@@ -108,6 +112,10 @@ def run_grading_job(grading_job: GradingJobJSON, no_container: bool,
         else:
             handle_grading_job(grading_job, container_sha)
     except Exception as e:
+        print(e)
+        if type(e) == CalledProcessError:
+          print(e.stdout)
+          print(e.stderr)
         if "response_url" in grading_job:
             push_results_with_exception(grading_job, e)
         else:
@@ -152,7 +160,7 @@ def handle_grading_job(grading_job: GradingJobJSON, container_sha: str | None =
         if result and result.stdout:
             # TODO: make this a log statement of some sort.
             print(result.stdout.decode())
-        elif result and result.stderr:
+        if result and result.stderr:
             print(result.stderr.decode())
 
 

diff --git a/worker/orca_grader/common/services/push_results.py b/worker/orca_grader/common/services/push_results.py
@@ -20,12 +20,13 @@ def push_results_to_response_url(job_result: GradingJobResult,
         **job_result.to_json(interpolated_dirs=interpolated_dirs),
         "key": key
     }
+    print(result_as_json)
     _send_results_with_exponential_backoff(result_as_json, response_url)
 
 
 def push_results_with_exception(grading_job: GradingJobJSON,
                                 e: Exception) -> None:
-    output = GradingJobResult([], [e]).to_json()
+    output = GradingJobResult([], [e])
     key, response_url = grading_job["key"], grading_job["response_url"]
     push_results_to_response_url(output, key, response_url, {})
 

diff --git a/worker/orca_grader/container/build_script/code_file/code_file_info.py b/worker/orca_grader/container/build_script/code_file/code_file_info.py
@@ -1,18 +1,18 @@
 from urllib.parse import urlparse, unquote
 from os.path import basename
-from orca_grader.container.build_script.code_file.sub_mime_types import SubmissionMIMEType
+from orca_grader.container.build_script.code_file.mime_types import MIMEType
 from orca_grader.common.types.grading_job_json_types import CodeFileInfoJSON
 
 class CodeFileInfo:
   """
   CodeFileInfo contains:
-    - A URL to download/save/extract either an assignment, submission, or 
+    - A URL to download/save/extract either an assignment, submission, or
       grading file.
     - The MIME type of that file.
     - The source of the code file. See ./code_file_source.py for more info.
   """
 
-  def __init__(self, url: str, mime_type: SubmissionMIMEType, 
+  def __init__(self, url: str, mime_type: MIMEType,
     save_dir_name: str, should_replace_paths: bool) -> None:
     self.__url = url
     self.__mime_type = mime_type
@@ -21,16 +21,16 @@ def __init__(self, url: str, mime_type: SubmissionMIMEType,
 
   def get_url(self) -> str:
     return self.__url
-  
-  def get_mime_type(self) -> SubmissionMIMEType:
+
+  def get_mime_type(self) -> MIMEType:
     return self.__mime_type
-  
+
   def should_replace_paths(self) -> bool:
     return self.__should_replace_paths
 
   def get_save_dir_name(self) -> str:
     return self.__save_dir_name
-  
+
   # https://stackoverflow.com/questions/18727347/how-to-extract-a-filename-from-a-url-append-a-word-to-it
   def get_file_name(self) -> str:
     """
@@ -41,5 +41,5 @@ def get_file_name(self) -> str:
     return basename(file_path)
 
 def json_to_code_file_info(json_code_file: CodeFileInfoJSON, dir_name: str) -> CodeFileInfo:
-  return CodeFileInfo(json_code_file["url"], SubmissionMIMEType(json_code_file["mime_type"]), 
+  return CodeFileInfo(json_code_file["url"], MIMEType(json_code_file["mime_type"]),
     dir_name, json_code_file["should_replace_paths"])
diff --git a/worker/orca_grader/container/build_script/code_file/mime_types.py b/worker/orca_grader/container/build_script/code_file/mime_types.py
@@ -0,0 +1,33 @@
+from enum import Enum
+
+
+class MIMEType(Enum):
+    """
+    An enumeration representing MIME types that files associated with an assignment
+    or submission may take on.
+    """
+
+    TAR = "application/x-tar"
+    GZ = "application/gzip"
+    TAR_GZ = "application/x-gtar"
+    ZIP = "application/zip"
+    SEVEN_ZIP = "application/x-7z-compressed"
+    JAVA = "text/x-java"
+    JAVA_ARCHIVE = "application/java-archive"
+    JAVA_CLASS = "application/java-vm"
+    JAVASCRIPT = "text/javascript"
+    PYRET = "pyret"
+    RACKET_SCHEME = "scheme"
+    LISP = "text/x-common-lisp"
+    ML = "mllike"
+    HASKELL = "text/x-haskell"
+    LIT_HASKELL = "text/z-literate-haskell"
+    PLAIN_TEXT = "text/plain"
+
+
+ARCHIVE_MIMES = [MIMEType.ZIP, MIMEType.TAR,
+                 MIMEType.GZ, MIMEType.TAR_GZ]
+
+
+def is_archive_mime_type(mime_type: MIMEType) -> bool:
+    return mime_type in ARCHIVE_MIMES
diff --git a/worker/orca_grader/container/build_script/code_file/processing/code_file_processor.py b/worker/orca_grader/container/build_script/code_file/processing/code_file_processor.py
@@ -5,87 +5,96 @@
 import gzip
 from typing import Dict
 from orca_grader.container.build_script.code_file.code_file_info import CodeFileInfo
-from orca_grader.container.build_script.code_file.sub_mime_types import SubmissionMIMEType
+from orca_grader.container.build_script.code_file.mime_types import MIMEType
 import subprocess
 from orca_grader.common.services.download_file import download_file
 
-__EXTRACTION_TIMEOUT = 60 * 2.5 # 2 minutes & 30 seconds
+__EXTRACTION_TIMEOUT = 60 * 2.5  # 2 minutes & 30 seconds
+
 
 def extract_tar_file(from_path: str, to_path: str, compression_option: str = "") -> str:
-  subprocess.run(["tar", f"-x{compression_option}f", from_path, "-C", to_path], 
-      stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
-  return to_path
+    subprocess.run(["tar", f"-x{compression_option}f", from_path, "-C", to_path],
+                   stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
+    return to_path
+
 
 def extract_gz_file(from_path: str, to_path: str) -> str:
-  from_f_name = path.splitext(path.basename(from_path))[0] # Remove .gz from file name.
-  f_out_path = path.join(to_path, from_f_name)
-  with gzip.open(from_path, "rb") as f_in:
-    with open(f_out_path, 'wb') as f_out:
-      copyfileobj(f_in, f_out)
-  return f_out_path
+    # Remove .gz from file name.
+    from_f_name = path.splitext(path.basename(from_path))[0]
+    f_out_path = path.join(to_path, from_f_name)
+    with gzip.open(from_path, "rb") as f_in:
+        with open(f_out_path, 'wb') as f_out:
+            copyfileobj(f_in, f_out)
+    return f_out_path
+
 
 def extract_zip_file(from_path: str, to_path: str) -> str:
-  subprocess.run(["unzip", from_path, "-d", to_path], stdout=subprocess.DEVNULL, 
-      stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
-  return to_path
+    subprocess.run(["unzip", from_path, "-d", to_path], stdout=subprocess.DEVNULL,
+                   stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
+    return to_path
+
 
 def extract_7zip_file(from_path: str, to_path: str) -> str:
-  subprocess.run(["7z", "x", from_path, f"-o{to_path}"], stdout=subprocess.DEVNULL, 
-      stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
-  return to_path
-
+    subprocess.run(["7z", "x", from_path, f"-o{to_path}"], stdout=subprocess.DEVNULL,
+                   stderr=subprocess.STDOUT, timeout=__EXTRACTION_TIMEOUT)
+    return to_path
+
+
 class CodeFileProcessor:
 
-  def __init__(self, interpolated_dirs: Dict[str, str]) -> None:
-    self.__interpolated_dirs = interpolated_dirs
-
-  def process_file(self, code_file: CodeFileInfo, download_dir: str, extraction_dir: str) -> None:
-    os.makedirs(download_dir)
-    os.makedirs(extraction_dir)
-    downloaded_file_path = self._download_code_file(code_file, download_dir)
-    extracted_file_path = self._extract_code_file(code_file, downloaded_file_path, extraction_dir)
-    if code_file.should_replace_paths():
-      self.__replace_paths(extracted_file_path)
-
-  def _download_code_file(self, code_file: CodeFileInfo, download_path: str) -> str:
-    file_name = code_file.get_file_name()
-    file_path = path.join(download_path, file_name)
-    return download_file(code_file.get_url(), file_path)
-
-  def _extract_code_file(self, code_file: CodeFileInfo, from_path: str, to_path: str) -> str:
-    mime_to_extraction = {
-      SubmissionMIMEType.TAR: lambda from_path, to_path: extract_tar_file(from_path, to_path),
-      SubmissionMIMEType.TAR_GZ: lambda from_path, to_path: extract_tar_file(from_path, to_path, 'z'),
-      SubmissionMIMEType.GZ: extract_gz_file,
-      SubmissionMIMEType.ZIP: extract_zip_file,
-      SubmissionMIMEType.SEVEN_ZIP: extract_7zip_file
-    }
-    mime_type = code_file.get_mime_type()
-    if mime_type in mime_to_extraction:
-      extracted_path = mime_to_extraction[mime_type](from_path, to_path)
-    else:
-      extracted_path = path.join(to_path, code_file.get_file_name())
-      copyfile(from_path, extracted_path)
-    return extracted_path
-
-  def __replace_paths(self, file_path: str):
-    if path.isdir(file_path):
-      for file_name in os.listdir(file_path):
-        self.__replace_paths(path.join(file_path, file_name))
-    else:
-      file_name = path.basename(file_path)
-      dir_name = path.dirname(file_path)
-      name, ext = path.splitext(file_name)
-      edited_file_name = f"{name}_edited{ext}"
-      edited_file_path = path.join(dir_name, edited_file_name)
-      with open(file_path, 'r') as original_file:
-        with open(edited_file_path, 'w') as edited_file:
-          for line in original_file.readlines():
-            edited_file.write(
-              reduce(
-                lambda current, key: current.replace(key, self.__interpolated_dirs[key]), 
-                self.__interpolated_dirs,
-                line)
-            )
-      os.remove(file_path)
-      os.rename(path.join(dir_name, edited_file_name), file_path)
+    def __init__(self, interpolated_dirs: Dict[str, str]) -> None:
+        self.__interpolated_dirs = interpolated_dirs
+
+    def process_file(self, code_file: CodeFileInfo, download_dir: str, extraction_dir: str) -> None:
+        os.makedirs(download_dir)
+        os.makedirs(extraction_dir)
+        downloaded_file_path = self._download_code_file(
+            code_file, download_dir)
+        extracted_file_path = self._extract_code_file(
+            code_file, downloaded_file_path, extraction_dir)
+        if code_file.should_replace_paths():
+            self.__replace_paths(extracted_file_path)
+
+    def _download_code_file(self, code_file: CodeFileInfo, download_path: str) -> str:
+        file_name = code_file.get_file_name()
+        file_path = path.join(download_path, file_name)
+        return download_file(code_file.get_url(), file_path)
+
+    def _extract_code_file(self, code_file: CodeFileInfo, from_path: str, to_path: str) -> str:
+        mime_to_extraction = {
+            MIMEType.TAR: lambda from_path, to_path: extract_tar_file(from_path, to_path),
+            MIMEType.TAR_GZ: lambda from_path, to_path: extract_tar_file(from_path, to_path, 'z'),
+            MIMEType.GZ: extract_gz_file,
+            MIMEType.ZIP: extract_zip_file,
+            MIMEType.SEVEN_ZIP: extract_7zip_file
+        }
+        mime_type = code_file.get_mime_type()
+        if mime_type in mime_to_extraction:
+            extracted_path = mime_to_extraction[mime_type](from_path, to_path)
+        else:
+            extracted_path = path.join(to_path, code_file.get_file_name())
+            copyfile(from_path, extracted_path)
+        return extracted_path
+
+    def __replace_paths(self, file_path: str):
+        if path.isdir(file_path):
+            for file_name in os.listdir(file_path):
+                self.__replace_paths(path.join(file_path, file_name))
+        else:
+            file_name = path.basename(file_path)
+            dir_name = path.dirname(file_path)
+            name, ext = path.splitext(file_name)
+            edited_file_name = f"{name}_edited{ext}"
+            edited_file_path = path.join(dir_name, edited_file_name)
+            with open(file_path, 'r') as original_file:
+                with open(edited_file_path, 'w') as edited_file:
+                    for line in original_file.readlines():
+                        edited_file.write(
+                            reduce(
+                                lambda current, key: current.replace(
+                                    key, self.__interpolated_dirs[key]),
+                                self.__interpolated_dirs,
+                                line)
+                        )
+            os.remove(file_path)
+            os.rename(path.join(dir_name, edited_file_name), file_path)
diff --git a/worker/orca_grader/container/build_script/code_file/sub_mime_types.py b/worker/orca_grader/container/build_script/code_file/sub_mime_types.py
diff --git a/worker/orca_grader/container/do_grading.py b/worker/orca_grader/container/do_grading.py
@@ -2,6 +2,7 @@
 import os
 import shutil
 import traceback
+from pathlib import Path
 from typing import Dict, List, TextIO
 from orca_grader.common.services.push_results import push_results_to_response_url
 from orca_grader.container.build_script.preprocess.preprocessor import GradingScriptPreprocessor
@@ -16,10 +17,16 @@
     GradingJobJSON,
     GradingScriptCommandJSON
 )
+from orca_grader.container.fs_tree import tree
 
 
 def do_grading(secret: str, grading_job_json: GradingJobJSON) -> GradingJobResult:
     command_responses: List[GradingScriptCommandResponse] = []
+    interpolated_dirs = {
+        "$DOWNLOADED": f"{secret}/downloaded",
+        "$EXTRACTED": f"{secret}/extracted",
+        "$BUILD": f"{secret}/build"
+    }
     # The following exceptions are used to encapsulate things "expected to go wrong":
     # - InvalidGradingJobJSONException*: Thrown when job JSON doesn't match schema (see validations/).
     # - PreprocessingException: Thrown when a GradingJob's script is not valid.
@@ -30,15 +37,15 @@ def do_grading(secret: str, grading_job_json: GradingJobJSON) -> GradingJobResul
     try:
         code_files = produce_code_files_dictionary(grading_job_json["files"])
         commands: List[GradingScriptCommandJSON] = grading_job_json["script"]
-        interpolated_dirs = {
-            "$DOWNLOADED": f"{secret}/downloaded",
-            "$EXTRACTED": f"{secret}/extracted",
-            "$BUILD": f"{secret}/build"
-        }
         code_file_processor = CodeFileProcessor(interpolated_dirs)
         preprocessor = GradingScriptPreprocessor(secret, commands, code_files,
                                                  code_file_processor)
         script: GradingScriptCommand = preprocessor.preprocess_job()
+        print("****Directories and their files:****")
+        for actual_dir in interpolated_dirs.values():
+            print(f"{actual_dir}:")
+            for line in tree(Path(actual_dir)):
+                print(line)
         output: GradingJobResult = script.execute(command_responses)
     except PreprocessingException as preprocess_e:
         output = GradingJobResult(command_responses, [preprocess_e])