release: assign4

* feat: working draft * feat: write main * feat: impl spellcheck main, untested * feat: finished starter code * feat: autograder buildout * feat: finish autograder * feat: first half of handout * fix: readme typos * fix: whitespace typo * feat: wip handout * feat: autograder/readme org, timer impls * feat: timing stats with --profile * feat: final touches on assignment * feat: typo * feat: disallow std::transform in spellcheck
cs106l · Feb 8, 2025 · 88b3a97 · 88b3a97
1 parent 89e5817
commit 88b3a97
Show file tree

Hide file tree

Showing 27 changed files with 468,226 additions and 1 deletion.
diff --git a/assign3/README.md b/assign3/README.md
@@ -27,7 +27,7 @@ To download the starter code for this assignment, please see the instructions fo
 
 ## Running your code
 
-To run your code, first you'll need to compile it. Open up a terminal (if you are using VSCode, hit <kbd>Ctrl+\`</kbd> or go to **Terminal > New Terminal** at the top). Then make sure that you are in the `assign2/` directory and run:
+To run your code, first you'll need to compile it. Open up a terminal (if you are using VSCode, hit <kbd>Ctrl+\`</kbd> or go to **Terminal > New Terminal** at the top). Then make sure that you are in the `assign3/` directory and run:
 
 ```sh
 g++ -std=c++20 main.cpp class.cpp -o main

diff --git a/assign4/README.md b/assign4/README.md
diff --git a/assign4/autograder/autograder.py b/assign4/autograder/autograder.py
@@ -0,0 +1,324 @@
+import difflib
+from colorama import Back, Fore, Style
+from utils import Autograder
+
+from typing import Dict, Iterable, List, Tuple, Union
+
+import os
+import re
+import subprocess
+import shutil
+import sys
+
+PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+AUTOGRADER_DIR = os.path.join(PATH, "autograder")
+CODE_PATH = os.path.join(PATH, "spellcheck.cpp")
+EXAMPLES_PATH = os.path.join(PATH, "examples")
+EXAMPLES_GOLD_PATH = os.path.join(AUTOGRADER_DIR, "gold")
+
+# =============================================================================
+# Verifying source code
+# =============================================================================
+
+
+FUNCTION_MATCHERS: Dict[str, Iterable[Union[str, Iterable[str]]]] = {
+    "tokenize": [
+        "find_all",
+        "std::transform",
+        "std::inserter",
+        "std::erase_if",
+        "#noloops",
+    ],
+    "spellcheck": [
+        ["std::ranges::views::filter", "rv::filter"],
+        ["std::ranges::views::transform", "rv::transform"],
+        "!std::copy_if",
+        "!std::transform",
+        "levenshtein",
+        "#noloops",
+    ],
+}
+
+
+def remove_comments_strings(content):
+    comment_pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)"
+    comment_regex = re.compile(comment_pattern, re.MULTILINE | re.DOTALL)
+    string_pattern = r"\".*?\""
+    string_regex = re.compile(string_pattern, re.MULTILINE | re.DOTALL)
+
+    content = comment_regex.sub(
+        lambda match: "" if match.group(2) is not None else match.group(1), content
+    )
+    content = string_regex.sub("", content)
+
+    return content
+
+
+def tokenize_source(input_code: str) -> Iterable[str]:
+    tokens = []
+
+    pattern_fqn = re.compile(r"^(::)?[a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*")
+    pattern_non_word = re.compile(r"^\W+")
+
+    while input_code:
+        fqn_match = pattern_fqn.match(input_code)
+        if fqn_match:
+            tokens.append(fqn_match.group().strip())
+            input_code = input_code[len(fqn_match.group()) :]
+        else:
+            non_word_match = pattern_non_word.match(input_code)
+            if non_word_match:
+                tokens.append(non_word_match.group().strip())
+                input_code = input_code[len(non_word_match.group()) :]
+            else:
+                tokens.append(input_code[0])
+                input_code = input_code[1:]
+
+    return [t for t in tokens if t]
+
+
+def parse_methods(file_path):
+    with open(file_path, "r") as file:
+        content = file.read()
+    content = remove_comments_strings(content)
+
+    method_pattern = re.compile(r"\b(\w+)\s*\([^)]*\)\s*\{")
+    methods = {}
+    pos = 0
+
+    while True:
+        match = method_pattern.search(content, pos)
+        if not match:
+            break
+
+        method_name = match.group(1)
+        if method_name in methods:
+            raise RuntimeError(
+                f"Duplicate method definition of '{method_name}'. Have you tried recompiling your code?"
+            )
+
+        start_idx = match.end() - 1
+        brace_stack = 1
+        end_idx = start_idx + 1
+
+        while brace_stack > 0:
+            if end_idx >= len(content):
+                raise RuntimeError(
+                    f"Unmatched braces in method definition of '{method_name}'. Have you tried recompiling your code?"
+                )
+            if content[end_idx] == "{":
+                brace_stack += 1
+            elif content[end_idx] == "}":
+                brace_stack -= 1
+            end_idx += 1
+
+        method_body = content[start_idx + 1 : end_idx - 1].strip()
+        methods[method_name] = tokenize_source(method_body)
+        pos = end_idx
+
+    return methods
+
+
+def add_matcher_tests(grader: Autograder, file: str):
+    student_methods = parse_methods(file)
+    for method, matchers in FUNCTION_MATCHERS.items():
+
+        def generate_test_method(method_copy, matchers_copy):
+            def test():
+                if method_copy not in student_methods:
+                    raise RuntimeError(
+                        f"Could not find a definition for required method '{method_copy}' in {file}"
+                    )
+
+                method_body = student_methods[method_copy]
+
+                for matcher in matchers_copy:
+                    if matcher == "#noloops":
+                        for loop_type in ["for", "while", "goto"]:
+                            if loop_type in method_body:
+                                raise RuntimeError(
+                                    f"Method {method_copy} may not contain any explicit for/while loops! You must use the STL instead! Found loop: {loop_type}"
+                                )
+                        print(f"🔎 {method_copy} has no for/while loops!")
+                        continue
+
+                    if isinstance(matcher, str):
+                        matcher = [matcher]
+
+                    for m in matcher:
+                        if m.startswith("!"):
+                            m = m[1:]
+                            if m in method_body:
+                                raise RuntimeError(f"Method '{method_copy}' is not allowed to call method: {m}")
+                        elif m in method_body:
+                            print(f"🔎 {method_copy} called method {m}")
+                            break
+                    else:
+                        raise RuntimeError(
+                            f"Method '{method_copy}' must call one of the following methods: {matcher}."
+                        )
+
+            return test
+
+        grader.add_part(method, generate_test_method(method, matchers))
+
+    # Ensure no helper function were used
+    def test_no_helper_functions():
+        present = set(student_methods.keys())
+        expected = set(FUNCTION_MATCHERS.keys())
+        extra = present - expected
+        if extra:
+            raise RuntimeError(
+                f"You may not use any helper functions for this assignment. You must implement all your code in the following functions: {', '.join(expected)}. \n\nFound extra functions: {', '.join(extra)}"
+            )
+
+    grader.add_part(
+        "Check submission has no helper functions", test_no_helper_functions
+    )
+
+
+def no_obvious_namespace_std():
+    with open(CODE_PATH, "r") as file:
+        content = file.read()
+    content = remove_comments_strings(content)
+    content = content.replace("\n", " ")
+    using_namespace = re.compile(r"using\s+namespace\s+std\s*;")
+    if using_namespace.search(content):
+        raise RuntimeError(
+            "You should not use 'using namespace std;' for this assignment. In general, this is considered bad practice as it can lead to naming conflicts, and will affect the autograder for this assignment."
+        )
+
+
+# =============================================================================
+# Verifying program correctness
+# =============================================================================
+
+
+import os
+import subprocess
+
+
+def find_executable(containing_dir):
+    # Search for the executable in the given directory
+    for filename in ("main", "main.exe"):
+        exe_path = os.path.join(containing_dir, filename)
+        if os.path.isfile(exe_path) and os.access(exe_path, os.X_OK):
+            return exe_path
+    raise FileNotFoundError(
+        f"No executable named 'main' or 'main.exe' found in '{containing_dir}'."
+    )
+
+
+def spellcheck(file_path):
+    exe_path = find_executable(PATH)
+    command = [exe_path, "--stdin", "--unstyled"]
+
+    with open(file_path, "r") as file:
+        result = subprocess.run(
+            command,
+            stdin=file,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+
+    return result.stdout
+
+
+def generate_gold_dir():
+    if os.path.exists(EXAMPLES_GOLD_PATH):
+        shutil.rmtree(EXAMPLES_GOLD_PATH)
+    os.makedirs(EXAMPLES_GOLD_PATH)
+
+    for example_file in os.listdir(EXAMPLES_PATH):
+        example_file_path = os.path.join(EXAMPLES_PATH, example_file)
+        if not os.path.isfile(example_file_path):
+            continue
+        try:
+            output = spellcheck(example_file_path)
+            gold_file_path = os.path.join(EXAMPLES_GOLD_PATH, example_file)
+            with open(gold_file_path, "w") as gold_file:
+                gold_file.write(output)
+
+            print(f"Processed {example_file} -> {gold_file_path}")
+        except Exception as e:
+            print(f"Failed to process {example_file}: {e}")
+
+
+def assert_contents_equal(expected, actual, filename):
+    if expected != actual:
+        expected_lines = expected.split("\n")
+        actual_lines = actual.split("\n")
+
+        diff = list(
+            difflib.unified_diff(
+                expected_lines,
+                actual_lines,
+                fromfile="Expected in solution, missing in your output",
+                tofile="Present in your output, missing in solution",
+            )
+        )
+
+        diff = [f"\t{l}" for l in diff]
+        diff[0] = diff[0].rstrip()
+        diff_output = "\n".join(diff)
+
+        def matcher(fore):
+            return (
+                lambda match: f"{fore}{Style.BRIGHT}{match.group(0)}{Style.RESET_ALL}"
+            )
+
+        diff_output = re.sub(
+            r"^\s*-+", matcher(Fore.RED), diff_output, flags=re.MULTILINE
+        )
+        diff_output = re.sub(
+            r"^\s*\++", matcher(Fore.GREEN), diff_output, flags=re.MULTILINE
+        )
+
+        error_lines = [
+            f"Contents do not match solution:",
+            diff_output,
+            "",
+            f"\t{Fore.CYAN}To see the output of your submission on this file, run:",
+            "",
+            f'\t\t ./main --stdin < "examples/{filename}"',
+            "",
+            f'\tTo see the expected solution output, open "autograder/gold/{filename}"{Fore.RESET}',
+        ]
+
+        raise RuntimeError("\n".join(error_lines))
+
+
+def test_spellcheck():
+    for example_file in os.listdir(EXAMPLES_GOLD_PATH):
+        gold_path = os.path.join(EXAMPLES_GOLD_PATH, example_file)
+        input_path = os.path.join(EXAMPLES_PATH, example_file)
+
+        if not os.path.isfile(input_path):
+            raise RuntimeError(
+                f"Could not find gold file for example '{example_file}'. Did you modify the examples/ directory?"
+            )
+
+        with open(gold_path, "r", encoding="utf-8") as f:
+            gold_output = f.read()
+
+        spellcheck_result = spellcheck(input_path)
+
+        assert_contents_equal(gold_output, spellcheck_result, example_file)
+        print(f"🔎 {example_file} spellcheck matched solution!")
+
+
+# =============================================================================
+# Autograder setup
+# =============================================================================
+
+if __name__ == "__main__":
+    if "--gold" in sys.argv:
+        generate_gold_dir()
+        sys.exit(0)
+
+    grader = Autograder()
+    grader.setup = no_obvious_namespace_std
+    add_matcher_tests(grader, CODE_PATH)
+    grader.add_part("Spellcheck", test_spellcheck)
+    grader.run()
diff --git a/assign4/autograder/gold/(kafka).txt b/assign4/autograder/gold/(kafka).txt
@@ -0,0 +1,20 @@
+Loading dictionary... loaded 464811 unique words.
+Tokenizing input... got 121 tokens.
+
+Someone must have slandered Josef K., for one morning, without having 
+done anything wrong, he was arrested.
+
+The cook, who always <<broght>> him his breakfast at eight o’clock, did 
+not come this time. That had never happened before. For a moment he 
+lay still, looking at the pillow and the old woman who lived opposite
+and who was watching him with an inquisitiveness quite unusual for 
+her. Then, with <<astonshment>>, he noticed that a man he had never seen
+before was in his room. He was wearing a tight black suit which, 
+with its various <<trimings>>, looked like a travelling outfit that had
+gone out of fashion years ago.
+
+“What do you want?” asked K., raising himself half up in bed.
+
+broght: {bright, brocht, brogh, brought}
+astonshment: {astonishment}
+trimings: {primings, timings, trimmings}
diff --git a/assign4/autograder/gold/(marquez).txt b/assign4/autograder/gold/(marquez).txt
@@ -0,0 +1,15 @@
+Loading dictionary... loaded 464811 unique words.
+Tokenizing input... got 84 tokens.
+
+Many years later, as he faced the firing squad, Colonel <<Aureliano>> Buendía
+was to remember that distant afternoon when his father took him to discover ice.
+
+At that time Macondo was a <<vilage>> of twenty adobe houses, built on the bank
+of a river of clear water that ran along a bed of <<pollished>> stones, which were
+white and enormous, like prehistoric eggs. The world was so recent that many
+things lacked names, and in order to indicate them it was <<necesary>> to point.
+
+Aureliano: {aurelian}
+vilage: {milage, pilage, silage, viage, village, vinage, visage, volage}
+pollished: {polished}
+necesary: {necessary}
diff --git a/assign4/autograder/gold/(morrison).txt b/assign4/autograder/gold/(morrison).txt
@@ -0,0 +1,18 @@
+Loading dictionary... loaded 464811 unique words.
+Tokenizing input... got 128 tokens.
+
+124 WAS SPITEFUL. Full of a <<baby's>> venom. The women in the house knew 
+it and so did the children. For years each put up with the spite in his 
+own way, but by 1873 <<Sethe>> and her daughter Denver were its only victims.
+The grandmother, Baby <<Suggs>>, was dead, and the sons, Howard and <<Buglar>>, 
+had run away by the time they were thirteen years old--as soon as merely
+looking in a mirror shattered it (that was the signal for <<Buglar>>); as soon
+as two tiny hand prints appeared in the cake (that was it for Howard). Neither 
+boy waited to see more; another kettleful of chickpeas smoking in a heap on 
+the floor; soda crackers crumbled and strewn in a line next to the door sill.
+
+baby's: {babe's}
+Sethe: {bethe, ethe, lethe, rethe, seathe, seethe, setae, seth, sethi, sithe, smethe}
+Suggs: {muggs, sugg, sughs, sugis, vuggs}
+Buglar: {bugler, burglar, juglar}
+Buglar: {bugler, burglar, juglar}
diff --git a/assign4/autograder/gold/gibberish.txt b/assign4/autograder/gold/gibberish.txt
@@ -0,0 +1,9 @@
+Loading dictionary... loaded 464811 unique words.
+Tokenizing input... got 22 tokens.
+
+This is a bunch of gibberish:
+
+ansdka nakdlsnakln lnfgklanrf nksladnksal nkglrnkadf nklsadn
+
+These are definitely misspelled, but probably don't have any suggestions.
+