Skip to content

Commit

Permalink
Rename RawClass property names
Browse files Browse the repository at this point in the history
This makes the JSON files (and also overrides) more readable;
performance should not be affected since the files are gzipped in
transit.
  • Loading branch information
psvenk committed Nov 30, 2024
1 parent c062692 commit 3836ec3
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 157 deletions.
2 changes: 1 addition & 1 deletion public/f22.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/f23.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/f24.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/s23.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/s24.json

Large diffs are not rendered by default.

36 changes: 13 additions & 23 deletions scrapers/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
{
"6.3900": {
"on": "6.036",
"nx": true | false,
"rp": true | false,
"hf": false | 1 | 2,
"u": "https://introml.mit.edu",
"f": true | false,
"lm": true | false,
"nonext": true | false,
"repeat": true | false,
"url": "https://introml.mit.edu",
"final": true | false,
"half": false | 1 | 2,
"limited": true | false,
}
}
"""
Expand Down Expand Up @@ -116,24 +115,15 @@ def get_course_data(filtered_html):
Returns:
* dict[str, Union[bool, int, str]]: metadata about that particular class
"""
no_next = is_not_offered_next_year(filtered_html)
repeat = is_repeat_allowed(filtered_html)
url = get_url(filtered_html)
final = has_final(filtered_html)
half = get_half(filtered_html)
limited = is_limited(filtered_html)

course_data = {
"nx": no_next,
"rp": repeat,
"u": url,
"f": final,
"hf": half,
"lm": limited,
return {
"nonext": is_not_offered_next_year(filtered_html),
"repeat": is_repeat_allowed(filtered_html),
"url": get_url(filtered_html),
"final": has_final(filtered_html),
"half": get_half(filtered_html),
"limited": is_limited(filtered_html),
}

return course_data


def get_home_catalog_links():
"""
Expand Down
74 changes: 38 additions & 36 deletions scrapers/fireroad.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def parse_schedule(course):
result = {}

# Kinds of sections that exist.
result["s"] = []
result["sectionKinds"] = []
section_kinds = ("Lecture", "Recitation", "Lab", "Design")

for chunk in schedule.split(";"):
Expand All @@ -100,7 +100,7 @@ def parse_schedule(course):

# The key is lowercase
kind = name.lower()
result["s"].append(kind)
result["sectionKinds"].append(kind)

# Raw section times, e.g. T9.301-11 or TR1,F2.
result[kind + "RawSections"] = sections
Expand All @@ -115,7 +115,7 @@ def parse_schedule(course):
result[kindSectionsName].append(parse_section(info))

# True if some schedule is not scheduled yet.
result["tb"] = section_tba
result["tba"] = section_tba
return result


Expand All @@ -134,15 +134,15 @@ def parse_attributes(course):
gir_attr = course.get("gir_attribute", "")

return {
"hh": hass_code == "H",
"ha": hass_code == "A",
"hs": hass_code == "S",
"he": hass_code == "E",
"ci": comms_code == "CI-H",
"cw": comms_code == "CI-HW",
"re": gir_attr == "REST",
"la": gir_attr == "LAB",
"pl": gir_attr == "LAB2",
"hassH": hass_code == "H",
"hassA": hass_code == "A",
"hassS": hass_code == "S",
"hassE": hass_code == "E",
"cih": comms_code == "CI-H",
"cihw": comms_code == "CI-HW",
"rest": gir_attr == "REST",
"lab": gir_attr == "LAB",
"partLab": gir_attr == "LAB2",
}


Expand All @@ -154,7 +154,7 @@ def parse_terms(course):
* course (dict[str, Union[bool, float, int, list[str], str]]): The course object.
Returns:
* dict[str, list[str]]: The parsed terms, stored in the key "t".
* dict[str, list[str]]: The parsed terms, stored in the key "terms".
"""
terms = [
name
Expand All @@ -166,7 +166,7 @@ def parse_terms(course):
]
if course[attr]
]
return {"t": terms}
return {"terms": terms}


def parse_prereqs(course):
Expand All @@ -177,14 +177,14 @@ def parse_prereqs(course):
* course (dict[str, Union[bool, float, int, list[str], str]]): The course object.
Returns:
* dict[str, str]: The parsed prereqs, in the key "pr".
* dict[str, str]: The parsed prereqs, in the key "prereqs".
"""
prereqs = course.get("prerequisites", "")
for gir, gir_rw in utils.GIR_REWRITE.items():
prereqs = prereqs.replace(gir, gir_rw)
if not prereqs:
prereqs = "None"
return {"pr": prereqs}
return {"prereqs": prereqs}


def get_course_data(courses, course):
Expand All @@ -203,9 +203,9 @@ def get_course_data(courses, course):
course_code = course["subject_id"]
course_num, course_class = course_code.split(".")
raw_class = {
"no": course_code,
"co": course_num,
"cl": course_class,
"number": course_code,
"course": course_num,
"subject": course_class,
}

if "schedule" not in course:
Expand All @@ -224,43 +224,45 @@ def get_course_data(courses, course):
raw_class.update(parse_attributes(course))
raw_class.update(
{
"u1": course["lecture_units"],
"u2": course["lab_units"],
"u3": course["preparation_units"],
"le": course["level"],
"vu": course["is_variable_units"],
"sa": ", ".join(course.get("joint_subjects", [])),
"mw": ", ".join(course.get("meets_with_subjects", [])),
"lectureUnits": course["lecture_units"],
"labUnits": course["lab_units"],
"preparationUnits": course["preparation_units"],
"level": course["level"],
"isVariableUnits": course["is_variable_units"],
"same": ", ".join(course.get("joint_subjects", [])),
"meets": ", ".join(course.get("meets_with_subjects", [])),
}
)
# This should be the case with variable-units classes, but just to make sure.
if raw_class["vu"]:
raw_class["u1"] = raw_class["u2"] = raw_class["u3"] = 0
if raw_class["isVariableUnits"]:
assert raw_class["lectureUnits"] == 0
assert raw_class["labUnits"] == 0
assert raw_class["preparationUnits"] == 0

# t, pr
raw_class.update(parse_terms(course))
raw_class.update(parse_prereqs(course))

raw_class.update(
{
"d": course.get("description", ""),
"n": course.get("title", ""),
"description": course.get("description", ""),
"name": course.get("title", ""),
# TODO: improve instructor parsing
"i": ",".join(course.get("instructors", [])),
"v": course.get("virtual_status", "") == "Virtual",
"inCharge": ",".join(course.get("instructors", [])),
"virtualStatus": course.get("virtual_status", "") == "Virtual",
}
)

# nx, rp, u, f, hf, lm are from catalog.json, not here

if "old_id" in course:
raw_class["on"] = course["old_id"]
raw_class["oldNumber"] = course["old_id"]

raw_class.update(
{
"ra": course.get("rating", 0),
"h": course.get("in_class_hours", 0) + course.get("out_of_class_hours", 0),
"si": course.get("enrollment_number", 0),
"rating": course.get("rating", 0),
"hours": course.get("in_class_hours", 0) + course.get("out_of_class_hours", 0),
"size": course.get("enrollment_number", 0),
}
)

Expand Down
Loading

0 comments on commit 3836ec3

Please sign in to comment.