From 07e4cc74d941dfd7c312178e7c183671cf932b18 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Thu, 21 Apr 2016 15:01:17 -0400 Subject: [PATCH 1/3] Add campus key --- uoftscrapers/scrapers/exams/utm.py | 1 + uoftscrapers/scrapers/exams/utsc.py | 1 + uoftscrapers/scrapers/exams/utsg.py | 1 + 3 files changed, 3 insertions(+) diff --git a/uoftscrapers/scrapers/exams/utm.py b/uoftscrapers/scrapers/exams/utm.py index dfbccdb..156a7bc 100644 --- a/uoftscrapers/scrapers/exams/utm.py +++ b/uoftscrapers/scrapers/exams/utm.py @@ -79,6 +79,7 @@ def retrieve_exams(courses): ('id', id_), ('course_id', course_id), ('course_code', course_code), + ('campus', 'UTM'), ('period', period), ('date', date), ('start_time', start), diff --git a/uoftscrapers/scrapers/exams/utsc.py b/uoftscrapers/scrapers/exams/utsc.py index cbaddbc..20b342e 100644 --- a/uoftscrapers/scrapers/exams/utsc.py +++ b/uoftscrapers/scrapers/exams/utsc.py @@ -47,6 +47,7 @@ def scrape(location='.'): ('id', id_), ('course_id', course_id), ('course_code', course_code), + ('campus', 'UTSC'), ('period', period), ('date', date), ('start_time', start), diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py index 2ba0fe0..b84513b 100644 --- a/uoftscrapers/scrapers/exams/utsg.py +++ b/uoftscrapers/scrapers/exams/utsg.py @@ -58,6 +58,7 @@ def scrape(location='.', year=None): ('id', id_), ('course_id', course_id), ('course_code', course_code), + ('campus', 'UTSG'), ('period', p.upper()), ('date', date_), ('start_time', start), From 7015d8ba7f0b7ba9142c6eec17d93840bb2dd828 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Thu, 21 Apr 2016 16:11:40 -0400 Subject: [PATCH 2/3] Separate lecture code, exam section --- uoftscrapers/scrapers/exams/utm.py | 17 ++++++++--------- uoftscrapers/scrapers/exams/utsc.py | 14 +++++++------- uoftscrapers/scrapers/exams/utsg.py | 17 +++++++++++++++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/uoftscrapers/scrapers/exams/utm.py b/uoftscrapers/scrapers/exams/utm.py index 156a7bc..cab97b5 100644 --- a/uoftscrapers/scrapers/exams/utm.py +++ b/uoftscrapers/scrapers/exams/utm.py @@ -45,12 +45,11 @@ def retrieve_exams(courses): soup = BeautifulSoup(html, 'html.parser') course_code = soup.find('div', class_='title').text.strip() + lecture_code = None # some course names include lecture code (see CHI200Y5Y) if ' ' in course_code: course_code, lecture_code = course_code.split(' ') - else: - lecture_code = None data = [br.previous_sibling.string.strip() for br in soup.find('div', class_='info').find_all('br')] @@ -71,9 +70,8 @@ def retrieve_exams(courses): for room in [x for x in data[3:] if 'Room:' in x]] # append lecture code to section range if it exists - if lecture_code: - sections[0]['section'] = '%s %s' % (lecture_code, - sections[0]['section']) + for i in range(len(sections)): + sections[i]['lecture'] = lecture_code or '' doc = OrderedDict([ ('id', id_), @@ -91,10 +89,11 @@ def retrieve_exams(courses): exams[id_] = doc for section in sections: - exams[id_]['sections'].append({ - 'section': section['section'].strip(), - 'locaton': section['room'] - }) + exams[id_]['sections'].append(OrderedDict([ + ('lecture_code', section['lecture']), + ('exam_section', section['section']), + ('location', section['room']) + ])) return exams @staticmethod diff --git a/uoftscrapers/scrapers/exams/utsc.py b/uoftscrapers/scrapers/exams/utsc.py index 20b342e..a2c609c 100644 --- a/uoftscrapers/scrapers/exams/utsc.py +++ b/uoftscrapers/scrapers/exams/utsc.py @@ -26,14 +26,13 @@ def scrape(location='.'): for tr in table.find_all('tr')[1:]: data = [x.text.strip() for x in tr.find_all('td')] - course_code = data[0] + course_code, lecture_code = data[0], None if ' ' in course_code: course_code, lecture_code = course_code.split(' ') - else: - lecture_code = None date = data[1] start, end = UTSCExams.parse_time(data[2], data[3], date) + location_ = data[4] id_, course_id = UTSCExams.get_course_id(course_code, date) @@ -58,10 +57,11 @@ def scrape(location='.'): if id_ not in exams: exams[id_] = doc - exams[id_]['sections'].append({ - 'section': lecture_code or '', - 'location': location_ - }) + exams[id_]['sections'].append(OrderedDict([ + ('lecture_code', lecture_code or ''), + ('exam_section', ''), + ('location', location_) + ])) for id_, doc in exams.items(): Scraper.save_json(doc, location, id_) diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py index b84513b..c9a5b25 100644 --- a/uoftscrapers/scrapers/exams/utsg.py +++ b/uoftscrapers/scrapers/exams/utsg.py @@ -50,7 +50,19 @@ def scrape(location='.', year=None): if id_ is None: continue - section, location_ = data[1], data[4] + section = data[1] + + lecture_section = exam_section = None + + if ' ' in section: + lecture_section, exam_section = section.split(' ') + elif '-' in section: + exam_section = section + else: + lecture_section = section + + location_ = data[4] + date_ = UTSGExams.parse_date(data[2], p[-2:]) or '' start, end = UTSGExams.parse_time(data[3], date_) or (0, 0) @@ -70,7 +82,8 @@ def scrape(location='.', year=None): exams[id_] = doc exams[id_]['sections'].append(OrderedDict([ - ('section', section), + ('lecture_code', lecture_section or ''), + ('exam_section', exam_section or ''), ('location', location_) ])) From d1a885355b1faf67c848df008ccfa0ebae73915d Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Thu, 21 Apr 2016 16:12:00 -0400 Subject: [PATCH 3/3] Use updated schema --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index db1be02..e91d2a2 100644 --- a/README.md +++ b/README.md @@ -405,7 +405,8 @@ uoftscrapers.Exams "start_time": String, "end_time": String, "sections": [{ - "section": String, + "lecture_code": String, + "exam_section": String, "location": String }] }