From ee95cf92e8d12cbb1edabb6e3917713cbeec7675 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Fri, 2 Sep 2016 13:06:25 -0400 Subject: [PATCH] Parse current enrolment, include count in result --- uoftscrapers/scrapers/courses/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/uoftscrapers/scrapers/courses/__init__.py b/uoftscrapers/scrapers/courses/__init__.py index 617faae..ea9e6b5 100755 --- a/uoftscrapers/scrapers/courses/__init__.py +++ b/uoftscrapers/scrapers/courses/__init__.py @@ -39,7 +39,7 @@ def scrape(location='.'): worker.start() Scraper.logger.info('Queued %d courses.' % total) - for x in urls: + for x in urls[:10]: course_id = re.search('offImg(.*)', x[0]).group(1).split('"')[0] url = '%s/courseSearch/coursedetails/%s' % ( Courses.host, @@ -183,6 +183,11 @@ def parse_course_html(course_id, html): class_size = tds[4].get_text().strip() + try: + current_enrolment = tds[5].get_text().strip() + except (IndexError, AttributeError) as e: + current_enrolment = 0 + time_data = [] for i in range(len(times)): info = times[i].split(" ") @@ -215,7 +220,7 @@ def parse_course_html(course_id, html): ("instructors", instructors), ("times", time_data), ("size", int(class_size)), - ("enrolment", 0) + ("enrolment", int(current_enrolment)) ]) sections.append(data)