From 31607f25a599266580aa448df37354f6f2151fe6 Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 04:29:07 -0400
Subject: [PATCH 1/6] Start engineering exams scraper

---
 uoftscrapers/scrapers/exams/utsg.py | 138 +++++++++++++++++++++++++---
 1 file changed, 126 insertions(+), 12 deletions(-)

diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py
index ffa7e1b..54ff3cc 100644
--- a/uoftscrapers/scrapers/exams/utsg.py
+++ b/uoftscrapers/scrapers/exams/utsg.py
@@ -4,9 +4,121 @@
 from datetime import datetime
 from pytz import timezone
 
+from pprint import pprint
+
 
 class UTSGExams:
-    """A scraper for UTSG exams.
+    """A scraper for UTSG exams."""
+
+    @staticmethod
+    def scrape(location='.', year=None):
+        """Update the local JSON files for this scraper."""
+
+        Scraper.logger.info('UTSGExams initialized.')
+
+        artsci_exams = ArtSciExams.scrape(location, year, False)
+        eng_exams = None # EngExams.scrape(location, False)
+
+        for exams in artsci_exams, eng_exams:
+            if exams is None:
+                continue
+            for id_, doc in exams.items():
+                Scraper.save_json(doc, location, id_)
+
+        Scraper.logger.info('UTSGExams completed.')
+
+
+class EngExams:
+    """A scraper for Engineering exams.
+
+    Data is scraped from http://www.artsci.utoronto.ca/current/exams/
+    """
+
+    host = 'http://www.apsc.utoronto.ca/timetable/fes.aspx'
+
+    @staticmethod
+    def scrape(location='.', save=True):
+        """Update the local JSON files for this scraper."""
+
+        Scraper.logger.info('EngExams initialized.')
+
+        exams = OrderedDict()
+
+        headers = {
+            'Referer': EngExams.host
+        }
+        html = Scraper.get(EngExams.host, headers=headers, max_attempts=3)
+        soup = BeautifulSoup(html, 'html.parser')
+
+        for tr in soup.find('table', id='DataList1').find_all('tr'):
+            for td in tr.find_all('td'):
+                exam = OrderedDict()
+
+                entry = td.find('div', id='logo')
+
+                if entry is None:
+                    continue
+
+                info = entry.find('div')
+                locations = entry.find('table', class_='xx')
+
+                course_code = info.find('strong').text.strip()
+
+                id_, course_id = course_code, ''
+
+                date, time = [br.next_sibling.strip()
+                              for br in info.find_all('br')[:2]]
+
+                date = datetime.strptime(date.split(':')[-1].strip(),
+                                         '%b %d, %Y').date().isoformat()
+
+                time = time.strip().split(':')
+                hour = int(time[1])
+                minute, period = time[2].split(' ')
+
+                hour += 12 if period == 'PM' and hour != 12 else 0
+
+                start = hour * 60 * 60 + int(minute) * 60
+                # No end times, using 2.5h per http://www.undergrad.engineering.utoronto.ca/Office_of_the_Registrar/Examinations/Schedules_Locations.htm
+                duration = 2 * 60 * 60 + 30 * 60
+                end = start + duration
+
+                exam_sections = []
+                for tr in locations.find_all('tr')[1:]:
+                    location, range, _ = [td.text.strip() for td in tr.find_all('td')]
+
+                    location = location.replace('-', ' ')
+                    range = '' if range == 'A - Z' else range
+
+                    exam_sections.append(OrderedDict([
+                        ('lecture_code', ''),
+                        ('exam_section', range),
+                        ('location', location)
+                    ]))
+
+                exams[id_] = OrderedDict([
+                    ('id', id_),
+                    ('course_id', course_id),
+                    ('course_code', course_code),
+                    ('campus', 'UTSG'),
+                    ('period', ''),
+                    ('date', date),
+                    ('start_time', start),
+                    ('end_time', end),
+                    ('duration', duration),
+                    ('sections', exam_sections)
+                ])
+
+        if save:
+            for id_, doc in exams.items():
+                Scraper.save_json(doc, location, id_)
+
+        Scraper.logger.info('EngExams completed.')
+        return exams
+
+
+class ArtSciExams:
+    """A scraper for Art & Science exams.
 
     Data is scraped from http://www.artsci.utoronto.ca/current/exams/
     """
@@ -14,20 +126,20 @@ class UTSGExams:
     host = 'http://www.artsci.utoronto.ca/current/exams/'
 
     @staticmethod
-    def scrape(location='.', year=None):
+    def scrape(location='.', year=None, save=True):
         """Update the local JSON files for this scraper."""
 
-        Scraper.logger.info('UTSGExams initialized.')
+        Scraper.logger.info('ArtSciExams initialized.')
 
         exams = OrderedDict()
 
-        for p in UTSGExams.get_exam_periods(year):
+        for p in ArtSciExams.get_exam_periods(year):
             Scraper.logger.info('Scraping %s exams.' % p.upper())
 
             headers = {
-                'Referer': UTSGExams.host
+                'Referer': ArtSciExams.host
             }
-            html = Scraper.get('%s%s' % (UTSGExams.host, p),
+            html = Scraper.get('%s%s' % (ArtSciExams.host, p),
                                headers=headers,
                                max_attempts=3)
 
@@ -45,7 +157,7 @@ def scrape(location='.', year=None):
             for row in rows[1:]:
                 data = [x.text.strip() for x in row.find_all('td')]
 
-                id_, course_id, course_code = UTSGExams.parse_course_info(p, data[0])
+                id_, course_id, course_code = ArtSciExams.parse_course_info(p, data[0])
 
                 if id_ is None:
                     continue
@@ -63,8 +175,8 @@ def scrape(location='.', year=None):
 
                 location_ = data[4]
 
-                date = UTSGExams.parse_date(data[2], p[-2:]) or ''
-                start, end = UTSGExams.parse_time(data[3], date) or (0, 0)
+                date = ArtSciExams.parse_date(data[2], p[-2:]) or ''
+                start, end = ArtSciExams.parse_time(data[3], date) or (0, 0)
                 duration = end - start
 
                 doc = OrderedDict([
@@ -89,10 +201,12 @@ def scrape(location='.', year=None):
                     ('location', location_)
                 ]))
 
-        for id_, doc in exams.items():
-            Scraper.save_json(doc, location, id_)
+        if save:
+            for id_, doc in exams.items():
+                Scraper.save_json(doc, location, id_)
 
-        Scraper.logger.info('UTSGExams completed.')
+        Scraper.logger.info('ArtSciExams completed.')
+        return exams
 
     @staticmethod
     def parse_course_info(period, course_code):

From 04c6004ba7eb2f1280b771cdc2d8819e2322fce0 Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 13:29:33 -0400
Subject: [PATCH 2/6] Clean up date parser

---
 uoftscrapers/scrapers/exams/utm.py  | 11 +++--------
 uoftscrapers/scrapers/exams/utsc.py | 10 ++--------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/uoftscrapers/scrapers/exams/utm.py b/uoftscrapers/scrapers/exams/utm.py
index 3d1684f..c07880b 100644
--- a/uoftscrapers/scrapers/exams/utm.py
+++ b/uoftscrapers/scrapers/exams/utm.py
@@ -112,14 +112,9 @@ def get_page_links(endpoint):
     @staticmethod
     def get_period(d):
         def get_date(month, date, year):
-            months = {
-                'dec': 12,
-                'apr': 4,
-                'june': 6,
-                'aug': 8
-            }
-            return datetime.strptime('%s-%d-%d' % (year, months[month], date),
-                                     '%Y-%m-%d')
+            month = 'jun' if month == 'june' else month
+            return datetime.strptime('%s %s %d' % (year, month, date),
+                                     '%Y %b %d')
 
         d = datetime.strptime(d, '%Y-%m-%d')
 
diff --git a/uoftscrapers/scrapers/exams/utsc.py b/uoftscrapers/scrapers/exams/utsc.py
index 42f35d7..49cb680 100644
--- a/uoftscrapers/scrapers/exams/utsc.py
+++ b/uoftscrapers/scrapers/exams/utsc.py
@@ -73,14 +73,8 @@ def scrape(location='.'):
     @staticmethod
     def get_period(d):
         def get_date(month, date, year):
-            months = {
-                'dec': 12,
-                'apr': 4,
-                'june': 6,
-                'aug': 8
-            }
-            return datetime.strptime('%s-%d-%d' % (year, months[month], date),
-                                     '%Y-%m-%d')
+            month = 'jun' if month == 'june' else month
+            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
 
         d = datetime.strptime(d, '%Y-%m-%d')
 

From 1c6a28f171e0501fb99177ddb6a706258d4b60f9 Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 13:30:11 -0400
Subject: [PATCH 3/6] Add course/id parser

---
 uoftscrapers/scrapers/exams/utsg.py | 80 ++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 23 deletions(-)

diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py
index 54ff3cc..6b421ca 100644
--- a/uoftscrapers/scrapers/exams/utsg.py
+++ b/uoftscrapers/scrapers/exams/utsg.py
@@ -16,10 +16,8 @@ def scrape(location='.', year=None):
 
         Scraper.logger.info('UTSGExams initialized.')
 
-        artsci_exams = ArtSciExams.scrape(location, year, False)
-        eng_exams = None # EngExams.scrape(location, False)
-
-        for exams in artsci_exams, eng_exams:
+        for faculty in ArtSciExams, EngExams:
+            exams = faculty.scrape(location=location, year=year, save=False)
             if exams is None:
                 continue
             for id_, doc in exams.items():
@@ -31,13 +29,13 @@ def scrape(location='.', year=None):
 class EngExams:
     """A scraper for Engineering exams.
 
-    Data is scraped from http://www.artsci.utoronto.ca/current/exams/
+    Data is scraped from http://www.apsc.utoronto.ca/timetable/fes.aspx
     """
 
     host = 'http://www.apsc.utoronto.ca/timetable/fes.aspx'
 
     @staticmethod
-    def scrape(location='.', save=True):
+    def scrape(location='.', year=None, save=True):
         """Update the local JSON files for this scraper."""
 
         Scraper.logger.info('EngExams initialized.')
@@ -50,10 +48,11 @@ def scrape(location='.', save=True):
         html = Scraper.get(EngExams.host, headers=headers, max_attempts=3)
         soup = BeautifulSoup(html, 'html.parser')
 
+        if soup is None:
+            return
+
         for tr in soup.find('table', id='DataList1').find_all('tr'):
             for td in tr.find_all('td'):
-                exam = OrderedDict()
-
                 entry = td.find('div', id='logo')
 
                 if entry is None:
@@ -62,10 +61,6 @@ def scrape(location='.', save=True):
                 info = entry.find('div')
                 locations = entry.find('table', class_='xx')
 
-                course_code = info.find('strong').text.strip()
-
-                id_, course_id = course_code, ''
-
                 date, time = [br.next_sibling.strip()
                               for br in info.find_all('br')[:2]]
 
@@ -74,34 +69,37 @@ def scrape(location='.', save=True):
 
                 time = time.strip().split(':')
                 hour = int(time[1])
-                minute, period = time[2].split(' ')
+                minute, meridiem = time[2].split(' ')
+
+                period = EngExams.get_period(date)
+
+                exam_id, course_id, course_code = \
+                    EngExams.get_course_info(info.find('strong').text.strip(), period)
 
-                hour += 12 if period == 'PM' and hour != 12 else 0
+                hour += 12 if meridiem == 'PM' and hour != 12 else 0
 
+                # No end times, using 2.5h for duration per
+                # http://www.undergrad.engineering.utoronto.ca/Office_of_the_Registrar/Examinations/Schedules_Locations.htm
                 start = hour * 60 * 60 + int(minute) * 60
-                # No end times, using 2.5h per http://www.undergrad.engineering.utoronto.ca/Office_of_the_Registrar/Examinations/Schedules_Locations.htm
                 duration = 2 * 60 * 60 + 30 * 60
                 end = start + duration
 
                 exam_sections = []
                 for tr in locations.find_all('tr')[1:]:
-                    location, range, _ = [td.text.strip() for td in tr.find_all('td')]
-
-                    location = location.replace('-', ' ')
-                    range = '' if range == 'A - Z' else range
+                    location, range = [td.text.strip() for td in tr.find_all('td')[:2]]
 
                     exam_sections.append(OrderedDict([
                         ('lecture_code', ''),
                         ('exam_section', range),
-                        ('location', location)
+                        ('location', location.replace('-', ' '))
                     ]))
 
-                exams[id_] = OrderedDict([
-                    ('id', id_),
+                exams[exam_id] = OrderedDict([
+                    ('id', exam_id),
                     ('course_id', course_id),
                     ('course_code', course_code),
                     ('campus', 'UTSG'),
-                    ('period', ''),
+                    ('period', period),
                     ('date', date),
                     ('start_time', start),
                     ('end_time', end),
@@ -116,6 +114,42 @@ def scrape(location='.', save=True):
         Scraper.logger.info('EngExams completed.')
         return exams
 
+    @staticmethod
+    def get_course_info(course, period):
+        endings = {
+            'dec': {'season': 'F', 'month': '1'},
+            'apr': {'season': 'S', 'month': '1'},
+            'june': {'season': 'F', 'month': '5F'},
+            'aug': {'season': 'S', 'month': '5S'}
+        }
+
+        month, year = period[:-2].lower(), period[-2:]
+        exam_id = course_id = course_code = None
+        if month in endings:
+            course_code = '%s%s' % (course, endings[month]['season'])
+            course_id = '%s20%s%s' % (course_code, year, endings[month]['month'])
+            exam_id = '%s%s' % (course_id, period)
+        return exam_id, course_id, course_code
+
+    @staticmethod
+    def get_period(d):
+        def get_date(month, date, year):
+            month = 'jun' if month == 'june' else month
+            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
+
+        d = datetime.strptime(d, '%Y-%m-%d')
+
+        year = d.year
+        month = None
+
+        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
+            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
+                month = m
+                break
+
+        if month:
+            return '%s%s' % (month.upper(), str(year)[2:])
+
 
 class ArtSciExams:
     """A scraper for Art & Science exams.

From 20372f9418fb9fb8fc26e31fd56ebcb182dc5df9 Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 13:38:27 -0400
Subject: [PATCH 4/6] Clean up

---
 uoftscrapers/scrapers/exams/utsg.py | 255 ++++++++++++++--------------
 1 file changed, 127 insertions(+), 128 deletions(-)

diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py
index 6b421ca..911c759 100644
--- a/uoftscrapers/scrapers/exams/utsg.py
+++ b/uoftscrapers/scrapers/exams/utsg.py
@@ -4,8 +4,6 @@
 from datetime import datetime
 from pytz import timezone
 
-from pprint import pprint
-
 
 class UTSGExams:
     """A scraper for UTSG exams."""
@@ -26,133 +24,8 @@ def scrape(location='.', year=None):
         Scraper.logger.info('UTSGExams completed.')
 
 
-class EngExams:
-    """A scraper for Engineering exams.
-
-    Data is scraped from http://www.apsc.utoronto.ca/timetable/fes.aspx
-    """
-
-    host = 'http://www.apsc.utoronto.ca/timetable/fes.aspx'
-
-    @staticmethod
-    def scrape(location='.', year=None, save=True):
-        """Update the local JSON files for this scraper."""
-
-        Scraper.logger.info('EngExams initialized.')
-
-        exams = OrderedDict()
-
-        headers = {
-            'Referer': EngExams.host
-        }
-        html = Scraper.get(EngExams.host, headers=headers, max_attempts=3)
-        soup = BeautifulSoup(html, 'html.parser')
-
-        if soup is None:
-            return
-
-        for tr in soup.find('table', id='DataList1').find_all('tr'):
-            for td in tr.find_all('td'):
-                entry = td.find('div', id='logo')
-
-                if entry is None:
-                    continue
-
-                info = entry.find('div')
-                locations = entry.find('table', class_='xx')
-
-                date, time = [br.next_sibling.strip()
-                              for br in info.find_all('br')[:2]]
-
-                date = datetime.strptime(date.split(':')[-1].strip(),
-                                         '%b %d, %Y').date().isoformat()
-
-                time = time.strip().split(':')
-                hour = int(time[1])
-                minute, meridiem = time[2].split(' ')
-
-                period = EngExams.get_period(date)
-
-                exam_id, course_id, course_code = \
-                    EngExams.get_course_info(info.find('strong').text.strip(), period)
-
-                hour += 12 if meridiem == 'PM' and hour != 12 else 0
-
-                # No end times, using 2.5h for duration per
-                # http://www.undergrad.engineering.utoronto.ca/Office_of_the_Registrar/Examinations/Schedules_Locations.htm
-                start = hour * 60 * 60 + int(minute) * 60
-                duration = 2 * 60 * 60 + 30 * 60
-                end = start + duration
-
-                exam_sections = []
-                for tr in locations.find_all('tr')[1:]:
-                    location, range = [td.text.strip() for td in tr.find_all('td')[:2]]
-
-                    exam_sections.append(OrderedDict([
-                        ('lecture_code', ''),
-                        ('exam_section', range),
-                        ('location', location.replace('-', ' '))
-                    ]))
-
-                exams[exam_id] = OrderedDict([
-                    ('id', exam_id),
-                    ('course_id', course_id),
-                    ('course_code', course_code),
-                    ('campus', 'UTSG'),
-                    ('period', period),
-                    ('date', date),
-                    ('start_time', start),
-                    ('end_time', end),
-                    ('duration', duration),
-                    ('sections', exam_sections)
-                ])
-
-        if save:
-            for id_, doc in exams.items():
-                Scraper.save_json(doc, location, id_)
-
-        Scraper.logger.info('EngExams completed.')
-        return exams
-
-    @staticmethod
-    def get_course_info(course, period):
-        endings = {
-            'dec': {'season': 'F', 'month': '1'},
-            'apr': {'season': 'S', 'month': '1'},
-            'june': {'season': 'F', 'month': '5F'},
-            'aug': {'season': 'S', 'month': '5S'}
-        }
-
-        month, year = period[:-2].lower(), period[-2:]
-        exam_id = course_id = course_code = None
-        if month in endings:
-            course_code = '%s%s' % (course, endings[month]['season'])
-            course_id = '%s20%s%s' % (course_code, year, endings[month]['month'])
-            exam_id = '%s%s' % (course_id, period)
-        return exam_id, course_id, course_code
-
-    @staticmethod
-    def get_period(d):
-        def get_date(month, date, year):
-            month = 'jun' if month == 'june' else month
-            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
-
-        d = datetime.strptime(d, '%Y-%m-%d')
-
-        year = d.year
-        month = None
-
-        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
-            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
-                month = m
-                break
-
-        if month:
-            return '%s%s' % (month.upper(), str(year)[2:])
-
-
 class ArtSciExams:
-    """A scraper for Art & Science exams.
+    """A scraper for Arts & Science exams.
 
     Data is scraped from http://www.artsci.utoronto.ca/current/exams/
     """
@@ -312,3 +185,129 @@ def get_exam_periods(year):
             periods.append('%s%s' % (m, str(y)[2:]))
 
         return periods
+
+
+class EngExams:
+    """A scraper for Engineering exams.
+
+    Data is scraped from http://www.apsc.utoronto.ca/timetable/fes.aspx
+    """
+
+    host = 'http://www.apsc.utoronto.ca/timetable/fes.aspx'
+
+    @staticmethod
+    def scrape(location='.', year=None, save=True):
+        """Update the local JSON files for this scraper."""
+
+        Scraper.logger.info('EngExams initialized.')
+
+        exams = OrderedDict()
+
+        headers = {
+            'Referer': EngExams.host
+        }
+        html = Scraper.get(EngExams.host, headers=headers, max_attempts=3)
+        soup = BeautifulSoup(html, 'html.parser')
+
+        if soup is None:
+            return
+
+        for tr in soup.find('table', id='DataList1').find_all('tr'):
+            for td in tr.find_all('td'):
+                entry = td.find('div', id='logo')
+
+                if entry is None:
+                    continue
+
+                info = entry.find('div')
+
+                date, time = [br.next_sibling.strip()
+                              for br in info.find_all('br')[:2]]
+
+                date = datetime.strptime(date.split(':')[-1].strip(),
+                                         '%b %d, %Y').date().isoformat()
+
+                time = time.strip().split(':')
+                hour = int(time[1])
+                minute, meridiem = time[2].split(' ')
+
+                hour += 12 if meridiem == 'PM' and hour != 12 else 0
+
+                # No end times, using 2.5h for duration per
+                # http://www.undergrad.engineering.utoronto.ca/Office_of_the_Registrar/Examinations/Schedules_Locations.htm
+                start = hour * 60 * 60 + int(minute) * 60
+                duration = 2 * 60 * 60 + 30 * 60
+                end = start + duration
+
+                period = EngExams.get_period(date)
+
+                exam_id, course_id, course_code = \
+                    EngExams.get_course_info(info.find('strong').text.strip(), period)
+
+                locations = entry.find('table', class_='xx')
+
+                exam_sections = []
+                for tr in locations.find_all('tr')[1:]:
+                    location, range = [td.text.strip() for td in tr.find_all('td')[:2]]
+
+                    exam_sections.append(OrderedDict([
+                        ('lecture_code', ''),
+                        ('exam_section', range),
+                        ('location', location.replace('-', ' '))
+                    ]))
+
+                exams[exam_id] = OrderedDict([
+                    ('id', exam_id),
+                    ('course_id', course_id),
+                    ('course_code', course_code),
+                    ('campus', 'UTSG'),
+                    ('period', period),
+                    ('date', date),
+                    ('start_time', start),
+                    ('end_time', end),
+                    ('duration', duration),
+                    ('sections', exam_sections)
+                ])
+
+        if save:
+            for id_, doc in exams.items():
+                Scraper.save_json(doc, location, id_)
+
+        Scraper.logger.info('EngExams completed.')
+        return exams
+
+    @staticmethod
+    def get_course_info(course, period):
+        endings = {
+            'dec': {'season': 'F', 'month': '1'},
+            'apr': {'season': 'S', 'month': '1'},
+            'june': {'season': 'F', 'month': '5F'},
+            'aug': {'season': 'S', 'month': '5S'}
+        }
+
+        month, year = period[:-2].lower(), period[-2:]
+        exam_id = course_id = course_code = None
+        if month in endings:
+            course_code = '%s%s' % (course, endings[month]['season'])
+            course_id = '%s20%s%s' % (course_code, year, endings[month]['month'])
+            exam_id = '%s%s' % (course_id, period)
+        return exam_id, course_id, course_code
+
+    @staticmethod
+    def get_period(d):
+        def get_date(month, date, year):
+            month = 'jun' if month == 'june' else month
+            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
+
+        d = datetime.strptime(d, '%Y-%m-%d')
+
+        year = d.year
+        month = None
+
+        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
+            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
+                month = m
+                break
+
+        if month:
+            return '%s%s' % (month.upper(), str(year)[2:])

From 4913694daa0615e1a2811e9c79941f0236cbfb1b Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 13:43:51 -0400
Subject: [PATCH 5/6] Oops

---
 uoftscrapers/scrapers/exams/utsg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py
index 911c759..1b0f108 100644
--- a/uoftscrapers/scrapers/exams/utsg.py
+++ b/uoftscrapers/scrapers/exams/utsg.py
@@ -279,7 +279,7 @@ def scrape(location='.', year=None, save=True):
     @staticmethod
     def get_course_info(course, period):
         endings = {
-            'dec': {'season': 'F', 'month': '1'},
+            'dec': {'season': 'F', 'month': '9'},
             'apr': {'season': 'S', 'month': '1'},
             'june': {'season': 'F', 'month': '5F'},
             'aug': {'season': 'S', 'month': '5S'}

From 4176d900b8c71ffd9cf69adecb3c45c2f33cd62c Mon Sep 17 00:00:00 2001
From: Kashav Madan <kshvmdn@gmail.com>
Date: Sun, 1 May 2016 17:26:01 -0400
Subject: [PATCH 6/6] Modularize common helpers

---
 uoftscrapers/scrapers/exams/exams_helpers.py | 64 +++++++++++++++
 uoftscrapers/scrapers/exams/utm.py           | 79 +++---------------
 uoftscrapers/scrapers/exams/utsc.py          | 84 ++++----------------
 uoftscrapers/scrapers/exams/utsg.py          | 43 +++-------
 4 files changed, 99 insertions(+), 171 deletions(-)
 create mode 100644 uoftscrapers/scrapers/exams/exams_helpers.py

diff --git a/uoftscrapers/scrapers/exams/exams_helpers.py b/uoftscrapers/scrapers/exams/exams_helpers.py
new file mode 100644
index 0000000..90b6f93
--- /dev/null
+++ b/uoftscrapers/scrapers/exams/exams_helpers.py
@@ -0,0 +1,64 @@
+from datetime import datetime
+
+
+def convert_time(t):
+    t = [int(x) for x in t.split(':')]
+
+    converted = 0
+    for i in range(min(len(t), 3)):
+        converted += t[i] * (60 ** (2-i))
+    return converted
+
+
+def get_period(d):
+    def get_date(month, date, year):
+        month = 'jun' if month == 'june' else month
+        return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
+
+    d = datetime.strptime(d, '%Y-%m-%d')
+    year, month = d.year, None
+
+    for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
+        if get_date(m, 1, year) <= d <= get_date(m, ld, year):
+            month = m
+            break
+
+    if month:
+        return '%s%s' % (month.upper(), str(year)[2:])
+
+
+def get_course_id(course_code, date):
+        d = datetime.strptime(date, '%Y-%m-%d')
+
+        month, year = d.strftime('%b').lower(), d.year
+        month = 'june' if month == 'jun' else month
+
+        endings = {
+            'dec': {
+                'F': '%d9' % year,
+                'Y': '%d9' % (year - 1)
+            },
+            'apr': {
+                'S': '%d1' % year,
+                'Y': '%d9' % (year - 1)
+            },
+            'june': {
+                'F': '%d5F' % year,
+                'Y': '%d5' % year
+            },
+            'aug': {
+                'S': '%d5S' % year,
+                'Y': '%d5' % year
+            }
+        }
+
+        season = course_code[-1]
+        period = get_period(date)
+
+        exam_id = course_id = None
+
+        if month in endings and season in endings[month]:
+            course_id = '%s%s' % (course_code, endings[month][season])
+            exam_id = '%s%s' % (course_id, period)
+
+        return exam_id, course_id
diff --git a/uoftscrapers/scrapers/exams/utm.py b/uoftscrapers/scrapers/exams/utm.py
index c07880b..e42e545 100644
--- a/uoftscrapers/scrapers/exams/utm.py
+++ b/uoftscrapers/scrapers/exams/utm.py
@@ -1,4 +1,5 @@
 from ..utils import Scraper
+from .exams_helpers import *
 from bs4 import BeautifulSoup
 from collections import OrderedDict
 from datetime import datetime
@@ -33,7 +34,6 @@ def scrape(location='.'):
 
     @staticmethod
     def retrieve_exams(courses):
-
         exams = OrderedDict()
 
         for course in courses:
@@ -56,15 +56,15 @@ def retrieve_exams(courses):
 
             date = data[0].split(': ')[1]
 
-            id_, course_id = UTMExams.get_course_id(course_code, date)
+            exam_id, course_id = get_course_id(course_code, date)
 
-            period = UTMExams.get_period(date)
+            period = get_period(date)
 
-            if not id_ or not period:
+            if not exam_id or not period:
                 continue
 
-            start, end = UTMExams.parse_time(data[1].split(': ')[1],
-                                             data[2].split(': ')[1], date)
+            start = convert_time(data[1].split(': ')[1])
+            end = convert_time(data[2].split(': ')[1])
             duration = end - start
 
             sections = [UTMExams.parse_sections(room.split(': ')[1])
@@ -75,7 +75,7 @@ def retrieve_exams(courses):
                 sections[i]['lecture'] = lecture_code or ''
 
             doc = OrderedDict([
-                ('id', id_),
+                ('id', exam_id),
                 ('course_id', course_id),
                 ('course_code', course_code),
                 ('campus', 'UTM'),
@@ -87,11 +87,11 @@ def retrieve_exams(courses):
                 ('sections', [])
             ])
 
-            if id_ not in exams:
-                exams[id_] = doc
+            if exam_id not in exams:
+                exams[exam_id] = doc
 
             for section in sections:
-                exams[id_]['sections'].append(OrderedDict([
+                exams[exam_id]['sections'].append(OrderedDict([
                     ('lecture_code', section['lecture']),
                     ('exam_section', section['section']),
                     ('location', section['room'])
@@ -109,58 +109,6 @@ def get_page_links(endpoint):
         return [li.find('a')['href']
                 for li in soup.find('ul', class_='link').find_all('li')]
 
-    @staticmethod
-    def get_period(d):
-        def get_date(month, date, year):
-            month = 'jun' if month == 'june' else month
-            return datetime.strptime('%s %s %d' % (year, month, date),
-                                     '%Y %b %d')
-
-        d = datetime.strptime(d, '%Y-%m-%d')
-
-        year = d.year
-        month = None
-
-        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
-            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
-                month = m
-                break
-
-        if month:
-            return '%s%s' % (month.upper(), str(year)[2:])
-
-    @staticmethod
-    def get_course_id(course_code, date):
-        d = datetime.strptime(date, '%Y-%m-%d')
-        month, year, period = d.strftime('%b').lower(), d.year, UTMExams.get_period(date)
-        endings = {
-            'dec': {
-                'F': '%s9' % str(year),
-                'Y': '%s9' % str(int(year) - 1)
-            },
-            'apr': {
-                'S': '%s1' % str(year),
-                'Y': '%s9' % str(int(year) - 1)
-            },
-            'june': {
-                'F': '%s5F' % str(year),
-                'Y': '%s5' % str(year)
-            },
-            'aug': {
-                'S': '%s5S' % str(year),
-                'Y': '%s5' % str(year)
-            }
-        }
-
-        season = course_code[-1]
-        exam_id = course_id = None
-
-        if month in endings and season in endings[month]:
-            course_id = '%s%s' % (course_code, endings[month][season])
-            exam_id = '%s%s' % (course_id, period)
-
-        return exam_id, course_id
-
     @staticmethod
     def parse_sections(room):
         section = ''
@@ -168,10 +116,3 @@ def parse_sections(room):
             room, section = [x.strip()
                              for x in re.sub('[()]', ' ', room).split('  ')]
         return {'section': section, 'room': room}
-
-    @staticmethod
-    def parse_time(start, end, date):
-        def convert_time(t):
-            h, m, s = [int(x) for x in t.split(':')]
-            return (h * 60 * 60) + (m * 60) + s
-        return convert_time(start), convert_time(end)
diff --git a/uoftscrapers/scrapers/exams/utsc.py b/uoftscrapers/scrapers/exams/utsc.py
index 49cb680..a588e3b 100644
--- a/uoftscrapers/scrapers/exams/utsc.py
+++ b/uoftscrapers/scrapers/exams/utsc.py
@@ -1,4 +1,5 @@
 from ..utils import Scraper
+from .exams_helpers import *
 from bs4 import BeautifulSoup
 from collections import OrderedDict
 from datetime import datetime
@@ -31,20 +32,22 @@ def scrape(location='.'):
                     course_code, lecture_code = course_code.split(' ')
 
                 date = data[1]
-                start, end = UTSCExams.parse_time(data[2], data[3], date)
-                duration = end - start
-
-                location_ = data[4]
 
-                id_, course_id = UTSCExams.get_course_id(course_code, date)
+                exam_id, course_id = get_course_id(course_code, date)
 
-                period = UTSCExams.get_period(date)
+                period = get_period(date)
 
-                if not id_ or not period:
+                if not exam_id or not period:
                     continue
 
+                start = convert_time(data[2])
+                end = convert_time(data[3])
+                duration = end - start
+
+                location_ = data[4]
+
                 doc = OrderedDict([
-                    ('id', id_),
+                    ('id', exam_id),
                     ('course_id', course_id),
                     ('course_code', course_code),
                     ('campus', 'UTSC'),
@@ -56,10 +59,10 @@ def scrape(location='.'):
                     ('sections', [])
                 ])
 
-                if id_ not in exams:
-                    exams[id_] = doc
+                if exam_id not in exams:
+                    exams[exam_id] = doc
 
-                exams[id_]['sections'].append(OrderedDict([
+                exams[exam_id]['sections'].append(OrderedDict([
                     ('lecture_code', lecture_code or ''),
                     ('exam_section', ''),
                     ('location', location_)
@@ -69,62 +72,3 @@ def scrape(location='.'):
             Scraper.save_json(doc, location, id_)
 
         Scraper.logger.info('UTSCExams completed.')
-
-    @staticmethod
-    def get_period(d):
-        def get_date(month, date, year):
-            month = 'jun' if month == 'june' else month
-            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
-
-        d = datetime.strptime(d, '%Y-%m-%d')
-
-        year = d.year
-        month = None
-
-        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
-            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
-                month = m
-                break
-
-        if month:
-            return '%s%s' % (month.upper(), str(year)[2:])
-
-    @staticmethod
-    def get_course_id(course_code, date):
-        d = datetime.strptime(date, '%Y-%m-%d')
-        month, year, period = d.strftime(
-            "%b").lower(), d.year, UTSCExams.get_period(date)
-        endings = {
-            'dec': {
-                'F': '%s9' % str(year),
-                'Y': '%s9' % str(int(year) - 1)
-            },
-            'apr': {
-                'S': '%s1' % str(year),
-                'Y': '%s9' % str(int(year) - 1)
-            },
-            'june': {
-                'F': '%s5F' % str(year),
-                'Y': '%s5' % str(year)
-            },
-            'aug': {
-                'S': '%s5S' % str(year),
-                'Y': '%s5' % str(year)
-            }
-        }
-
-        season = course_code[-1]
-        exam_id = course_id = None
-
-        if month in endings and season in endings[month]:
-            course_id = '%s%s' % (course_code, endings[month][season])
-            exam_id = '%s%s' % (course_id, period)
-
-        return exam_id, course_id
-
-    @staticmethod
-    def parse_time(start, end, date):
-        def convert_time(t):
-            h, m = [int(x) for x in t.split(':')]
-            return (h * 60 * 60) + (m * 60)
-        return convert_time(start), convert_time(end)
diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py
index 1b0f108..1e24b0a 100644
--- a/uoftscrapers/scrapers/exams/utsg.py
+++ b/uoftscrapers/scrapers/exams/utsg.py
@@ -1,4 +1,5 @@
 from ..utils import Scraper
+from .exams_helpers import *
 from bs4 import BeautifulSoup
 from collections import OrderedDict
 from datetime import datetime
@@ -64,9 +65,10 @@ def scrape(location='.', year=None, save=True):
             for row in rows[1:]:
                 data = [x.text.strip() for x in row.find_all('td')]
 
-                id_, course_id, course_code = ArtSciExams.parse_course_info(p, data[0])
+                exam_id, course_id, course_code = \
+                    ArtSciExams.parse_course_info(p, data[0])
 
-                if id_ is None:
+                if exam_id is None:
                     continue
 
                 section = data[1]
@@ -87,7 +89,7 @@ def scrape(location='.', year=None, save=True):
                 duration = end - start
 
                 doc = OrderedDict([
-                    ('id', id_),
+                    ('id', exam_id),
                     ('course_id', course_id),
                     ('course_code', course_code),
                     ('campus', 'UTSG'),
@@ -99,10 +101,10 @@ def scrape(location='.', year=None, save=True):
                     ('sections', [])
                 ])
 
-                if id_ not in exams:
-                    exams[id_] = doc
+                if exam_id not in exams:
+                    exams[exam_id] = doc
 
-                exams[id_]['sections'].append(OrderedDict([
+                exams[exam_id]['sections'].append(OrderedDict([
                     ('lecture_code', lecture_section or ''),
                     ('exam_section', exam_section or ''),
                     ('location', location_)
@@ -149,12 +151,8 @@ def parse_course_info(period, course_code):
     @staticmethod
     def parse_date(date, year):
         """Convert date of form `D DD MMM` to ISO 8601 format."""
-
-        date = date.split(' ')
-        if len(date) == 3:
-            day, date, month = date
-
-            return datetime.strptime('%s %s %s %s' % (day, date, month, year),
+        if date.count(' ') == 2:
+            return datetime.strptime('%s %s' % (date, year),
                                      '%a %d %b %y').date().isoformat()
 
     @staticmethod
@@ -239,7 +237,7 @@ def scrape(location='.', year=None, save=True):
                 duration = 2 * 60 * 60 + 30 * 60
                 end = start + duration
 
-                period = EngExams.get_period(date)
+                period = get_period(date)
 
                 exam_id, course_id, course_code = \
                     EngExams.get_course_info(info.find('strong').text.strip(), period)
@@ -292,22 +290,3 @@ def get_course_info(course, period):
             course_id = '%s20%s%s' % (course_code, year, endings[month]['month'])
             exam_id = '%s%s' % (course_id, period)
         return exam_id, course_id, course_code
-
-    @staticmethod
-    def get_period(d):
-        def get_date(month, date, year):
-            month = 'jun' if month == 'june' else month
-            return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')
-
-        d = datetime.strptime(d, '%Y-%m-%d')
-
-        year = d.year
-        month = None
-
-        for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
-            if get_date(m, 1, year) <= d <= get_date(m, ld, year):
-                month = m
-                break
-
-        if month:
-            return '%s%s' % (month.upper(), str(year)[2:])