From 13fdda031e17d18fe80d379ea35eea3ac995fa96 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 15:48:26 -0400
Subject: [PATCH 1/9] initial commit

added initial scraping code
---
 uoftscrapers/scrapers/calendar/utm.py | 33 +++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index d5bd387..752cff7 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -4,14 +4,39 @@
 import json
 import os
 import requests
+import datetime
+now = datetime.datetime.now()
 
 
 class UTMCalendar:
 
-    host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'
+    host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
+    host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
 
     @staticmethod
-    def scrape(location='.'):
+    def scrape(location='.', year=None):
+        year = year or now.year
+
         Scraper.logger.info('UTMCalendar initialized.')
-        Scraper.logger.info('Not implemented.')
-        Scraper.logger.info('UTMCalendar completed.')
+
+        html = Scraper.get(UTMCalendar.host1.format(year))
+        soup = BeautifulSoup(html, 'html.parser')
+        content = soup.find('div', class_='content')
+        dates = content.find_all('div', class_='title')
+        i = 0
+        currentDate = dates[i]
+        while(i<len(dates)):
+            print(dates[i].text)
+
+            while (currentDate == dates[i]):
+                info = dates[i].find_next('div', class_='info')
+                print(info.text)
+                i+=1
+                if(i>=len(dates)):
+                    break;
+            if(i<len(dates)):
+                currentDate = dates[i]
+
+
+
+        Scraper.logger.info('UTMCalendar completed.')
\ No newline at end of file

From 9b7ff62ada6893f7ab2b0e22feb7efa4bb68d343 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 15:48:47 -0400
Subject: [PATCH 2/9] Revert "initial commit"

This reverts commit 13fdda031e17d18fe80d379ea35eea3ac995fa96.
---
 uoftscrapers/scrapers/calendar/utm.py | 33 ++++-----------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 752cff7..d5bd387 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -4,39 +4,14 @@
 import json
 import os
 import requests
-import datetime
-now = datetime.datetime.now()
 
 
 class UTMCalendar:
 
-    host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
-    host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
+    host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'
 
     @staticmethod
-    def scrape(location='.', year=None):
-        year = year or now.year
-
+    def scrape(location='.'):
         Scraper.logger.info('UTMCalendar initialized.')
-
-        html = Scraper.get(UTMCalendar.host1.format(year))
-        soup = BeautifulSoup(html, 'html.parser')
-        content = soup.find('div', class_='content')
-        dates = content.find_all('div', class_='title')
-        i = 0
-        currentDate = dates[i]
-        while(i<len(dates)):
-            print(dates[i].text)
-
-            while (currentDate == dates[i]):
-                info = dates[i].find_next('div', class_='info')
-                print(info.text)
-                i+=1
-                if(i>=len(dates)):
-                    break;
-            if(i<len(dates)):
-                currentDate = dates[i]
-
-
-
-        Scraper.logger.info('UTMCalendar completed.')
\ No newline at end of file
+        Scraper.logger.info('Not implemented.')
+        Scraper.logger.info('UTMCalendar completed.')

From d4a02d76e74ea8bb1749db2706abd82451562996 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 16:22:01 -0400
Subject: [PATCH 3/9] initial commit

Added basic scraping code
---
 uoftscrapers/scrapers/calendar/utm.py | 33 +++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index d5bd387..752cff7 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -4,14 +4,39 @@
 import json
 import os
 import requests
+import datetime
+now = datetime.datetime.now()
 
 
 class UTMCalendar:
 
-    host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'
+    host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
+    host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
 
     @staticmethod
-    def scrape(location='.'):
+    def scrape(location='.', year=None):
+        year = year or now.year
+
         Scraper.logger.info('UTMCalendar initialized.')
-        Scraper.logger.info('Not implemented.')
-        Scraper.logger.info('UTMCalendar completed.')
+
+        html = Scraper.get(UTMCalendar.host1.format(year))
+        soup = BeautifulSoup(html, 'html.parser')
+        content = soup.find('div', class_='content')
+        dates = content.find_all('div', class_='title')
+        i = 0
+        currentDate = dates[i]
+        while(i<len(dates)):
+            print(dates[i].text)
+
+            while (currentDate == dates[i]):
+                info = dates[i].find_next('div', class_='info')
+                print(info.text)
+                i+=1
+                if(i>=len(dates)):
+                    break;
+            if(i<len(dates)):
+                currentDate = dates[i]
+
+
+
+        Scraper.logger.info('UTMCalendar completed.')
\ No newline at end of file

From 0b017b980fe01286a4973351e7e61ece95cc2dd9 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 17:41:41 -0400
Subject: [PATCH 4/9] Produced a functional scraper

the scraper and now functional, but the JSON file names may have to be
changed
---
 uoftscrapers/scrapers/calendar/utm.py | 58 ++++++++++++++++++---------
 1 file changed, 40 insertions(+), 18 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 752cff7..4b8d300 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -12,31 +12,53 @@ class UTMCalendar:
 
     host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
     host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
+    sessionLinks = [host1, host2]
 
     @staticmethod
     def scrape(location='.', year=None):
         year = year or now.year
 
+        calendar = OrderedDict()
         Scraper.logger.info('UTMCalendar initialized.')
+        for link in UTMCalendar.sessionLinks:
+            html = Scraper.get(link.format(year))
+            soup = BeautifulSoup(html, 'html.parser')
+            content = soup.find('div', class_='content')
+            dates = content.find_all('div', class_='title')
+            i = 0
+            currentDate = dates[i]
+            while(i<len(dates)):
+                date = dates[i].text
+                events = []
+                while (currentDate == dates[i]):
+                    info = dates[i].find_next('div', class_='info')
+                    description = info.text
+                    eventStartEnd = date.split('-')
+                    eventStart = eventStartEnd[0].strip()
+                    if len(eventStartEnd)>1:
+                        eventEnd = eventStartEnd[1].strip()
+                    else:
+                        eventEnd = eventStart
 
-        html = Scraper.get(UTMCalendar.host1.format(year))
-        soup = BeautifulSoup(html, 'html.parser')
-        content = soup.find('div', class_='content')
-        dates = content.find_all('div', class_='title')
-        i = 0
-        currentDate = dates[i]
-        while(i<len(dates)):
-            print(dates[i].text)
+                    events.append(OrderedDict([
+                            ('end_date', eventEnd),
+                            ('campus', 'UTM'),
+                            ('description', description)
+                        ]))
+                    i+=1
+                    if(i>=len(dates)):
+                        break;
+                calendar[date] = OrderedDict([
+                        ('date', eventStart),
+                        ('session', "Summer"),
+                        ('events', events)
+                    ])
+                if(i<len(dates)):
+                    currentDate = dates[i]
 
-            while (currentDate == dates[i]):
-                info = dates[i].find_next('div', class_='info')
-                print(info.text)
-                i+=1
-                if(i>=len(dates)):
-                    break;
-            if(i<len(dates)):
-                currentDate = dates[i]
 
+        for date, info in calendar.items():
+            Scraper.save_json(info, location, date)
 
-
-        Scraper.logger.info('UTMCalendar completed.')
\ No newline at end of file
+        Scraper.logger.info('UTMCalendar completed.')
+        return calendar
\ No newline at end of file

From 158d743593890fcb77fcec67f57663ac0d687ed3 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 17:52:25 -0400
Subject: [PATCH 5/9] fixed important dates session bug

fixed bug and added more comments
---
 uoftscrapers/scrapers/calendar/utm.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 4b8d300..1330446 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -9,13 +9,16 @@
 
 
 class UTMCalendar:
+    '''Scraper for Important dates from UTM calendar found at https://www.utm.utoronto.ca/registrar/important-dates
+        '''
 
-    host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
-    host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
-    sessionLinks = [host1, host2]
-
+    summerLink = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
+    fallLink = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
+    sessionLinks = [summerLink, fallLink]
+    currentSession = "Summer"
     @staticmethod
-    def scrape(location='.', year=None):
+    def scrape(location='.', year=None): #scrapes most current sessions by default
+        
         year = year or now.year
 
         calendar = OrderedDict()
@@ -33,7 +36,7 @@ def scrape(location='.', year=None):
                 while (currentDate == dates[i]):
                     info = dates[i].find_next('div', class_='info')
                     description = info.text
-                    eventStartEnd = date.split('-')
+                    eventStartEnd = date.split('-') #splits event dates over a period
                     eventStart = eventStartEnd[0].strip()
                     if len(eventStartEnd)>1:
                         eventEnd = eventStartEnd[1].strip()
@@ -50,12 +53,12 @@ def scrape(location='.', year=None):
                         break;
                 calendar[date] = OrderedDict([
                         ('date', eventStart),
-                        ('session', "Summer"),
+                        ('session', UTMCalendar.currentSession),
                         ('events', events)
                     ])
                 if(i<len(dates)):
                     currentDate = dates[i]
-
+            UTMCalendar.currentSession = "Fall/Winter"
 
         for date, info in calendar.items():
             Scraper.save_json(info, location, date)

From c2744a4f67b122658305a55aba62fb4c42540e38 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Thu, 12 May 2016 19:20:52 -0400
Subject: [PATCH 6/9] Updated JSON file names

Update date format to match ISO 8601 format
---
 uoftscrapers/scrapers/calendar/utm.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 1330446..7ef385d 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -60,8 +60,19 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
                     currentDate = dates[i]
             UTMCalendar.currentSession = "Fall/Winter"
 
+
         for date, info in calendar.items():
-            Scraper.save_json(info, location, date)
+            Scraper.save_json(info, location, UTMCalendar.convert_date(date))
 
         Scraper.logger.info('UTMCalendar completed.')
-        return calendar
\ No newline at end of file
+        return calendar
+
+    @staticmethod
+    def convert_date(date):
+        date_dict = {'January':'1', 'February':'2', 'March':'3', 'April':'4', 'May':'5', 'June':'6', 'July':'7',
+                     'August':'8', 'September':'9', 'October':'10', 'November':'11', 'December':'12'}
+        splitDate = date.split(' ')
+        year = splitDate[2]
+        day = splitDate[1].strip(',')
+        month = date_dict[splitDate[0]]
+        return("{0}-{1}-{2}".format(year, month, day))
\ No newline at end of file

From d2d20825b46f9c39db88005b5557866ec9cecf77 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Fri, 13 May 2016 14:35:00 -0400
Subject: [PATCH 7/9] Cleaned up scraper code

Removed unnecessary lines and implemented suggested fixes
---
 uoftscrapers/scrapers/calendar/utm.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 7ef385d..92b57dd 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -5,26 +5,24 @@
 import os
 import requests
 import datetime
-now = datetime.datetime.now()
 
 
 class UTMCalendar:
     '''Scraper for Important dates from UTM calendar found at https://www.utm.utoronto.ca/registrar/important-dates
         '''
 
-    summerLink = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
-    fallLink = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
-    sessionLinks = [summerLink, fallLink]
+    link = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}{1}&header='
+    sessionNumber = [5, 9]
     currentSession = "Summer"
     @staticmethod
     def scrape(location='.', year=None): #scrapes most current sessions by default
         
-        year = year or now.year
+        year = year or datetime.datetime.now()
 
         calendar = OrderedDict()
         Scraper.logger.info('UTMCalendar initialized.')
-        for link in UTMCalendar.sessionLinks:
-            html = Scraper.get(link.format(year))
+        for session in UTMCalendar.sessionNumber:
+            html = Scraper.get(UTMCalendar.link.format(year, session))
             soup = BeautifulSoup(html, 'html.parser')
             content = soup.find('div', class_='content')
             dates = content.find_all('div', class_='title')
@@ -37,9 +35,9 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
                     info = dates[i].find_next('div', class_='info')
                     description = info.text
                     eventStartEnd = date.split('-') #splits event dates over a period
-                    eventStart = eventStartEnd[0].strip()
+                    eventStart = UTMCalendar.convert_date(eventStartEnd[0].strip())
                     if len(eventStartEnd)>1:
-                        eventEnd = eventStartEnd[1].strip()
+                        eventEnd = UTMCalendar.convert_date(eventStartEnd[1].strip())
                     else:
                         eventEnd = eventStart
 
@@ -69,10 +67,9 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
 
     @staticmethod
     def convert_date(date):
-        date_dict = {'January':'1', 'February':'2', 'March':'3', 'April':'4', 'May':'5', 'June':'6', 'July':'7',
-                     'August':'8', 'September':'9', 'October':'10', 'November':'11', 'December':'12'}
         splitDate = date.split(' ')
+        print(splitDate)
         year = splitDate[2]
         day = splitDate[1].strip(',')
-        month = date_dict[splitDate[0]]
-        return("{0}-{1}-{2}".format(year, month, day))
\ No newline at end of file
+        month = datetime.datetime.strptime(splitDate[0], '%B').strftime('%m')
+        return("{0}-{1}-{2}".format(year, month, day.zfill(2)))
\ No newline at end of file

From c006dbe728e12b0e9ee86cbfd996cec3cf00fbd3 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Fri, 13 May 2016 17:48:42 -0400
Subject: [PATCH 8/9] Fixed minor issues

---
 uoftscrapers/scrapers/calendar/utm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 92b57dd..71e8324 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -17,7 +17,7 @@ class UTMCalendar:
     @staticmethod
     def scrape(location='.', year=None): #scrapes most current sessions by default
         
-        year = year or datetime.datetime.now()
+        year = year or datetime.datetime.now().year
 
         calendar = OrderedDict()
         Scraper.logger.info('UTMCalendar initialized.')
@@ -68,7 +68,6 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
     @staticmethod
     def convert_date(date):
         splitDate = date.split(' ')
-        print(splitDate)
         year = splitDate[2]
         day = splitDate[1].strip(',')
         month = datetime.datetime.strptime(splitDate[0], '%B').strftime('%m')

From 5bafb09aa5a9f01e8c1c93ae657bc2fe74a5be79 Mon Sep 17 00:00:00 2001
From: Anderson Ng Ho Yin <anderson202@live.hk>
Date: Sat, 14 May 2016 16:57:07 -0400
Subject: [PATCH 9/9] Updated JSON file format

---
 uoftscrapers/scrapers/calendar/utm.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
index 71e8324..b90b4aa 100644
--- a/uoftscrapers/scrapers/calendar/utm.py
+++ b/uoftscrapers/scrapers/calendar/utm.py
@@ -13,12 +13,12 @@ class UTMCalendar:
 
     link = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}{1}&header='
     sessionNumber = [5, 9]
-    currentSession = "Summer"
     @staticmethod
     def scrape(location='.', year=None): #scrapes most current sessions by default
         
         year = year or datetime.datetime.now().year
 
+        currentSession = "{0} SUMMER"
         calendar = OrderedDict()
         Scraper.logger.info('UTMCalendar initialized.')
         for session in UTMCalendar.sessionNumber:
@@ -43,6 +43,7 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
 
                     events.append(OrderedDict([
                             ('end_date', eventEnd),
+                            ('session', currentSession.format(UTMCalendar.get_year_from(eventStart))),
                             ('campus', 'UTM'),
                             ('description', description)
                         ]))
@@ -51,12 +52,11 @@ def scrape(location='.', year=None): #scrapes most current sessions by default
                         break;
                 calendar[date] = OrderedDict([
                         ('date', eventStart),
-                        ('session', UTMCalendar.currentSession),
                         ('events', events)
                     ])
                 if(i<len(dates)):
                     currentDate = dates[i]
-            UTMCalendar.currentSession = "Fall/Winter"
+            currentSession = "{0} FALL/WINTER"
 
 
         for date, info in calendar.items():
@@ -71,4 +71,9 @@ def convert_date(date):
         year = splitDate[2]
         day = splitDate[1].strip(',')
         month = datetime.datetime.strptime(splitDate[0], '%B').strftime('%m')
-        return("{0}-{1}-{2}".format(year, month, day.zfill(2)))
\ No newline at end of file
+        return("{0}-{1}-{2}".format(year, month, day.zfill(2)))
+
+    @staticmethod
+    def get_year_from(date):
+        splitDate = date.split('-')
+        return splitDate[0]
\ No newline at end of file