initial commit

added initial scraping code
anderson202 · May 12, 2016 · 13fdda0 · 13fdda0
1 parent 627513e
commit 13fdda0
Showing 1 changed file with 29 additions and 4 deletions.
diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py
@@ -4,14 +4,39 @@
 import json
 import os
 import requests
+import datetime
+now = datetime.datetime.now()
 
 
 class UTMCalendar:
 
-    host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'
+    host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
+    host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='
 
     @staticmethod
-    def scrape(location='.'):
+    def scrape(location='.', year=None):
+        year = year or now.year
+
         Scraper.logger.info('UTMCalendar initialized.')
-        Scraper.logger.info('Not implemented.')
-        Scraper.logger.info('UTMCalendar completed.')
+
+        html = Scraper.get(UTMCalendar.host1.format(year))
+        soup = BeautifulSoup(html, 'html.parser')
+        content = soup.find('div', class_='content')
+        dates = content.find_all('div', class_='title')
+        i = 0
+        currentDate = dates[i]
+        while(i<len(dates)):
+            print(dates[i].text)
+
+            while (currentDate == dates[i]):
+                info = dates[i].find_next('div', class_='info')
+                print(info.text)
+                i+=1
+                if(i>=len(dates)):
+                    break;
+            if(i<len(dates)):
+                currentDate = dates[i]
+
+
+
+        Scraper.logger.info('UTMCalendar completed.')