diff --git a/uoftscrapers/scrapers/calendar/utm.py b/uoftscrapers/scrapers/calendar/utm.py index d5bd387..b90b4aa 100644 --- a/uoftscrapers/scrapers/calendar/utm.py +++ b/uoftscrapers/scrapers/calendar/utm.py @@ -4,14 +4,76 @@ import json import os import requests +import datetime class UTMCalendar: + '''Scraper for Important dates from UTM calendar found at https://www.utm.utoronto.ca/registrar/important-dates + ''' - host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/' - + link = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}{1}&header=' + sessionNumber = [5, 9] @staticmethod - def scrape(location='.'): + def scrape(location='.', year=None): #scrapes most current sessions by default + + year = year or datetime.datetime.now().year + + currentSession = "{0} SUMMER" + calendar = OrderedDict() Scraper.logger.info('UTMCalendar initialized.') - Scraper.logger.info('Not implemented.') + for session in UTMCalendar.sessionNumber: + html = Scraper.get(UTMCalendar.link.format(year, session)) + soup = BeautifulSoup(html, 'html.parser') + content = soup.find('div', class_='content') + dates = content.find_all('div', class_='title') + i = 0 + currentDate = dates[i] + while(i1: + eventEnd = UTMCalendar.convert_date(eventStartEnd[1].strip()) + else: + eventEnd = eventStart + + events.append(OrderedDict([ + ('end_date', eventEnd), + ('session', currentSession.format(UTMCalendar.get_year_from(eventStart))), + ('campus', 'UTM'), + ('description', description) + ])) + i+=1 + if(i>=len(dates)): + break; + calendar[date] = OrderedDict([ + ('date', eventStart), + ('events', events) + ]) + if(i