Skip to content

Commit

Permalink
Merge pull request #81 from anderson202/utmdates
Browse files Browse the repository at this point in the history
Add UTM important dates scraper
  • Loading branch information
qasim committed May 15, 2016
2 parents 627513e + 5bafb09 commit 2064067
Showing 1 changed file with 66 additions and 4 deletions.
70 changes: 66 additions & 4 deletions uoftscrapers/scrapers/calendar/utm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,76 @@
import json
import os
import requests
import datetime


class UTMCalendar:
'''Scraper for Important dates from UTM calendar found at https://www.utm.utoronto.ca/registrar/important-dates
'''

host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'

link = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}{1}&header='
sessionNumber = [5, 9]
@staticmethod
def scrape(location='.'):
def scrape(location='.', year=None): #scrapes most current sessions by default

year = year or datetime.datetime.now().year

currentSession = "{0} SUMMER"
calendar = OrderedDict()
Scraper.logger.info('UTMCalendar initialized.')
Scraper.logger.info('Not implemented.')
for session in UTMCalendar.sessionNumber:
html = Scraper.get(UTMCalendar.link.format(year, session))
soup = BeautifulSoup(html, 'html.parser')
content = soup.find('div', class_='content')
dates = content.find_all('div', class_='title')
i = 0
currentDate = dates[i]
while(i<len(dates)):
date = dates[i].text
events = []
while (currentDate == dates[i]):
info = dates[i].find_next('div', class_='info')
description = info.text
eventStartEnd = date.split('-') #splits event dates over a period
eventStart = UTMCalendar.convert_date(eventStartEnd[0].strip())
if len(eventStartEnd)>1:
eventEnd = UTMCalendar.convert_date(eventStartEnd[1].strip())
else:
eventEnd = eventStart

events.append(OrderedDict([
('end_date', eventEnd),
('session', currentSession.format(UTMCalendar.get_year_from(eventStart))),
('campus', 'UTM'),
('description', description)
]))
i+=1
if(i>=len(dates)):
break;
calendar[date] = OrderedDict([
('date', eventStart),
('events', events)
])
if(i<len(dates)):
currentDate = dates[i]
currentSession = "{0} FALL/WINTER"


for date, info in calendar.items():
Scraper.save_json(info, location, UTMCalendar.convert_date(date))

Scraper.logger.info('UTMCalendar completed.')
return calendar

@staticmethod
def convert_date(date):
splitDate = date.split(' ')
year = splitDate[2]
day = splitDate[1].strip(',')
month = datetime.datetime.strptime(splitDate[0], '%B').strftime('%m')
return("{0}-{1}-{2}".format(year, month, day.zfill(2)))

@staticmethod
def get_year_from(date):
splitDate = date.split('-')
return splitDate[0]

0 comments on commit 2064067

Please sign in to comment.