diff --git a/README.md b/README.md index 56993ea..93fe4ad 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,10 @@ This is a library of scrapers for various University of Toronto websites. It is - [UTSG Exams](#utsg-exams) - [UTM Exams](#utm-exams) - [UTSC Exams](#utsc-exams) + - [Athletics](#athletics) + - [UTSG Athletics](#utsg-athletics) + - [UTM Athletics](#utm-exams) + - [UTSC Athletics](#utsc-athletics) - [Parking](#parking) - [Shuttle Bus Schedule](#shuttle) @@ -332,64 +336,6 @@ uoftscrapers.Timetable ---------------------------------------- -### UTM Exams - -##### Class name -```python -uoftscrapers.UTMExams -``` - -##### Scraper source -https://student.utm.utoronto.ca/examschedule/finalexams.php - -##### Output format -```js -{ - id: String, - course_id: String, - course_code: String - period: String, - date: String, - start_time: String, - end_time: String, - sections: [{ - section: String, - location: String - }] -} -``` - ------- - -### UTSC Exams - -##### Class name -```python -uoftscrapers.UTSCExams -``` - -##### Scraper source -http://www.utsc.utoronto.ca/registrar/examination-schedule - -##### Output format -```js -{ - id: String, - course_id: String, - course_code: String - period: String, - date: String, - start_time: String, - end_time: String, - sections: [{ - section: String, - location: String - }] -} -``` - ------- - ### UTSG Timetable ##### Class name @@ -511,6 +457,79 @@ Refer to [Exams](#exams) -------------------------------------------------------------------------------- +### Athletics + +##### Class name +```python +uoftscrapers.Athletics +``` + +##### Scraper source + - [UTSG Athletics](#utsg-athletics) + - [UTM Athletics](#utm-athletics) + - [UTSC Athletics](#utsc-athletics) + +##### Output format +```js +{ + "date": String, + "events":[{ + "title": String, + "location": String, + "building_id": String, + "start_time": String, + "end_time": String + }] +} +``` + +---------------------------------------- + +### UTSG Athletics + +##### Class name +```python +uoftscrapers.UTSGAthletics +``` + +##### Scraper source +_Not yet implemented_ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------- + +### UTM Athletics + +##### Class name +```python +uoftscrapers.UTMAthletics +``` + +##### Scraper source +http://www.utm.utoronto.ca/athletics/schedule/month/ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------- + +### UTSC Athletics + +##### Class name +```python +uoftscrapers.UTSCAthletics +``` + +##### Scraper source +http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------------------------------------------------------------------- + ### Parking ##### Class name diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index 361a816..4e80fc1 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -25,6 +25,11 @@ from .scrapers.exams.utm import UTMExams from .scrapers.exams.utsc import UTSCExams +from .scrapers.athletics import Athletics +from .scrapers.athletics.utsg import UTSGAthletics +from .scrapers.athletics.utm import UTMAthletics +from .scrapers.athletics.utsc import UTSCAthletics + from .scrapers.parking import Parking from .scrapers.shuttle import Shuttle diff --git a/uoftscrapers/scrapers/athletics/__init__.py b/uoftscrapers/scrapers/athletics/__init__.py new file mode 100644 index 0000000..ad42955 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/__init__.py @@ -0,0 +1,14 @@ +from ..utils import Scraper +from .utsg import UTSGAthletics +from .utm import UTMAthletics +from .utsc import UTSCAthletics + + +class Athletics: + @staticmethod + def scrape(location='.'): + Scraper.logger.info('Athletics initialized.') + UTSGAthletics.scrape(location) + UTMAthletics.scrape(location) + UTSCAthletics.scrape(location) + Scraper.logger.info('Athletics completed.') diff --git a/uoftscrapers/scrapers/athletics/utm.py b/uoftscrapers/scrapers/athletics/utm.py new file mode 100644 index 0000000..60ae255 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/utm.py @@ -0,0 +1,79 @@ +from ..utils import Scraper +from bs4 import BeautifulSoup +from datetime import datetime +from collections import OrderedDict +import json +import requests + + +class UTMAthletics: + """A scraper for the UTM athletics schedule. + + The schedule is located at http://www.utm.utoronto.ca/athletics/schedule + """ + + host = 'http://www.utm.utoronto.ca/athletics/schedule/month/' + + @staticmethod + def scrape(location='.', month=None): + """Update the local JSON files for this scraper.""" + month = month or UTMAthletics.get_month(month) + + Scraper.logger.info('UTMAthletics initialized.') + html = Scraper.get_html('%s%s' % (UTMAthletics.host, month)) + soup = BeautifulSoup(html, 'html.parser') + + athletics = OrderedDict() + + calendar = soup.find('div', class_='month-view') + for tr in calendar.find_all('tr', class_='single-day'): + for td in tr.find_all('td'): + date = td.get('data-date') + + if not UTMAthletics.date_in_month(date, month): + continue + + events = [] + for item in td.find(class_='inner').find_all(class_='item'): + + # event cancelled or athletic center closed + if item.find(class_='cancelled-item'): + continue + + if item.find(class_='athletics-calendar-note'): + continue + + title = item.find(class_='athletics-calendar-title').text + location_ = item.find(class_='athletics-calendar-location').text + start = item.find(class_='date-display-start').get('content') + end = item.find(class_='date-display-end').get('content') + + events.append(OrderedDict([ + ('title', title), + ('location', location_), + ('building_id', '332'), + ('start_time', start), + ('end_time', end) + ])) + + athletics[date] = OrderedDict([ + ('date', date), + ('events', events) + ]) + + for date, doc in athletics.items(): + Scraper.save_json(doc, location, date) + + Scraper.logger.info('UTMAthletics completed.') + + @staticmethod + def get_month(m): + now = datetime.now() + return '%s-%s' % (now.year, now.month) + + @staticmethod + def date_in_month(d, m): + d = datetime.strptime(d, '%Y-%m-%d') + m = datetime.strptime(m, '%Y-%m') + + return d.month == m.month diff --git a/uoftscrapers/scrapers/athletics/utsc.py b/uoftscrapers/scrapers/athletics/utsc.py new file mode 100644 index 0000000..5a8f1a5 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/utsc.py @@ -0,0 +1,78 @@ +from ..utils import Scraper +from bs4 import BeautifulSoup +from datetime import datetime +from collections import OrderedDict +import json +import requests + + +class UTSCAthletics: + """A scraper for the UTSC athletics schedule. + + The schedule is located at http://www.utsc.utoronto.ca/athletics + """ + + host = 'http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/' + + @staticmethod + def scrape(location='.', month=None): + """Update the local JSON files for this scraper.""" + month = month or UTSCAthletics.get_month(month) + + Scraper.logger.info('UTSCAthletics initialized.') + html = Scraper.get_html('%s%s' % (UTSCAthletics.host, month)) + soup = BeautifulSoup(html, 'html.parser') + + athletics = OrderedDict() + + calendar = soup.find('div', class_='month-view') + for tr in calendar.find_all('tr', class_='single-day'): + for td in tr.find_all('td'): + date = td.get('data-date') + + if not UTSCAthletics.date_in_month(date, month): + continue + + events = [] + for item in td.find(class_='inner').find_all(class_='item'): + title = item.find(class_='views-field-title').text.strip() + + location_ = item.find(class_='views-field-field-location]') + + if location_.text.strip() == '': + location_ = list(location_.next_siblings)[1] + + location_ = location_.text.strip() + + start = item.find(class_='date-display-start').get('content') + end = item.find(class_='date-display-end').get('content') + + events.append(OrderedDict([ + ('title', title), + ('location', location_), + ('building_id', '208'), + ('start_time', start), + ('end_time', end) + ])) + + athletics[date] = OrderedDict([ + ('date', date), + ('events', events) + ]) + + for date, doc in athletics.items(): + Scraper.save_json(doc, location, date) + + Scraper.logger.info('UTSCAthletics completed.') + + @staticmethod + def get_month(m): + now = datetime.now() + return '%s-%s' % (now.year, now.month) + + @staticmethod + def date_in_month(d, m): + d = datetime.strptime(d, '%Y-%m-%d') + m = datetime.strptime(m, '%Y-%m') + + return d.month == m.month diff --git a/uoftscrapers/scrapers/athletics/utsg.py b/uoftscrapers/scrapers/athletics/utsg.py new file mode 100644 index 0000000..6073a67 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/utsg.py @@ -0,0 +1,14 @@ +from ..utils import Scraper +from bs4 import BeautifulSoup +from collections import OrderedDict +import json +import requests + + +class UTSGAthletics: + + @staticmethod + def scrape(location='.'): + Scraper.logger.info('UTSGAthletics initialized.') + Scraper.logger.info('Not implemented.') + Scraper.logger.info('UTSGAthletics completed.')