From 310ab229264bf8c2ee5f623d01b2356970da7321 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 14:26:19 -0400 Subject: [PATCH 01/12] Start athletics scraper --- uoftscrapers/__init__.py | 1 + .../scrapers/utmathletics/__init__.py | 96 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 uoftscrapers/scrapers/utmathletics/__init__.py diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index f72841d..1aa1a3a 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -15,6 +15,7 @@ from .scrapers.timetable.utsg import UTSGTimetable from .scrapers.utmshuttle import UTMShuttle from .scrapers.parking.utsg import UTSGParking +from .scrapers.utmathletics import UTMAthletics class NullHandler(logging.Handler): diff --git a/uoftscrapers/scrapers/utmathletics/__init__.py b/uoftscrapers/scrapers/utmathletics/__init__.py new file mode 100644 index 0000000..c3f6279 --- /dev/null +++ b/uoftscrapers/scrapers/utmathletics/__init__.py @@ -0,0 +1,96 @@ +from ..scraper import Scraper +from bs4 import BeautifulSoup +from datetime import datetime +from collections import OrderedDict +import json +import requests +import pytz + +from pprint import pprint + + +class UTMAthletics: + """A scraper for the UTM athletics schedule. + + The schedule is located at https://m.utm.utoronto.ca/physed.php. + """ + + host = 'https://m.utm.utoronto.ca/physed.php' + s = requests.Session() + + @staticmethod + def scrape(location='.'): + """Update the local JSON files for this scraper.""" + + Scraper.logger.info('UTMAthletics initialized.') + headers = { + 'Referer': UTMAthletics.host + } + html = UTMAthletics.s.get('%s' % UTMAthletics.host, + headers=headers).text + soup = BeautifulSoup(html, 'html.parser') + + athletics = OrderedDict() + + date = None + + div = soup.find('div', id='all') + for child in div.children: + + if child.name == 'br': + continue + + if child.name == 'h2': + date = UTMAthletics.parse_date(child.text) + + if date not in athletics: + athletics[date] = OrderedDict([ + ('date', date), + ('activities', []) + ]) + + continue + + if 'title' in child.get('class'): + title = child.find('b').text + location_ = child.find('span').text + + athletics[date]['activities'].append(OrderedDict([ + ('title', title), + ('location', location_) + ])) + + elif 'info' in child.get('class'): + start, end = UTMAthletics.parse_time(child.text, date) + athletics[date]['activities'][-1].update([ + ('start_time', start), + ('end_time', end) + ]) + + if athletics: + Scraper.ensure_location(location) + + for date, doc in athletics.items(): + with open('%s/%s.json' % (location, date), 'w+') as outfile: + json.dump(doc, outfile) + + Scraper.logger.info('UTMAthletics completed.') + + @staticmethod + def parse_date(d): + month, date_, year = [x.strip() for x in d.split(' ')] + return datetime.strptime('%s %s %s' % (date_.zfill(2), month, year), + '%d %B %Y').date().isoformat() + + @staticmethod + def parse_time(time_range, d): + + def convert_time(time): + t, p = time.split(' ') + h, m = t.split(':') + date = datetime.strptime('%s %s %s %s' % (d, h, m, p), + '%Y-%m-%d %I %M %p') + return date.replace(tzinfo=pytz.timezone('US/Eastern')).isoformat() + + start, end = [x.strip() for x in time_range.split(' - ')] + return convert_time(start), convert_time(end) From 1f3f7a61fc012cd34b4b844918cdae1527cdf703 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 15:35:42 -0400 Subject: [PATCH 02/12] Use full site (instead of mobile) for cleaner scraping --- .../scrapers/utmathletics/__init__.py | 81 ++++++++----------- 1 file changed, 35 insertions(+), 46 deletions(-) diff --git a/uoftscrapers/scrapers/utmathletics/__init__.py b/uoftscrapers/scrapers/utmathletics/__init__.py index c3f6279..3ba98d9 100644 --- a/uoftscrapers/scrapers/utmathletics/__init__.py +++ b/uoftscrapers/scrapers/utmathletics/__init__.py @@ -4,7 +4,6 @@ from collections import OrderedDict import json import requests -import pytz from pprint import pprint @@ -12,59 +11,56 @@ class UTMAthletics: """A scraper for the UTM athletics schedule. - The schedule is located at https://m.utm.utoronto.ca/physed.php. + The schedule is located at http://www.utm.utoronto.ca/athletics/schedule """ - host = 'https://m.utm.utoronto.ca/physed.php' + host = 'http://www.utm.utoronto.ca/athletics/schedule/month/' s = requests.Session() @staticmethod - def scrape(location='.'): + def scrape(location='.', month=None): """Update the local JSON files for this scraper.""" + month = month or UTMAthletics.get_month(month) Scraper.logger.info('UTMAthletics initialized.') headers = { 'Referer': UTMAthletics.host } - html = UTMAthletics.s.get('%s' % UTMAthletics.host, + html = UTMAthletics.s.get('%s%s' % (UTMAthletics.host, month), headers=headers).text soup = BeautifulSoup(html, 'html.parser') athletics = OrderedDict() - date = None + for tr in soup.find('div', class_='month-view').find_all('tr', class_='single-day'): + for td in tr.find_all('td'): + date = td.get('data-date') - div = soup.find('div', id='all') - for child in div.children: + if not UTMAthletics.date_in_month(date, month): + continue - if child.name == 'br': - continue + events = [] + for item in td.find(class_='inner').find_all(class_='item'): - if child.name == 'h2': - date = UTMAthletics.parse_date(child.text) + # event cancelled or athletic center closed + if item.find(class_='cancelled-item') or item.find(class_='athletics-calendar-note'): + continue - if date not in athletics: - athletics[date] = OrderedDict([ - ('date', date), - ('activities', []) - ]) + title = item.find(class_='athletics-calendar-title').text + location_ = item.find(class_='athletics-calendar-location').text + start = item.find(class_='date-display-start').get('content') + end = item.find(class_='date-display-end').get('content') - continue + events.append(OrderedDict([ + ('title', title), + ('location', location_), + ('start_time', start), + ('end_time', end) + ])) - if 'title' in child.get('class'): - title = child.find('b').text - location_ = child.find('span').text - - athletics[date]['activities'].append(OrderedDict([ - ('title', title), - ('location', location_) - ])) - - elif 'info' in child.get('class'): - start, end = UTMAthletics.parse_time(child.text, date) - athletics[date]['activities'][-1].update([ - ('start_time', start), - ('end_time', end) + athletics[date] = OrderedDict([ + ('date', date), + ('events', events) ]) if athletics: @@ -77,20 +73,13 @@ def scrape(location='.'): Scraper.logger.info('UTMAthletics completed.') @staticmethod - def parse_date(d): - month, date_, year = [x.strip() for x in d.split(' ')] - return datetime.strptime('%s %s %s' % (date_.zfill(2), month, year), - '%d %B %Y').date().isoformat() + def get_month(m): + now = datetime.now() + return '%s-%s' % (now.year, now.month) @staticmethod - def parse_time(time_range, d): - - def convert_time(time): - t, p = time.split(' ') - h, m = t.split(':') - date = datetime.strptime('%s %s %s %s' % (d, h, m, p), - '%Y-%m-%d %I %M %p') - return date.replace(tzinfo=pytz.timezone('US/Eastern')).isoformat() + def date_in_month(d, m): + d = datetime.strptime(d, '%Y-%m-%d') + m = datetime.strptime(m, '%Y-%m') - start, end = [x.strip() for x in time_range.split(' - ')] - return convert_time(start), convert_time(end) + return d.month == m.month From fabbc25e32203ec0fa854c85b77834534ec099f7 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 17:06:45 -0400 Subject: [PATCH 03/12] Fix merge conflict --- uoftscrapers/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index 1aa1a3a..f72841d 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -15,7 +15,6 @@ from .scrapers.timetable.utsg import UTSGTimetable from .scrapers.utmshuttle import UTMShuttle from .scrapers.parking.utsg import UTSGParking -from .scrapers.utmathletics import UTMAthletics class NullHandler(logging.Handler): From 73f83e3ada810ef400da4f807c28c943ccf255bb Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 17:11:05 -0400 Subject: [PATCH 04/12] Relocate athletics scraper --- uoftscrapers/__init__.py | 1 + .../scrapers/{utmathletics => athletics/utm}/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) rename uoftscrapers/scrapers/{utmathletics => athletics/utm}/__init__.py (98%) diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index a3d3d1d..faeed9e 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -2,6 +2,7 @@ import os import sys +from .scrapers.athletics.utm import UTMAthletics from .scrapers.buildings import Buildings from .scrapers.calendar.utsg import UTSGCalendar from .scrapers.coursefinder import CourseFinder diff --git a/uoftscrapers/scrapers/utmathletics/__init__.py b/uoftscrapers/scrapers/athletics/utm/__init__.py similarity index 98% rename from uoftscrapers/scrapers/utmathletics/__init__.py rename to uoftscrapers/scrapers/athletics/utm/__init__.py index 3ba98d9..afea3a7 100644 --- a/uoftscrapers/scrapers/utmathletics/__init__.py +++ b/uoftscrapers/scrapers/athletics/utm/__init__.py @@ -1,4 +1,4 @@ -from ..scraper import Scraper +from ...scraper import Scraper from bs4 import BeautifulSoup from datetime import datetime from collections import OrderedDict From 7b8917ea62cc65c2a7e4c3818e8fa3e854cad988 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:03:51 -0400 Subject: [PATCH 05/12] Building id --- uoftscrapers/scrapers/athletics/utm/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/uoftscrapers/scrapers/athletics/utm/__init__.py b/uoftscrapers/scrapers/athletics/utm/__init__.py index afea3a7..ced5cda 100644 --- a/uoftscrapers/scrapers/athletics/utm/__init__.py +++ b/uoftscrapers/scrapers/athletics/utm/__init__.py @@ -5,8 +5,6 @@ import json import requests -from pprint import pprint - class UTMAthletics: """A scraper for the UTM athletics schedule. @@ -54,6 +52,7 @@ def scrape(location='.', month=None): events.append(OrderedDict([ ('title', title), ('location', location_), + ('building_id', '332'), ('start_time', start), ('end_time', end) ])) From 3deee1eea011346fad29bd55999482957ceb85ff Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:04:07 -0400 Subject: [PATCH 06/12] Add UTSC athletics scraper --- uoftscrapers/__init__.py | 1 + .../scrapers/athletics/utsc/__init__.py | 85 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 uoftscrapers/scrapers/athletics/utsc/__init__.py diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index faeed9e..f628430 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -3,6 +3,7 @@ import sys from .scrapers.athletics.utm import UTMAthletics +from .scrapers.athletics.utsc import UTSCAthletics from .scrapers.buildings import Buildings from .scrapers.calendar.utsg import UTSGCalendar from .scrapers.coursefinder import CourseFinder diff --git a/uoftscrapers/scrapers/athletics/utsc/__init__.py b/uoftscrapers/scrapers/athletics/utsc/__init__.py new file mode 100644 index 0000000..3526a49 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/utsc/__init__.py @@ -0,0 +1,85 @@ +from ...scraper import Scraper +from bs4 import BeautifulSoup +from datetime import datetime +from collections import OrderedDict +import json +import requests + +class UTSCAthletics: + """A scraper for the UTSC athletics schedule. + + The schedule is located at http://www.utsc.utoronto.ca/athletics + """ + + host = 'http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/' + s = requests.Session() + + @staticmethod + def scrape(location='.', month=None): + """Update the local JSON files for this scraper.""" + month = month or UTSCAthletics.get_month(month) + + Scraper.logger.info('UTSCAthletics initialized.') + headers = { + 'Referer': UTSCAthletics.host + } + html = UTSCAthletics.s.get('%s%s' % (UTSCAthletics.host, month), + headers=headers).text + soup = BeautifulSoup(html, 'html.parser') + + athletics = OrderedDict() + + for tr in soup.find('div', class_='month-view').find_all('tr', class_='single-day'): + for td in tr.find_all('td'): + date = td.get('data-date') + + if not UTSCAthletics.date_in_month(date, month): + continue + + events = [] + for item in td.find(class_='inner').find_all(class_='item'): + title = item.find(class_='views-field-title').text.strip() + + location_ = item.find(class_='views-field-field-location]') + + if location_.text.strip() == '': + location_ = list(location_.next_siblings)[1] + + location_ = location_.text.strip() + + start = item.find(class_='date-display-start').get('content') + end = item.find(class_='date-display-end').get('content') + + events.append(OrderedDict([ + ('title', title), + ('location', location_), + ('building_id', '208'), + ('start_time', start), + ('end_time', end) + ])) + + athletics[date] = OrderedDict([ + ('date', date), + ('events', events) + ]) + + if athletics: + Scraper.ensure_location(location) + + for date, doc in athletics.items(): + with open('%s/%s.json' % (location, date), 'w+') as outfile: + json.dump(doc, outfile) + + Scraper.logger.info('UTSCAthletics completed.') + + @staticmethod + def get_month(m): + now = datetime.now() + return '%s-%s' % (now.year, now.month) + + @staticmethod + def date_in_month(d, m): + d = datetime.strptime(d, '%Y-%m-%d') + m = datetime.strptime(m, '%Y-%m') + + return d.month == m.month From d5e004e3f1cb810254e8d53fc2ac328aa6efb2d9 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:06:12 -0400 Subject: [PATCH 07/12] Merge conflict --- uoftscrapers/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index f628430..a3d3d1d 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -2,8 +2,6 @@ import os import sys -from .scrapers.athletics.utm import UTMAthletics -from .scrapers.athletics.utsc import UTSCAthletics from .scrapers.buildings import Buildings from .scrapers.calendar.utsg import UTSGCalendar from .scrapers.coursefinder import CourseFinder From 620d4eb31ffd0d7a632abc5ad200010a03b60244 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:15:13 -0400 Subject: [PATCH 08/12] Use new structure --- uoftscrapers/__init__.py | 4 ++++ uoftscrapers/scrapers/athletics/__init__.py | 12 ++++++++++++ .../scrapers/athletics/{utm/__init__.py => utm.py} | 8 ++------ .../scrapers/athletics/{utsc/__init__.py => utsc.py} | 9 +++------ 4 files changed, 21 insertions(+), 12 deletions(-) create mode 100644 uoftscrapers/scrapers/athletics/__init__.py rename uoftscrapers/scrapers/athletics/{utm/__init__.py => utm.py} (92%) rename uoftscrapers/scrapers/athletics/{utsc/__init__.py => utsc.py} (92%) diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index 361a816..2c09654 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -25,6 +25,10 @@ from .scrapers.exams.utm import UTMExams from .scrapers.exams.utsc import UTSCExams +from .scrapers.athletics import Athletics +from .scrapers.athletics.utm import UTMAthletics +from .scrapers.athletics.utsc import UTSCAthletics + from .scrapers.parking import Parking from .scrapers.shuttle import Shuttle diff --git a/uoftscrapers/scrapers/athletics/__init__.py b/uoftscrapers/scrapers/athletics/__init__.py new file mode 100644 index 0000000..7801afd --- /dev/null +++ b/uoftscrapers/scrapers/athletics/__init__.py @@ -0,0 +1,12 @@ +from ..utils import Scraper +from .utm import UTMAthletics +from .utsc import UTSCAthletics + + +class Athletics: + @staticmethod + def scrape(location='.'): + Scraper.logger.info('Athletics initialized.') + UTMAthletics.scrape(location) + UTSCAthletics.scrape(location) + Scraper.logger.info('Athletics completed.') diff --git a/uoftscrapers/scrapers/athletics/utm/__init__.py b/uoftscrapers/scrapers/athletics/utm.py similarity index 92% rename from uoftscrapers/scrapers/athletics/utm/__init__.py rename to uoftscrapers/scrapers/athletics/utm.py index ced5cda..d8d9182 100644 --- a/uoftscrapers/scrapers/athletics/utm/__init__.py +++ b/uoftscrapers/scrapers/athletics/utm.py @@ -1,4 +1,4 @@ -from ...scraper import Scraper +from ..utils import Scraper from bs4 import BeautifulSoup from datetime import datetime from collections import OrderedDict @@ -62,12 +62,8 @@ def scrape(location='.', month=None): ('events', events) ]) - if athletics: - Scraper.ensure_location(location) - for date, doc in athletics.items(): - with open('%s/%s.json' % (location, date), 'w+') as outfile: - json.dump(doc, outfile) + Scraper.save_json(doc, location, date) Scraper.logger.info('UTMAthletics completed.') diff --git a/uoftscrapers/scrapers/athletics/utsc/__init__.py b/uoftscrapers/scrapers/athletics/utsc.py similarity index 92% rename from uoftscrapers/scrapers/athletics/utsc/__init__.py rename to uoftscrapers/scrapers/athletics/utsc.py index 3526a49..f777e01 100644 --- a/uoftscrapers/scrapers/athletics/utsc/__init__.py +++ b/uoftscrapers/scrapers/athletics/utsc.py @@ -1,10 +1,11 @@ -from ...scraper import Scraper +from ..utils import Scraper from bs4 import BeautifulSoup from datetime import datetime from collections import OrderedDict import json import requests + class UTSCAthletics: """A scraper for the UTSC athletics schedule. @@ -63,12 +64,8 @@ def scrape(location='.', month=None): ('events', events) ]) - if athletics: - Scraper.ensure_location(location) - for date, doc in athletics.items(): - with open('%s/%s.json' % (location, date), 'w+') as outfile: - json.dump(doc, outfile) + Scraper.save_json(doc, location, date) Scraper.logger.info('UTSCAthletics completed.') From 29259959b40534a3ac2cd779c588c271edf525ba Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:27:39 -0400 Subject: [PATCH 09/12] Clean up - Use get_html from Scraper superclass - PEP8 stuff --- uoftscrapers/scrapers/athletics/utm.py | 15 +++++++-------- uoftscrapers/scrapers/athletics/utsc.py | 10 +++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/uoftscrapers/scrapers/athletics/utm.py b/uoftscrapers/scrapers/athletics/utm.py index d8d9182..60ae255 100644 --- a/uoftscrapers/scrapers/athletics/utm.py +++ b/uoftscrapers/scrapers/athletics/utm.py @@ -13,7 +13,6 @@ class UTMAthletics: """ host = 'http://www.utm.utoronto.ca/athletics/schedule/month/' - s = requests.Session() @staticmethod def scrape(location='.', month=None): @@ -21,16 +20,13 @@ def scrape(location='.', month=None): month = month or UTMAthletics.get_month(month) Scraper.logger.info('UTMAthletics initialized.') - headers = { - 'Referer': UTMAthletics.host - } - html = UTMAthletics.s.get('%s%s' % (UTMAthletics.host, month), - headers=headers).text + html = Scraper.get_html('%s%s' % (UTMAthletics.host, month)) soup = BeautifulSoup(html, 'html.parser') athletics = OrderedDict() - for tr in soup.find('div', class_='month-view').find_all('tr', class_='single-day'): + calendar = soup.find('div', class_='month-view') + for tr in calendar.find_all('tr', class_='single-day'): for td in tr.find_all('td'): date = td.get('data-date') @@ -41,7 +37,10 @@ def scrape(location='.', month=None): for item in td.find(class_='inner').find_all(class_='item'): # event cancelled or athletic center closed - if item.find(class_='cancelled-item') or item.find(class_='athletics-calendar-note'): + if item.find(class_='cancelled-item'): + continue + + if item.find(class_='athletics-calendar-note'): continue title = item.find(class_='athletics-calendar-title').text diff --git a/uoftscrapers/scrapers/athletics/utsc.py b/uoftscrapers/scrapers/athletics/utsc.py index f777e01..5a8f1a5 100644 --- a/uoftscrapers/scrapers/athletics/utsc.py +++ b/uoftscrapers/scrapers/athletics/utsc.py @@ -13,7 +13,6 @@ class UTSCAthletics: """ host = 'http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/' - s = requests.Session() @staticmethod def scrape(location='.', month=None): @@ -21,16 +20,13 @@ def scrape(location='.', month=None): month = month or UTSCAthletics.get_month(month) Scraper.logger.info('UTSCAthletics initialized.') - headers = { - 'Referer': UTSCAthletics.host - } - html = UTSCAthletics.s.get('%s%s' % (UTSCAthletics.host, month), - headers=headers).text + html = Scraper.get_html('%s%s' % (UTSCAthletics.host, month)) soup = BeautifulSoup(html, 'html.parser') athletics = OrderedDict() - for tr in soup.find('div', class_='month-view').find_all('tr', class_='single-day'): + calendar = soup.find('div', class_='month-view') + for tr in calendar.find_all('tr', class_='single-day'): for td in tr.find_all('td'): date = td.get('data-date') From aeb8da2462ccad9bbff081ef88ccb9ed4a9247dc Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:34:09 -0400 Subject: [PATCH 10/12] Add Athletics references --- README.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/README.md b/README.md index 56993ea..e65f579 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,9 @@ This is a library of scrapers for various University of Toronto websites. It is - [UTSG Exams](#utsg-exams) - [UTM Exams](#utm-exams) - [UTSC Exams](#utsc-exams) + - [Athletics](#athletics) + - [UTM Athletics](#utm-exams) + - [UTSC Athletics](#utsc-athletics) - [Parking](#parking) - [Shuttle Bus Schedule](#shuttle) @@ -511,6 +514,63 @@ Refer to [Exams](#exams) -------------------------------------------------------------------------------- +### Athletics + +##### Class name +```python +uoftscrapers.Athletics +``` + +##### Scraper source + - [UTM Athletics](#utm-athletics) + - [UTSC Athletics](#utsc-athletics) + +##### Output format +```js +{ + "date": String, + "events":[{ + "title": String, + "location": String, + "building_id": String, + "start_time": String, + "end_time": String + }] +} +``` + +---------------------------------------- + +### UTM Athletics + +##### Class name +```python +uoftscrapers.UTMAthletics +``` + +##### Scraper source +http://www.utm.utoronto.ca/athletics/schedule/month/ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------- + +### UTSC Athletics + +##### Class name +```python +uoftscrapers.UTSCAthletics +``` + +##### Scraper source +http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------------------------------------------------------------------- + ### Parking ##### Class name From 4324a7e25a13c1eaba5165629ec09cb5d7b0d16f Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:38:22 -0400 Subject: [PATCH 11/12] Remove duplicate exams references, add UTSG athletics --- README.md | 77 +++++++++++++------------------------------------------ 1 file changed, 18 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index e65f579..93fe4ad 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ This is a library of scrapers for various University of Toronto websites. It is - [UTM Exams](#utm-exams) - [UTSC Exams](#utsc-exams) - [Athletics](#athletics) + - [UTSG Athletics](#utsg-athletics) - [UTM Athletics](#utm-exams) - [UTSC Athletics](#utsc-athletics) - [Parking](#parking) @@ -335,64 +336,6 @@ uoftscrapers.Timetable ---------------------------------------- -### UTM Exams - -##### Class name -```python -uoftscrapers.UTMExams -``` - -##### Scraper source -https://student.utm.utoronto.ca/examschedule/finalexams.php - -##### Output format -```js -{ - id: String, - course_id: String, - course_code: String - period: String, - date: String, - start_time: String, - end_time: String, - sections: [{ - section: String, - location: String - }] -} -``` - ------- - -### UTSC Exams - -##### Class name -```python -uoftscrapers.UTSCExams -``` - -##### Scraper source -http://www.utsc.utoronto.ca/registrar/examination-schedule - -##### Output format -```js -{ - id: String, - course_id: String, - course_code: String - period: String, - date: String, - start_time: String, - end_time: String, - sections: [{ - section: String, - location: String - }] -} -``` - ------- - ### UTSG Timetable ##### Class name @@ -522,6 +465,7 @@ uoftscrapers.Athletics ``` ##### Scraper source + - [UTSG Athletics](#utsg-athletics) - [UTM Athletics](#utm-athletics) - [UTSC Athletics](#utsc-athletics) @@ -529,7 +473,7 @@ uoftscrapers.Athletics ```js { "date": String, - "events":[{ + "events":[{ "title": String, "location": String, "building_id": String, @@ -541,6 +485,21 @@ uoftscrapers.Athletics ---------------------------------------- +### UTSG Athletics + +##### Class name +```python +uoftscrapers.UTSGAthletics +``` + +##### Scraper source +_Not yet implemented_ + +##### Output format +Refer to [Athletics](#athletics) + +-------------------- + ### UTM Athletics ##### Class name From ac7f4b79c4404312d211c883ec5ca88466afcdcc Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sat, 16 Apr 2016 19:40:41 -0400 Subject: [PATCH 12/12] Bootstrap UTSGAthletics --- uoftscrapers/__init__.py | 1 + uoftscrapers/scrapers/athletics/__init__.py | 2 ++ uoftscrapers/scrapers/athletics/utsg.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 uoftscrapers/scrapers/athletics/utsg.py diff --git a/uoftscrapers/__init__.py b/uoftscrapers/__init__.py index 2c09654..4e80fc1 100644 --- a/uoftscrapers/__init__.py +++ b/uoftscrapers/__init__.py @@ -26,6 +26,7 @@ from .scrapers.exams.utsc import UTSCExams from .scrapers.athletics import Athletics +from .scrapers.athletics.utsg import UTSGAthletics from .scrapers.athletics.utm import UTMAthletics from .scrapers.athletics.utsc import UTSCAthletics diff --git a/uoftscrapers/scrapers/athletics/__init__.py b/uoftscrapers/scrapers/athletics/__init__.py index 7801afd..ad42955 100644 --- a/uoftscrapers/scrapers/athletics/__init__.py +++ b/uoftscrapers/scrapers/athletics/__init__.py @@ -1,4 +1,5 @@ from ..utils import Scraper +from .utsg import UTSGAthletics from .utm import UTMAthletics from .utsc import UTSCAthletics @@ -7,6 +8,7 @@ class Athletics: @staticmethod def scrape(location='.'): Scraper.logger.info('Athletics initialized.') + UTSGAthletics.scrape(location) UTMAthletics.scrape(location) UTSCAthletics.scrape(location) Scraper.logger.info('Athletics completed.') diff --git a/uoftscrapers/scrapers/athletics/utsg.py b/uoftscrapers/scrapers/athletics/utsg.py new file mode 100644 index 0000000..6073a67 --- /dev/null +++ b/uoftscrapers/scrapers/athletics/utsg.py @@ -0,0 +1,14 @@ +from ..utils import Scraper +from bs4 import BeautifulSoup +from collections import OrderedDict +import json +import requests + + +class UTSGAthletics: + + @staticmethod + def scrape(location='.'): + Scraper.logger.info('UTSGAthletics initialized.') + Scraper.logger.info('Not implemented.') + Scraper.logger.info('UTSGAthletics completed.')