Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UTSC / UTM athletics scraper #51

Merged
merged 15 commits into from
Apr 16, 2016
Merged
135 changes: 77 additions & 58 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ This is a library of scrapers for various University of Toronto websites. It is
- [UTSG Exams](#utsg-exams)
- [UTM Exams](#utm-exams)
- [UTSC Exams](#utsc-exams)
- [Athletics](#athletics)
- [UTSG Athletics](#utsg-athletics)
- [UTM Athletics](#utm-exams)
- [UTSC Athletics](#utsc-athletics)
- [Parking](#parking)
- [Shuttle Bus Schedule](#shuttle)

Expand Down Expand Up @@ -332,64 +336,6 @@ uoftscrapers.Timetable

----------------------------------------

### UTM Exams

##### Class name
```python
uoftscrapers.UTMExams
```

##### Scraper source
https://student.utm.utoronto.ca/examschedule/finalexams.php

##### Output format
```js
{
id: String,
course_id: String,
course_code: String
period: String,
date: String,
start_time: String,
end_time: String,
sections: [{
section: String,
location: String
}]
}
```

------

### UTSC Exams

##### Class name
```python
uoftscrapers.UTSCExams
```

##### Scraper source
http://www.utsc.utoronto.ca/registrar/examination-schedule

##### Output format
```js
{
id: String,
course_id: String,
course_code: String
period: String,
date: String,
start_time: String,
end_time: String,
sections: [{
section: String,
location: String
}]
}
```

------

### UTSG Timetable

##### Class name
Expand Down Expand Up @@ -511,6 +457,79 @@ Refer to [Exams](#exams)

--------------------------------------------------------------------------------

### Athletics

##### Class name
```python
uoftscrapers.Athletics
```

##### Scraper source
- [UTSG Athletics](#utsg-athletics)
- [UTM Athletics](#utm-athletics)
- [UTSC Athletics](#utsc-athletics)

##### Output format
```js
{
"date": String,
"events":[{
"title": String,
"location": String,
"building_id": String,
"start_time": String,
"end_time": String
}]
}
```

----------------------------------------

### UTSG Athletics

##### Class name
```python
uoftscrapers.UTSGAthletics
```

##### Scraper source
_Not yet implemented_

##### Output format
Refer to [Athletics](#athletics)

--------------------

### UTM Athletics

##### Class name
```python
uoftscrapers.UTMAthletics
```

##### Scraper source
http://www.utm.utoronto.ca/athletics/schedule/month/

##### Output format
Refer to [Athletics](#athletics)

--------------------

### UTSC Athletics

##### Class name
```python
uoftscrapers.UTSCAthletics
```

##### Scraper source
http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/

##### Output format
Refer to [Athletics](#athletics)

--------------------------------------------------------------------------------

### Parking

##### Class name
Expand Down
5 changes: 5 additions & 0 deletions uoftscrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from .scrapers.exams.utm import UTMExams
from .scrapers.exams.utsc import UTSCExams

from .scrapers.athletics import Athletics
from .scrapers.athletics.utsg import UTSGAthletics
from .scrapers.athletics.utm import UTMAthletics
from .scrapers.athletics.utsc import UTSCAthletics

from .scrapers.parking import Parking

from .scrapers.shuttle import Shuttle
Expand Down
14 changes: 14 additions & 0 deletions uoftscrapers/scrapers/athletics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from ..utils import Scraper
from .utsg import UTSGAthletics
from .utm import UTMAthletics
from .utsc import UTSCAthletics


class Athletics:
@staticmethod
def scrape(location='.'):
Scraper.logger.info('Athletics initialized.')
UTSGAthletics.scrape(location)
UTMAthletics.scrape(location)
UTSCAthletics.scrape(location)
Scraper.logger.info('Athletics completed.')
79 changes: 79 additions & 0 deletions uoftscrapers/scrapers/athletics/utm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from ..utils import Scraper
from bs4 import BeautifulSoup
from datetime import datetime
from collections import OrderedDict
import json
import requests


class UTMAthletics:
"""A scraper for the UTM athletics schedule.

The schedule is located at http://www.utm.utoronto.ca/athletics/schedule
"""

host = 'http://www.utm.utoronto.ca/athletics/schedule/month/'

@staticmethod
def scrape(location='.', month=None):
"""Update the local JSON files for this scraper."""
month = month or UTMAthletics.get_month(month)

Scraper.logger.info('UTMAthletics initialized.')
html = Scraper.get_html('%s%s' % (UTMAthletics.host, month))
soup = BeautifulSoup(html, 'html.parser')

athletics = OrderedDict()

calendar = soup.find('div', class_='month-view')
for tr in calendar.find_all('tr', class_='single-day'):
for td in tr.find_all('td'):
date = td.get('data-date')

if not UTMAthletics.date_in_month(date, month):
continue

events = []
for item in td.find(class_='inner').find_all(class_='item'):

# event cancelled or athletic center closed
if item.find(class_='cancelled-item'):
continue

if item.find(class_='athletics-calendar-note'):
continue

title = item.find(class_='athletics-calendar-title').text
location_ = item.find(class_='athletics-calendar-location').text
start = item.find(class_='date-display-start').get('content')
end = item.find(class_='date-display-end').get('content')

events.append(OrderedDict([
('title', title),
('location', location_),
('building_id', '332'),
('start_time', start),
('end_time', end)
]))

athletics[date] = OrderedDict([
('date', date),
('events', events)
])

for date, doc in athletics.items():
Scraper.save_json(doc, location, date)

Scraper.logger.info('UTMAthletics completed.')

@staticmethod
def get_month(m):
now = datetime.now()
return '%s-%s' % (now.year, now.month)

@staticmethod
def date_in_month(d, m):
d = datetime.strptime(d, '%Y-%m-%d')
m = datetime.strptime(m, '%Y-%m')

return d.month == m.month
78 changes: 78 additions & 0 deletions uoftscrapers/scrapers/athletics/utsc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from ..utils import Scraper
from bs4 import BeautifulSoup
from datetime import datetime
from collections import OrderedDict
import json
import requests


class UTSCAthletics:
"""A scraper for the UTSC athletics schedule.

The schedule is located at http://www.utsc.utoronto.ca/athletics
"""

host = 'http://www.utsc.utoronto.ca/athletics/calendar-node-field-date-time/month/'

@staticmethod
def scrape(location='.', month=None):
"""Update the local JSON files for this scraper."""
month = month or UTSCAthletics.get_month(month)

Scraper.logger.info('UTSCAthletics initialized.')
html = Scraper.get_html('%s%s' % (UTSCAthletics.host, month))
soup = BeautifulSoup(html, 'html.parser')

athletics = OrderedDict()

calendar = soup.find('div', class_='month-view')
for tr in calendar.find_all('tr', class_='single-day'):
for td in tr.find_all('td'):
date = td.get('data-date')

if not UTSCAthletics.date_in_month(date, month):
continue

events = []
for item in td.find(class_='inner').find_all(class_='item'):
title = item.find(class_='views-field-title').text.strip()

location_ = item.find(class_='views-field-field-location]')

if location_.text.strip() == '':
location_ = list(location_.next_siblings)[1]

location_ = location_.text.strip()

start = item.find(class_='date-display-start').get('content')
end = item.find(class_='date-display-end').get('content')

events.append(OrderedDict([
('title', title),
('location', location_),
('building_id', '208'),
('start_time', start),
('end_time', end)
]))

athletics[date] = OrderedDict([
('date', date),
('events', events)
])

for date, doc in athletics.items():
Scraper.save_json(doc, location, date)

Scraper.logger.info('UTSCAthletics completed.')

@staticmethod
def get_month(m):
now = datetime.now()
return '%s-%s' % (now.year, now.month)

@staticmethod
def date_in_month(d, m):
d = datetime.strptime(d, '%Y-%m-%d')
m = datetime.strptime(m, '%Y-%m')

return d.month == m.month
14 changes: 14 additions & 0 deletions uoftscrapers/scrapers/athletics/utsg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from ..utils import Scraper
from bs4 import BeautifulSoup
from collections import OrderedDict
import json
import requests


class UTSGAthletics:

@staticmethod
def scrape(location='.'):
Scraper.logger.info('UTSGAthletics initialized.')
Scraper.logger.info('Not implemented.')
Scraper.logger.info('UTSGAthletics completed.')