Skip to content

Commit

Permalink
Merge pull request #77 from kshvmdn/eng-exams
Browse files Browse the repository at this point in the history
Add engineering exams
  • Loading branch information
qasim committed May 2, 2016
2 parents 02d3945 + 4176d90 commit 1c0dd67
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 173 deletions.
64 changes: 64 additions & 0 deletions uoftscrapers/scrapers/exams/exams_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from datetime import datetime


def convert_time(t):
t = [int(x) for x in t.split(':')]

converted = 0
for i in range(min(len(t), 3)):
converted += t[i] * (60 ** (2-i))
return converted


def get_period(d):
def get_date(month, date, year):
month = 'jun' if month == 'june' else month
return datetime.strptime('%s%s%d' % (year, month, date), '%Y%b%d')

d = datetime.strptime(d, '%Y-%m-%d')
year, month = d.year, None

for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
if get_date(m, 1, year) <= d <= get_date(m, ld, year):
month = m
break

if month:
return '%s%s' % (month.upper(), str(year)[2:])


def get_course_id(course_code, date):
d = datetime.strptime(date, '%Y-%m-%d')

month, year = d.strftime('%b').lower(), d.year
month = 'june' if month == 'jun' else month

endings = {
'dec': {
'F': '%d9' % year,
'Y': '%d9' % (year - 1)
},
'apr': {
'S': '%d1' % year,
'Y': '%d9' % (year - 1)
},
'june': {
'F': '%d5F' % year,
'Y': '%d5' % year
},
'aug': {
'S': '%d5S' % year,
'Y': '%d5' % year
}
}

season = course_code[-1]
period = get_period(date)

exam_id = course_id = None

if month in endings and season in endings[month]:
course_id = '%s%s' % (course_code, endings[month][season])
exam_id = '%s%s' % (course_id, period)

return exam_id, course_id
84 changes: 10 additions & 74 deletions uoftscrapers/scrapers/exams/utm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ..utils import Scraper
from .exams_helpers import *
from bs4 import BeautifulSoup
from collections import OrderedDict
from datetime import datetime
Expand Down Expand Up @@ -33,7 +34,6 @@ def scrape(location='.'):

@staticmethod
def retrieve_exams(courses):

exams = OrderedDict()

for course in courses:
Expand All @@ -56,15 +56,15 @@ def retrieve_exams(courses):

date = data[0].split(': ')[1]

id_, course_id = UTMExams.get_course_id(course_code, date)
exam_id, course_id = get_course_id(course_code, date)

period = UTMExams.get_period(date)
period = get_period(date)

if not id_ or not period:
if not exam_id or not period:
continue

start, end = UTMExams.parse_time(data[1].split(': ')[1],
data[2].split(': ')[1], date)
start = convert_time(data[1].split(': ')[1])
end = convert_time(data[2].split(': ')[1])
duration = end - start

sections = [UTMExams.parse_sections(room.split(': ')[1])
Expand All @@ -75,7 +75,7 @@ def retrieve_exams(courses):
sections[i]['lecture'] = lecture_code or ''

doc = OrderedDict([
('id', id_),
('id', exam_id),
('course_id', course_id),
('course_code', course_code),
('campus', 'UTM'),
Expand All @@ -87,11 +87,11 @@ def retrieve_exams(courses):
('sections', [])
])

if id_ not in exams:
exams[id_] = doc
if exam_id not in exams:
exams[exam_id] = doc

for section in sections:
exams[id_]['sections'].append(OrderedDict([
exams[exam_id]['sections'].append(OrderedDict([
('lecture_code', section['lecture']),
('exam_section', section['section']),
('location', section['room'])
Expand All @@ -109,74 +109,10 @@ def get_page_links(endpoint):
return [li.find('a')['href']
for li in soup.find('ul', class_='link').find_all('li')]

@staticmethod
def get_period(d):
def get_date(month, date, year):
months = {
'dec': 12,
'apr': 4,
'june': 6,
'aug': 8
}
return datetime.strptime('%s-%d-%d' % (year, months[month], date),
'%Y-%m-%d')

d = datetime.strptime(d, '%Y-%m-%d')

year = d.year
month = None

for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
if get_date(m, 1, year) <= d <= get_date(m, ld, year):
month = m
break

if month:
return '%s%s' % (month.upper(), str(year)[2:])

@staticmethod
def get_course_id(course_code, date):
d = datetime.strptime(date, '%Y-%m-%d')
month, year, period = d.strftime('%b').lower(), d.year, UTMExams.get_period(date)
endings = {
'dec': {
'F': '%s9' % str(year),
'Y': '%s9' % str(int(year) - 1)
},
'apr': {
'S': '%s1' % str(year),
'Y': '%s9' % str(int(year) - 1)
},
'june': {
'F': '%s5F' % str(year),
'Y': '%s5' % str(year)
},
'aug': {
'S': '%s5S' % str(year),
'Y': '%s5' % str(year)
}
}

season = course_code[-1]
exam_id = course_id = None

if month in endings and season in endings[month]:
course_id = '%s%s' % (course_code, endings[month][season])
exam_id = '%s%s' % (course_id, period)

return exam_id, course_id

@staticmethod
def parse_sections(room):
section = ''
if '(' in room:
room, section = [x.strip()
for x in re.sub('[()]', ' ', room).split(' ')]
return {'section': section, 'room': room}

@staticmethod
def parse_time(start, end, date):
def convert_time(t):
h, m, s = [int(x) for x in t.split(':')]
return (h * 60 * 60) + (m * 60) + s
return convert_time(start), convert_time(end)
90 changes: 14 additions & 76 deletions uoftscrapers/scrapers/exams/utsc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ..utils import Scraper
from .exams_helpers import *
from bs4 import BeautifulSoup
from collections import OrderedDict
from datetime import datetime
Expand Down Expand Up @@ -31,20 +32,22 @@ def scrape(location='.'):
course_code, lecture_code = course_code.split(' ')

date = data[1]
start, end = UTSCExams.parse_time(data[2], data[3], date)
duration = end - start

location_ = data[4]

id_, course_id = UTSCExams.get_course_id(course_code, date)
exam_id, course_id = get_course_id(course_code, date)

period = UTSCExams.get_period(date)
period = get_period(date)

if not id_ or not period:
if not exam_id or not period:
continue

start = convert_time(data[2])
end = convert_time(data[3])
duration = end - start

location_ = data[4]

doc = OrderedDict([
('id', id_),
('id', exam_id),
('course_id', course_id),
('course_code', course_code),
('campus', 'UTSC'),
Expand All @@ -56,10 +59,10 @@ def scrape(location='.'):
('sections', [])
])

if id_ not in exams:
exams[id_] = doc
if exam_id not in exams:
exams[exam_id] = doc

exams[id_]['sections'].append(OrderedDict([
exams[exam_id]['sections'].append(OrderedDict([
('lecture_code', lecture_code or ''),
('exam_section', ''),
('location', location_)
Expand All @@ -69,68 +72,3 @@ def scrape(location='.'):
Scraper.save_json(doc, location, id_)

Scraper.logger.info('UTSCExams completed.')

@staticmethod
def get_period(d):
def get_date(month, date, year):
months = {
'dec': 12,
'apr': 4,
'june': 6,
'aug': 8
}
return datetime.strptime('%s-%d-%d' % (year, months[month], date),
'%Y-%m-%d')

d = datetime.strptime(d, '%Y-%m-%d')

year = d.year
month = None

for m, ld in (('dec', 31), ('apr', 30), ('june', 30), ('aug', 31)):
if get_date(m, 1, year) <= d <= get_date(m, ld, year):
month = m
break

if month:
return '%s%s' % (month.upper(), str(year)[2:])

@staticmethod
def get_course_id(course_code, date):
d = datetime.strptime(date, '%Y-%m-%d')
month, year, period = d.strftime(
"%b").lower(), d.year, UTSCExams.get_period(date)
endings = {
'dec': {
'F': '%s9' % str(year),
'Y': '%s9' % str(int(year) - 1)
},
'apr': {
'S': '%s1' % str(year),
'Y': '%s9' % str(int(year) - 1)
},
'june': {
'F': '%s5F' % str(year),
'Y': '%s5' % str(year)
},
'aug': {
'S': '%s5S' % str(year),
'Y': '%s5' % str(year)
}
}

season = course_code[-1]
exam_id = course_id = None

if month in endings and season in endings[month]:
course_id = '%s%s' % (course_code, endings[month][season])
exam_id = '%s%s' % (course_id, period)

return exam_id, course_id

@staticmethod
def parse_time(start, end, date):
def convert_time(t):
h, m = [int(x) for x in t.split(':')]
return (h * 60 * 60) + (m * 60)
return convert_time(start), convert_time(end)
Loading

0 comments on commit 1c0dd67

Please sign in to comment.