Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
added initial scraping code
  • Loading branch information
Anderson Ng Ho Yin committed May 12, 2016
1 parent 627513e commit 13fdda0
Showing 1 changed file with 29 additions and 4 deletions.
33 changes: 29 additions & 4 deletions uoftscrapers/scrapers/calendar/utm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,39 @@
import json
import os
import requests
import datetime
now = datetime.datetime.now()


class UTMCalendar:

host = 'http://www.artsandscience.utoronto.ca/ofr/calendar/'
host1 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}5&header='
host2 = 'http://m.utm.utoronto.ca/importantDates.php?mode=full&session={0}9&header='

@staticmethod
def scrape(location='.'):
def scrape(location='.', year=None):
year = year or now.year

Scraper.logger.info('UTMCalendar initialized.')
Scraper.logger.info('Not implemented.')
Scraper.logger.info('UTMCalendar completed.')

html = Scraper.get(UTMCalendar.host1.format(year))
soup = BeautifulSoup(html, 'html.parser')
content = soup.find('div', class_='content')
dates = content.find_all('div', class_='title')
i = 0
currentDate = dates[i]
while(i<len(dates)):
print(dates[i].text)

while (currentDate == dates[i]):
info = dates[i].find_next('div', class_='info')
print(info.text)
i+=1
if(i>=len(dates)):
break;
if(i<len(dates)):
currentDate = dates[i]



Scraper.logger.info('UTMCalendar completed.')

0 comments on commit 13fdda0

Please sign in to comment.