Skip to content

Commit

Permalink
Merge pull request #53 from kshvmdn/master
Browse files Browse the repository at this point in the history
Add unique id key
  • Loading branch information
qasim committed Apr 17, 2016
2 parents 532316b + e859cff commit a44cf55
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ uoftscrapers.Athletics

##### Output format
```js
{
{
"id": String,
"date": String,
"events":[{
"title": String,
Expand Down
13 changes: 10 additions & 3 deletions uoftscrapers/scrapers/athletics/utm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def scrape(location='.', month=None):
for tr in calendar.find_all('tr', class_='single-day'):
for td in tr.find_all('td'):
date = td.get('data-date')
id_ = UTMAthletics.get_id(date)

if not UTMAthletics.date_in_month(date, month):
continue
Expand Down Expand Up @@ -56,13 +57,14 @@ def scrape(location='.', month=None):
('end_time', end)
]))

athletics[date] = OrderedDict([
athletics[id_] = OrderedDict([
('id', id_),
('date', date),
('events', events)
])

for date, doc in athletics.items():
Scraper.save_json(doc, location, date)
for id_, doc in athletics.items():
Scraper.save_json(doc, location, id_)

Scraper.logger.info('UTMAthletics completed.')

Expand All @@ -71,6 +73,11 @@ def get_month(m):
now = datetime.now()
return '%s-%s' % (now.year, now.month)

@staticmethod
def get_id(d):
day = datetime.strptime(d, '%Y-%m-%d').day
return '%s%s' % (str(day).zfill(2), 'M')

@staticmethod
def date_in_month(d, m):
d = datetime.strptime(d, '%Y-%m-%d')
Expand Down
13 changes: 10 additions & 3 deletions uoftscrapers/scrapers/athletics/utsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def scrape(location='.', month=None):
for tr in calendar.find_all('tr', class_='single-day'):
for td in tr.find_all('td'):
date = td.get('data-date')
id_ = UTSCAthletics.get_id(date)

if not UTSCAthletics.date_in_month(date, month):
continue
Expand All @@ -55,13 +56,14 @@ def scrape(location='.', month=None):
('end_time', end)
]))

athletics[date] = OrderedDict([
athletics[id_] = OrderedDict([
('id', id_),
('date', date),
('events', events)
])

for date, doc in athletics.items():
Scraper.save_json(doc, location, date)
for id_, doc in athletics.items():
Scraper.save_json(doc, location, id_)

Scraper.logger.info('UTSCAthletics completed.')

Expand All @@ -70,6 +72,11 @@ def get_month(m):
now = datetime.now()
return '%s-%s' % (now.year, now.month)

@staticmethod
def get_id(d):
day = datetime.strptime(d, '%Y-%m-%d').day
return '%s%s' % (str(day).zfill(2), 'SC')

@staticmethod
def date_in_month(d, m):
d = datetime.strptime(d, '%Y-%m-%d')
Expand Down

0 comments on commit a44cf55

Please sign in to comment.