From e859cff33ae7e26d2acc48557d33098f4fe5c268 Mon Sep 17 00:00:00 2001 From: Kashav Madan Date: Sun, 17 Apr 2016 14:52:34 -0400 Subject: [PATCH] Add unique id key --- README.md | 3 ++- uoftscrapers/scrapers/athletics/utm.py | 13 ++++++++++--- uoftscrapers/scrapers/athletics/utsc.py | 13 ++++++++++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 93fe4ad..a1da10b 100644 --- a/README.md +++ b/README.md @@ -471,7 +471,8 @@ uoftscrapers.Athletics ##### Output format ```js -{ +{ + "id": String, "date": String, "events":[{ "title": String, diff --git a/uoftscrapers/scrapers/athletics/utm.py b/uoftscrapers/scrapers/athletics/utm.py index 60ae255..69eaa4a 100644 --- a/uoftscrapers/scrapers/athletics/utm.py +++ b/uoftscrapers/scrapers/athletics/utm.py @@ -29,6 +29,7 @@ def scrape(location='.', month=None): for tr in calendar.find_all('tr', class_='single-day'): for td in tr.find_all('td'): date = td.get('data-date') + id_ = UTMAthletics.get_id(date) if not UTMAthletics.date_in_month(date, month): continue @@ -56,13 +57,14 @@ def scrape(location='.', month=None): ('end_time', end) ])) - athletics[date] = OrderedDict([ + athletics[id_] = OrderedDict([ + ('id', id_), ('date', date), ('events', events) ]) - for date, doc in athletics.items(): - Scraper.save_json(doc, location, date) + for id_, doc in athletics.items(): + Scraper.save_json(doc, location, id_) Scraper.logger.info('UTMAthletics completed.') @@ -71,6 +73,11 @@ def get_month(m): now = datetime.now() return '%s-%s' % (now.year, now.month) + @staticmethod + def get_id(d): + day = datetime.strptime(d, '%Y-%m-%d').day + return '%s%s' % (str(day).zfill(2), 'M') + @staticmethod def date_in_month(d, m): d = datetime.strptime(d, '%Y-%m-%d') diff --git a/uoftscrapers/scrapers/athletics/utsc.py b/uoftscrapers/scrapers/athletics/utsc.py index 5a8f1a5..67e70e6 100644 --- a/uoftscrapers/scrapers/athletics/utsc.py +++ b/uoftscrapers/scrapers/athletics/utsc.py @@ -29,6 +29,7 @@ def scrape(location='.', month=None): for tr in calendar.find_all('tr', class_='single-day'): for td in tr.find_all('td'): date = td.get('data-date') + id_ = UTSCAthletics.get_id(date) if not UTSCAthletics.date_in_month(date, month): continue @@ -55,13 +56,14 @@ def scrape(location='.', month=None): ('end_time', end) ])) - athletics[date] = OrderedDict([ + athletics[id_] = OrderedDict([ + ('id', id_), ('date', date), ('events', events) ]) - for date, doc in athletics.items(): - Scraper.save_json(doc, location, date) + for id_, doc in athletics.items(): + Scraper.save_json(doc, location, id_) Scraper.logger.info('UTSCAthletics completed.') @@ -70,6 +72,11 @@ def get_month(m): now = datetime.now() return '%s-%s' % (now.year, now.month) + @staticmethod + def get_id(d): + day = datetime.strptime(d, '%Y-%m-%d').day + return '%s%s' % (str(day).zfill(2), 'SC') + @staticmethod def date_in_month(d, m): d = datetime.strptime(d, '%Y-%m-%d')