Skip to content

Commit

Permalink
Easier querying for data. Added TravisCI.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaront committed May 18, 2016
1 parent 5c3a5c3 commit 9cb3e30
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 19 deletions.
15 changes: 15 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
language: python
python:
- "3.5"
- "nightly"

branches:
only:
- master

cache:
directories:
- $HOME/.cache/pip
- $HOME/.pyenv

script: nosetests
2 changes: 1 addition & 1 deletion puckdb/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ def __init__(self, message=None):
self.message = message

def __str__(self):
return 'Invalid filter{message}'.format(': ' + self.message if self.message else '')
return 'Invalid filter{message}'.format(message=': ' + self.message if self.message else '')
36 changes: 19 additions & 17 deletions puckdb/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

try:
import uvloop

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except ImportError:
pass
Expand All @@ -27,19 +28,27 @@ def __init__(self, filter_by: filters.BaseFilter, concurrency: int = 5):
self.sem = asyncio.Semaphore(concurrency)

@abc.abstractmethod
async def process(self, data: dict) -> List[dict]:
async def _process(self, data: dict) -> List[dict]:
return [data]

async def _fetch(self, session: aiohttp.ClientSession, url: str) -> List[dict]:
async with self.sem:
async with session.get(url, headers=headers) as response:
assert response.status == 200
return await self.process(await response.json(loads=ujson.loads))
return await self._process(await response.json(loads=ujson.loads))

@abc.abstractmethod
def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
pass

def fetch(self, loop: asyncio.AbstractEventLoop):
with aiohttp.ClientSession(loop=loop) as session:
loop.run_until_complete(self._get_tasks(session))

def get(self, loop: asyncio.AbstractEventLoop):
with aiohttp.ClientSession(loop=loop) as session:
return list(itertools.chain(*loop.run_until_complete(asyncio.gather(*self._get_tasks(session)))))


class NHLScheduleScraper(BaseScraper):
url = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={from_date}&endDate={to_date}' \
Expand All @@ -48,14 +57,14 @@ class NHLScheduleScraper(BaseScraper):
def __init__(self, filter_by: filters.GameFilter, concurrency: int = 5):
super().__init__(filter_by, concurrency)

async def process(self, data: dict) -> List[dict]:
async def _process(self, data: dict) -> List[dict]:
games = []
if 'dates' in data:
for daily in data['dates']:
games.extend(daily['games'])
return games

def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
urls = [
self.url.format(from_date=interval.start.strftime('%Y-%m-%d'), to_date=interval.end.strftime('%Y-%m-%d'))
for interval in self.filter_by.intervals]
Expand All @@ -68,25 +77,18 @@ class NHLGameScraper(BaseScraper):
def __init__(self, filter_by: filters.GameFilter, concurrency: int = 3):
super().__init__(filter_by, concurrency)

async def process(self, data: dict) -> List[dict]:
async def _process(self, data: dict) -> List[dict]:
return [data]

def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]:
urls = [self.url.format(game_id=gid) for gid in self.filter_by.game_ids]
return [asyncio.ensure_future(self._fetch(session, url)) for url in urls]


def _fetch_all_tasks(tasks: List[asyncio.Future], loop: asyncio.AbstractEventLoop) -> List[dict]:
return list(itertools.chain(*loop.run_until_complete(asyncio.gather(*tasks))))


def fetch_games(filter_by: filters.GameFilter) -> List[object]:
loop = asyncio.get_event_loop()
schedule_scraper = NHLScheduleScraper(filter_by)
with aiohttp.ClientSession(loop=loop) as session:
schedule_games = _fetch_all_tasks(schedule_scraper.get_tasks(session), loop)
game_filter = filters.GameFilter(game_ids=[g['gamePk'] for g in schedule_games])
game_scraper = NHLGameScraper(game_filter)
games = _fetch_all_tasks(game_scraper.get_tasks(session), loop)
schedule_games = NHLScheduleScraper(filter_by).get(loop)
game_filter = filters.GameFilter(game_ids=[g['gamePk'] for g in schedule_games])
games = NHLGameScraper(game_filter).get(loop)
loop.close()
return games
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[wheel]
universal = 1
2 changes: 1 addition & 1 deletion tests/test_scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ def test_one_day(self):
day = datetime(2016, 4, 30)
game_filter = filters.GameFilter(from_date=day, to_date=day)
games = scrapers.fetch_games(game_filter)
self.assertEqual(2, len(games))
self.assertEqual(2, len(games))

0 comments on commit 9cb3e30

Please sign in to comment.