From 9cb3e3041c3eb8700439036fd1befd50fdc41fe5 Mon Sep 17 00:00:00 2001 From: Aaron Toth Date: Tue, 17 May 2016 22:38:18 -0400 Subject: [PATCH] Easier querying for data. Added TravisCI. --- .travis.yml | 15 +++++++++++++++ puckdb/exceptions.py | 2 +- puckdb/scrapers.py | 36 +++++++++++++++++++----------------- setup.cfg | 2 ++ tests/test_scrapers.py | 2 +- 5 files changed, 38 insertions(+), 19 deletions(-) create mode 100644 .travis.yml create mode 100644 setup.cfg diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..b176b40 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python +python: + - "3.5" + - "nightly" + +branches: + only: + - master + +cache: + directories: + - $HOME/.cache/pip + - $HOME/.pyenv + +script: nosetests diff --git a/puckdb/exceptions.py b/puckdb/exceptions.py index 2e0f141..0b7aa4b 100644 --- a/puckdb/exceptions.py +++ b/puckdb/exceptions.py @@ -3,4 +3,4 @@ def __init__(self, message=None): self.message = message def __str__(self): - return 'Invalid filter{message}'.format(': ' + self.message if self.message else '') + return 'Invalid filter{message}'.format(message=': ' + self.message if self.message else '') diff --git a/puckdb/scrapers.py b/puckdb/scrapers.py index c69fce3..02affd4 100644 --- a/puckdb/scrapers.py +++ b/puckdb/scrapers.py @@ -8,6 +8,7 @@ try: import uvloop + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) except ImportError: pass @@ -27,19 +28,27 @@ def __init__(self, filter_by: filters.BaseFilter, concurrency: int = 5): self.sem = asyncio.Semaphore(concurrency) @abc.abstractmethod - async def process(self, data: dict) -> List[dict]: + async def _process(self, data: dict) -> List[dict]: return [data] async def _fetch(self, session: aiohttp.ClientSession, url: str) -> List[dict]: async with self.sem: async with session.get(url, headers=headers) as response: assert response.status == 200 - return await self.process(await response.json(loads=ujson.loads)) + return await self._process(await response.json(loads=ujson.loads)) @abc.abstractmethod - def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: + def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: pass + def fetch(self, loop: asyncio.AbstractEventLoop): + with aiohttp.ClientSession(loop=loop) as session: + loop.run_until_complete(self._get_tasks(session)) + + def get(self, loop: asyncio.AbstractEventLoop): + with aiohttp.ClientSession(loop=loop) as session: + return list(itertools.chain(*loop.run_until_complete(asyncio.gather(*self._get_tasks(session))))) + class NHLScheduleScraper(BaseScraper): url = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={from_date}&endDate={to_date}' \ @@ -48,14 +57,14 @@ class NHLScheduleScraper(BaseScraper): def __init__(self, filter_by: filters.GameFilter, concurrency: int = 5): super().__init__(filter_by, concurrency) - async def process(self, data: dict) -> List[dict]: + async def _process(self, data: dict) -> List[dict]: games = [] if 'dates' in data: for daily in data['dates']: games.extend(daily['games']) return games - def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: + def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: urls = [ self.url.format(from_date=interval.start.strftime('%Y-%m-%d'), to_date=interval.end.strftime('%Y-%m-%d')) for interval in self.filter_by.intervals] @@ -68,25 +77,18 @@ class NHLGameScraper(BaseScraper): def __init__(self, filter_by: filters.GameFilter, concurrency: int = 3): super().__init__(filter_by, concurrency) - async def process(self, data: dict) -> List[dict]: + async def _process(self, data: dict) -> List[dict]: return [data] - def get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: + def _get_tasks(self, session: aiohttp.ClientSession) -> List[asyncio.Future]: urls = [self.url.format(game_id=gid) for gid in self.filter_by.game_ids] return [asyncio.ensure_future(self._fetch(session, url)) for url in urls] -def _fetch_all_tasks(tasks: List[asyncio.Future], loop: asyncio.AbstractEventLoop) -> List[dict]: - return list(itertools.chain(*loop.run_until_complete(asyncio.gather(*tasks)))) - - def fetch_games(filter_by: filters.GameFilter) -> List[object]: loop = asyncio.get_event_loop() - schedule_scraper = NHLScheduleScraper(filter_by) - with aiohttp.ClientSession(loop=loop) as session: - schedule_games = _fetch_all_tasks(schedule_scraper.get_tasks(session), loop) - game_filter = filters.GameFilter(game_ids=[g['gamePk'] for g in schedule_games]) - game_scraper = NHLGameScraper(game_filter) - games = _fetch_all_tasks(game_scraper.get_tasks(session), loop) + schedule_games = NHLScheduleScraper(filter_by).get(loop) + game_filter = filters.GameFilter(game_ids=[g['gamePk'] for g in schedule_games]) + games = NHLGameScraper(game_filter).get(loop) loop.close() return games diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5e40900 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[wheel] +universal = 1 diff --git a/tests/test_scrapers.py b/tests/test_scrapers.py index 78e5a42..d9c578a 100644 --- a/tests/test_scrapers.py +++ b/tests/test_scrapers.py @@ -9,4 +9,4 @@ def test_one_day(self): day = datetime(2016, 4, 30) game_filter = filters.GameFilter(from_date=day, to_date=day) games = scrapers.fetch_games(game_filter) - self.assertEqual(2, len(games)) \ No newline at end of file + self.assertEqual(2, len(games))