Skip to content

Commit

Permalink
Merge pull request inspirehep#104 from spirosdelviniotis/hepcrawl_ref…
Browse files Browse the repository at this point in the history
…actor_tests

global: add testlib module for reusability
  • Loading branch information
david-caro authored May 10, 2017
2 parents 69662a8 + d1ddf43 commit e25033e
Show file tree
Hide file tree
Showing 35 changed files with 204 additions and 153 deletions.
4 changes: 2 additions & 2 deletions docker-compose.deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ version: '2'
services:
pip:
build:
context: ${PWD}/tests/functional
dockerfile: hepcrawl_base.dockerfile
context: ${PWD}/tests
dockerfile: Dockerfile.hepcrawl_base
image: hepcrawl_base
command: bash -c "pip install -e .[all] && pip freeze"
volumes:
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
- APP_BROKER_URL=amqp://guest:guest@rabbitmq:5672//
- APP_CELERY_RESULT_BACKEND=amqp://guest:guest@rabbitmq:5672//
- APP_CRAWLER_HOST_URL=http://scrapyd:6800
- APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=tests.functional.tasks.submit_results
- APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=hepcrawl.testlib.tasks.submit_results
- COVERAGE_PROCESS_START=/code/.coveragerc
command: py.test -vv tests/functional/WSP/test_wsp.py
volumes: &common_volumes
Expand Down Expand Up @@ -44,7 +44,7 @@ services:
celery:
image: hepcrawl_base
environment: *env_variables
command: celery worker --events --app tests.functional.tasks --loglevel=debug
command: celery worker --events --app hepcrawl.testlib.tasks --loglevel=debug
volumes: *common_volumes
links:
- rabbitmq
Expand Down
4 changes: 3 additions & 1 deletion tests/__init__.py → hepcrawl/testlib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017 CERN.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from __future__ import absolute_import, print_function, unicode_literals
91 changes: 91 additions & 0 deletions hepcrawl/testlib/celery_monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

"""Celery monitor dealing with celery tasks for functional tests."""

from __future__ import absolute_import, print_function, unicode_literals

from itertools import islice

import logging

LOGGER = logging.getLogger(__name__)


class CeleryMonitor(object):
def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100):
self.results = []
self.recv = None
self.app = app
self.connection = None
self.monitor_timeout = monitor_timeout
self.monitor_iter_limit = monitor_iter_limit

def __enter__(self):
state = self.app.events.State()

def announce_succeeded_tasks(event):
state.event(event)
task = state.tasks.get(event['uuid'])
LOGGER.info('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
tasks = self.app.AsyncResult(task.id)
for task in tasks.result:
self.results.append(task)
self.recv.should_stop = True

def announce_failed_tasks(event):
state.event(event)
task = state.tasks.get(event['uuid'])
LOGGER.info('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
self.results.append(task.info())
self.recv.should_stop = True

self.app.control.enable_events()
self.connection = self.app.connection()
self.recv = self.app.events.Receiver(self.connection, handlers={
'task-succeeded': announce_succeeded_tasks,
'task-failed': announce_failed_tasks,
})

return self

def __exit__(self, exc_type, exc_val, exc_tb):
events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True)
self._wait_for_results(events_iter)
self.connection.__exit__()

def _wait_for_results(self, events_iter):
any(islice(
events_iter, # iterable
self.monitor_iter_limit # stop
))

@classmethod
def do_crawl(
cls,
app,
monitor_timeout,
monitor_iter_limit,
crawler_instance,
project='hepcrawl',
spider='WSP',
settings=None,
**crawler_arguments
):
settings = settings or {}

with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor:
crawler_instance.schedule(
project=project,
spider=spider,
settings=settings,
**crawler_arguments
)

return my_monitor.results
34 changes: 29 additions & 5 deletions tests/unit/responses/__init__.py → hepcrawl/testlib/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from __future__ import absolute_import, division, print_function

import os

from scrapy.http import Request, TextResponse
Expand All @@ -24,12 +26,10 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty
:returns: A scrapy HTTP response which can be used for unittesting.
"""
meta = {}
request = Request(url=url)

if not file_name[0] == '/':
responses_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(responses_dir, file_name)
file_path = get_responses_path(file_name)
else:
file_path = file_name

Expand All @@ -47,7 +47,6 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty

def fake_response_from_string(text, url='http://www.example.com', response_type=TextResponse):
"""Fake Scrapy response from a string."""
meta = {}
request = Request(url=url)
response = response_type(
url=url,
Expand All @@ -65,6 +64,31 @@ def get_node(spider, tag, response=None, text=None, rtype="xml"):
selector = Selector(response, type=rtype)
elif text:
selector = Selector(text=text, type=rtype)

spider._register_namespaces(selector)
node = selector.xpath(tag)
return node


def get_responses_path(*path_chunks):
"""
:param path_chunks: Optional extra path element to suffix the responses directory with.
:return: The absolute path to the responses and if path_chuncks provided the absolute
path to path chunks.
:Example:
>>> get_responses_path()
'/home/myuser/hepcrawl/tests/responses'
>>> get_responses_path('one', 'two')
'/home/myuser/hepcrawl/tests/responses/one/two'
"""
project_root_dir = os.path.abspath(
os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'..',
'..',
)
)
return os.path.join(project_root_dir, 'tests', 'unit', 'responses', *path_chunks)
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/functional/tasks.py → hepcrawl/testlib/tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017 CERN.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
Expand Down
File renamed without changes.
File renamed without changes.
8 changes: 0 additions & 8 deletions tests/functional/WSP/__init__.py

This file was deleted.

81 changes: 3 additions & 78 deletions tests/functional/WSP/test_wsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,86 +15,11 @@
import json
import os

from itertools import islice
from scrapyd_api import ScrapydAPI
from time import sleep

from tests.functional.tasks import app


class CeleryMonitor(object):
def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100):
self.results = []
self.recv = None
self.app = app
self.connection = None
self.monitor_timeout = monitor_timeout
self.monitor_iter_limit = monitor_iter_limit

def __enter__(self):
state = self.app.events.State()

def announce_succeeded_tasks(event):
state.event(event)
task = state.tasks.get(event['uuid'])
print('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
tasks = app.AsyncResult(task.id)
for task in tasks.result:
self.results.append(task)
self.recv.should_stop = True

def announce_failed_tasks(event):
state.event(event)
task = state.tasks.get(event['uuid'])
print('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
self.results.append(task.info())
self.recv.should_stop = True

self.app.control.enable_events()
self.connection = self.app.connection()
self.recv = self.app.events.Receiver(self.connection, handlers={
'task-succeeded': announce_succeeded_tasks,
'task-failed': announce_failed_tasks,
})

return self

def __exit__(self, exc_type, exc_val, exc_tb):
events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True)
self._wait_for_results(events_iter)
self.connection.__exit__()

def _wait_for_results(self, events_iter):
any(islice(
events_iter, # iterable
self.monitor_iter_limit # stop
))

@classmethod
def do_crawl(
cls,
app,
monitor_timeout,
monitor_iter_limit,
crawler_instance,
project='hepcrawl',
spider='WSP',
settings=None,
**crawler_arguments
):

if settings is None:
settings = {}

with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor:
crawler_instance.schedule(
project=project,
spider=spider,
settings=settings or {},
**crawler_arguments
)

return my_monitor.results
from hepcrawl.testlib.tasks import app as celery_app
from hepcrawl.testlib.celery_monitor import CeleryMonitor


def get_crawler_instance(crawler_host, *args, **kwargs):
Expand Down Expand Up @@ -149,7 +74,7 @@ def test_wsp_normal_set_of_records(set_up_environment, expected_results):
sleep(10)

results = CeleryMonitor.do_crawl(
app=app,
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
crawler_instance=crawler,
Expand Down
8 changes: 0 additions & 8 deletions tests/functional/__init__.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/functional/scrapyd_coverage_runner.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@


[scrapyd]
runner = tests.functional.scrapyd_coverage_runner
runner = hepcrawl.testlib.scrapyd_coverage_runner
1 change: 0 additions & 1 deletion tests/unit/__init__.py

This file was deleted.

9 changes: 7 additions & 2 deletions tests/unit/test_alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@

from hepcrawl.spiders import alpha_spider

from .responses import fake_response_from_file
from hepcrawl.testlib.fixtures import fake_response_from_file


@pytest.fixture
def results():
"""Return results generator from the Alpha spider."""
spider = alpha_spider.AlphaSpider()
records = list(spider.parse(fake_response_from_file('alpha/test_1.htm')))
records = list(
spider.parse(
fake_response_from_file('alpha/test_1.htm')
)
)

assert records
return records

Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pytest

from hepcrawl.spiders import aps_spider
from .responses import fake_response_from_file
from hepcrawl.testlib.fixtures import fake_response_from_file


@pytest.fixture
Expand All @@ -29,6 +29,7 @@ def results():
)
)
)

assert records
return records

Expand Down
Loading

0 comments on commit e25033e

Please sign in to comment.