Merge pull request #104 from spirosdelviniotis/hepcrawl_refactor_tests

global: add testlib module for reusability
inspirehep · May 10, 2017 · e25033e · e25033e
2 parents 69662a8 + d1ddf43
commit e25033e
Show file tree

Hide file tree

Showing 35 changed files with 204 additions and 153 deletions.
diff --git a/docker-compose.deps.yml b/docker-compose.deps.yml
@@ -12,8 +12,8 @@ version: '2'
 services:
   pip:
     build:
-      context: ${PWD}/tests/functional
-      dockerfile: hepcrawl_base.dockerfile
+      context: ${PWD}/tests
+      dockerfile: Dockerfile.hepcrawl_base
     image: hepcrawl_base
     command: bash -c "pip install -e .[all] && pip freeze"
     volumes:

diff --git a/docker-compose.test.yml b/docker-compose.test.yml
@@ -16,7 +16,7 @@ services:
       - APP_BROKER_URL=amqp://guest:guest@rabbitmq:5672//
       - APP_CELERY_RESULT_BACKEND=amqp://guest:guest@rabbitmq:5672//
       - APP_CRAWLER_HOST_URL=http://scrapyd:6800
-      - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=tests.functional.tasks.submit_results
+      - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=hepcrawl.testlib.tasks.submit_results
       - COVERAGE_PROCESS_START=/code/.coveragerc
     command: py.test -vv tests/functional/WSP/test_wsp.py
     volumes: &common_volumes
@@ -44,7 +44,7 @@ services:
   celery:
     image: hepcrawl_base
     environment: *env_variables
-    command: celery worker --events --app tests.functional.tasks --loglevel=debug
+    command: celery worker --events --app hepcrawl.testlib.tasks --loglevel=debug
     volumes: *common_volumes
     links:
       - rabbitmq

diff --git a/tests/__init__.py → hepcrawl/testlib/__init__.py b/tests/__init__.py → hepcrawl/testlib/__init__.py
@@ -1,8 +1,10 @@
 # -*- coding: utf-8 -*-
 #
 # This file is part of hepcrawl.
-# Copyright (C) 2015, 2016, 2017 CERN.
+# Copyright (C) 2017 CERN.
 #
 # hepcrawl is a free software; you can redistribute it and/or modify it
 # under the terms of the Revised BSD License; see LICENSE file for
 # more details.
+
+from __future__ import absolute_import, print_function, unicode_literals
diff --git a/hepcrawl/testlib/celery_monitor.py b/hepcrawl/testlib/celery_monitor.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of hepcrawl.
+# Copyright (C) 2017 CERN.
+#
+# hepcrawl is a free software; you can redistribute it and/or modify it
+# under the terms of the Revised BSD License; see LICENSE file for
+# more details.
+
+"""Celery monitor dealing with celery tasks for functional tests."""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from itertools import islice
+
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
+
+class CeleryMonitor(object):
+    def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100):
+        self.results = []
+        self.recv = None
+        self.app = app
+        self.connection = None
+        self.monitor_timeout = monitor_timeout
+        self.monitor_iter_limit = monitor_iter_limit
+
+    def __enter__(self):
+        state = self.app.events.State()
+
+        def announce_succeeded_tasks(event):
+            state.event(event)
+            task = state.tasks.get(event['uuid'])
+            LOGGER.info('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
+            tasks = self.app.AsyncResult(task.id)
+            for task in tasks.result:
+                self.results.append(task)
+            self.recv.should_stop = True
+
+        def announce_failed_tasks(event):
+            state.event(event)
+            task = state.tasks.get(event['uuid'])
+            LOGGER.info('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
+            self.results.append(task.info())
+            self.recv.should_stop = True
+
+        self.app.control.enable_events()
+        self.connection = self.app.connection()
+        self.recv = self.app.events.Receiver(self.connection, handlers={
+            'task-succeeded': announce_succeeded_tasks,
+            'task-failed': announce_failed_tasks,
+        })
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True)
+        self._wait_for_results(events_iter)
+        self.connection.__exit__()
+
+    def _wait_for_results(self, events_iter):
+        any(islice(
+            events_iter,  # iterable
+            self.monitor_iter_limit  # stop
+        ))
+
+    @classmethod
+    def do_crawl(
+        cls,
+        app,
+        monitor_timeout,
+        monitor_iter_limit,
+        crawler_instance,
+        project='hepcrawl',
+        spider='WSP',
+        settings=None,
+        **crawler_arguments
+    ):
+        settings = settings or {}
+
+        with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor:
+            crawler_instance.schedule(
+                project=project,
+                spider=spider,
+                settings=settings,
+                **crawler_arguments
+            )
+
+        return my_monitor.results
diff --git a/tests/unit/responses/__init__.py → hepcrawl/testlib/fixtures.py b/tests/unit/responses/__init__.py → hepcrawl/testlib/fixtures.py
@@ -1,12 +1,14 @@
 # -*- coding: utf-8 -*-
 #
 # This file is part of hepcrawl.
-# Copyright (C) 2015 CERN.
+# Copyright (C) 2015, 2016, 2017 CERN.
 #
 # hepcrawl is a free software; you can redistribute it and/or modify it
 # under the terms of the Revised BSD License; see LICENSE file for
 # more details.
 
+from __future__ import absolute_import, division, print_function
+
 import os
 
 from scrapy.http import Request, TextResponse
@@ -24,12 +26,10 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty
 
     :returns: A scrapy HTTP response which can be used for unittesting.
     """
-    meta = {}
     request = Request(url=url)
 
     if not file_name[0] == '/':
-        responses_dir = os.path.dirname(os.path.realpath(__file__))
-        file_path = os.path.join(responses_dir, file_name)
+        file_path = get_responses_path(file_name)
     else:
         file_path = file_name
 
@@ -47,7 +47,6 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty
 
 def fake_response_from_string(text, url='http://www.example.com', response_type=TextResponse):
     """Fake Scrapy response from a string."""
-    meta = {}
     request = Request(url=url)
     response = response_type(
         url=url,
@@ -65,6 +64,31 @@ def get_node(spider, tag, response=None, text=None, rtype="xml"):
         selector = Selector(response, type=rtype)
     elif text:
         selector = Selector(text=text, type=rtype)
+
     spider._register_namespaces(selector)
     node = selector.xpath(tag)
     return node
+
+
+def get_responses_path(*path_chunks):
+    """
+    :param path_chunks: Optional extra path element to suffix the responses directory with.
+    :return: The absolute path to the responses and if path_chuncks provided the absolute
+     path to path chunks.
+
+    :Example:
+
+        >>> get_responses_path()
+        '/home/myuser/hepcrawl/tests/responses'
+
+        >>> get_responses_path('one', 'two')
+        '/home/myuser/hepcrawl/tests/responses/one/two'
+    """
+    project_root_dir = os.path.abspath(
+        os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            '..',
+            '..',
+        )
+    )
+    return os.path.join(project_root_dir, 'tests', 'unit', 'responses', *path_chunks)
diff --git a/tests/functional/scrapyd_coverage_runner.py → hepcrawl/testlib/scrapyd_coverage_runner.py b/tests/functional/scrapyd_coverage_runner.py → hepcrawl/testlib/scrapyd_coverage_runner.py
diff --git a/tests/functional/tasks.py → hepcrawl/testlib/tasks.py b/tests/functional/tasks.py → hepcrawl/testlib/tasks.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # This file is part of hepcrawl.
-# Copyright (C) 2015, 2016, 2017 CERN.
+# Copyright (C) 2017 CERN.
 #
 # hepcrawl is a free software; you can redistribute it and/or modify it
 # under the terms of the Revised BSD License; see LICENSE file for

diff --git a/tests/functional/hepcrawl_base.dockerfile → tests/Dockerfile.hepcrawl_base b/tests/functional/hepcrawl_base.dockerfile → tests/Dockerfile.hepcrawl_base
diff --git a/tests/functional/docker_entrypoint.sh → tests/docker_entrypoint.sh b/tests/functional/docker_entrypoint.sh → tests/docker_entrypoint.sh
diff --git a/tests/functional/WSP/__init__.py b/tests/functional/WSP/__init__.py
diff --git a/tests/functional/WSP/test_wsp.py b/tests/functional/WSP/test_wsp.py
@@ -15,86 +15,11 @@
 import json
 import os
 
-from itertools import islice
 from scrapyd_api import ScrapydAPI
 from time import sleep
 
-from tests.functional.tasks import app
-
-
-class CeleryMonitor(object):
-    def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100):
-        self.results = []
-        self.recv = None
-        self.app = app
-        self.connection = None
-        self.monitor_timeout = monitor_timeout
-        self.monitor_iter_limit = monitor_iter_limit
-
-    def __enter__(self):
-        state = self.app.events.State()
-
-        def announce_succeeded_tasks(event):
-            state.event(event)
-            task = state.tasks.get(event['uuid'])
-            print('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
-            tasks = app.AsyncResult(task.id)
-            for task in tasks.result:
-                self.results.append(task)
-            self.recv.should_stop = True
-
-        def announce_failed_tasks(event):
-            state.event(event)
-            task = state.tasks.get(event['uuid'])
-            print('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),))
-            self.results.append(task.info())
-            self.recv.should_stop = True
-
-        self.app.control.enable_events()
-        self.connection = self.app.connection()
-        self.recv = self.app.events.Receiver(self.connection, handlers={
-            'task-succeeded': announce_succeeded_tasks,
-            'task-failed': announce_failed_tasks,
-        })
-
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True)
-        self._wait_for_results(events_iter)
-        self.connection.__exit__()
-
-    def _wait_for_results(self, events_iter):
-        any(islice(
-            events_iter,  # iterable
-            self.monitor_iter_limit  # stop
-        ))
-
-    @classmethod
-    def do_crawl(
-        cls,
-        app,
-        monitor_timeout,
-        monitor_iter_limit,
-        crawler_instance,
-        project='hepcrawl',
-        spider='WSP',
-        settings=None,
-        **crawler_arguments
-    ):
-
-        if settings is None:
-            settings = {}
-
-        with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor:
-            crawler_instance.schedule(
-                project=project,
-                spider=spider,
-                settings=settings or {},
-                **crawler_arguments
-            )
-
-        return my_monitor.results
+from hepcrawl.testlib.tasks import app as celery_app
+from hepcrawl.testlib.celery_monitor import CeleryMonitor
 
 
 def get_crawler_instance(crawler_host, *args, **kwargs):
@@ -149,7 +74,7 @@ def test_wsp_normal_set_of_records(set_up_environment, expected_results):
     sleep(10)
 
     results = CeleryMonitor.do_crawl(
-        app=app,
+        app=celery_app,
         monitor_timeout=5,
         monitor_iter_limit=100,
         crawler_instance=crawler,

diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py
diff --git a/tests/functional/scrapyd_coverage_runner.conf b/tests/functional/scrapyd_coverage_runner.conf
@@ -9,4 +9,4 @@
 
 
 [scrapyd]
-runner = tests.functional.scrapyd_coverage_runner
+runner = hepcrawl.testlib.scrapyd_coverage_runner
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
diff --git a/tests/unit/test_alpha.py b/tests/unit/test_alpha.py
@@ -13,14 +13,19 @@
 
 from hepcrawl.spiders import alpha_spider
 
-from .responses import fake_response_from_file
+from hepcrawl.testlib.fixtures import fake_response_from_file
 
 
 @pytest.fixture
 def results():
     """Return results generator from the Alpha spider."""
     spider = alpha_spider.AlphaSpider()
-    records = list(spider.parse(fake_response_from_file('alpha/test_1.htm')))
+    records = list(
+        spider.parse(
+            fake_response_from_file('alpha/test_1.htm')
+        )
+    )
+
     assert records
     return records
 

diff --git a/tests/unit/test_aps.py b/tests/unit/test_aps.py
@@ -12,7 +12,7 @@
 import pytest
 
 from hepcrawl.spiders import aps_spider
-from .responses import fake_response_from_file
+from hepcrawl.testlib.fixtures import fake_response_from_file
 
 
 @pytest.fixture
@@ -29,6 +29,7 @@ def results():
             )
         )
     )
+
     assert records
     return records
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,4 +9,4 @@


		[scrapyd]
		runner = tests.functional.scrapyd_coverage_runner
		runner = hepcrawl.testlib.scrapyd_coverage_runner