diff --git a/docker-compose.deps.yml b/docker-compose.deps.yml index 937082a1..7127c491 100644 --- a/docker-compose.deps.yml +++ b/docker-compose.deps.yml @@ -12,8 +12,8 @@ version: '2' services: pip: build: - context: ${PWD}/tests/functional - dockerfile: hepcrawl_base.dockerfile + context: ${PWD}/tests + dockerfile: Dockerfile.hepcrawl_base image: hepcrawl_base command: bash -c "pip install -e .[all] && pip freeze" volumes: diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 917400b8..e188e7f9 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -16,7 +16,7 @@ services: - APP_BROKER_URL=amqp://guest:guest@rabbitmq:5672// - APP_CELERY_RESULT_BACKEND=amqp://guest:guest@rabbitmq:5672// - APP_CRAWLER_HOST_URL=http://scrapyd:6800 - - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=tests.functional.tasks.submit_results + - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=hepcrawl.testlib.tasks.submit_results - COVERAGE_PROCESS_START=/code/.coveragerc command: py.test -vv tests/functional/WSP/test_wsp.py volumes: &common_volumes @@ -44,7 +44,7 @@ services: celery: image: hepcrawl_base environment: *env_variables - command: celery worker --events --app tests.functional.tasks --loglevel=debug + command: celery worker --events --app hepcrawl.testlib.tasks --loglevel=debug volumes: *common_volumes links: - rabbitmq diff --git a/tests/__init__.py b/hepcrawl/testlib/__init__.py similarity index 68% rename from tests/__init__.py rename to hepcrawl/testlib/__init__.py index e8c02e63..9d0c62af 100644 --- a/tests/__init__.py +++ b/hepcrawl/testlib/__init__.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- # # This file is part of hepcrawl. -# Copyright (C) 2015, 2016, 2017 CERN. +# Copyright (C) 2017 CERN. # # hepcrawl is a free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for # more details. + +from __future__ import absolute_import, print_function, unicode_literals diff --git a/hepcrawl/testlib/celery_monitor.py b/hepcrawl/testlib/celery_monitor.py new file mode 100644 index 00000000..12cb86b4 --- /dev/null +++ b/hepcrawl/testlib/celery_monitor.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# This file is part of hepcrawl. +# Copyright (C) 2017 CERN. +# +# hepcrawl is a free software; you can redistribute it and/or modify it +# under the terms of the Revised BSD License; see LICENSE file for +# more details. + +"""Celery monitor dealing with celery tasks for functional tests.""" + +from __future__ import absolute_import, print_function, unicode_literals + +from itertools import islice + +import logging + +LOGGER = logging.getLogger(__name__) + + +class CeleryMonitor(object): + def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100): + self.results = [] + self.recv = None + self.app = app + self.connection = None + self.monitor_timeout = monitor_timeout + self.monitor_iter_limit = monitor_iter_limit + + def __enter__(self): + state = self.app.events.State() + + def announce_succeeded_tasks(event): + state.event(event) + task = state.tasks.get(event['uuid']) + LOGGER.info('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),)) + tasks = self.app.AsyncResult(task.id) + for task in tasks.result: + self.results.append(task) + self.recv.should_stop = True + + def announce_failed_tasks(event): + state.event(event) + task = state.tasks.get(event['uuid']) + LOGGER.info('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),)) + self.results.append(task.info()) + self.recv.should_stop = True + + self.app.control.enable_events() + self.connection = self.app.connection() + self.recv = self.app.events.Receiver(self.connection, handlers={ + 'task-succeeded': announce_succeeded_tasks, + 'task-failed': announce_failed_tasks, + }) + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True) + self._wait_for_results(events_iter) + self.connection.__exit__() + + def _wait_for_results(self, events_iter): + any(islice( + events_iter, # iterable + self.monitor_iter_limit # stop + )) + + @classmethod + def do_crawl( + cls, + app, + monitor_timeout, + monitor_iter_limit, + crawler_instance, + project='hepcrawl', + spider='WSP', + settings=None, + **crawler_arguments + ): + settings = settings or {} + + with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor: + crawler_instance.schedule( + project=project, + spider=spider, + settings=settings, + **crawler_arguments + ) + + return my_monitor.results diff --git a/tests/unit/responses/__init__.py b/hepcrawl/testlib/fixtures.py similarity index 68% rename from tests/unit/responses/__init__.py rename to hepcrawl/testlib/fixtures.py index 7502e0b2..49ef9924 100644 --- a/tests/unit/responses/__init__.py +++ b/hepcrawl/testlib/fixtures.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- # # This file is part of hepcrawl. -# Copyright (C) 2015 CERN. +# Copyright (C) 2015, 2016, 2017 CERN. # # hepcrawl is a free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for # more details. +from __future__ import absolute_import, division, print_function + import os from scrapy.http import Request, TextResponse @@ -24,12 +26,10 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty :returns: A scrapy HTTP response which can be used for unittesting. """ - meta = {} request = Request(url=url) if not file_name[0] == '/': - responses_dir = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join(responses_dir, file_name) + file_path = get_responses_path(file_name) else: file_path = file_name @@ -47,7 +47,6 @@ def fake_response_from_file(file_name, url='http://www.example.com', response_ty def fake_response_from_string(text, url='http://www.example.com', response_type=TextResponse): """Fake Scrapy response from a string.""" - meta = {} request = Request(url=url) response = response_type( url=url, @@ -65,6 +64,31 @@ def get_node(spider, tag, response=None, text=None, rtype="xml"): selector = Selector(response, type=rtype) elif text: selector = Selector(text=text, type=rtype) + spider._register_namespaces(selector) node = selector.xpath(tag) return node + + +def get_responses_path(*path_chunks): + """ + :param path_chunks: Optional extra path element to suffix the responses directory with. + :return: The absolute path to the responses and if path_chuncks provided the absolute + path to path chunks. + + :Example: + + >>> get_responses_path() + '/home/myuser/hepcrawl/tests/responses' + + >>> get_responses_path('one', 'two') + '/home/myuser/hepcrawl/tests/responses/one/two' + """ + project_root_dir = os.path.abspath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + '..', + '..', + ) + ) + return os.path.join(project_root_dir, 'tests', 'unit', 'responses', *path_chunks) diff --git a/tests/functional/scrapyd_coverage_runner.py b/hepcrawl/testlib/scrapyd_coverage_runner.py similarity index 100% rename from tests/functional/scrapyd_coverage_runner.py rename to hepcrawl/testlib/scrapyd_coverage_runner.py diff --git a/tests/functional/tasks.py b/hepcrawl/testlib/tasks.py similarity index 97% rename from tests/functional/tasks.py rename to hepcrawl/testlib/tasks.py index bdef104b..d76588fc 100644 --- a/tests/functional/tasks.py +++ b/hepcrawl/testlib/tasks.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of hepcrawl. -# Copyright (C) 2015, 2016, 2017 CERN. +# Copyright (C) 2017 CERN. # # hepcrawl is a free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for diff --git a/tests/functional/hepcrawl_base.dockerfile b/tests/Dockerfile.hepcrawl_base similarity index 100% rename from tests/functional/hepcrawl_base.dockerfile rename to tests/Dockerfile.hepcrawl_base diff --git a/tests/functional/docker_entrypoint.sh b/tests/docker_entrypoint.sh similarity index 100% rename from tests/functional/docker_entrypoint.sh rename to tests/docker_entrypoint.sh diff --git a/tests/functional/WSP/__init__.py b/tests/functional/WSP/__init__.py deleted file mode 100644 index e8c02e63..00000000 --- a/tests/functional/WSP/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2015, 2016, 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. diff --git a/tests/functional/WSP/test_wsp.py b/tests/functional/WSP/test_wsp.py index dbd6c443..8c75f89d 100644 --- a/tests/functional/WSP/test_wsp.py +++ b/tests/functional/WSP/test_wsp.py @@ -15,86 +15,11 @@ import json import os -from itertools import islice from scrapyd_api import ScrapydAPI from time import sleep -from tests.functional.tasks import app - - -class CeleryMonitor(object): - def __init__(self, app, monitor_timeout=3, monitor_iter_limit=100): - self.results = [] - self.recv = None - self.app = app - self.connection = None - self.monitor_timeout = monitor_timeout - self.monitor_iter_limit = monitor_iter_limit - - def __enter__(self): - state = self.app.events.State() - - def announce_succeeded_tasks(event): - state.event(event) - task = state.tasks.get(event['uuid']) - print('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(),)) - tasks = app.AsyncResult(task.id) - for task in tasks.result: - self.results.append(task) - self.recv.should_stop = True - - def announce_failed_tasks(event): - state.event(event) - task = state.tasks.get(event['uuid']) - print('TASK FAILED: %s[%s] %s' % (task.name, task.uuid, task.info(),)) - self.results.append(task.info()) - self.recv.should_stop = True - - self.app.control.enable_events() - self.connection = self.app.connection() - self.recv = self.app.events.Receiver(self.connection, handlers={ - 'task-succeeded': announce_succeeded_tasks, - 'task-failed': announce_failed_tasks, - }) - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - events_iter = self.recv.itercapture(limit=None, timeout=self.monitor_timeout, wakeup=True) - self._wait_for_results(events_iter) - self.connection.__exit__() - - def _wait_for_results(self, events_iter): - any(islice( - events_iter, # iterable - self.monitor_iter_limit # stop - )) - - @classmethod - def do_crawl( - cls, - app, - monitor_timeout, - monitor_iter_limit, - crawler_instance, - project='hepcrawl', - spider='WSP', - settings=None, - **crawler_arguments - ): - - if settings is None: - settings = {} - - with cls(app, monitor_timeout=monitor_timeout, monitor_iter_limit=monitor_iter_limit) as my_monitor: - crawler_instance.schedule( - project=project, - spider=spider, - settings=settings or {}, - **crawler_arguments - ) - - return my_monitor.results +from hepcrawl.testlib.tasks import app as celery_app +from hepcrawl.testlib.celery_monitor import CeleryMonitor def get_crawler_instance(crawler_host, *args, **kwargs): @@ -149,7 +74,7 @@ def test_wsp_normal_set_of_records(set_up_environment, expected_results): sleep(10) results = CeleryMonitor.do_crawl( - app=app, + app=celery_app, monitor_timeout=5, monitor_iter_limit=100, crawler_instance=crawler, diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py deleted file mode 100644 index e8c02e63..00000000 --- a/tests/functional/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of hepcrawl. -# Copyright (C) 2015, 2016, 2017 CERN. -# -# hepcrawl is a free software; you can redistribute it and/or modify it -# under the terms of the Revised BSD License; see LICENSE file for -# more details. diff --git a/tests/functional/scrapyd_coverage_runner.conf b/tests/functional/scrapyd_coverage_runner.conf index 3851724e..426f9e3c 100644 --- a/tests/functional/scrapyd_coverage_runner.conf +++ b/tests/functional/scrapyd_coverage_runner.conf @@ -9,4 +9,4 @@ [scrapyd] -runner = tests.functional.scrapyd_coverage_runner +runner = hepcrawl.testlib.scrapyd_coverage_runner diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py deleted file mode 100644 index 8d1c8b69..00000000 --- a/tests/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/unit/test_alpha.py b/tests/unit/test_alpha.py index ac683ac6..eaa32bf4 100644 --- a/tests/unit/test_alpha.py +++ b/tests/unit/test_alpha.py @@ -13,14 +13,19 @@ from hepcrawl.spiders import alpha_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture def results(): """Return results generator from the Alpha spider.""" spider = alpha_spider.AlphaSpider() - records = list(spider.parse(fake_response_from_file('alpha/test_1.htm'))) + records = list( + spider.parse( + fake_response_from_file('alpha/test_1.htm') + ) + ) + assert records return records diff --git a/tests/unit/test_aps.py b/tests/unit/test_aps.py index 39c8b01d..4e774e91 100644 --- a/tests/unit/test_aps.py +++ b/tests/unit/test_aps.py @@ -12,7 +12,7 @@ import pytest from hepcrawl.spiders import aps_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture @@ -29,6 +29,7 @@ def results(): ) ) ) + assert records return records diff --git a/tests/unit/test_arxiv_all.py b/tests/unit/test_arxiv_all.py index 2b9e4c80..f0e8d07e 100644 --- a/tests/unit/test_arxiv_all.py +++ b/tests/unit/test_arxiv_all.py @@ -15,7 +15,7 @@ from hepcrawl.pipelines import InspireCeleryPushPipeline from hepcrawl.spiders import arxiv_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture @@ -32,12 +32,15 @@ def one_result(spider): """ from scrapy.http import TextResponse - records = list(spider.parse( - fake_response_from_file( - 'arxiv/sample_arxiv_record0.xml', - response_type=TextResponse + records = list( + spider.parse( + fake_response_from_file( + 'arxiv/sample_arxiv_record0.xml', + response_type=TextResponse, + ) ) - )) + ) + assert records pipeline = InspireCeleryPushPipeline() pipeline.open_spider(spider) @@ -51,12 +54,15 @@ def many_results(spider): """ from scrapy.http import TextResponse - records = list(spider.parse( - fake_response_from_file( - 'arxiv/sample_arxiv_record.xml', - response_type=TextResponse + records = list( + spider.parse( + fake_response_from_file( + 'arxiv/sample_arxiv_record.xml', + response_type=TextResponse, + ) ) - )) + ) + assert records pipeline = InspireCeleryPushPipeline() pipeline.open_spider(spider) diff --git a/tests/unit/test_arxiv_single.py b/tests/unit/test_arxiv_single.py index 009f56e3..700a1517 100644 --- a/tests/unit/test_arxiv_single.py +++ b/tests/unit/test_arxiv_single.py @@ -17,7 +17,7 @@ from hepcrawl.pipelines import InspireCeleryPushPipeline from hepcrawl.spiders import arxiv_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture @@ -27,12 +27,15 @@ def results(): crawler = Crawler(spidercls=arxiv_spider.ArxivSpider) spider = arxiv_spider.ArxivSpider.from_crawler(crawler) - records = list(spider.parse( - fake_response_from_file( - 'arxiv/sample_arxiv_record0.xml', - response_type=TextResponse + records = list( + spider.parse( + fake_response_from_file( + 'arxiv/sample_arxiv_record0.xml', + response_type=TextResponse, + ) ) - )) + ) + assert records pipeline = InspireCeleryPushPipeline() pipeline.open_spider(spider) diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 657d8b48..18110426 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -19,7 +19,7 @@ import hepcrawl from hepcrawl.spiders import base_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, @@ -31,6 +31,7 @@ def record(): """Return built HEPRecord from the BASE spider.""" spider = base_spider.BaseSpider() response = fake_response_from_file('base/test_1.xml') + selector = Selector(response, type='xml') spider._register_namespaces(selector) nodes = selector.xpath('.//%s' % spider.itertag) diff --git a/tests/unit/test_brown.py b/tests/unit/test_brown.py index 5b669b60..42f34573 100644 --- a/tests/unit/test_brown.py +++ b/tests/unit/test_brown.py @@ -19,7 +19,7 @@ from hepcrawl.spiders import brown_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, ) diff --git a/tests/unit/test_dnb.py b/tests/unit/test_dnb.py index b03bbe8f..efb34094 100644 --- a/tests/unit/test_dnb.py +++ b/tests/unit/test_dnb.py @@ -19,7 +19,7 @@ from hepcrawl.spiders import dnb_spider from hepcrawl.items import HEPRecord -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, @@ -54,6 +54,7 @@ def record(scrape_pos_page_body): return request.callback(response) +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_title(record): """Test title.""" title = "Auslegung und Messungen einer supraleitenden 325 MHz CH-Struktur für Strahlbetrieb" @@ -61,12 +62,14 @@ def test_title(record): assert record["title"] == title +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_date_published(record): """Test date_published.""" assert "date_published" in record assert record["date_published"] == "2015" +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_authors(record): """Test authors.""" authors = ["Busch, Marco"] @@ -86,30 +89,35 @@ def test_authors(record): ] +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_supervisors(record): """Test thesis supervisors""" assert "thesis_supervisor" in record assert record["thesis_supervisor"][0]["full_name"] == "Podlech, Holger" +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_source(record): """Test thesis source""" assert "source" in record assert record["source"] == 'Univ.-Bibliothek Frankfurt am Main' +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_language(record): """Test thesis language""" assert "language" in record assert record["language"][0] == u'German' +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_files(record): """Test files.""" assert "file_urls" in record assert record["file_urls"][0] == "http://d-nb.info/1079912991/34" +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_urls(record): """Test url in record.""" urls = [ @@ -127,12 +135,14 @@ def test_urls(record): seen_urls.add(url['value']) +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_doctype(record): """Test doctype""" assert "thesis" in record assert record["thesis"]["degree_type"] == "PhD" +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_abstract(record): """Test that abstract has been fetched correctly.""" abstract = ( @@ -184,6 +194,8 @@ def test_abstract(record): assert "abstract" in record assert record["abstract"] == abstract + +@pytest.mark.skip(reason='connecting to external source - Issue #120') def test_page_nr(record): """Test that page range is correct.""" assert "page_nr" in record diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py index 9da52cf1..8331a6e8 100644 --- a/tests/unit/test_edp.py +++ b/tests/unit/test_edp.py @@ -21,7 +21,7 @@ from scrapy.http import HtmlResponse, Request -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py index 40f8dbef..9f2c7a8b 100644 --- a/tests/unit/test_elsevier.py +++ b/tests/unit/test_elsevier.py @@ -15,7 +15,7 @@ from hepcrawl.spiders import elsevier_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, diff --git a/tests/unit/test_extensions.py b/tests/unit/test_extensions.py index 8e97f999..a378f641 100644 --- a/tests/unit/test_extensions.py +++ b/tests/unit/test_extensions.py @@ -17,7 +17,7 @@ from hepcrawl.extensions import ErrorHandler from hepcrawl.spiders.wsp_spider import WorldScientificSpider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture diff --git a/tests/unit/test_hindawi.py b/tests/unit/test_hindawi.py index d7d9d4ea..05593a52 100644 --- a/tests/unit/test_hindawi.py +++ b/tests/unit/test_hindawi.py @@ -13,7 +13,7 @@ from hepcrawl.spiders import hindawi_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, get_node, ) diff --git a/tests/unit/test_infn.py b/tests/unit/test_infn.py index 09004806..b54cc91a 100644 --- a/tests/unit/test_infn.py +++ b/tests/unit/test_infn.py @@ -17,7 +17,7 @@ import hepcrawl from hepcrawl.spiders import infn_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, ) diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py index 8cea9852..d07b6abb 100644 --- a/tests/unit/test_iop.py +++ b/tests/unit/test_iop.py @@ -17,14 +17,14 @@ from hepcrawl.spiders import iop_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, + get_responses_path, ) -tests_dir = os.path.dirname(os.path.realpath(__file__)) -test_pdf_dir = os.path.join(tests_dir, "responses/iop/pdf/") +TEST_PDF_DIR = get_responses_path('iop', 'pdf') @pytest.fixture @@ -33,7 +33,7 @@ def record(): spider = iop_spider.IOPSpider() response = fake_response_from_file('iop/xml/test_standard.xml') node = get_node(spider, "Article", response) - spider.pdf_files = test_pdf_dir + spider.pdf_files = TEST_PDF_DIR parsed_record = spider.parse_node(response, node) assert parsed_record return parsed_record @@ -150,7 +150,7 @@ def test_files(record): assert "additional_files" in record assert record["additional_files"][1]["access"] == 'INSPIRE-HIDDEN' assert record["additional_files"][1]["type"] == 'Fulltext' - assert record["additional_files"][1]["url"] == test_pdf_dir + pdf_filename + assert record["additional_files"][1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename) @pytest.fixture @@ -173,8 +173,7 @@ def erratum_open_access_record(): """ response = fake_response_from_string(body) node = get_node(spider, "Article", response) - tests_dir = os.path.dirname(os.path.realpath(__file__)) - spider.pdf_files = os.path.join(tests_dir, "responses/iop/pdf/") + spider.pdf_files = get_responses_path('iop', 'pdf') parsed_record = spider.parse_node(response, node) assert parsed_record return parsed_record @@ -190,7 +189,7 @@ def test_files_erratum_open_access_record(erratum_open_access_record): assert erratum_open_access_record[ "additional_files"][1]["type"] == 'Erratum' assert erratum_open_access_record["additional_files"][ - 1]["url"] == test_pdf_dir + pdf_filename + 1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename) def test_not_published_record(): @@ -210,8 +209,7 @@ def test_not_published_record(): """ response = fake_response_from_string(body) node = get_node(spider, "Article", response) - tests_dir = os.path.dirname(os.path.realpath(__file__)) - spider.pdf_files = os.path.join(tests_dir, "responses/iop/pdf/") + spider.pdf_files = get_responses_path('iop', 'pdf') records = spider.parse_node(response, node) assert records is None diff --git a/tests/unit/test_magic.py b/tests/unit/test_magic.py index 783febfd..15539ff7 100644 --- a/tests/unit/test_magic.py +++ b/tests/unit/test_magic.py @@ -17,7 +17,7 @@ from hepcrawl.spiders import magic_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, diff --git a/tests/unit/test_mit.py b/tests/unit/test_mit.py index 59ac75d6..4dee43e8 100644 --- a/tests/unit/test_mit.py +++ b/tests/unit/test_mit.py @@ -13,7 +13,7 @@ from hepcrawl.spiders import mit_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, diff --git a/tests/unit/test_phenix.py b/tests/unit/test_phenix.py index da1c4cba..19efa2da 100644 --- a/tests/unit/test_phenix.py +++ b/tests/unit/test_phenix.py @@ -15,7 +15,7 @@ from hepcrawl.spiders import phenix_spider -from .responses import ( +from hepcrawl.testlib.fixtures import ( fake_response_from_file, fake_response_from_string, get_node, diff --git a/tests/unit/test_phil.py b/tests/unit/test_phil.py index c2fe073f..326a7c8b 100644 --- a/tests/unit/test_phil.py +++ b/tests/unit/test_phil.py @@ -15,7 +15,7 @@ from hepcrawl.spiders import phil_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture diff --git a/tests/unit/test_pipelines.py b/tests/unit/test_pipelines.py index ea9c3600..1cbde74b 100644 --- a/tests/unit/test_pipelines.py +++ b/tests/unit/test_pipelines.py @@ -21,7 +21,7 @@ from hepcrawl.spiders import arxiv_spider from hepcrawl.pipelines import InspireAPIPushPipeline -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture diff --git a/tests/unit/test_pos.py b/tests/unit/test_pos.py index c4e67d28..32ac588e 100644 --- a/tests/unit/test_pos.py +++ b/tests/unit/test_pos.py @@ -19,7 +19,7 @@ from hepcrawl.pipelines import InspireCeleryPushPipeline from hepcrawl.spiders import pos_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture diff --git a/tests/unit/test_t2k.py b/tests/unit/test_t2k.py index 4043bb19..c228b88b 100644 --- a/tests/unit/test_t2k.py +++ b/tests/unit/test_t2k.py @@ -17,7 +17,7 @@ from hepcrawl.spiders import t2k_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file @pytest.fixture diff --git a/tests/unit/test_world_scientific.py b/tests/unit/test_world_scientific.py index 458b9f45..f0785384 100644 --- a/tests/unit/test_world_scientific.py +++ b/tests/unit/test_world_scientific.py @@ -18,7 +18,7 @@ from hepcrawl.pipelines import InspireCeleryPushPipeline from hepcrawl.spiders import wsp_spider -from .responses import fake_response_from_file +from hepcrawl.testlib.fixtures import fake_response_from_file def create_spider():