Skip to content

Commit

Permalink
tests: adapt tests for all previous changes
Browse files Browse the repository at this point in the history
Signed-off-by: David Caro <[email protected]>
  • Loading branch information
david-caro committed Aug 22, 2017
1 parent e4b9f43 commit ce8ccc0
Show file tree
Hide file tree
Showing 22 changed files with 264 additions and 122 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
install_requires = [
'autosemver~=0.2',
'inspire-schemas~=42.0',
'inspire-dojson~=41.0',
'Scrapy>=1.1.0',
# TODO: unpin once they support wheel building again
'scrapyd==1.1.0',
Expand Down
1 change: 1 addition & 0 deletions tests/functional/arxiv/test_arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_arxiv(set_up_local_environment, expected_results):
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=set_up_local_environment.get('CRAWLER_PROJECT'),
spider='arXiv',
Expand Down
8 changes: 3 additions & 5 deletions tests/functional/wsp/test_wsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@

import pytest
import os
import shutil

from time import sleep

from hepcrawl.testlib.celery_monitor import CeleryMonitor
from hepcrawl.testlib.fixtures import (
get_test_suite_path,
expected_json_results_from_file,
clean_dir,
)
from hepcrawl.testlib.tasks import app as celery_app
from hepcrawl.testlib.utils import get_crawler_instance
Expand Down Expand Up @@ -90,10 +90,6 @@ def remove_generated_files(package_location):
os.unlink(os.path.join(package_location, file_name))


def clean_dir(path='/tmp/WSP/'):
shutil.rmtree(path, ignore_errors=True)


@pytest.mark.parametrize(
'expected_results',
[
Expand All @@ -114,6 +110,7 @@ def test_wsp_ftp(set_up_ftp_environment, expected_results):
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=set_up_ftp_environment.get('CRAWLER_PROJECT'),
spider='WSP',
Expand Down Expand Up @@ -147,6 +144,7 @@ def test_wsp_local_package_path(set_up_local_environment, expected_results):
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=set_up_local_environment.get('CRAWLER_PROJECT'),
spider='WSP',
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
def results():
"""Return results generator from the Alpha spider."""
spider = alpha_spider.AlphaSpider()
records = list(
parsed_items = list(
spider.parse(
fake_response_from_file('alpha/test_1.htm')
)
)

records = [parsed_item.record for parsed_item in parsed_items]
assert records

return records


Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def results():
from scrapy.http import TextResponse

spider = aps_spider.APSSpider()
records = list(
parsed_items = list(
spider.parse(
fake_response_from_file(
'aps/aps_single_response.json',
Expand All @@ -30,6 +30,8 @@ def results():
)
)

records = [parsed_item.record for parsed_item in parsed_items]

assert records
return records

Expand Down
35 changes: 8 additions & 27 deletions tests/unit/test_arxiv_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

import pytest

from scrapy.crawler import Crawler
from scrapy.crawler import Crawler
from scrapy.http import TextResponse

from hepcrawl.pipelines import InspireCeleryPushPipeline
from hepcrawl.spiders import arxiv_spider
Expand All @@ -25,36 +26,16 @@ def spider():
return spider


@pytest.fixture
def one_result(spider):
"""Return results generator from the arxiv spider. Tricky fields, one
record.
"""
from scrapy.http import TextResponse

records = list(
spider.parse(
fake_response_from_file(
'arxiv/sample_arxiv_record0.xml',
response_type=TextResponse,
)
)
)

assert records
pipeline = InspireCeleryPushPipeline()
pipeline.open_spider(spider)
return [pipeline.process_item(record, spider) for record in records]


@pytest.fixture
def many_results(spider):
"""Return results generator from the arxiv spider. Tricky fields, many
records.
"""
from scrapy.http import TextResponse
def _get_processed_record(item, spider):
record = pipeline.process_item(item, spider)
return record

records = list(
parsed_items = list(
spider.parse(
fake_response_from_file(
'arxiv/sample_arxiv_record.xml',
Expand All @@ -63,10 +44,10 @@ def many_results(spider):
)
)

assert records
pipeline = InspireCeleryPushPipeline()
pipeline.open_spider(spider)
return [pipeline.process_item(record, spider) for record in records]

return [_get_processed_record(parsed_item, spider) for parsed_item in parsed_items]


def test_page_nr(many_results):
Expand Down
15 changes: 7 additions & 8 deletions tests/unit/test_arxiv_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@
def results():
"""Return results generator from the arxiv spider. All fields, one record.
"""
def _get_processed_item(item, spider):
record = pipeline.process_item(item, spider)
validate(record, 'hep')
assert record
return record

crawler = Crawler(spidercls=arxiv_spider.ArxivSpider)
spider = arxiv_spider.ArxivSpider.from_crawler(crawler)
records = list(
parsed_items = list(
spider.parse(
fake_response_from_file(
'arxiv/sample_arxiv_record0.xml',
Expand All @@ -36,16 +41,10 @@ def results():
)
)

assert records
pipeline = InspireCeleryPushPipeline()
pipeline.open_spider(spider)
processed_records = []
for record in records:
processed_record = pipeline.process_item(record, spider)
validate(processed_record, 'hep')
processed_records.append(processed_record)

return processed_records
return [_get_processed_item(parsed_item, spider) for parsed_item in parsed_items]



Expand Down
23 changes: 18 additions & 5 deletions tests/unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@ def record():
nodes = selector.xpath('.//%s' % spider.itertag)
response.meta["record"] = nodes[0].extract()
response.meta["urls"] = ["http://hdl.handle.net/1885/10005"]
parsed_record = spider.build_item(response)
assert parsed_record
return parsed_record

parsed_item = spider.build_item(response)
assert parsed_item
assert parsed_item.record

return parsed_item.record


@pytest.fixture
Expand Down Expand Up @@ -169,7 +172,12 @@ def splash():
'Content-Type': 'text/html',
},
)
return spider.scrape_for_pdf(splash_response)

parsed_item = spider.scrape_for_pdf(splash_response)
assert parsed_item
assert parsed_item.record

return parsed_item.record


def test_splash(splash):
Expand Down Expand Up @@ -201,7 +209,12 @@ def parsed_node():
response = fake_response_from_string(text=body)
node = get_node(spider, 'OAI-PMH:record', text=body)
response.meta["record"] = node[0].extract()
return spider.parse_node(response, node[0])

parsed_item = spider.parse_node(response, node[0])
assert parsed_item
assert parsed_item.record

return parsed_item.record


def test_parsed_node(parsed_node):
Expand Down
14 changes: 10 additions & 4 deletions tests/unit/test_brown.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def record():

splash_response = fake_response_from_file('brown/test_splash.html')
splash_response.meta["jsonrecord"] = jsonrecord
parsed_record = spider.scrape_splash(splash_response)

assert parsed_record
return parsed_record
parsed_item = spider.scrape_splash(splash_response)
assert parsed_item
assert parsed_item.record

return parsed_item.record


@pytest.fixture
Expand Down Expand Up @@ -200,7 +202,11 @@ def parsed_node_no_splash():
jsonrecord = jsonresponse["items"]["docs"][0]
response.meta["jsonrecord"] = jsonrecord

return spider.parse(response).next()
parsed_item = spider.parse(response).next()
assert parsed_item
assert parsed_item.record

return parsed_item.record


def test_no_splash(parsed_node_no_splash):
Expand Down
14 changes: 12 additions & 2 deletions tests/unit/test_dnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ def record(scrape_pos_page_body):
body=scrape_pos_page_body,
**{'encoding': 'utf-8'}
)
return request.callback(response)

parsed_item = request.callback(response)
assert parsed_item
assert parsed_item.record

return parsed_item.record


def test_title(record):
Expand Down Expand Up @@ -241,7 +246,12 @@ def parse_without_splash():
'Content-Type': 'application/pdf;charset=base64',
}
)
return spider.parse_node(response, nodes[0])

parsed_item = spider.parse_node(response, nodes[0])
assert parsed_item
assert parsed_item.record

return parsed_item.record


def test_parse_without_splash(parse_without_splash):
Expand Down
Loading

0 comments on commit ce8ccc0

Please sign in to comment.