Skip to content

Commit

Permalink
global: rename from crawler to hepcrawl record
Browse files Browse the repository at this point in the history
Signed-off-by: David Caro <[email protected]>
  • Loading branch information
david-caro committed Aug 10, 2017
1 parent 9e399d1 commit ed1a59b
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 21 deletions.
2 changes: 1 addition & 1 deletion hepcrawl/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from inspire_schemas.utils import validate

from hepcrawl.crawler2hep import item_to_hep
from hepcrawl.tohep import item_to_hep
from hepcrawl.settings import FILES_STORE
from hepcrawl.utils import RecordFile

Expand Down
26 changes: 18 additions & 8 deletions hepcrawl/testlib/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
from scrapy.selector import Selector


def fake_response_from_file(file_name, test_suite='unit', url='http://www.example.com', response_type=TextResponse):
def fake_response_from_file(
file_name,
test_suite='unit',
url='http://www.example.com',
response_type=TextResponse,
):
"""Create a Scrapy fake HTTP response from a HTML file
Args:
Expand All @@ -26,11 +31,12 @@ def fake_response_from_file(file_name, test_suite='unit', url='http://www.exampl
test_suite(str): The test suite that the response file comes from,
e.g. ``unit``, ``functional``.
url(str): The URL of the response.
response_type: The type of the scrapy Response to be returned,
response_type(class): The type of the scrapy Response to be returned,
depending on the Request (Response, TextResponse, etc).
Returns:
``response_type``: A scrapy HTTP response which can be used for unit testing.
``response_type``: A scrapy HTTP response which can be used for unit
testing.
"""
request = Request(url=url)

Expand All @@ -55,15 +61,18 @@ def fake_response_from_file(file_name, test_suite='unit', url='http://www.exampl
return response


def fake_response_from_string(text, url='http://www.example.com', response_type=TextResponse):
"""Fake Scrapy response from a string."""
def fake_response_from_string(
text,
url='http://www.example.com',
response_type=TextResponse,
):
request = Request(url=url)
response = response_type(
url=url,
request=request,
body=text,
**{'encoding': 'utf-8'}
)
encoding='utf-8',
)

return response

Expand All @@ -83,7 +92,8 @@ def get_node(spider, tag, response=None, text=None, rtype="xml"):
def get_test_suite_path(*path_chunks, **kwargs):
"""
Args:
*path_chunks: Optional extra path element (strings) to suffix the responses directory with.
*path_chunks: Optional extra path element (strings) to suffix the
responses directory with.
**kwargs: The test type folder name, default is the ``unit`` test suite,
e.g. ``test_suite='unit'``, ``test_suite='functional'``.
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion hepcrawl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ class RecordFile(object):
name(str): Optional, name of the file, if not passed, will use the name
in the ``path``.
Rises:
Raises:
PathDoesNotExist:
"""
def __init__(self, path, name=None):
Expand Down
22 changes: 11 additions & 11 deletions tests/unit/test_crawler2hep.py → tests/unit/test_tohep.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import pytest
import yaml

from hepcrawl.crawler2hep import hepcrawl_to_hep
from hepcrawl.tohep import hepcrawl_to_hep
from hepcrawl.testlib.fixtures import get_test_suite_path


def load_file(file_name):
path = get_test_suite_path(
'responses',
'crawler2hep',
'tohep',
file_name,
)
with open(path) as input_data:
Expand All @@ -29,13 +29,13 @@ def load_file(file_name):


@pytest.fixture('module')
def expected_generic_crawler_record():
return load_file('out_generic_crawler_record.yaml')
def expected_generic_hepcrawl_record():
return load_file('out_generic_hepcrawl_record.yaml')


@pytest.fixture('module')
def input_generic_crawler_record():
return load_file('in_generic_crawler_record.yaml')
def input_generic_hepcrawl_record():
return load_file('in_generic_hepcrawl_record.yaml')


@pytest.fixture('module')
Expand All @@ -48,12 +48,12 @@ def input_no_document_type_record():
return load_file('in_no_document_type.yaml')


def test_generic_crawler_record(
input_generic_crawler_record,
expected_generic_crawler_record
def test_generic_hepcrawl_record(
input_generic_hepcrawl_record,
expected_generic_hepcrawl_record
):
produced_record = hepcrawl_to_hep(input_generic_crawler_record)
assert produced_record == expected_generic_crawler_record
produced_record = hepcrawl_to_hep(input_generic_hepcrawl_record)
assert produced_record == expected_generic_hepcrawl_record


def test_no_document_type(
Expand Down

0 comments on commit ed1a59b

Please sign in to comment.