Skip to content

Commit

Permalink
pos: add functional test
Browse files Browse the repository at this point in the history
Signed-off-by: Spiros Delviniotis <[email protected]>
  • Loading branch information
spirosdelviniotis committed Aug 15, 2017
1 parent 1d6e8e8 commit 2716d53
Show file tree
Hide file tree
Showing 11 changed files with 338 additions and 7 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ env:
- SUITE=functional_wsp
- SUITE=functional_arxiv
- SUITE=functional_desy
- SUITE=functional_pos

matrix:
fast_finish: true
Expand Down
16 changes: 16 additions & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ services:
links:
- scrapyd

functional_pos:
<<: *service_base
command: py.test -vv tests/functional/pos
links:
- scrapyd
- server.local

unit:
<<: *service_base
command: bash -c "py.test tests/unit -vv && make -C docs clean && make -C docs html && python setup.py sdist && ls dist/*"
Expand Down Expand Up @@ -82,5 +89,14 @@ services:
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd

server.local:
image: nginx:stable-alpine
volumes:
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/ssl:/etc/nginx/ssl
- ${PWD}/tests/functional/pos/fixtures/https_server/records:/etc/nginx/html/
ports:
- 443:443

rabbitmq:
image: rabbitmq
26 changes: 20 additions & 6 deletions hepcrawl/spiders/pos_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,20 @@ class POSSpider(Spider):
$ scrapy crawl PoS \\
-a source_file=file://`pwd`/tests/unit/responses/pos/sample_pos_record.xml
"""
name = 'PoS'
BASE_CONFERENCE_PAPER_URL = "https://pos.sissa.it/contribution?id="
name = 'pos'
# pos_proceedings_url = "https://pos.sissa.it/cgi-bin/reader/conf.cgi?confid="

def __init__(self, source_file=None, **kwargs):
def __init__(
self,
source_file=None,
base_conference_paper_url='https://pos.sissa.it/contribution?id=',
# TODO to be changed without question in the url
**kwargs
):
"""Construct POS spider."""
super(POSSpider, self).__init__(**kwargs)
self.source_file = source_file
self.BASE_CONFERENCE_PAPER_URL = base_conference_paper_url

def start_requests(self):
yield Request(self.source_file)
Expand Down Expand Up @@ -160,7 +166,7 @@ def _get_journal_artid(identifier):
def _get_ext_systems_number(node):
return [
{
'institute': 'PoS',
'institute': 'pos',
'value': node.xpath('.//identifier/text()').extract_first()
},
]
Expand All @@ -187,10 +193,18 @@ def _get_authors(node): # To be refactored
)
for affiliation in author.xpath('.//affiliation//text()').extract():
if 'affiliations' in auth_dict:
auth_dict['affiliations'].append({'value': affiliation})
auth_dict['affiliations'].append(
{
'value': affiliation
}
)
# Todo probably to remove
else:
auth_dict['affiliations'] = [{'value': affiliation}, ]
auth_dict['affiliations'] = [
{
'value': affiliation
},
]
if auth_dict:
authors.append(auth_dict)
return authors
17 changes: 17 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/proxy.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
server {
listen 443 ssl;
server_name localhost;

ssl on;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
ssl_certificate ssl/cert.pem;
ssl_certificate_key ssl/cert.key;

location ~ /contribution {
if ($args ~* "^id=(.*)") {
set $mid $1;
set $args '';
rewrite ^.*$ /$mid.html permanent;
}
}
}
28 changes: 28 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/ssl/cert.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQChhBiOoipMRRcc
E5waKrGB01/QtRpfIGp5KmJfnif05dR05wWojHO6EtabZ2qbXtcSuyQ0vRNpbZUU
OzcriFOMk8dujDzuKMkegsq/LE4PyN/R5JZtf34NyGG7v70K6Uq7RV4PUzk+zoum
1McMUBk1QlGP/E9RsDlSPv9XOblUpicPDuwhCwPf4zi6jporgXjDJ/iUuh+bexxv
40R7f2dCWkiHYiNiLNLTwXdYkaWBcc3HoTq9FEZZhYDhWRjX0/TuINmMr5lbUvr6
UYRABOS4VeUyHpb/e7OH9WXQxzR76LuQFfQDSgs0GxXw1KG58aq+P0ni2E77C4Iu
odQ8iT+jAgMBAAECggEBAIqJeFrXY7p5xIGznEChgBHgUR3+SPlxH4KARVLIoHMh
s2L2SVcx6Y2f3O38/Wb5KTcKx9polz7l3Go3BHJVg3xfwT7kENsipqeB/g+OHALU
BI7PJ+wR3/hIePQGWUsDobMRo8U3WDG0DfryJS09gvG4yabb/tkNc41FNdUGUR31
7VInQFqv2/jZ/2A3s3DZ0Cns9vJuLhmf7629k3MVCuU7Rh0rStnVCA70kjgKzOfP
+26fnfd/MmrQYbaukw04+cwcwifGkF5Jis80qTWsgdF82rkzpwJLDo0Jd2HZFuOa
AHkWK2QiMzb6PS2Uo7Zarax9E+W2TLahANXZQQ32NAkCgYEAzKw7XbEwzWG/T7yX
EgNIAN7YtcGYr9sfHlVJ8bWYK7GZBbCkKDlGU+YGRE++plh/jtXYjsIFElWtv01Y
UpqBdWf7p8mXdtVoq6YyL5WuQVMwpjKHvegTXXwAoreEXZeKr1LKC11B14h+8wsR
D5uf0GVmdw12nSrzeu3Q4oSgss8CgYEAygU++fItIYuPtZfrC8qDcyEiOLQmAHtX
eTnEHOPy8ik+bdwF5Rg0nzxLu3RZ47ykGdEOzpGRO4B9V1EevwSEzX6VO7latMUS
cLKb3Y0bXm6qQcWG+LAlvyaHfAH0oN47xfScLDiUm6BKd4Eo9kpkgaQzSgUfFZNQ
6DHiA3Emau0CgYEAyel7Y3GjMGomvrXQ3x9HkDxH0/7Z71qe92CyYvZ/2VMKH9fk
Ch5+p9P8CLYW4anapQGH80WqlSzbDCd0Y4EzB6z+UceJWd0stnFtfw4N6znze3HM
AegJ+qaTRfL/bQlL8qwc0Fs+0i9A9enL+fbQEVmHXRl2E5TEwFgOQvkOQ3cCgYAA
4bD6qkHkKZXA9x7BeGrGb9iUYsTfr6ocD1J5xczjnaZ2GEW2UDq6jyrNcJ6LzeDx
c+YapKv7lH33iZUWxFBIDUtdbVul+k4wS7c+akU6TkVT8Ca8oxgnE2X39pI4uX+N
R5n+32hWnYZ1qwygtoZlwm+u3QLbtz7dJIqV9UJzqQKBgQCL8Xo9LA0Dm7ZsdDDI
I93YsjCELvBsonymmD1MTpk7uIA+qH8LAih+Vhonc17NtpXuas8eqc8ntuNLAgON
Tylvk32uaRqquHWl6MT7bwaaK7pD8KuOIUJdl5SEc+DDUcB2A2XLg7Yv08Dus8A7
6J5oH8YJ3hqmVGZzbOo75IFerg==
-----END PRIVATE KEY-----
19 changes: 19 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/ssl/cert.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDATCCAemgAwIBAgIJAJRKy2TWwZqTMA0GCSqGSIb3DQEBCwUAMBcxFTATBgNV
BAMMDGh0dHBzX3NlcnZlcjAeFw0xNzA4MTQxNDQ1MTFaFw0yMDA2MDMxNDQ1MTFa
MBcxFTATBgNVBAMMDGh0dHBzX3NlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQADggEP
ADCCAQoCggEBAKGEGI6iKkxFFxwTnBoqsYHTX9C1Gl8gankqYl+eJ/Tl1HTnBaiM
c7oS1ptnapte1xK7JDS9E2ltlRQ7NyuIU4yTx26MPO4oyR6Cyr8sTg/I39Hklm1/
fg3IYbu/vQrpSrtFXg9TOT7Oi6bUxwxQGTVCUY/8T1GwOVI+/1c5uVSmJw8O7CEL
A9/jOLqOmiuBeMMn+JS6H5t7HG/jRHt/Z0JaSIdiI2Is0tPBd1iRpYFxzcehOr0U
RlmFgOFZGNfT9O4g2YyvmVtS+vpRhEAE5LhV5TIelv97s4f1ZdDHNHvou5AV9ANK
CzQbFfDUobnxqr4/SeLYTvsLgi6h1DyJP6MCAwEAAaNQME4wHQYDVR0OBBYEFAfu
RxroDak/yro7MbRfDogKVDmBMB8GA1UdIwQYMBaAFAfuRxroDak/yro7MbRfDogK
VDmBMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAF5M/Gz6JDC1HoSm
6HFLBB9ul9TQQI3RhohwreCYyeZ866WrvqZfle+lxcgVburYCSyi5paFpvNK3DH2
J0A2fDAMekZGcaJ7O5Zx0evTCwXoxDOhS+xO5IlGTXWCEKLeLkU27WJiLC9cTbFr
kfjL14IMnsioRzUz4a+aX5JllqnEccCDlHjSk1w5YvOvt6GC6Bvenouja2apPes/
oJJpFwZVO0epqOQo1ndRGbt5NLv6YgZlvdFXWoKNKohzdfDV/RbW9BrbpyKSxFTm
usrmVcZTQpSf69zbnEVO8N3N6c1zNdETPON1ZGLW1O1MXWkQDZniH6LduXN/Oob7
vYqvXlw=
-----END CERTIFICATE-----
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
<head>
<title>PoS(LATTICE 2013)001</title>

<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
</head>
<body>

<map name="headmap" id="headmap">
<area shape="rect" coords="682,15,748,65" href="http://www.sissa.it" />
<area shape="rect" coords="9,9,266,69" href="/index.html" target="_top"/>
</map>
<h1>PoS(LATTICE 2013)001</h1>

<div id="identifier">

<div>
<em>Title</em>
<strong>Heavy Flavour Physics Review</strong>
</div>

<div>
<em>Conference</em>
<strong><a href="/cgi-bin/reader/conf.cgi?confid=187">31st International Symposium on Lattice Field Theory LATTICE 2013</a></strong>
</div>

<div>
<em>Authors</em>
<div class="identxt">
A. El-Khadra</div>
</div>


<div>
<em>Contribution</em>
<strong><a href="https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf">pdf</a></strong>
</div>




</div>
<div id="footer">
<p>
Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
| <a href="/POScookies.html" title="Cookie policy">Cookie policy</a>
</p>
</div>


</body>
</html>
33 changes: 33 additions & 0 deletions tests/functional/pos/fixtures/oai_harvested/pos_record.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2015-01-29T13:44:13Z</responseDate>
<request verb="ListRecords" metadataPrefix="pos-ext_dc" set="conference:LATTICE 2013">
https://pos.sissa.it/cgi-bin/oai/oai-script-spires-extended.cgi
</request>
<ListRecords>
<record>
<header>
<identifier>oai:pos.sissa.it:LATTICE 2013/001</identifier>
<datestamp>2014-04-28</datestamp>
<setSpec>conference:LATTICE 2013</setSpec>
<setSpec>group:9</setSpec>
</header>
<metadata>
<pos-ext_dc:pex-dc xmlns:pos-ext_dc="http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd" xmlns:pex-dc="http://pos.sissa.it/pos-ext_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pos.sissa.it/pos-ext_dc/ http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd">
<pex-dc:title>Heavy Flavour Physics Review</pex-dc:title>
<pex-dc:creator><pex-dc:name>Aida El-Khadra</pex-dc:name><pex-dc:affiliation>INFN and Università di Firenze</pex-dc:affiliation></pex-dc:creator>
<pex-dc:creator><pex-dc:name>M. T. MacDonald</pex-dc:name><pex-dc:affiliation>U of Pecs</pex-dc:affiliation></pex-dc:creator><pex-dc:subject>Lattice Field Theory</pex-dc:subject>
<pex-dc:description>31st International Symposium on Lattice Field Theory LATTICE 2013; Plenary sessions</pex-dc:description>
<pex-dc:publisher>Sissa Medialab</pex-dc:publisher>
<pex-dc:date>2014-03-19T21:09:30Z</pex-dc:date>
<pex-dc:type>Text</pex-dc:type>
<pex-dc:format>application/pdf</pex-dc:format>
<pex-dc:identifier>PoS(LATTICE 2013)001</pex-dc:identifier>
<pex-dc:language>en</pex-dc:language>
<pex-dc:relation>LATTICE 2013 (31st International Symposium on Lattice Field Theory LATTICE 2013) isPartOf</pex-dc:relation>
<pex-dc:rights>Creative Commons Attribution-NonCommercial-ShareAlike</pex-dc:rights>
</pos-ext_dc:pex-dc>
</metadata>
</record>
</ListRecords>
</OAI-PMH>
57 changes: 57 additions & 0 deletions tests/functional/pos/fixtures/pos_records.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[
{
"acquisition_source": {
"source": "pos",
"method": "hepcrawl",
"submission_number": "5652c7f6190f11e79e8000224dabeaad",
"datetime": "2017-04-03T10:26:40.365216"
},
"license": [
{
"url": "https://creativecommons.org/licenses/by-nc-sa/3.0",
"license": "CC-BY-NC-SA-3.0"
}
],
"titles": [
{
"source": "Sissa Medialab",
"title": "Heavy Flavour Physics Review"
}
],
"authors": [
{
"affiliations": [
{
"value": "INFN and Universit\u00e0 di Firenze"
}
],
"full_name": "El-Khadra, Aida"
},
{
"affiliations": [
{
"value": "U of Pecs"
}
],
"full_name": "MacDonald, M.T."
}
],
"publication_info": [
{
"journal_volume": "LATTICE 2013",
"year": 2014,
"artid": "001",
"journal_title": "PoS"
}
],
"document_type": [
"conference paper"
],
"imprints": [
{
"date": "2014-03-19"
}
],
"citeable": true
}
]
91 changes: 91 additions & 0 deletions tests/functional/pos/test_pos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

"""Functional tests for PoS spider"""

from __future__ import absolute_import, division, print_function

import pytest

from time import sleep

from hepcrawl.testlib.celery_monitor import CeleryMonitor
from hepcrawl.testlib.fixtures import (
get_test_suite_path,
expected_json_results_from_file,
)
from hepcrawl.testlib.tasks import app as celery_app
from hepcrawl.testlib.utils import get_crawler_instance


def override_generated_fields(record):
record['acquisition_source']['datetime'] = u'2017-04-03T10:26:40.365216'
record['acquisition_source']['submission_number'] = u'5652c7f6190f11e79e8000224dabeaad'

return record


@pytest.fixture(scope="function")
def set_up_oai_environment():
package_location = get_test_suite_path(
'pos',
'fixtures',
'oai_harvested',
'pos_record.xml',
test_suite='functional',
)

# The test must wait until the docker environment is up (takes about 10 seconds).
sleep(10)

yield {
'CRAWLER_HOST_URL': 'http://scrapyd:6800',
'CRAWLER_PROJECT': 'hepcrawl',
'CRAWLER_ARGUMENTS': {
'source_file': 'file://' + package_location,
'base_conference_paper_url': 'https://server.local/contribution?id=',
}
}


@pytest.mark.parametrize(
'expected_results',
[
expected_json_results_from_file(
'pos',
'fixtures',
'pos_records.json',
),
],
ids=[
'smoke',
]
)
def test_pos(
set_up_oai_environment,
expected_results,
):
crawler = get_crawler_instance(set_up_oai_environment.get('CRAWLER_HOST_URL'))

results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=set_up_oai_environment.get('CRAWLER_PROJECT'),
spider='pos',
settings={},
**set_up_oai_environment.get('CRAWLER_ARGUMENTS')
)

gotten_results = [override_generated_fields(result) for result in results]
expected_results = [override_generated_fields(expected) for expected in expected_results]

assert sorted(gotten_results) == expected_results
Loading

0 comments on commit 2716d53

Please sign in to comment.