Skip to content

Commit

Permalink
pos: add functional test
Browse files Browse the repository at this point in the history
Signed-off-by: Spiros Delviniotis <[email protected]>
  • Loading branch information
spirosdelviniotis authored and david-caro committed Nov 1, 2017
1 parent 92d420f commit 81735b8
Show file tree
Hide file tree
Showing 11 changed files with 338 additions and 28 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ env:
- SUITE=functional_arxiv
- SUITE=functional_desy
- SUITE=functional_cds
- SUITE=functional_pos

matrix:
fast_finish: true
Expand Down
16 changes: 16 additions & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ services:
links:
- scrapyd

functional_pos:
<<: *service_base
command: py.test -vv tests/functional/pos
links:
- scrapyd
- server.local

unit:
<<: *service_base
command: bash -c "py.test tests/unit -vv && make -C docs clean && make -C docs html && python setup.py sdist && ls dist/*"
Expand Down Expand Up @@ -96,5 +103,14 @@ services:
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd

server.local:
image: nginx:stable-alpine
volumes:
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/ssl:/etc/nginx/ssl
- ${PWD}/tests/functional/pos/fixtures/https_server/records:/etc/nginx/html/
ports:
- 443:443

rabbitmq:
image: rabbitmq
39 changes: 13 additions & 26 deletions hepcrawl/spiders/pos_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class POSSpider(StatefulSpider):
-a source_file=file://`pwd`/tests/unit/responses/pos/sample_pos_record.xml
"""
name = 'pos'
# pos_proceedings_url = "https://pos.sissa.it/cgi-bin/reader/conf.cgi?confid="

def __init__(
self,
Expand Down Expand Up @@ -83,24 +84,18 @@ def scrape_conference_paper(self, response):
response=response,
)

# TODO Yield request for Conference page
proceedings_identifier = response.selector.xpath("//a[contains(@href,'?confid')]/@href").extract_first()
proceedings_identifier = proceedings_identifier.split('=')[1]
pos_url = "{0}{1}".format(self.BASE_PROCEEDINGS_URL, proceedings_identifier)
self.log('===> scrape_conference_paper url::{pos_url}'.format(**vars()))
# # Yield request for Conference page
# proceedings_identifier = response.selector.xpath("//a[contains(@href,'?confid')]/@href").extract_first()
# proceedings_identifier = proceedings_identifier.split('=')[1]
# pos_url = "{0}{1}".format(self.pos_proceedings_url, proceedings_identifier)
# yield Request(pos_url, callback=self.scrape_proceedings)

yield self.build_conference_paper_item(response)
return self.build_conference_paper_item(response)

def scrape_proceedings(self, response):
# TODO create proceedings record
# TODO document_type = proceeding
# TODO title = template(“Proceedings, <title>”)
# TODO subtitle = template(“<place>, <date>”)
# TODO publication_info.journal_title = “PoS”
# TODO publication_info.journal_volume = identifier

pass
# def scrape_proceedings(self, response):
# # create proceedings record
# import pytest
# pytest.set_trace()

def build_conference_paper_item(self, response):
"""Parse an PoS XML exported file into a HEP record."""
Expand Down Expand Up @@ -174,7 +169,7 @@ def _get_journal_artid(identifier):
def _get_ext_systems_number(node):
return [
{
'institute': 'pos',
'institute': 'PoS',
'value': node.xpath('.//identifier/text()').extract_first()
},
]
Expand All @@ -201,18 +196,10 @@ def _get_authors(node): # To be refactored
)
for affiliation in author.xpath('.//affiliation//text()').extract():
if 'affiliations' in auth_dict:
auth_dict['affiliations'].append(
{
'value': affiliation
}
)
auth_dict['affiliations'].append({'value': affiliation})
# Todo probably to remove
else:
auth_dict['affiliations'] = [
{
'value': affiliation
},
]
auth_dict['affiliations'] = [{'value': affiliation}, ]
if auth_dict:
authors.append(auth_dict)
return authors
17 changes: 17 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/proxy.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
server {
listen 443 ssl;
server_name localhost;

ssl on;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
ssl_certificate ssl/cert.pem;
ssl_certificate_key ssl/cert.key;

location ~ /contribution {
if ($args ~* "^id=(.*)") {
set $mid $1;
set $args '';
rewrite ^.*$ /$mid.html permanent;
}
}
}
28 changes: 28 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/ssl/cert.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQChhBiOoipMRRcc
E5waKrGB01/QtRpfIGp5KmJfnif05dR05wWojHO6EtabZ2qbXtcSuyQ0vRNpbZUU
OzcriFOMk8dujDzuKMkegsq/LE4PyN/R5JZtf34NyGG7v70K6Uq7RV4PUzk+zoum
1McMUBk1QlGP/E9RsDlSPv9XOblUpicPDuwhCwPf4zi6jporgXjDJ/iUuh+bexxv
40R7f2dCWkiHYiNiLNLTwXdYkaWBcc3HoTq9FEZZhYDhWRjX0/TuINmMr5lbUvr6
UYRABOS4VeUyHpb/e7OH9WXQxzR76LuQFfQDSgs0GxXw1KG58aq+P0ni2E77C4Iu
odQ8iT+jAgMBAAECggEBAIqJeFrXY7p5xIGznEChgBHgUR3+SPlxH4KARVLIoHMh
s2L2SVcx6Y2f3O38/Wb5KTcKx9polz7l3Go3BHJVg3xfwT7kENsipqeB/g+OHALU
BI7PJ+wR3/hIePQGWUsDobMRo8U3WDG0DfryJS09gvG4yabb/tkNc41FNdUGUR31
7VInQFqv2/jZ/2A3s3DZ0Cns9vJuLhmf7629k3MVCuU7Rh0rStnVCA70kjgKzOfP
+26fnfd/MmrQYbaukw04+cwcwifGkF5Jis80qTWsgdF82rkzpwJLDo0Jd2HZFuOa
AHkWK2QiMzb6PS2Uo7Zarax9E+W2TLahANXZQQ32NAkCgYEAzKw7XbEwzWG/T7yX
EgNIAN7YtcGYr9sfHlVJ8bWYK7GZBbCkKDlGU+YGRE++plh/jtXYjsIFElWtv01Y
UpqBdWf7p8mXdtVoq6YyL5WuQVMwpjKHvegTXXwAoreEXZeKr1LKC11B14h+8wsR
D5uf0GVmdw12nSrzeu3Q4oSgss8CgYEAygU++fItIYuPtZfrC8qDcyEiOLQmAHtX
eTnEHOPy8ik+bdwF5Rg0nzxLu3RZ47ykGdEOzpGRO4B9V1EevwSEzX6VO7latMUS
cLKb3Y0bXm6qQcWG+LAlvyaHfAH0oN47xfScLDiUm6BKd4Eo9kpkgaQzSgUfFZNQ
6DHiA3Emau0CgYEAyel7Y3GjMGomvrXQ3x9HkDxH0/7Z71qe92CyYvZ/2VMKH9fk
Ch5+p9P8CLYW4anapQGH80WqlSzbDCd0Y4EzB6z+UceJWd0stnFtfw4N6znze3HM
AegJ+qaTRfL/bQlL8qwc0Fs+0i9A9enL+fbQEVmHXRl2E5TEwFgOQvkOQ3cCgYAA
4bD6qkHkKZXA9x7BeGrGb9iUYsTfr6ocD1J5xczjnaZ2GEW2UDq6jyrNcJ6LzeDx
c+YapKv7lH33iZUWxFBIDUtdbVul+k4wS7c+akU6TkVT8Ca8oxgnE2X39pI4uX+N
R5n+32hWnYZ1qwygtoZlwm+u3QLbtz7dJIqV9UJzqQKBgQCL8Xo9LA0Dm7ZsdDDI
I93YsjCELvBsonymmD1MTpk7uIA+qH8LAih+Vhonc17NtpXuas8eqc8ntuNLAgON
Tylvk32uaRqquHWl6MT7bwaaK7pD8KuOIUJdl5SEc+DDUcB2A2XLg7Yv08Dus8A7
6J5oH8YJ3hqmVGZzbOo75IFerg==
-----END PRIVATE KEY-----
19 changes: 19 additions & 0 deletions tests/functional/pos/fixtures/https_server/conf/ssl/cert.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDATCCAemgAwIBAgIJAJRKy2TWwZqTMA0GCSqGSIb3DQEBCwUAMBcxFTATBgNV
BAMMDGh0dHBzX3NlcnZlcjAeFw0xNzA4MTQxNDQ1MTFaFw0yMDA2MDMxNDQ1MTFa
MBcxFTATBgNVBAMMDGh0dHBzX3NlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQADggEP
ADCCAQoCggEBAKGEGI6iKkxFFxwTnBoqsYHTX9C1Gl8gankqYl+eJ/Tl1HTnBaiM
c7oS1ptnapte1xK7JDS9E2ltlRQ7NyuIU4yTx26MPO4oyR6Cyr8sTg/I39Hklm1/
fg3IYbu/vQrpSrtFXg9TOT7Oi6bUxwxQGTVCUY/8T1GwOVI+/1c5uVSmJw8O7CEL
A9/jOLqOmiuBeMMn+JS6H5t7HG/jRHt/Z0JaSIdiI2Is0tPBd1iRpYFxzcehOr0U
RlmFgOFZGNfT9O4g2YyvmVtS+vpRhEAE5LhV5TIelv97s4f1ZdDHNHvou5AV9ANK
CzQbFfDUobnxqr4/SeLYTvsLgi6h1DyJP6MCAwEAAaNQME4wHQYDVR0OBBYEFAfu
RxroDak/yro7MbRfDogKVDmBMB8GA1UdIwQYMBaAFAfuRxroDak/yro7MbRfDogK
VDmBMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAF5M/Gz6JDC1HoSm
6HFLBB9ul9TQQI3RhohwreCYyeZ866WrvqZfle+lxcgVburYCSyi5paFpvNK3DH2
J0A2fDAMekZGcaJ7O5Zx0evTCwXoxDOhS+xO5IlGTXWCEKLeLkU27WJiLC9cTbFr
kfjL14IMnsioRzUz4a+aX5JllqnEccCDlHjSk1w5YvOvt6GC6Bvenouja2apPes/
oJJpFwZVO0epqOQo1ndRGbt5NLv6YgZlvdFXWoKNKohzdfDV/RbW9BrbpyKSxFTm
usrmVcZTQpSf69zbnEVO8N3N6c1zNdETPON1ZGLW1O1MXWkQDZniH6LduXN/Oob7
vYqvXlw=
-----END CERTIFICATE-----
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
<head>
<title>PoS(LATTICE 2013)001</title>

<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
</head>
<body>

<map name="headmap" id="headmap">
<area shape="rect" coords="682,15,748,65" href="http://www.sissa.it" />
<area shape="rect" coords="9,9,266,69" href="/index.html" target="_top"/>
</map>
<h1>PoS(LATTICE 2013)001</h1>

<div id="identifier">

<div>
<em>Title</em>
<strong>Heavy Flavour Physics Review</strong>
</div>

<div>
<em>Conference</em>
<strong><a href="/cgi-bin/reader/conf.cgi?confid=187">31st International Symposium on Lattice Field Theory LATTICE 2013</a></strong>
</div>

<div>
<em>Authors</em>
<div class="identxt">
A. El-Khadra</div>
</div>


<div>
<em>Contribution</em>
<strong><a href="https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf">pdf</a></strong>
</div>




</div>
<div id="footer">
<p>
Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
| <a href="/POScookies.html" title="Cookie policy">Cookie policy</a>
</p>
</div>


</body>
</html>
33 changes: 33 additions & 0 deletions tests/functional/pos/fixtures/oai_harvested/pos_record.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2015-01-29T13:44:13Z</responseDate>
<request verb="ListRecords" metadataPrefix="pos-ext_dc" set="conference:LATTICE 2013">
https://pos.sissa.it/cgi-bin/oai/oai-script-spires-extended.cgi
</request>
<ListRecords>
<record>
<header>
<identifier>oai:pos.sissa.it:LATTICE 2013/001</identifier>
<datestamp>2014-04-28</datestamp>
<setSpec>conference:LATTICE 2013</setSpec>
<setSpec>group:9</setSpec>
</header>
<metadata>
<pos-ext_dc:pex-dc xmlns:pos-ext_dc="http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd" xmlns:pex-dc="http://pos.sissa.it/pos-ext_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pos.sissa.it/pos-ext_dc/ http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd">
<pex-dc:title>Heavy Flavour Physics Review</pex-dc:title>
<pex-dc:creator><pex-dc:name>Aida El-Khadra</pex-dc:name><pex-dc:affiliation>INFN and Università di Firenze</pex-dc:affiliation></pex-dc:creator>
<pex-dc:creator><pex-dc:name>M. T. MacDonald</pex-dc:name><pex-dc:affiliation>U of Pecs</pex-dc:affiliation></pex-dc:creator><pex-dc:subject>Lattice Field Theory</pex-dc:subject>
<pex-dc:description>31st International Symposium on Lattice Field Theory LATTICE 2013; Plenary sessions</pex-dc:description>
<pex-dc:publisher>Sissa Medialab</pex-dc:publisher>
<pex-dc:date>2014-03-19T21:09:30Z</pex-dc:date>
<pex-dc:type>Text</pex-dc:type>
<pex-dc:format>application/pdf</pex-dc:format>
<pex-dc:identifier>PoS(LATTICE 2013)001</pex-dc:identifier>
<pex-dc:language>en</pex-dc:language>
<pex-dc:relation>LATTICE 2013 (31st International Symposium on Lattice Field Theory LATTICE 2013) isPartOf</pex-dc:relation>
<pex-dc:rights>Creative Commons Attribution-NonCommercial-ShareAlike</pex-dc:rights>
</pos-ext_dc:pex-dc>
</metadata>
</record>
</ListRecords>
</OAI-PMH>
57 changes: 57 additions & 0 deletions tests/functional/pos/fixtures/pos_records.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[
{
"acquisition_source": {
"source": "pos",
"method": "hepcrawl",
"submission_number": "5652c7f6190f11e79e8000224dabeaad",
"datetime": "2017-04-03T10:26:40.365216"
},
"license": [
{
"url": "https://creativecommons.org/licenses/by-nc-sa/3.0",
"license": "CC-BY-NC-SA-3.0"
}
],
"titles": [
{
"source": "Sissa Medialab",
"title": "Heavy Flavour Physics Review"
}
],
"authors": [
{
"affiliations": [
{
"value": "INFN and Universit\u00e0 di Firenze"
}
],
"full_name": "El-Khadra, Aida"
},
{
"affiliations": [
{
"value": "U of Pecs"
}
],
"full_name": "MacDonald, M.T."
}
],
"publication_info": [
{
"journal_volume": "LATTICE 2013",
"year": 2014,
"artid": "001",
"journal_title": "PoS"
}
],
"document_type": [
"conference paper"
],
"imprints": [
{
"date": "2014-03-19"
}
],
"citeable": true
}
]
Loading

0 comments on commit 81735b8

Please sign in to comment.