Skip to content

Commit

Permalink
desy: adapt to the new middleware
Browse files Browse the repository at this point in the history
As It changes the actual url of the files to download, the hash also
changes.

Signed-off-by: David Caro <[email protected]>
  • Loading branch information
david-caro committed Sep 20, 2017
1 parent 1a53584 commit 3110b0f
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 36 deletions.
15 changes: 13 additions & 2 deletions hepcrawl/spiders/desy_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,18 @@ def start_requests(self):
yield request

@staticmethod
def _get_full_uri(current_path, base_url, schema, hostname=''):
def _get_full_uri(current_path, base_url, schema, hostname=None):
hostname = hostname or ''
if os.path.isabs(current_path):
full_path = current_path
else:
full_path = os.path.join(base_url, current_path)

return '{schema}://{hostname}{full_path}'.format(**vars())
return '{schema}://{hostname}{full_path}'.format(
schema=schema,
hostname=hostname,
full_path=full_path,
)

def parse(self, response):
"""Parse a ``Desy`` XML file into a :class:`hepcrawl.utils.ParsedItem`.
Expand All @@ -208,8 +213,12 @@ def parse(self, response):
url_schema = 'file'
hostname = None

self.log('Getting marc xml records...')
marcxml_records = self._get_marcxml_records(response.body)
self.log('Got %d marc xml records' % len(marcxml_records))
self.log('Getting hep records...')
hep_records = self._hep_records_from_marcxml(marcxml_records)
self.log('Got %d hep records' % len(hep_records))

for hep_record in hep_records:
list_file_urls = [
Expand All @@ -222,12 +231,14 @@ def parse(self, response):
for fft_path in hep_record['_fft']
]

self.log('Got the following fft urls: %s' % list_file_urls)
parsed_item = ParsedItem(
record=hep_record,
file_urls=list_file_urls,
ftp_params=ftp_params,
record_format='hep',
)
self.log('Got item: %s' % parsed_item)

yield parsed_item

Expand Down
22 changes: 11 additions & 11 deletions tests/functional/desy/fixtures/desy_local_records_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"type": "Main",
"filename": "test_fft_1"
},
Expand All @@ -19,7 +19,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"type": "Main",
"filename": "test_fft_2"
}
Expand Down Expand Up @@ -78,7 +78,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"type": "Main",
"filename": "test_fft_1"
},
Expand All @@ -87,7 +87,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"type": "Main",
"filename": "test_fft_2"
}
Expand Down Expand Up @@ -146,7 +146,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"type": "Main",
"filename": "test_fft_1"
},
Expand All @@ -155,7 +155,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"type": "Main",
"filename": "test_fft_2"
}
Expand Down Expand Up @@ -214,7 +214,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"type": "Main",
"filename": "test_fft_1"
},
Expand All @@ -223,7 +223,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"type": "Main",
"filename": "test_fft_2"
}
Expand Down Expand Up @@ -1754,7 +1754,7 @@
"format": ".pdf",
"filename": "dummy",
"version": 1,
"path": "/tmp/file_urls/full/c011422ef40ef111a72bd72092066dd3c1cc7a39.pdf",
"path": "/tmp/file_urls/full/0df3efe7842cf285ae0eeed845cca003dd755674.pdf",
"type": "Main"
},
{
Expand All @@ -1763,7 +1763,7 @@
"format": ".txt",
"filename": "test_fft_1",
"version": 1,
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"type": "Main"
},
{
Expand All @@ -1772,7 +1772,7 @@
"format": ".txt",
"filename": "test_fft_2",
"version": 1,
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"type": "Main"
}
],
Expand Down
48 changes: 25 additions & 23 deletions tests/functional/desy/test_desy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import copy
import hashlib
import os
from time import sleep

import pytest
Expand Down Expand Up @@ -76,6 +77,29 @@ def _generate_md5_hash(file_path):
assert file_1_hash == file_2_hash


def assert_ffts_content_matches_expected(record):
for fft_field in record.get('_fft', []):
assert_fft_content_matches_expected(fft_field)


def assert_fft_content_matches_expected(fft_field):
expected_file_name = get_file_name_from_fft(fft_field)
assert_files_equal(expected_file_name, fft_field['path'])


def get_file_name_from_fft(fft_field):
file_path = get_test_suite_path(
'desy',
'fixtures',
'ftp_server',
'DESY',
'FFT',
fft_field['filename'] + fft_field['format'],
test_suite='functional',
)
return file_path


def get_ftp_settings():
netrc_location = get_test_suite_path(
'desy',
Expand Down Expand Up @@ -120,6 +144,7 @@ def cleanup():
sleep(10)
yield

clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))
clean_dir('/tmp/file_urls')
clean_dir('/tmp/DESY')

Expand Down Expand Up @@ -180,26 +205,3 @@ def test_desy(

for record in gotten_results:
assert_ffts_content_matches_expected(record)


def assert_ffts_content_matches_expected(record):
for fft_field in record.get('_fft', []):
assert_fft_content_matches_expected(fft_field)


def assert_fft_content_matches_expected(fft_field):
expected_file_name = get_file_name_from_fft(fft_field)
assert_files_equal(expected_file_name, fft_field['path'])


def get_file_name_from_fft(fft_field):
file_path = get_test_suite_path(
'desy',
'fixtures',
'ftp_server',
'DESY',
'FFT',
fft_field['filename'] + fft_field['format'],
test_suite='functional',
)
return file_path

0 comments on commit 3110b0f

Please sign in to comment.