From 3110b0f9d1a954cf071b4ffbeec5f75c9f3263f5 Mon Sep 17 00:00:00 2001 From: David Caro Date: Wed, 20 Sep 2017 13:45:50 +0200 Subject: [PATCH] desy: adapt to the new middleware As It changes the actual url of the files to download, the hash also changes. Signed-off-by: David Caro --- hepcrawl/spiders/desy_spider.py | 15 +++++- .../fixtures/desy_local_records_expected.json | 22 ++++----- tests/functional/desy/test_desy.py | 48 ++++++++++--------- 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/hepcrawl/spiders/desy_spider.py b/hepcrawl/spiders/desy_spider.py index ec70ec39..efae063e 100644 --- a/hepcrawl/spiders/desy_spider.py +++ b/hepcrawl/spiders/desy_spider.py @@ -178,13 +178,18 @@ def start_requests(self): yield request @staticmethod - def _get_full_uri(current_path, base_url, schema, hostname=''): + def _get_full_uri(current_path, base_url, schema, hostname=None): + hostname = hostname or '' if os.path.isabs(current_path): full_path = current_path else: full_path = os.path.join(base_url, current_path) - return '{schema}://{hostname}{full_path}'.format(**vars()) + return '{schema}://{hostname}{full_path}'.format( + schema=schema, + hostname=hostname, + full_path=full_path, + ) def parse(self, response): """Parse a ``Desy`` XML file into a :class:`hepcrawl.utils.ParsedItem`. @@ -208,8 +213,12 @@ def parse(self, response): url_schema = 'file' hostname = None + self.log('Getting marc xml records...') marcxml_records = self._get_marcxml_records(response.body) + self.log('Got %d marc xml records' % len(marcxml_records)) + self.log('Getting hep records...') hep_records = self._hep_records_from_marcxml(marcxml_records) + self.log('Got %d hep records' % len(hep_records)) for hep_record in hep_records: list_file_urls = [ @@ -222,12 +231,14 @@ def parse(self, response): for fft_path in hep_record['_fft'] ] + self.log('Got the following fft urls: %s' % list_file_urls) parsed_item = ParsedItem( record=hep_record, file_urls=list_file_urls, ftp_params=ftp_params, record_format='hep', ) + self.log('Got item: %s' % parsed_item) yield parsed_item diff --git a/tests/functional/desy/fixtures/desy_local_records_expected.json b/tests/functional/desy/fixtures/desy_local_records_expected.json index 1dc784b9..dc7baf23 100644 --- a/tests/functional/desy/fixtures/desy_local_records_expected.json +++ b/tests/functional/desy/fixtures/desy_local_records_expected.json @@ -10,7 +10,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", + "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", "type": "Main", "filename": "test_fft_1" }, @@ -19,7 +19,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", + "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", "type": "Main", "filename": "test_fft_2" } @@ -78,7 +78,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", + "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", "type": "Main", "filename": "test_fft_1" }, @@ -87,7 +87,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", + "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", "type": "Main", "filename": "test_fft_2" } @@ -146,7 +146,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", + "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", "type": "Main", "filename": "test_fft_1" }, @@ -155,7 +155,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", + "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", "type": "Main", "filename": "test_fft_2" } @@ -214,7 +214,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", + "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", "type": "Main", "filename": "test_fft_1" }, @@ -223,7 +223,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", + "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", "type": "Main", "filename": "test_fft_2" } @@ -1754,7 +1754,7 @@ "format": ".pdf", "filename": "dummy", "version": 1, - "path": "/tmp/file_urls/full/c011422ef40ef111a72bd72092066dd3c1cc7a39.pdf", + "path": "/tmp/file_urls/full/0df3efe7842cf285ae0eeed845cca003dd755674.pdf", "type": "Main" }, { @@ -1763,7 +1763,7 @@ "format": ".txt", "filename": "test_fft_1", "version": 1, - "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", + "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", "type": "Main" }, { @@ -1772,7 +1772,7 @@ "format": ".txt", "filename": "test_fft_2", "version": 1, - "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", + "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", "type": "Main" } ], diff --git a/tests/functional/desy/test_desy.py b/tests/functional/desy/test_desy.py index d7286f7e..c3e7ca4f 100644 --- a/tests/functional/desy/test_desy.py +++ b/tests/functional/desy/test_desy.py @@ -13,6 +13,7 @@ import copy import hashlib +import os from time import sleep import pytest @@ -76,6 +77,29 @@ def _generate_md5_hash(file_path): assert file_1_hash == file_2_hash +def assert_ffts_content_matches_expected(record): + for fft_field in record.get('_fft', []): + assert_fft_content_matches_expected(fft_field) + + +def assert_fft_content_matches_expected(fft_field): + expected_file_name = get_file_name_from_fft(fft_field) + assert_files_equal(expected_file_name, fft_field['path']) + + +def get_file_name_from_fft(fft_field): + file_path = get_test_suite_path( + 'desy', + 'fixtures', + 'ftp_server', + 'DESY', + 'FFT', + fft_field['filename'] + fft_field['format'], + test_suite='functional', + ) + return file_path + + def get_ftp_settings(): netrc_location = get_test_suite_path( 'desy', @@ -120,6 +144,7 @@ def cleanup(): sleep(10) yield + clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) clean_dir('/tmp/file_urls') clean_dir('/tmp/DESY') @@ -180,26 +205,3 @@ def test_desy( for record in gotten_results: assert_ffts_content_matches_expected(record) - - -def assert_ffts_content_matches_expected(record): - for fft_field in record.get('_fft', []): - assert_fft_content_matches_expected(fft_field) - - -def assert_fft_content_matches_expected(fft_field): - expected_file_name = get_file_name_from_fft(fft_field) - assert_files_equal(expected_file_name, fft_field['path']) - - -def get_file_name_from_fft(fft_field): - file_path = get_test_suite_path( - 'desy', - 'fixtures', - 'ftp_server', - 'DESY', - 'FFT', - fft_field['filename'] + fft_field['format'], - test_suite='functional', - ) - return file_path