diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py index 3a8d3ba8..51c1e10e 100644 --- a/hepcrawl/crawler2hep.py +++ b/hepcrawl/crawler2hep.py @@ -35,12 +35,6 @@ def _get_updated_fft_fields(current_fft_fields, record_files): for record_file in record_files } new_fft_fields = [] - import logging - logger = logging.getLogger(__name__) - logger.log(logging.INFO, - "-------------------- _get_updated_fft_fields -------------------") - logger.log(logging.INFO, - 'current_fft_fields: {}'.format(current_fft_fields)) for fft_field in current_fft_fields: file_name = os.path.basename(fft_field['path']) if file_name in record_files_index: @@ -151,7 +145,7 @@ def item_to_hep( ) elif item.record_format == 'hepcrawl': item = _normalize_hepcrawl_record( - item=item, + item=item.record, source=source, ) return hepcrawl_to_hep(dict(item)) @@ -160,10 +154,11 @@ def item_to_hep( def hep_to_hep(hep_record, record_files): - hep_record['_fft'] = _get_updated_fft_fields( - current_fft_fields=hep_record['_fft'], - record_files=record_files, - ) + if record_files: + hep_record['_fft'] = _get_updated_fft_fields( + current_fft_fields=hep_record['_fft'], + record_files=record_files, + ) return hep_record diff --git a/hepcrawl/spiders/alpha_spider.py b/hepcrawl/spiders/alpha_spider.py index ab151fa6..3330980f 100644 --- a/hepcrawl/spiders/alpha_spider.py +++ b/hepcrawl/spiders/alpha_spider.py @@ -149,8 +149,8 @@ def parse(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) yield parsed_item diff --git a/hepcrawl/spiders/aps_spider.py b/hepcrawl/spiders/aps_spider.py index d15c690a..042db5d7 100644 --- a/hepcrawl/spiders/aps_spider.py +++ b/hepcrawl/spiders/aps_spider.py @@ -117,8 +117,8 @@ def parse(self, response): record.add_value('collections', ['HEP', 'Citeable', 'Published']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) yield parsed_item diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py index 8ab0af4f..0784c625 100644 --- a/hepcrawl/spiders/arxiv_spider.py +++ b/hepcrawl/spiders/arxiv_spider.py @@ -116,8 +116,8 @@ def parse_node(self, response, node): record.add_value('license', license) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/base_spider.py b/hepcrawl/spiders/base_spider.py index ee3a7d47..defd5ee7 100644 --- a/hepcrawl/spiders/base_spider.py +++ b/hepcrawl/spiders/base_spider.py @@ -199,8 +199,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/brown_spider.py b/hepcrawl/spiders/brown_spider.py index ee22a3eb..97d3c6b9 100644 --- a/hepcrawl/spiders/brown_spider.py +++ b/hepcrawl/spiders/brown_spider.py @@ -225,8 +225,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/dnb_spider.py b/hepcrawl/spiders/dnb_spider.py index 3dd50b59..645e5d4e 100644 --- a/hepcrawl/spiders/dnb_spider.py +++ b/hepcrawl/spiders/dnb_spider.py @@ -226,8 +226,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py index d7ed1715..50e5a742 100644 --- a/hepcrawl/spiders/edp_spider.py +++ b/hepcrawl/spiders/edp_spider.py @@ -320,8 +320,8 @@ def build_item_rich(self, response): record.add_value("urls", response.meta.get("urls")) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item @@ -395,8 +395,8 @@ def build_item_jats(self, response): record.add_value("references", references) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py index 7dfbb9bb..9ad7c58c 100644 --- a/hepcrawl/spiders/elsevier_spider.py +++ b/hepcrawl/spiders/elsevier_spider.py @@ -1036,8 +1036,8 @@ def build_item(self, response): record.add_value('references', self.get_references(node)) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py index 46fae495..3637ac59 100644 --- a/hepcrawl/spiders/hindawi_spider.py +++ b/hepcrawl/spiders/hindawi_spider.py @@ -226,8 +226,8 @@ def parse_node(self, response, node): "./datafield[@tag='260']/subfield[@code='b']/text()") parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/infn_spider.py b/hepcrawl/spiders/infn_spider.py index 579ac65b..eb6161ab 100644 --- a/hepcrawl/spiders/infn_spider.py +++ b/hepcrawl/spiders/infn_spider.py @@ -243,8 +243,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py index 90c7809f..b40ffa72 100644 --- a/hepcrawl/spiders/iop_spider.py +++ b/hepcrawl/spiders/iop_spider.py @@ -224,8 +224,8 @@ def parse_node(self, response, node): self.add_fft_file(pdf_file_path, file_access, file_type)) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/magic_spider.py b/hepcrawl/spiders/magic_spider.py index 1c83c829..a7832f0f 100644 --- a/hepcrawl/spiders/magic_spider.py +++ b/hepcrawl/spiders/magic_spider.py @@ -180,8 +180,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) yield parsed_item diff --git a/hepcrawl/spiders/mit_spider.py b/hepcrawl/spiders/mit_spider.py index 4e099348..7dc2f344 100644 --- a/hepcrawl/spiders/mit_spider.py +++ b/hepcrawl/spiders/mit_spider.py @@ -228,8 +228,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/phenix_spider.py b/hepcrawl/spiders/phenix_spider.py index 95bc874a..b4d79655 100644 --- a/hepcrawl/spiders/phenix_spider.py +++ b/hepcrawl/spiders/phenix_spider.py @@ -130,8 +130,8 @@ def parse_node(self, response, node): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/phil_spider.py b/hepcrawl/spiders/phil_spider.py index 8a486292..cbeacfe3 100644 --- a/hepcrawl/spiders/phil_spider.py +++ b/hepcrawl/spiders/phil_spider.py @@ -165,8 +165,8 @@ def build_item(self, response): record.add_value('journal_year', int(jsonrecord['year'])) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py index a4d68f7d..0b849a8e 100644 --- a/hepcrawl/spiders/pos_spider.py +++ b/hepcrawl/spiders/pos_spider.py @@ -136,8 +136,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'ConferencePaper']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/spiders/t2k_spider.py b/hepcrawl/spiders/t2k_spider.py index 97ae8202..1c93de27 100644 --- a/hepcrawl/spiders/t2k_spider.py +++ b/hepcrawl/spiders/t2k_spider.py @@ -168,8 +168,8 @@ def build_item(self, response): record.add_value('collections', ['HEP', 'THESIS']) parsed_item = ParsedItem( - item=record.load_item(), - item_format='hepcrawl', + record=record.load_item(), + record_format='hepcrawl', ) yield parsed_item diff --git a/hepcrawl/spiders/wsp_spider.py b/hepcrawl/spiders/wsp_spider.py index 3e6ec655..a14b94f1 100644 --- a/hepcrawl/spiders/wsp_spider.py +++ b/hepcrawl/spiders/wsp_spider.py @@ -72,7 +72,15 @@ class WorldScientificSpider(Jats, XMLFeedSpider): 'rapid-communications' ] - def __init__(self, package_path=None, ftp_folder="/WSP", ftp_host=None, ftp_netrc=None, *args, **kwargs): + def __init__( + self, + package_path=None, + ftp_folder="/WSP", + ftp_host=None, + ftp_netrc=None, + *args, + **kwargs + ): """Construct WSP spider.""" super(WorldScientificSpider, self).__init__(*args, **kwargs) self.ftp_folder = ftp_folder @@ -206,8 +214,8 @@ def parse_node(self, response, node): record.add_value('collections', self._get_collections(node, article_type, journal_title)) parsed_item = ParsedItem( - item=dict(record.load_item()), - item_format='hepcrawl', + record=dict(record.load_item()), + record_format='hepcrawl', ) return parsed_item diff --git a/hepcrawl/utils.py b/hepcrawl/utils.py index 96dc130e..67e8e704 100644 --- a/hepcrawl/utils.py +++ b/hepcrawl/utils.py @@ -348,15 +348,10 @@ def get_license_by_text(license_text): return license -def get_absolute_file_path(file_path): - """Returns the absolute path of a relative path.""" - return os.path.abspath(file_path) - - class RecordFile(object): """Metadata of a file needed for a record. - Params: + Args: path(str): local path to the file. name(str): Optional, name of the file, if not passed, will use the name in the path. @@ -377,14 +372,14 @@ def __init__(self, path, name=None): class ParsedItem(dict): """Each of the individual items returned by the spider to the pipeline. - Params: + Args: record(dict): Information about the crawled record, might be in different formats. record_format(str): Format of the above record, for example ``"hep"`` or ``"hepcrawl"``. file_urls(list(str)): URLs to the files to be downloaded by ``FftFilesPipeline``. - ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the ftp - server, if any. + ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the + ftp server, if any. record_files(list(RecordFile)): files attached to the record, usually populated by - ``FftFilesPipeline`` from the ``file_urls`` parameter. + ``FftFilesPipeline`` from the ``file_urls`` parameter. """ def __init__( self, diff --git a/tests/functional/desy/fixtures/desy_local_records.json b/tests/functional/desy/fixtures/desy_local_records.json index aaf150de..4197a456 100644 --- a/tests/functional/desy/fixtures/desy_local_records.json +++ b/tests/functional/desy/fixtures/desy_local_records.json @@ -13,7 +13,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", + "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", "type": "Main", "filename": "cNFW_rogue_curves" }, @@ -22,7 +22,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", + "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", "type": "Main", "filename": "scalingRelations_DutBeh_DC14_all_Oh" } @@ -90,7 +90,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", + "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", "type": "Main", "filename": "cNFW_rogue_curves" }, @@ -99,7 +99,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", + "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", "type": "Main", "filename": "scalingRelations_DutBeh_DC14_all_Oh" } @@ -167,7 +167,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", + "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", "type": "Main", "filename": "cNFW_rogue_curves" }, @@ -176,7 +176,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", + "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", "type": "Main", "filename": "scalingRelations_DutBeh_DC14_all_Oh" } @@ -244,7 +244,7 @@ "version": 1, "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.", "format": ".txt", - "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt", + "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt", "type": "Main", "filename": "cNFW_rogue_curves" }, @@ -253,7 +253,7 @@ "creation_datetime": "2017-06-27T09:43:16", "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.", "format": ".txt", - "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt", + "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt", "type": "Main", "filename": "scalingRelations_DutBeh_DC14_all_Oh" } diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml index 4095d62f..359bb570 100644 --- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml +++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml @@ -51,7 +51,7 @@ INSPIRE:HEP - DESY/FFT/test_fft_1.txt + FFT/test_fft_1.txt 00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}. .txt cNFW_rogue_curves @@ -62,7 +62,7 @@ - DESY/FFT/test_fft_2.txt + FFT/test_fft_2.txt 00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines. .txt scalingRelations_DutBeh_DC14_all_Oh diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml index fa395bfc..1f9c57a9 100644 --- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml +++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml @@ -51,7 +51,7 @@ INSPIRE:HEP - DESY/FFT/test_fft_1.txt + FFT/test_fft_1.txt 00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}. .txt cNFW_rogue_curves @@ -62,7 +62,7 @@ - DESY/FFT/test_fft_2.txt + FFT/test_fft_2.txt 00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines. .txt scalingRelations_DutBeh_DC14_all_Oh diff --git a/tests/functional/wsp/test_wsp.py b/tests/functional/wsp/test_wsp.py index 8c7b060a..a0411b8e 100644 --- a/tests/functional/wsp/test_wsp.py +++ b/tests/functional/wsp/test_wsp.py @@ -72,7 +72,7 @@ def set_up_local_environment(): 'CRAWLER_HOST_URL': 'http://scrapyd:6800', 'CRAWLER_PROJECT': 'hepcrawl', 'CRAWLER_ARGUMENTS': { - 'source_folder': package_location, + 'package_path': package_location, } } diff --git a/tests/unit/responses/desy/desy_collection_records.xml b/tests/unit/responses/desy/desy_collection_records.xml index c5347dc5..93ede820 100644 --- a/tests/unit/responses/desy/desy_collection_records.xml +++ b/tests/unit/responses/desy/desy_collection_records.xml @@ -51,7 +51,7 @@ INSPIRE:HEP - DESY/FFT/test_fft_1.txt;1 + FFT/test_fft_1.txt 00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}. .txt cNFW_rogue_curves @@ -62,7 +62,7 @@ - DESY/FFT/test_fft_2.txt;1 + FFT/test_fft_2.txt 00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines. .txt scalingRelations_DutBeh_DC14_all_Oh @@ -124,7 +124,7 @@ INSPIRE:HEP - DESY/FFT/test_fft_1.txt;1 + FFT/test_fft_1.txt 00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}. .txt cNFW_rogue_curves @@ -135,7 +135,7 @@ - DESY/FFT/test_fft_2.txt;1 + FFT/test_fft_2.txt 00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines. .txt scalingRelations_DutBeh_DC14_all_Oh diff --git a/tests/unit/responses/desy/desy_record.xml b/tests/unit/responses/desy/desy_record.xml index e8db9ab1..9e20e8d0 100644 --- a/tests/unit/responses/desy/desy_record.xml +++ b/tests/unit/responses/desy/desy_record.xml @@ -51,7 +51,7 @@ INSPIRE:HEP - DESY/FFT/test_fft_1.txt;1 + FFT/test_fft_1.txt 00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}. .txt cNFW_rogue_curves @@ -62,7 +62,7 @@ - DESY/FFT/test_fft_2.txt;1 + FFT/test_fft_2.txt 00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines. .txt scalingRelations_DutBeh_DC14_all_Oh diff --git a/tests/unit/test_alpha.py b/tests/unit/test_alpha.py index ad8f3f03..96bf9af1 100644 --- a/tests/unit/test_alpha.py +++ b/tests/unit/test_alpha.py @@ -26,7 +26,7 @@ def results(): ) ) - records = [parsed_item.item for parsed_item in parsed_items] + records = [parsed_item.record for parsed_item in parsed_items] assert records return records diff --git a/tests/unit/test_aps.py b/tests/unit/test_aps.py index 8bc66033..3bb3698c 100644 --- a/tests/unit/test_aps.py +++ b/tests/unit/test_aps.py @@ -30,7 +30,7 @@ def results(): ) ) - records = [parsed_item.item for parsed_item in parsed_items] + records = [parsed_item.record for parsed_item in parsed_items] assert records return records diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 48551cdf..e37bd37b 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -42,7 +42,7 @@ def record(): parsed_item = spider.build_item(response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -174,7 +174,7 @@ def splash(): parsed_item = spider.scrape_for_pdf(splash_response) - return parsed_item.item + return parsed_item.record def test_splash(splash): @@ -209,7 +209,7 @@ def parsed_node(): parsed_item = spider.parse_node(response, node[0]) - return parsed_item.item + return parsed_item.record def test_parsed_node(parsed_node): diff --git a/tests/unit/test_brown.py b/tests/unit/test_brown.py index b78be316..d541ee5a 100644 --- a/tests/unit/test_brown.py +++ b/tests/unit/test_brown.py @@ -45,7 +45,7 @@ def record(): parsed_item = spider.scrape_splash(splash_response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -204,7 +204,7 @@ def parsed_node_no_splash(): parsed_item = spider.parse(response).next() assert parsed_item - return parsed_item.item + return parsed_item.record def test_no_splash(parsed_node_no_splash): diff --git a/tests/unit/test_crawler2hep.py b/tests/unit/test_crawler2hep.py index 088178f1..5d0ea837 100644 --- a/tests/unit/test_crawler2hep.py +++ b/tests/unit/test_crawler2hep.py @@ -19,7 +19,7 @@ def load_file(file_name): path = get_test_suite_path( 'responses', - 'hepcrawl_to_hep', + 'crawler2hep', file_name, ) with open(path) as input_data: diff --git a/tests/unit/test_desy.py b/tests/unit/test_desy.py index 5b01f7fd..1f9730c7 100644 --- a/tests/unit/test_desy.py +++ b/tests/unit/test_desy.py @@ -88,7 +88,7 @@ def test_pipeline_record(generated_record): 'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.', 'filename': 'cNFW_rogue_curves', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_1.txt;1', + 'path': 'FFT/test_fft_1.txt', 'type': 'Main', 'version': 1, }, @@ -107,7 +107,7 @@ def test_pipeline_record(generated_record): 'as the black dashed lines.', 'filename': 'scalingRelations_DutBeh_DC14_all_Oh', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_2.txt;1', + 'path': 'FFT/test_fft_2.txt', 'type': 'Main', 'version': 1 } @@ -212,7 +212,7 @@ def test_pipeline_collection_records(generated_records): 'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.', 'filename': 'cNFW_rogue_curves', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_1.txt;1', + 'path': 'FFT/test_fft_1.txt', 'type': 'Main', 'version': 1, }, @@ -231,7 +231,7 @@ def test_pipeline_collection_records(generated_records): 'as the black dashed lines.', 'filename': 'scalingRelations_DutBeh_DC14_all_Oh', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_2.txt;1', + 'path': 'FFT/test_fft_2.txt', 'type': 'Main', 'version': 1 } @@ -302,7 +302,7 @@ def test_pipeline_collection_records(generated_records): 'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.', 'filename': 'cNFW_rogue_curves', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_1.txt;1', + 'path': 'FFT/test_fft_1.txt', 'type': 'Main', 'version': 1, }, @@ -321,7 +321,7 @@ def test_pipeline_collection_records(generated_records): 'as the black dashed lines.', 'filename': 'scalingRelations_DutBeh_DC14_all_Oh', 'format': '.txt', - 'path': 'DESY/FFT/test_fft_2.txt;1', + 'path': 'FFT/test_fft_2.txt', 'type': 'Main', 'version': 1 } diff --git a/tests/unit/test_dnb.py b/tests/unit/test_dnb.py index a1a22dbd..d3c6f9bc 100644 --- a/tests/unit/test_dnb.py +++ b/tests/unit/test_dnb.py @@ -76,7 +76,7 @@ def record(scrape_pos_page_body): parsed_item = request.callback(response) assert parsed_item - return parsed_item.item + return parsed_item.record def test_title(record): @@ -247,7 +247,7 @@ def parse_without_splash(): ) parsed_item = spider.parse_node(response, nodes[0]) - return parsed_item.item + return parsed_item.record def test_parse_without_splash(parse_without_splash): diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py index 5dba9990..83c17cde 100644 --- a/tests/unit/test_edp.py +++ b/tests/unit/test_edp.py @@ -81,7 +81,7 @@ def record_jats(package_jats, scrape_pos_page_body): parsed_item = request.callback(response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -116,7 +116,7 @@ def record_rich(package_rich): parsed_item = spider.parse_node(fake_resp, node) assert parsed_item - return parsed_item.item + return parsed_item.record def test_title(record_jats): @@ -381,7 +381,7 @@ def test_no_dois_jats(): node = get_node(spider, "//article", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record assert "dois" not in record assert "additional_files" not in record @@ -403,7 +403,7 @@ def test_no_dois_rich(): node = get_node(spider, "//EDPSArticle", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record assert "dois" not in record assert "additional_files" not in record @@ -431,7 +431,7 @@ def test_addendum_jats(): node = get_node(spider, "//article", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record assert "related_article_doi" in record assert record["related_article_doi"][0][ @@ -456,7 +456,7 @@ def test_author_with_email(): node = get_node(spider, "//article", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record assert 'email' in record['authors'][0] assert record['authors'][0]['email'] == "Fname.Sname@university.org" @@ -491,7 +491,7 @@ def test_aff_with_email(): node = get_node(spider, "//article", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record affiliation = "Department of Physics, Western Michigan University, Kalamazoo, MI 49008, USA" assert 'affiliations' in record['authors'][0] @@ -525,7 +525,7 @@ def test_collections_review(): node = get_node(spider, "//article", response)[0] parsed_item = spider.parse_node(response, node) - record = parsed_item.item + record = parsed_item.record assert "collections" in record assert record["collections"] == [{'primary': 'HEP'}, {'primary': 'Review'}] @@ -556,7 +556,7 @@ def record_references_only(): parsed_item = spider.parse_node(response, node) assert parsed_item - return parsed_item.item + return parsed_item.record def test_references(record_references_only): diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py index d26e52fb..c19e2628 100644 --- a/tests/unit/test_elsevier.py +++ b/tests/unit/test_elsevier.py @@ -45,7 +45,7 @@ def record(): parsed_item = spider.parse_node(response, nodes) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture(scope="module") @@ -103,7 +103,7 @@ def parsed_node(): parsed_item = spider.scrape_sciencedirect(parse_response) assert parsed_item - return parsed_item.item + return parsed_item.record def test_collection(parsed_node): @@ -173,7 +173,7 @@ def cover_display_date(): parse_item = spider.parse_node(response, node) assert parse_item - return parse_item.item + return parse_item.record def test_cover_display_date(cover_display_date): @@ -199,7 +199,7 @@ def cover_display_date_y_m(): parse_item = spider.parse_node(response, node) assert parse_item - return parse_item.item + return parse_item.record def test_cover_display_date_y_m(cover_display_date_y_m): @@ -225,7 +225,7 @@ def cover_display_date_y(): parse_item = spider.parse_node(response, node) assert parse_item - return parse_item.item + return parse_item.record def test_cover_display_date_y(cover_display_date_y): @@ -1663,7 +1663,7 @@ def sciencedirect(): parse_item = spider.scrape_sciencedirect(response) assert parse_item - return parse_item.item + return parse_item.record def test_sciencedirect(sciencedirect): diff --git a/tests/unit/test_hindawi.py b/tests/unit/test_hindawi.py index bd1da795..1bc75268 100644 --- a/tests/unit/test_hindawi.py +++ b/tests/unit/test_hindawi.py @@ -29,7 +29,7 @@ def record(): parsed_item = spider.parse_node(response, nodes[0]) assert parsed_item - return parsed_item.item + return parsed_item.record def test_title(record): diff --git a/tests/unit/test_infn.py b/tests/unit/test_infn.py index 5cd1e27d..893bf8c3 100644 --- a/tests/unit/test_infn.py +++ b/tests/unit/test_infn.py @@ -32,7 +32,7 @@ def record(): parsed_item = spider.scrape_splash(response) assert parsed_item - return parsed_item.item + return parsed_item.record def test_title(record): @@ -153,4 +153,4 @@ def test_parse_node_nolink(): node = selector.xpath('//%s' % spider.itertag)[0] parsed_item = spider.parse_node(response, node).next() - assert isinstance(parsed_item.item, hepcrawl.items.HEPRecord) + assert isinstance(parsed_item.record, hepcrawl.items.HEPRecord) diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py index fb8d26d2..f708fa8d 100644 --- a/tests/unit/test_iop.py +++ b/tests/unit/test_iop.py @@ -42,7 +42,7 @@ def record(): parsed_item = spider.parse_node(response, node) assert parsed_item - return parsed_item.item + return parsed_item.record def test_abstract(record): @@ -188,7 +188,7 @@ def erratum_open_access_record(): parsed_item = spider.parse_node(response, node) assert parsed_item - return parsed_item.item + return parsed_item.record def test_files_erratum_open_access_record(erratum_open_access_record): diff --git a/tests/unit/test_magic.py b/tests/unit/test_magic.py index 74d9fad4..e2df96a4 100644 --- a/tests/unit/test_magic.py +++ b/tests/unit/test_magic.py @@ -43,7 +43,7 @@ def record(): parsed_item = spider.scrape_for_pdf(splash_response).next() assert parsed_item - return parsed_item.item + return parsed_item.record def test_abstract(record): @@ -168,7 +168,7 @@ def test_no_author_no_date_no_url(): node = get_node(spider, spider.itertag, text=body) parsed_item = spider.parse_node(response, node).next() - record = parsed_item.item + record = parsed_item.record assert isinstance(record, hepcrawl.items.HEPRecord) assert "date" not in record assert "authors" not in record @@ -189,7 +189,7 @@ def test_no_aff(): response = fake_response_from_string(body) parsed_item = spider.scrape_for_pdf(response).next() - record = parsed_item.item + record = parsed_item.record assert isinstance(record, hepcrawl.items.HEPRecord) assert "date" not in record assert "affiliations" not in record["authors"] @@ -222,7 +222,7 @@ def test_no_spash_page(): response.meta["urls"] = parsed_node.meta["urls"] parsed_item = spider.scrape_for_pdf(response).next() - record = parsed_item.item + record = parsed_item.record assert isinstance(record, hepcrawl.items.HEPRecord) assert "urls" in record assert "title" in record diff --git a/tests/unit/test_mit.py b/tests/unit/test_mit.py index 2629dd34..0bdc7e4e 100644 --- a/tests/unit/test_mit.py +++ b/tests/unit/test_mit.py @@ -29,7 +29,7 @@ def record(): parsed_item = spider.build_item(response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -169,7 +169,7 @@ def supervisors(): parsed_item = spider.build_item(response) assert parsed_item - return parsed_item.item + return parsed_item.record def test_two_supervisors(supervisors): diff --git a/tests/unit/test_phenix.py b/tests/unit/test_phenix.py index 5a6a5f4c..dc75d6ee 100644 --- a/tests/unit/test_phenix.py +++ b/tests/unit/test_phenix.py @@ -33,7 +33,7 @@ def record(): parsed_item = spider.parse_node(response, nodes[0]) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture diff --git a/tests/unit/test_phil.py b/tests/unit/test_phil.py index be0da905..5288500a 100644 --- a/tests/unit/test_phil.py +++ b/tests/unit/test_phil.py @@ -37,7 +37,7 @@ def record(): parsed_item = spider.build_item(response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -54,7 +54,7 @@ def journal(): parsed_item = spider.build_item(response) assert parsed_item - return parsed_item.item + return parsed_item.record @pytest.fixture @@ -232,7 +232,7 @@ def splash(): parsed_item = spider.scrape_for_pdf(response) assert parsed_item - return parsed_item.item + return parsed_item.record def test_scrape(splash): diff --git a/tests/unit/test_t2k.py b/tests/unit/test_t2k.py index caaaaefc..d87bdbd2 100644 --- a/tests/unit/test_t2k.py +++ b/tests/unit/test_t2k.py @@ -39,7 +39,7 @@ def record(): parsed_item = spider.scrape_for_pdf(splash_response).next() assert parsed_item - return parsed_item.item + return parsed_item.record def test_abstact(record): @@ -129,7 +129,7 @@ def non_url(): parsed_item = spider.parse_node(response, nodes[0]).next() assert parsed_item - return parsed_item.item + return parsed_item.record def test_non_url(non_url):