diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py
index 3a8d3ba8..51c1e10e 100644
--- a/hepcrawl/crawler2hep.py
+++ b/hepcrawl/crawler2hep.py
@@ -35,12 +35,6 @@ def _get_updated_fft_fields(current_fft_fields, record_files):
for record_file in record_files
}
new_fft_fields = []
- import logging
- logger = logging.getLogger(__name__)
- logger.log(logging.INFO,
- "-------------------- _get_updated_fft_fields -------------------")
- logger.log(logging.INFO,
- 'current_fft_fields: {}'.format(current_fft_fields))
for fft_field in current_fft_fields:
file_name = os.path.basename(fft_field['path'])
if file_name in record_files_index:
@@ -151,7 +145,7 @@ def item_to_hep(
)
elif item.record_format == 'hepcrawl':
item = _normalize_hepcrawl_record(
- item=item,
+ item=item.record,
source=source,
)
return hepcrawl_to_hep(dict(item))
@@ -160,10 +154,11 @@ def item_to_hep(
def hep_to_hep(hep_record, record_files):
- hep_record['_fft'] = _get_updated_fft_fields(
- current_fft_fields=hep_record['_fft'],
- record_files=record_files,
- )
+ if record_files:
+ hep_record['_fft'] = _get_updated_fft_fields(
+ current_fft_fields=hep_record['_fft'],
+ record_files=record_files,
+ )
return hep_record
diff --git a/hepcrawl/spiders/alpha_spider.py b/hepcrawl/spiders/alpha_spider.py
index ab151fa6..3330980f 100644
--- a/hepcrawl/spiders/alpha_spider.py
+++ b/hepcrawl/spiders/alpha_spider.py
@@ -149,8 +149,8 @@ def parse(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
yield parsed_item
diff --git a/hepcrawl/spiders/aps_spider.py b/hepcrawl/spiders/aps_spider.py
index d15c690a..042db5d7 100644
--- a/hepcrawl/spiders/aps_spider.py
+++ b/hepcrawl/spiders/aps_spider.py
@@ -117,8 +117,8 @@ def parse(self, response):
record.add_value('collections', ['HEP', 'Citeable', 'Published'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
yield parsed_item
diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py
index 8ab0af4f..0784c625 100644
--- a/hepcrawl/spiders/arxiv_spider.py
+++ b/hepcrawl/spiders/arxiv_spider.py
@@ -116,8 +116,8 @@ def parse_node(self, response, node):
record.add_value('license', license)
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/base_spider.py b/hepcrawl/spiders/base_spider.py
index ee3a7d47..defd5ee7 100644
--- a/hepcrawl/spiders/base_spider.py
+++ b/hepcrawl/spiders/base_spider.py
@@ -199,8 +199,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/brown_spider.py b/hepcrawl/spiders/brown_spider.py
index ee22a3eb..97d3c6b9 100644
--- a/hepcrawl/spiders/brown_spider.py
+++ b/hepcrawl/spiders/brown_spider.py
@@ -225,8 +225,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/dnb_spider.py b/hepcrawl/spiders/dnb_spider.py
index 3dd50b59..645e5d4e 100644
--- a/hepcrawl/spiders/dnb_spider.py
+++ b/hepcrawl/spiders/dnb_spider.py
@@ -226,8 +226,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py
index d7ed1715..50e5a742 100644
--- a/hepcrawl/spiders/edp_spider.py
+++ b/hepcrawl/spiders/edp_spider.py
@@ -320,8 +320,8 @@ def build_item_rich(self, response):
record.add_value("urls", response.meta.get("urls"))
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
@@ -395,8 +395,8 @@ def build_item_jats(self, response):
record.add_value("references", references)
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py
index 7dfbb9bb..9ad7c58c 100644
--- a/hepcrawl/spiders/elsevier_spider.py
+++ b/hepcrawl/spiders/elsevier_spider.py
@@ -1036,8 +1036,8 @@ def build_item(self, response):
record.add_value('references', self.get_references(node))
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py
index 46fae495..3637ac59 100644
--- a/hepcrawl/spiders/hindawi_spider.py
+++ b/hepcrawl/spiders/hindawi_spider.py
@@ -226,8 +226,8 @@ def parse_node(self, response, node):
"./datafield[@tag='260']/subfield[@code='b']/text()")
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/infn_spider.py b/hepcrawl/spiders/infn_spider.py
index 579ac65b..eb6161ab 100644
--- a/hepcrawl/spiders/infn_spider.py
+++ b/hepcrawl/spiders/infn_spider.py
@@ -243,8 +243,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py
index 90c7809f..b40ffa72 100644
--- a/hepcrawl/spiders/iop_spider.py
+++ b/hepcrawl/spiders/iop_spider.py
@@ -224,8 +224,8 @@ def parse_node(self, response, node):
self.add_fft_file(pdf_file_path, file_access, file_type))
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/magic_spider.py b/hepcrawl/spiders/magic_spider.py
index 1c83c829..a7832f0f 100644
--- a/hepcrawl/spiders/magic_spider.py
+++ b/hepcrawl/spiders/magic_spider.py
@@ -180,8 +180,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
yield parsed_item
diff --git a/hepcrawl/spiders/mit_spider.py b/hepcrawl/spiders/mit_spider.py
index 4e099348..7dc2f344 100644
--- a/hepcrawl/spiders/mit_spider.py
+++ b/hepcrawl/spiders/mit_spider.py
@@ -228,8 +228,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/phenix_spider.py b/hepcrawl/spiders/phenix_spider.py
index 95bc874a..b4d79655 100644
--- a/hepcrawl/spiders/phenix_spider.py
+++ b/hepcrawl/spiders/phenix_spider.py
@@ -130,8 +130,8 @@ def parse_node(self, response, node):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/phil_spider.py b/hepcrawl/spiders/phil_spider.py
index 8a486292..cbeacfe3 100644
--- a/hepcrawl/spiders/phil_spider.py
+++ b/hepcrawl/spiders/phil_spider.py
@@ -165,8 +165,8 @@ def build_item(self, response):
record.add_value('journal_year', int(jsonrecord['year']))
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py
index a4d68f7d..0b849a8e 100644
--- a/hepcrawl/spiders/pos_spider.py
+++ b/hepcrawl/spiders/pos_spider.py
@@ -136,8 +136,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'ConferencePaper'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/spiders/t2k_spider.py b/hepcrawl/spiders/t2k_spider.py
index 97ae8202..1c93de27 100644
--- a/hepcrawl/spiders/t2k_spider.py
+++ b/hepcrawl/spiders/t2k_spider.py
@@ -168,8 +168,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])
parsed_item = ParsedItem(
- item=record.load_item(),
- item_format='hepcrawl',
+ record=record.load_item(),
+ record_format='hepcrawl',
)
yield parsed_item
diff --git a/hepcrawl/spiders/wsp_spider.py b/hepcrawl/spiders/wsp_spider.py
index 3e6ec655..a14b94f1 100644
--- a/hepcrawl/spiders/wsp_spider.py
+++ b/hepcrawl/spiders/wsp_spider.py
@@ -72,7 +72,15 @@ class WorldScientificSpider(Jats, XMLFeedSpider):
'rapid-communications'
]
- def __init__(self, package_path=None, ftp_folder="/WSP", ftp_host=None, ftp_netrc=None, *args, **kwargs):
+ def __init__(
+ self,
+ package_path=None,
+ ftp_folder="/WSP",
+ ftp_host=None,
+ ftp_netrc=None,
+ *args,
+ **kwargs
+ ):
"""Construct WSP spider."""
super(WorldScientificSpider, self).__init__(*args, **kwargs)
self.ftp_folder = ftp_folder
@@ -206,8 +214,8 @@ def parse_node(self, response, node):
record.add_value('collections', self._get_collections(node, article_type, journal_title))
parsed_item = ParsedItem(
- item=dict(record.load_item()),
- item_format='hepcrawl',
+ record=dict(record.load_item()),
+ record_format='hepcrawl',
)
return parsed_item
diff --git a/hepcrawl/utils.py b/hepcrawl/utils.py
index 96dc130e..8c1a4a59 100644
--- a/hepcrawl/utils.py
+++ b/hepcrawl/utils.py
@@ -356,7 +356,7 @@ def get_absolute_file_path(file_path):
class RecordFile(object):
"""Metadata of a file needed for a record.
- Params:
+ Args:
path(str): local path to the file.
name(str): Optional, name of the file, if not passed, will use the name in the path.
@@ -377,14 +377,14 @@ def __init__(self, path, name=None):
class ParsedItem(dict):
"""Each of the individual items returned by the spider to the pipeline.
- Params:
+ Args:
record(dict): Information about the crawled record, might be in different formats.
record_format(str): Format of the above record, for example ``"hep"`` or ``"hepcrawl"``.
file_urls(list(str)): URLs to the files to be downloaded by ``FftFilesPipeline``.
- ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the ftp
- server, if any.
+ ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the
+ ftp server, if any.
record_files(list(RecordFile)): files attached to the record, usually populated by
- ``FftFilesPipeline`` from the ``file_urls`` parameter.
+ ``FftFilesPipeline`` from the ``file_urls`` parameter.
"""
def __init__(
self,
diff --git a/tests/functional/desy/fixtures/desy_local_records.json b/tests/functional/desy/fixtures/desy_local_records.json
index aaf150de..4197a456 100644
--- a/tests/functional/desy/fixtures/desy_local_records.json
+++ b/tests/functional/desy/fixtures/desy_local_records.json
@@ -13,7 +13,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
- "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+ "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
@@ -22,7 +22,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
- "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+ "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
@@ -90,7 +90,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
- "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+ "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
@@ -99,7 +99,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
- "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+ "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
@@ -167,7 +167,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
- "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+ "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
@@ -176,7 +176,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
- "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+ "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
@@ -244,7 +244,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
- "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+ "path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
@@ -253,7 +253,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
- "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+ "path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
index 4095d62f..359bb570 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
@@ -51,7 +51,7 @@
INSPIRE:HEP
- DESY/FFT/test_fft_1.txt
+ FFT/test_fft_1.txt
00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.
.txt
cNFW_rogue_curves
@@ -62,7 +62,7 @@
- DESY/FFT/test_fft_2.txt
+ FFT/test_fft_2.txt
00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.
.txt
scalingRelations_DutBeh_DC14_all_Oh
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
index fa395bfc..1f9c57a9 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
@@ -51,7 +51,7 @@
INSPIRE:HEP
- DESY/FFT/test_fft_1.txt
+ FFT/test_fft_1.txt
00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.
.txt
cNFW_rogue_curves
@@ -62,7 +62,7 @@
- DESY/FFT/test_fft_2.txt
+ FFT/test_fft_2.txt
00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.
.txt
scalingRelations_DutBeh_DC14_all_Oh
diff --git a/tests/functional/wsp/test_wsp.py b/tests/functional/wsp/test_wsp.py
index 8c7b060a..a0411b8e 100644
--- a/tests/functional/wsp/test_wsp.py
+++ b/tests/functional/wsp/test_wsp.py
@@ -72,7 +72,7 @@ def set_up_local_environment():
'CRAWLER_HOST_URL': 'http://scrapyd:6800',
'CRAWLER_PROJECT': 'hepcrawl',
'CRAWLER_ARGUMENTS': {
- 'source_folder': package_location,
+ 'package_path': package_location,
}
}
diff --git a/tests/unit/responses/desy/desy_collection_records.xml b/tests/unit/responses/desy/desy_collection_records.xml
index c5347dc5..93ede820 100644
--- a/tests/unit/responses/desy/desy_collection_records.xml
+++ b/tests/unit/responses/desy/desy_collection_records.xml
@@ -51,7 +51,7 @@
INSPIRE:HEP
- DESY/FFT/test_fft_1.txt;1
+ FFT/test_fft_1.txt
00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.
.txt
cNFW_rogue_curves
@@ -62,7 +62,7 @@
- DESY/FFT/test_fft_2.txt;1
+ FFT/test_fft_2.txt
00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.
.txt
scalingRelations_DutBeh_DC14_all_Oh
@@ -124,7 +124,7 @@
INSPIRE:HEP
- DESY/FFT/test_fft_1.txt;1
+ FFT/test_fft_1.txt
00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.
.txt
cNFW_rogue_curves
@@ -135,7 +135,7 @@
- DESY/FFT/test_fft_2.txt;1
+ FFT/test_fft_2.txt
00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.
.txt
scalingRelations_DutBeh_DC14_all_Oh
diff --git a/tests/unit/responses/desy/desy_record.xml b/tests/unit/responses/desy/desy_record.xml
index e8db9ab1..9e20e8d0 100644
--- a/tests/unit/responses/desy/desy_record.xml
+++ b/tests/unit/responses/desy/desy_record.xml
@@ -51,7 +51,7 @@
INSPIRE:HEP
- DESY/FFT/test_fft_1.txt;1
+ FFT/test_fft_1.txt
00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.
.txt
cNFW_rogue_curves
@@ -62,7 +62,7 @@
- DESY/FFT/test_fft_2.txt;1
+ FFT/test_fft_2.txt
00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.
.txt
scalingRelations_DutBeh_DC14_all_Oh
diff --git a/tests/unit/test_alpha.py b/tests/unit/test_alpha.py
index ad8f3f03..96bf9af1 100644
--- a/tests/unit/test_alpha.py
+++ b/tests/unit/test_alpha.py
@@ -26,7 +26,7 @@ def results():
)
)
- records = [parsed_item.item for parsed_item in parsed_items]
+ records = [parsed_item.record for parsed_item in parsed_items]
assert records
return records
diff --git a/tests/unit/test_aps.py b/tests/unit/test_aps.py
index 8bc66033..3bb3698c 100644
--- a/tests/unit/test_aps.py
+++ b/tests/unit/test_aps.py
@@ -30,7 +30,7 @@ def results():
)
)
- records = [parsed_item.item for parsed_item in parsed_items]
+ records = [parsed_item.record for parsed_item in parsed_items]
assert records
return records
diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
index 48551cdf..e37bd37b 100644
--- a/tests/unit/test_base.py
+++ b/tests/unit/test_base.py
@@ -42,7 +42,7 @@ def record():
parsed_item = spider.build_item(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -174,7 +174,7 @@ def splash():
parsed_item = spider.scrape_for_pdf(splash_response)
- return parsed_item.item
+ return parsed_item.record
def test_splash(splash):
@@ -209,7 +209,7 @@ def parsed_node():
parsed_item = spider.parse_node(response, node[0])
- return parsed_item.item
+ return parsed_item.record
def test_parsed_node(parsed_node):
diff --git a/tests/unit/test_brown.py b/tests/unit/test_brown.py
index b78be316..d541ee5a 100644
--- a/tests/unit/test_brown.py
+++ b/tests/unit/test_brown.py
@@ -45,7 +45,7 @@ def record():
parsed_item = spider.scrape_splash(splash_response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -204,7 +204,7 @@ def parsed_node_no_splash():
parsed_item = spider.parse(response).next()
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_no_splash(parsed_node_no_splash):
diff --git a/tests/unit/test_crawler2hep.py b/tests/unit/test_crawler2hep.py
index 088178f1..5d0ea837 100644
--- a/tests/unit/test_crawler2hep.py
+++ b/tests/unit/test_crawler2hep.py
@@ -19,7 +19,7 @@
def load_file(file_name):
path = get_test_suite_path(
'responses',
- 'hepcrawl_to_hep',
+ 'crawler2hep',
file_name,
)
with open(path) as input_data:
diff --git a/tests/unit/test_desy.py b/tests/unit/test_desy.py
index 5b01f7fd..1f9730c7 100644
--- a/tests/unit/test_desy.py
+++ b/tests/unit/test_desy.py
@@ -88,7 +88,7 @@ def test_pipeline_record(generated_record):
'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
'filename': 'cNFW_rogue_curves',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_1.txt;1',
+ 'path': 'FFT/test_fft_1.txt',
'type': 'Main',
'version': 1,
},
@@ -107,7 +107,7 @@ def test_pipeline_record(generated_record):
'as the black dashed lines.',
'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_2.txt;1',
+ 'path': 'FFT/test_fft_2.txt',
'type': 'Main',
'version': 1
}
@@ -212,7 +212,7 @@ def test_pipeline_collection_records(generated_records):
'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
'filename': 'cNFW_rogue_curves',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_1.txt;1',
+ 'path': 'FFT/test_fft_1.txt',
'type': 'Main',
'version': 1,
},
@@ -231,7 +231,7 @@ def test_pipeline_collection_records(generated_records):
'as the black dashed lines.',
'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_2.txt;1',
+ 'path': 'FFT/test_fft_2.txt',
'type': 'Main',
'version': 1
}
@@ -302,7 +302,7 @@ def test_pipeline_collection_records(generated_records):
'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
'filename': 'cNFW_rogue_curves',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_1.txt;1',
+ 'path': 'FFT/test_fft_1.txt',
'type': 'Main',
'version': 1,
},
@@ -321,7 +321,7 @@ def test_pipeline_collection_records(generated_records):
'as the black dashed lines.',
'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
'format': '.txt',
- 'path': 'DESY/FFT/test_fft_2.txt;1',
+ 'path': 'FFT/test_fft_2.txt',
'type': 'Main',
'version': 1
}
diff --git a/tests/unit/test_dnb.py b/tests/unit/test_dnb.py
index a1a22dbd..d3c6f9bc 100644
--- a/tests/unit/test_dnb.py
+++ b/tests/unit/test_dnb.py
@@ -76,7 +76,7 @@ def record(scrape_pos_page_body):
parsed_item = request.callback(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_title(record):
@@ -247,7 +247,7 @@ def parse_without_splash():
)
parsed_item = spider.parse_node(response, nodes[0])
- return parsed_item.item
+ return parsed_item.record
def test_parse_without_splash(parse_without_splash):
diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py
index 5dba9990..83c17cde 100644
--- a/tests/unit/test_edp.py
+++ b/tests/unit/test_edp.py
@@ -81,7 +81,7 @@ def record_jats(package_jats, scrape_pos_page_body):
parsed_item = request.callback(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -116,7 +116,7 @@ def record_rich(package_rich):
parsed_item = spider.parse_node(fake_resp, node)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_title(record_jats):
@@ -381,7 +381,7 @@ def test_no_dois_jats():
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
assert "dois" not in record
assert "additional_files" not in record
@@ -403,7 +403,7 @@ def test_no_dois_rich():
node = get_node(spider, "//EDPSArticle", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
assert "dois" not in record
assert "additional_files" not in record
@@ -431,7 +431,7 @@ def test_addendum_jats():
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
assert "related_article_doi" in record
assert record["related_article_doi"][0][
@@ -456,7 +456,7 @@ def test_author_with_email():
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
assert 'email' in record['authors'][0]
assert record['authors'][0]['email'] == "Fname.Sname@university.org"
@@ -491,7 +491,7 @@ def test_aff_with_email():
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
affiliation = "Department of Physics, Western Michigan University, Kalamazoo, MI 49008, USA"
assert 'affiliations' in record['authors'][0]
@@ -525,7 +525,7 @@ def test_collections_review():
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
- record = parsed_item.item
+ record = parsed_item.record
assert "collections" in record
assert record["collections"] == [{'primary': 'HEP'}, {'primary': 'Review'}]
@@ -556,7 +556,7 @@ def record_references_only():
parsed_item = spider.parse_node(response, node)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_references(record_references_only):
diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py
index d26e52fb..c19e2628 100644
--- a/tests/unit/test_elsevier.py
+++ b/tests/unit/test_elsevier.py
@@ -45,7 +45,7 @@ def record():
parsed_item = spider.parse_node(response, nodes)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture(scope="module")
@@ -103,7 +103,7 @@ def parsed_node():
parsed_item = spider.scrape_sciencedirect(parse_response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_collection(parsed_node):
@@ -173,7 +173,7 @@ def cover_display_date():
parse_item = spider.parse_node(response, node)
assert parse_item
- return parse_item.item
+ return parse_item.record
def test_cover_display_date(cover_display_date):
@@ -199,7 +199,7 @@ def cover_display_date_y_m():
parse_item = spider.parse_node(response, node)
assert parse_item
- return parse_item.item
+ return parse_item.record
def test_cover_display_date_y_m(cover_display_date_y_m):
@@ -225,7 +225,7 @@ def cover_display_date_y():
parse_item = spider.parse_node(response, node)
assert parse_item
- return parse_item.item
+ return parse_item.record
def test_cover_display_date_y(cover_display_date_y):
@@ -1663,7 +1663,7 @@ def sciencedirect():
parse_item = spider.scrape_sciencedirect(response)
assert parse_item
- return parse_item.item
+ return parse_item.record
def test_sciencedirect(sciencedirect):
diff --git a/tests/unit/test_hindawi.py b/tests/unit/test_hindawi.py
index bd1da795..1bc75268 100644
--- a/tests/unit/test_hindawi.py
+++ b/tests/unit/test_hindawi.py
@@ -29,7 +29,7 @@ def record():
parsed_item = spider.parse_node(response, nodes[0])
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_title(record):
diff --git a/tests/unit/test_infn.py b/tests/unit/test_infn.py
index 5cd1e27d..893bf8c3 100644
--- a/tests/unit/test_infn.py
+++ b/tests/unit/test_infn.py
@@ -32,7 +32,7 @@ def record():
parsed_item = spider.scrape_splash(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_title(record):
@@ -153,4 +153,4 @@ def test_parse_node_nolink():
node = selector.xpath('//%s' % spider.itertag)[0]
parsed_item = spider.parse_node(response, node).next()
- assert isinstance(parsed_item.item, hepcrawl.items.HEPRecord)
+ assert isinstance(parsed_item.record, hepcrawl.items.HEPRecord)
diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py
index fb8d26d2..f708fa8d 100644
--- a/tests/unit/test_iop.py
+++ b/tests/unit/test_iop.py
@@ -42,7 +42,7 @@ def record():
parsed_item = spider.parse_node(response, node)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_abstract(record):
@@ -188,7 +188,7 @@ def erratum_open_access_record():
parsed_item = spider.parse_node(response, node)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_files_erratum_open_access_record(erratum_open_access_record):
diff --git a/tests/unit/test_magic.py b/tests/unit/test_magic.py
index 74d9fad4..e2df96a4 100644
--- a/tests/unit/test_magic.py
+++ b/tests/unit/test_magic.py
@@ -43,7 +43,7 @@ def record():
parsed_item = spider.scrape_for_pdf(splash_response).next()
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_abstract(record):
@@ -168,7 +168,7 @@ def test_no_author_no_date_no_url():
node = get_node(spider, spider.itertag, text=body)
parsed_item = spider.parse_node(response, node).next()
- record = parsed_item.item
+ record = parsed_item.record
assert isinstance(record, hepcrawl.items.HEPRecord)
assert "date" not in record
assert "authors" not in record
@@ -189,7 +189,7 @@ def test_no_aff():
response = fake_response_from_string(body)
parsed_item = spider.scrape_for_pdf(response).next()
- record = parsed_item.item
+ record = parsed_item.record
assert isinstance(record, hepcrawl.items.HEPRecord)
assert "date" not in record
assert "affiliations" not in record["authors"]
@@ -222,7 +222,7 @@ def test_no_spash_page():
response.meta["urls"] = parsed_node.meta["urls"]
parsed_item = spider.scrape_for_pdf(response).next()
- record = parsed_item.item
+ record = parsed_item.record
assert isinstance(record, hepcrawl.items.HEPRecord)
assert "urls" in record
assert "title" in record
diff --git a/tests/unit/test_mit.py b/tests/unit/test_mit.py
index 2629dd34..0bdc7e4e 100644
--- a/tests/unit/test_mit.py
+++ b/tests/unit/test_mit.py
@@ -29,7 +29,7 @@ def record():
parsed_item = spider.build_item(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -169,7 +169,7 @@ def supervisors():
parsed_item = spider.build_item(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_two_supervisors(supervisors):
diff --git a/tests/unit/test_phenix.py b/tests/unit/test_phenix.py
index 5a6a5f4c..dc75d6ee 100644
--- a/tests/unit/test_phenix.py
+++ b/tests/unit/test_phenix.py
@@ -33,7 +33,7 @@ def record():
parsed_item = spider.parse_node(response, nodes[0])
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
diff --git a/tests/unit/test_phil.py b/tests/unit/test_phil.py
index be0da905..5288500a 100644
--- a/tests/unit/test_phil.py
+++ b/tests/unit/test_phil.py
@@ -37,7 +37,7 @@ def record():
parsed_item = spider.build_item(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -54,7 +54,7 @@ def journal():
parsed_item = spider.build_item(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
@pytest.fixture
@@ -232,7 +232,7 @@ def splash():
parsed_item = spider.scrape_for_pdf(response)
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_scrape(splash):
diff --git a/tests/unit/test_t2k.py b/tests/unit/test_t2k.py
index caaaaefc..d87bdbd2 100644
--- a/tests/unit/test_t2k.py
+++ b/tests/unit/test_t2k.py
@@ -39,7 +39,7 @@ def record():
parsed_item = spider.scrape_for_pdf(splash_response).next()
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_abstact(record):
@@ -129,7 +129,7 @@ def non_url():
parsed_item = spider.parse_node(response, nodes[0]).next()
assert parsed_item
- return parsed_item.item
+ return parsed_item.record
def test_non_url(non_url):