Skip to content

Commit

Permalink
fixed build
Browse files Browse the repository at this point in the history
Signed-off-by: Spiros Delviniotis <[email protected]>
  • Loading branch information
spirosdelviniotis committed Aug 7, 2017
1 parent da9acb4 commit 9c9bc7d
Show file tree
Hide file tree
Showing 43 changed files with 125 additions and 127 deletions.
17 changes: 6 additions & 11 deletions hepcrawl/crawler2hep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ def _get_updated_fft_fields(current_fft_fields, record_files):
for record_file in record_files
}
new_fft_fields = []
import logging
logger = logging.getLogger(__name__)
logger.log(logging.INFO,
"-------------------- _get_updated_fft_fields -------------------")
logger.log(logging.INFO,
'current_fft_fields: {}'.format(current_fft_fields))
for fft_field in current_fft_fields:
file_name = os.path.basename(fft_field['path'])
if file_name in record_files_index:
Expand Down Expand Up @@ -151,7 +145,7 @@ def item_to_hep(
)
elif item.record_format == 'hepcrawl':
item = _normalize_hepcrawl_record(
item=item,
item=item.record,
source=source,
)
return hepcrawl_to_hep(dict(item))
Expand All @@ -160,10 +154,11 @@ def item_to_hep(


def hep_to_hep(hep_record, record_files):
hep_record['_fft'] = _get_updated_fft_fields(
current_fft_fields=hep_record['_fft'],
record_files=record_files,
)
if record_files:
hep_record['_fft'] = _get_updated_fft_fields(
current_fft_fields=hep_record['_fft'],
record_files=record_files,
)

return hep_record

Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/alpha_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def parse(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

yield parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/aps_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def parse(self, response):
record.add_value('collections', ['HEP', 'Citeable', 'Published'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

yield parsed_item
Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/arxiv_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def parse_node(self, response, node):
record.add_value('license', license)

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/brown_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/dnb_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
8 changes: 4 additions & 4 deletions hepcrawl/spiders/edp_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,8 @@ def build_item_rich(self, response):
record.add_value("urls", response.meta.get("urls"))

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
Expand Down Expand Up @@ -395,8 +395,8 @@ def build_item_jats(self, response):
record.add_value("references", references)

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/elsevier_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,8 +1036,8 @@ def build_item(self, response):
record.add_value('references', self.get_references(node))

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/hindawi_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ def parse_node(self, response, node):
"./datafield[@tag='260']/subfield[@code='b']/text()")

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/infn_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/iop_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ def parse_node(self, response, node):
self.add_fft_file(pdf_file_path, file_access, file_type))

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/magic_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

yield parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/mit_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/phenix_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def parse_node(self, response, node):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/phil_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def build_item(self, response):
record.add_value('journal_year', int(jsonrecord['year']))

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
4 changes: 2 additions & 2 deletions hepcrawl/spiders/pos_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'ConferencePaper'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

return parsed_item
Expand Down
4 changes: 2 additions & 2 deletions hepcrawl/spiders/t2k_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def build_item(self, response):
record.add_value('collections', ['HEP', 'THESIS'])

parsed_item = ParsedItem(
item=record.load_item(),
item_format='hepcrawl',
record=record.load_item(),
record_format='hepcrawl',
)

yield parsed_item
14 changes: 11 additions & 3 deletions hepcrawl/spiders/wsp_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,15 @@ class WorldScientificSpider(Jats, XMLFeedSpider):
'rapid-communications'
]

def __init__(self, package_path=None, ftp_folder="/WSP", ftp_host=None, ftp_netrc=None, *args, **kwargs):
def __init__(
self,
package_path=None,
ftp_folder="/WSP",
ftp_host=None,
ftp_netrc=None,
*args,
**kwargs
):
"""Construct WSP spider."""
super(WorldScientificSpider, self).__init__(*args, **kwargs)
self.ftp_folder = ftp_folder
Expand Down Expand Up @@ -206,8 +214,8 @@ def parse_node(self, response, node):
record.add_value('collections', self._get_collections(node, article_type, journal_title))

parsed_item = ParsedItem(
item=dict(record.load_item()),
item_format='hepcrawl',
record=dict(record.load_item()),
record_format='hepcrawl',
)

return parsed_item
Expand Down
15 changes: 5 additions & 10 deletions hepcrawl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,15 +348,10 @@ def get_license_by_text(license_text):
return license


def get_absolute_file_path(file_path):
"""Returns the absolute path of a relative path."""
return os.path.abspath(file_path)


class RecordFile(object):
"""Metadata of a file needed for a record.
Params:
Args:
path(str): local path to the file.
name(str): Optional, name of the file, if not passed, will use the name in the path.
Expand All @@ -377,14 +372,14 @@ def __init__(self, path, name=None):
class ParsedItem(dict):
"""Each of the individual items returned by the spider to the pipeline.
Params:
Args:
record(dict): Information about the crawled record, might be in different formats.
record_format(str): Format of the above record, for example ``"hep"`` or ``"hepcrawl"``.
file_urls(list(str)): URLs to the files to be downloaded by ``FftFilesPipeline``.
ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the ftp
server, if any.
ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the
ftp server, if any.
record_files(list(RecordFile)): files attached to the record, usually populated by
``FftFilesPipeline`` from the ``file_urls`` parameter.
``FftFilesPipeline`` from the ``file_urls`` parameter.
"""
def __init__(
self,
Expand Down
16 changes: 8 additions & 8 deletions tests/functional/desy/fixtures/desy_local_records.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
Expand All @@ -22,7 +22,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
Expand Down Expand Up @@ -90,7 +90,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
Expand All @@ -99,7 +99,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
Expand Down Expand Up @@ -167,7 +167,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
Expand All @@ -176,7 +176,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
Expand Down Expand Up @@ -244,7 +244,7 @@
"version": 1,
"creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
"format": ".txt",
"path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
"path": "/tmp/file_urls/full/796483eeaa779dfc00871228dd70dc9809ebc3c0.txt",
"type": "Main",
"filename": "cNFW_rogue_curves"
},
Expand All @@ -253,7 +253,7 @@
"creation_datetime": "2017-06-27T09:43:16",
"description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
"format": ".txt",
"path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
"path": "/tmp/file_urls/full/ff1ccb47d9a3abb75acb91279e0ec2a4b530ba3e.txt",
"type": "Main",
"filename": "scalingRelations_DutBeh_DC14_all_Oh"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
<subfield code="p">INSPIRE:HEP</subfield>
</datafield>
<datafield tag="FFT" ind1=" " ind2=" ">
<subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
<subfield code="a">FFT/test_fft_1.txt</subfield>
<subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
<subfield code="f">.txt</subfield>
<subfield code="n">cNFW_rogue_curves</subfield>
Expand All @@ -62,7 +62,7 @@
<subfield code="z"/>
</datafield>
<datafield tag="FFT" ind1=" " ind2=" ">
<subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
<subfield code="a">FFT/test_fft_2.txt</subfield>
<subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
<subfield code="f">.txt</subfield>
<subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
Expand Down
Loading

0 comments on commit 9c9bc7d

Please sign in to comment.