Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Merge stable to master: v0.1.3-rc1 #276

Merged
merged 1 commit into from
Jun 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ ignore =
# do not use bare except
E722

# often contains "unused" imports
exclude = __init__.py, src/metax_api/migrations, src/static
# often contains "unused" imports, too long lines (generated files), and such
exclude = __init__.py,src/metax_api/migrations/*,src/static
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@
/src/.coverage
.idea/
ubuntu-xenial-16.04-cloudimg-console.log
/src/metax_api/migrations/*
!/src/metax_api/migrations/*__keep*
.ropeproject/
4 changes: 2 additions & 2 deletions .travis-deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ cd metax-ops/ansible/
if [[ "$TRAVIS_BRANCH" == "test" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
echo "Deploying to test.."
ansible-galaxy -r requirements.yml install --roles-path=roles
ansible-playbook -vv -i inventories/test/hosts site_deploy.yml --extra-vars "ssh_user=metax-user"
ansible-playbook -vv -i inventories/test/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user"
elif [[ "$TRAVIS_BRANCH" == "stable" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
echo "Deploying to stable.."
ansible-galaxy -r requirements.yml install --roles-path=roles
ansible-playbook -vv -i inventories/stable/hosts site_deploy.yml --extra-vars "ssh_user=metax-user"
ansible-playbook -vv -i inventories/stable/hosts site_deploy.yml --extra-vars "ssh_user=metax-deploy-user"
fi

# Make sure the last command to run before this part is the ansible-playbook command
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ services:
- postgresql

before_install:
- openssl aes-256-cbc -K $encrypted_596a6d1c4f83_key -iv $encrypted_596a6d1c4f83_iv -in deploy-key.enc -out deploy-key -d
- openssl aes-256-cbc -K $encrypted_62ed3fb8af4c_key -iv $encrypted_62ed3fb8af4c_iv -in deploy-key.enc -out deploy-key -d
- rm deploy-key.enc
- chmod 600 deploy-key
- mv deploy-key ~/.ssh/id_rsa
Expand Down
Binary file modified deploy-key.enc
Binary file not shown.
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
coveralls==1.3.0 # code coverage reportin in travis
dicttoxml==1.7.4
python-dateutil==2.7.1
python-dateutil==2.7.3
Django==2.0 # BSD-license
elasticsearch<6.0.0
hiredis==0.2.0 # Used by redis (redis-py) for parser
djangorestframework==3.8.2 # BSD-license
django-rainbowtests==0.6.0 # colored test output
flake8==3.5.0 # MIT-license
gevent==1.2.2 # gunicorn dep
gunicorn==19.7.1 # MIT-license
gevent==1.3.1 # gunicorn dep
gunicorn==19.8.1 # MIT-license
ipdb==0.11 # dev tool
jsonschema==2.6.0
lxml==4.2.1
pika==0.11.2
psycopg2-binary==2.7.4 # LGPL with exceptions or ZPL
pyoai==2.5.0
python-simplexquery==1.0.5.3
pytz==2018.3
pytz==2018.4
pyyaml==3.12
redis==2.10.6
requests==2.18.4 # Apache 2.0-license
simplejson==3.13.2 # MIT-license
simplejson==3.15.0 # MIT-license
urllib3==1.22
221 changes: 185 additions & 36 deletions src/metax_api/api/oaipmh/base/metax_oai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
from oaipmh.error import IdDoesNotExistError
from oaipmh.error import BadArgumentError

from metax_api.models.catalog_record import CatalogRecord
from metax_api.models.catalog_record import CatalogRecord, DataCatalog
from metax_api.services import CatalogRecordService as CRS

syke_url_prefix_template = 'http://metatieto.ymparisto.fi:8080/geoportal/catalog/search/resource/details.page?uuid=%s'


class MetaxOAIServer(ResumptionOAIPMH):

def _is_valid_set(self, set):
if not set or set == 'urnresolver' or set in settings.OAI['SET_MAPPINGS']:
if not set or set in ['urnresolver', 'datacatalogs'] or set in settings.OAI['SET_MAPPINGS']:
return True
return False

Expand All @@ -30,16 +32,20 @@ def _get_filtered_records(self, set, cursor, batch_size, from_=None, until=None)
if not self._is_valid_set(set):
raise BadArgumentError('invalid set value')

query_set = CatalogRecord.objects.all()
proxy = CatalogRecord
if set == 'datacatalogs':
proxy = DataCatalog

query_set = proxy.objects.all()
if from_ and until:
query_set = CatalogRecord.objects.filter(date_modified__gte=from_, date_modified__lte=until)
query_set = proxy.objects.filter(date_modified__gte=from_, date_modified__lte=until)
elif from_:
query_set = CatalogRecord.objects.filter(date_modified__gte=from_)
query_set = proxy.objects.filter(date_modified__gte=from_)
elif until:
query_set = CatalogRecord.objects.filter(date_modified__lte=until)
query_set = proxy.objects.filter(date_modified__lte=until)

if set:
if set == 'urnresolver':
if set in ['urnresolver', 'datacatalogs']:
pass
else:
query_set = query_set.filter(
Expand All @@ -48,38 +54,154 @@ def _get_filtered_records(self, set, cursor, batch_size, from_=None, until=None)
query_set = query_set.filter(data_catalog__catalog_json__identifier__in=self._get_default_set_filter())
return query_set[cursor:batch_size]

def _handle_syke_urnresolver_metadata(self, record):
identifiers = []
preferred_identifier = record.research_dataset.get('preferred_identifier')
identifiers.append(preferred_identifier)
for id_obj in record.research_dataset.get('other_identifier', []):
if id_obj.get('notation', '').startswith('{'):
uuid = id_obj['notation']
identifiers.append(syke_url_prefix_template % uuid)
return identifiers

def _get_oai_dc_urnresolver_metadata(self, record):
"""
Preferred identifier is added only for ida and att catalog records
other identifiers are added for all.

Special handling for SYKE catalog.
"""

identifiers = []
identifiers.append(settings.OAI['ETSIN_URL_TEMPLATE'] % record.identifier)

# assuming ida and att catalogs are not harvested
if not record.catalog_is_harvested():
preferred_identifier = record.research_dataset.get('preferred_identifier')
identifiers.append(preferred_identifier)
for id_obj in record.research_dataset.get('other_identifier', []):
if id_obj.get('notation', '').startswith('urn:nbn:fi:csc-kata'):
other_urn = id_obj['notation']
identifiers.append(other_urn)
data_catalog = record.data_catalog.catalog_json.get('identifier')
if data_catalog == 'urn:nbn:fi:att:data-catalog-harvest-syke':
identifiers = self._handle_syke_urnresolver_metadata(record)

else:
identifiers.append(settings.OAI['ETSIN_URL_TEMPLATE'] % record.identifier)

# assuming ida and att catalogs are not harvested
if not record.catalog_is_harvested():
preferred_identifier = record.research_dataset.get('preferred_identifier')
identifiers.append(preferred_identifier)
for id_obj in record.research_dataset.get('other_identifier', []):
if id_obj.get('notation', '').startswith('urn:nbn:fi:csc-kata'):
other_urn = id_obj['notation']
identifiers.append(other_urn)

meta = {
'identifier': identifiers
}
return meta

def _get_oai_dc_metadata(self, record):
identifier = record.research_dataset.get('preferred_identifier')
def _get_oaic_dc_value(self, value, lang=None):
valueDict = {}
valueDict['value'] = value
if lang:
valueDict['lang'] = lang
return valueDict

def _get_oai_dc_metadata(self, record, json, type):
identifier = []
if 'preferred_identifier' in json:
identifier.append(self._get_oaic_dc_value(json.get('preferred_identifier')))
if 'identifier' in json:
identifier.append(self._get_oaic_dc_value(json.get('identifier')))

title = []
title_data = json.get('title', {})
for key, value in title_data.items():
title.append(self._get_oaic_dc_value(value, key))

creator = []
creator_data = json.get('creator', [])
for value in creator_data:
if 'name' in value:
creator.append(self._get_oaic_dc_value(value.get('name')))

subject = []
subject_data = json.get('keyword', [])
for value in subject_data:
subject.append(self._get_oaic_dc_value(value))
subject_data = json.get('field_of_science', [])
for value in subject_data:
for key, value2 in value.get('pref_label', {}).items():
subject.append(self._get_oaic_dc_value(value2, key))
subject_data = json.get('theme', [])
for value in subject_data:
for key, value2 in value.get('pref_label', {}).items():
subject.append(self._get_oaic_dc_value(value2, key))

desc = []
desc_data = json.get('description', {}).get('name', {})
for key, value in desc_data.items():
desc.append(self._get_oaic_dc_value(value, key))

publisher = []
publisher_data = json.get('publisher', {})
for key, value in publisher_data.get('name', {}).items():
publisher.append(self._get_oaic_dc_value(value, key))

contributor = []
contributor_data = json.get('contributor', [])
for value in contributor_data:
if 'name' in value:
contributor.append(self._get_oaic_dc_value(value.get('name')))

date = self._get_oaic_dc_value(str(record.date_created))

language = []
language_data = json.get('language', [])
for value in language_data:
if 'identifier' in value:
language.append(self._get_oaic_dc_value(value['identifier']))

relation = []
relation_data = json.get('relation', [])
for value in relation_data:
if 'identifier'in value.get('entity', {}):
relation.append(self._get_oaic_dc_value(value['entity']['identifier']))

coverage = []
coverage_data = json.get('spatial', [])
for value in coverage_data:
if 'geographic_name' in value:
coverage.append(self._get_oaic_dc_value(value['geographic_name']))

rights = []
rights_data = json.get('access_rights', {})
rights_desc = rights_data.get('description', {}).get('name', {})
for key, value in rights_desc.items():
rights.append(self._get_oaic_dc_value(value, key))

for value in rights_data.get('license', []):
if 'identifier' in value:
rights.append(self._get_oaic_dc_value(value['identifier']))

types = []
types.append(self._get_oaic_dc_value(type))

meta = {
'identifier': [identifier]
'identifier': identifier,
'title': title,
'creator': creator,
'subject': subject,
'description': desc,
'publisher': publisher,
'contributor': contributor,
'date': [date],
'type': types,
'language': language,
'relation': relation,
'coverage': coverage,
'rights': rights
}
return meta

def _get_oai_datacite_metadata(self, record):
def _get_oai_datacite_metadata(self, json):
datacite_xml = CRS.transform_datasets_to_format(
{'research_dataset': record.research_dataset}, 'datacite', False
{'research_dataset': json}, 'datacite', False
)
meta = {
'datacentreSymbol': 'Metax',
Expand All @@ -88,13 +210,20 @@ def _get_oai_datacite_metadata(self, record):
}
return meta

def _get_metadata_for_record(self, record, metadata_prefix):
def _get_metadata_for_record(self, record, json, type, metadata_prefix):
if type == 'Datacatalog' and metadata_prefix != 'oai_dc':
raise BadArgumentError('Invalid set value. DataCatalogs can only be harvested using oai_dc format.')

meta = {}
json = CRS.strip_catalog_record(json)

if metadata_prefix == 'oai_dc':
meta = self._get_oai_dc_metadata(record)
meta = self._get_oai_dc_metadata(record, json, type)
elif metadata_prefix == 'oai_datacite':
meta = self._get_oai_datacite_metadata(record)
meta = self._get_oai_datacite_metadata(json)
elif metadata_prefix == 'oai_dc_urnresolver':
# This is a special case. Only identifier values are retrieved from the record,
# so strip_catalog_record is not applicable here.
meta = self._get_oai_dc_urnresolver_metadata(record)
return self._fix_metadata(meta)

Expand All @@ -106,9 +235,14 @@ def _get_header_timestamp(self, record):
timestamp = record.date_created
return timezone.make_naive(timestamp)

def _get_oai_item(self, record, metadata_prefix):
identifier = record.identifier
metadata = self._get_metadata_for_record(record, metadata_prefix)
def _get_oai_item(self, identifier, record, metadata_prefix):
metadata = self._get_metadata_for_record(record, record.research_dataset, 'Dataset', metadata_prefix)
item = (common.Header('', identifier, self._get_header_timestamp(record), ['metax'], False),
common.Metadata('', metadata), None)
return item

def _get_oai_catalog_item(self, identifier, record, metadata_prefix):
metadata = self._get_metadata_for_record(record, record.catalog_json, 'Datacatalog', metadata_prefix)
item = (common.Header('', identifier, self._get_header_timestamp(record), ['metax'], False),
common.Metadata('', metadata), None)
return item
Expand Down Expand Up @@ -161,18 +295,24 @@ def listMetadataFormats(self, identifier=None):

def listSets(self, cursor=None, batch_size=None):
"""Implement OAI-PMH verb ListSets."""
data = []
data = [('datacatalogs', 'datacatalog', '')]
for set_key in settings.OAI['SET_MAPPINGS'].keys():
data.append((set_key, set_key, ''))
return data

def _get_record_identifier(self, record, set):
if set == 'datacatalogs':
return record.catalog_json['identifier']
else:
return record.identifier

def listIdentifiers(self, metadataPrefix=None, set=None, cursor=None,
from_=None, until=None, batch_size=None):
"""Implement OAI-PMH verb listIdentifiers."""
records = self._get_filtered_records(set, cursor, batch_size, from_, until)
data = []
for record in records:
identifier = record.research_dataset.get('preferred_identifier')
identifier = self._get_record_identifier(record, set)
data.append(common.Header('', identifier, self._get_header_timestamp(record), ['metax'], False))
return data

Expand All @@ -182,18 +322,27 @@ def listRecords(self, metadataPrefix=None, set=None, cursor=None, from_=None,
data = []
records = self._get_filtered_records(set, cursor, batch_size, from_, until)
for record in records:
data.append(self._get_oai_item(record, metadataPrefix))
identifier = self._get_record_identifier(record, set)
if set == 'datacatalogs':
data.append(self._get_oai_catalog_item(identifier, record, metadataPrefix))
else:
data.append(self._get_oai_item(identifier, record, metadataPrefix))
return data

def getRecord(self, metadataPrefix, identifier):
"""Implement OAI-PMH verb GetRecord."""
try:
record = CatalogRecord.objects.get(
data_catalog__catalog_json__identifier__in=self._get_default_set_filter(),
identifier__exact=identifier
)
record = CatalogRecord.objects.get(identifier__exact=identifier)
json = record.research_dataset
type = 'Dataset'
except CatalogRecord.DoesNotExist:
raise IdDoesNotExistError("No dataset with id %s available through the OAI-PMH interface." % identifier)
metadata = self._get_metadata_for_record(record, metadataPrefix)
try:
record = DataCatalog.objects.get(catalog_json__identifier__exact=identifier)
json = record.catalog_json
type = 'Datacatalog'
except DataCatalog.DoesNotExist:
raise IdDoesNotExistError("No record with id %s available." % identifier)

metadata = self._get_metadata_for_record(record, json, type, metadataPrefix)
return (common.Header('', identifier, self._get_header_timestamp(record), ['metax'], False),
common.Metadata('', metadata), None)
Loading