diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..c11a11f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/check-crlf.yml b/.github/workflows/check-crlf.yml index e87dcbe..73c0bc8 100644 --- a/.github/workflows/check-crlf.yml +++ b/.github/workflows/check-crlf.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout repository contents - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Use action to check for CRLF endings uses: erclu/check-crlf@v1.2.0 diff --git a/.github/workflows/irc_notify.yml b/.github/workflows/irc_notify.yml index f2aba83..482f505 100644 --- a/.github/workflows/irc_notify.yml +++ b/.github/workflows/irc_notify.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - name: irc push - uses: rectalogic/notify-irc@v1 + uses: rectalogic/notify-irc@v2 if: github.event_name == 'push' with: channel: "#oih" @@ -26,7 +26,7 @@ jobs: ${{ github.actor }} pushed ${{ github.event.ref }} ${{ github.event.compare }} ${{ join(github.event.commits.*.message) }} - name: irc pull request - uses: rectalogic/notify-irc@v1 + uses: rectalogic/notify-irc@v2 if: github.event_name == 'pull_request' with: channel: "#oih" @@ -35,8 +35,9 @@ jobs: notice: true message: | ${{ github.actor }} opened PR ${{ github.event.pull_request.html_url }} + ${{ github.event.pull_request.title }} - name: irc tag created - uses: rectalogic/notify-irc@v1 + uses: rectalogic/notify-irc@v2 if: github.event_name == 'create' && github.event.ref_type == 'tag' with: channel: "#oih" diff --git a/LICENSE.md b/LICENSE.md index 9f4a111..f8097f0 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Ocean InfoHub +Copyright (c) 2024 Ocean InfoHub Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 961f29a..f5a508b 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,6 @@ This repo contains the code for the Ocean Info Hub Global Search Portal. * `/indexer` contains all of the code to ingest the OIH graph into the SOLR Instance * `/solr` contains the configuration required for the solr instance, including the schema. * `/frontend` contains the code for the static javascript app. This will produce a container in dev mode running a live server, and a static html/javascript site in production mode. -* `/regions` contains the QGIS file defining the gographical regions. +* `/regions` contains the QGIS file defining the geographical regions. See the individual README files for more information. diff --git a/frontend/public/index.html b/frontend/public/index.html index 8a0a9c8..7377358 100644 --- a/frontend/public/index.html +++ b/frontend/public/index.html @@ -1,82 +1,49 @@ - - - - - - - - - - - OIH Global Search - - - - - - - - -
- - + + + + + OIH Global Search + + + + + + +
+ + \ No newline at end of file diff --git a/frontend/src/components/results/types/CreativeWork.json b/frontend/src/components/results/types/CreativeWork.json new file mode 100644 index 0000000..1f71948 --- /dev/null +++ b/frontend/src/components/results/types/CreativeWork.json @@ -0,0 +1,22 @@ +[ + { + "key": "id", + "type": ["truncated", "link"] + }, + { + "key": "txt_author", + "type": "list", + "label": "Author(s)" + }, + "txt_identifier", + { + "key": "txt_keywords", + "type": "keywords" + }, + { + "key": "txt_contributor", + "type": "list", + "label": "Contributor(s)" + }, + { "key": "txt_distribution", "type": ["list", "link"] } +] \ No newline at end of file diff --git a/frontend/src/components/results/types/Dataset.json b/frontend/src/components/results/types/Dataset.json new file mode 100644 index 0000000..c5416c9 --- /dev/null +++ b/frontend/src/components/results/types/Dataset.json @@ -0,0 +1,19 @@ +[ + "name", + { "key": "txt_sameAs", "type": ["list", "truncated", "link"] }, + { "key": "txt_license", "type": "list", "label": "License" }, + { "key": "txt_citation", "type": "list", "label": "Related Works" }, + { "key": "txt_creditText", "type": "list", "label": "Recommended Citation" }, + "txt_version", + { "key": "txt_keywords", "type": "keywords" }, + { + "key": "id_includedInDataCatalog", + "type": ["list", "link"], + "label": "Data Catalog" + }, + "txt_temporalCoverage", + { "key": "txt_distribution", "type": ["list", "link"] }, + { "key": "txt_region", "type": "list" }, + { "key": "id_provider", "type": ["list", "link"], "label": "Provider ID" }, + { "key": "txt_provider", "type": "list" } +] diff --git a/indexer/conversions.py b/indexer/conversions.py index d34c4a2..d034596 100644 --- a/indexer/conversions.py +++ b/indexer/conversions.py @@ -132,6 +132,21 @@ def GeoShape(geo): raise UnhandledFormatException("Didn't handle %s in GeoShape" % json.dumps(geo)) +def GeoCoordinates(geo): + #print('here [GeoCoordinates]') + + lat = geo.get("latitude",None) + long = geo.get("longitude",None) + if lat is not None and long is not None: + print ("Generating a Point from the GeoCoordinates...") + newPoint = "POINT (" + str(long) + " " + str(lat) + ")" + print(newPoint) + return _geo('point', newPoint) + + raise UnhandledFormatException("Didn't handle %s in GeoCoordinates" % json.dumps(geo)) + + + def CourseInstance(data): atts = [_dispatch(field, data.get(field, None)) for field in ('startDate', 'endDate')] if 'location' in data: @@ -239,7 +254,7 @@ def _parseDate(field, d): try: dt = isoparse(d) return [ - Att('dt', dt.isoformat(), field), + Att('dt', dt.isoformat(timespec='seconds').replace('+00:00', 'Z'), field), Att('n', dt.year, field.replace('Date', 'Year')), ] except ValueError: diff --git a/indexer/indexer-graph-solr.py b/indexer/indexer-graph-solr.py new file mode 100644 index 0000000..b4eb3dc --- /dev/null +++ b/indexer/indexer-graph-solr.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +""" +Purpose: Load a directory of JSON files, generated from the ODIS graph->Solr + process (mdp2solr.sh), into an existing Solr core. + +Steps: 1) set your Solr core endpoint variable at the commandline by: + + export SOLR_URL=http://127.0.0.1:8983/solr/cioos + + 2) set the path to the directory of JSON files, at the commandline by: + + export DATA_DIR=/home/apps/oih-ui-jmckenna/indexer/data/test + + 3) python indexer-graph-solr.py + +Output: Records indexed into the Solr core. Look for the "added resource" message + in the command window (which means it successfully indexed into Solr) such as: + + ***Processing filename: /home/apps/oih-ui-jmckenna/indexer/data/test/ttt1.json + added resource https://catalogue.cioos.ca/dataset/00863729-b5a8-4ac6-b73a-523d463f9963.jsonld: schema:Dataset to index + ***Processing filename: /home/apps/oih-ui-jmckenna/indexer/data/test/ttt2.json + added resource https://catalogue.cioos.ca/dataset/d1391e91-1ed2-4600-901a-5a5408fd1a6f.jsonld: schema:Dataset to index + +Requires: Python 3.x + +Notes: + + Input files are JSON (not JSON-LD that the orginal "indexer.py" required) + +""" + +import requests +import json +import os +from pathlib import Path +from test_utils import test_generation, dump_exception + +#set urls +BASE_SOLR_URL=os.environ.get('SOLR_URL', '') +solr_url = BASE_SOLR_URL + "/update/json/docs" +delete_url = BASE_SOLR_URL + "/update" +query_url = BASE_SOLR_URL + "/select" + +DATA_DIR=os.environ.get('DATA_DIR') +BASE_DIR=Path(DATA_DIR) + +session = requests.Session() + +# set Solr params +solr_params = { + 'commit': 'true', + # echo implies a dry run +# 'echo': 'true', +} + +#loop through directory +def import_file(file): + with open(file, 'rb') as f: + print ("***Processing filename: " + f.name) + try: + orig = json.load(f) + except UnicodeDecodeError: + f.seek(0) + file_bytes= f.read() + try: + file_string = file_bytes.decode('latin1') + orig = json.loads(file_string) + except Exception as msg: + print ("Issue decoding %s, continuing" % filename) + shutil.copy(src, os.path.join('exceptions', filename.split('/')[-1])) + return + + data = orig + data['keys'] = list(data.keys()) + # print (json.dumps(data, indent=2)) + data['json_source'] = json.dumps(data) + solr_post = session.post(solr_url, params=solr_params, json=data) + try: + solr_post.raise_for_status() + print("added resource %s: %s to index" % (data['id'], data['type'])) + except: + dump_exception(orig, solr_post.text) + return + #print(solr_post.text) + +for item in os.scandir(BASE_DIR): + import_file(item) + diff --git a/indexer/indexer.py b/indexer/indexer.py index c7cb076..206b6f5 100755 --- a/indexer/indexer.py +++ b/indexer/indexer.py @@ -151,9 +151,9 @@ def genericType_toAtts(orig, rid=None): if orig['@type'] == 'Project' or orig['@type'] == 'ResearchProject': print('***changing type:Project to type:ResearchProject') origType = 'ResearchProject' - #handle type:DigitalDocument as type:CreativeWork (see https://github.com/iodepo/odis-arch/issues/337 ) - elif orig['@type'] == 'CreativeWork' or orig['@type'] == 'DigitalDocument': - print('***changing type:DigitalDocument to type:CreativeWork') + #handle CreativeWork subsets as type:CreativeWork (see https://github.com/iodepo/odis-arch/issues/337 ) + elif orig['@type'] == 'CreativeWork' or orig['@type'] == 'DigitalDocument' or orig['@type'] == 'Movie' or orig['@type'] == 'SoftwareSourceCode': + print('***changing type:' + orig['@type'] + ' to type:CreativeWork') origType = 'CreativeWork' else: origType = orig['@type'] @@ -185,14 +185,18 @@ def genericType_toAtts(orig, rid=None): #handle case of name as list for i in v: pos = 0 - print(i.values()) - for val in i.values(): - if val == "en": - listForm = list(i.values()) - print('***Name: ' + listForm[pos+1]) - data.append(Att(None, listForm[pos+1], k)) - data.append(Att('txt', listForm[pos+1], k)) - data.append(Att('txt', regions.regionForName(listForm[pos+1]), 'region')) + if isinstance(i, dict) == True: + print(i.values()) + for val in i.values(): + if val == "en": + listForm = list(i.values()) + print('***Name: ' + listForm[pos+1]) + data.append(Att(None, listForm[pos+1], k)) + data.append(Att('txt', listForm[pos+1], k)) + data.append(Att('txt', regions.regionForName(listForm[pos+1]), 'region')) + else: + data.append(Att(None, i, k)) + data.append(Att('txt', i, k)) elif k == 'description': if isinstance(v, list) == False: #print('type is: ',type(v)) @@ -330,7 +334,11 @@ def genericType_toAtts(orig, rid=None): if 'txt_region' in ret: ret['txt_region'] = list(set(ret['txt_region'])) if 'txt_nationality' in ret: - ret['txt_nationality'] = list(set(ret['txt_nationality'])) + ret['txt_nationality'] = list(set(ret['txt_nationality'])) + if 'txt_license' in ret: + #remove trailing slash in urls, for performing comparison + stripped_vals = [url.rstrip('/') for url in ret['txt_license']] + ret['txt_license'] = list(set(stripped_vals)) return ret def _merge_prov(orig, prov): diff --git a/solr/conf/schema.xml b/solr/conf/schema.xml index 875c6bc..b81f40c 100644 --- a/solr/conf/schema.xml +++ b/solr/conf/schema.xml @@ -104,7 +104,7 @@ schema. In this case the version should be set to the next CKAN version number. - +