Merge branch 'main' into queries

LinkedEarth · Jul 26, 2024 · c139a99 · c139a99
2 parents bf9e9a4 + 8012630
commit c139a99
Show file tree

Hide file tree

Showing 25 changed files with 108 additions and 90 deletions.
diff --git a/docs/_build/doctrees/api.doctree b/docs/_build/doctrees/api.doctree
diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle
diff --git a/docs/_build/doctrees/index.doctree b/docs/_build/doctrees/index.doctree
diff --git a/docs/_build/doctrees/installation.doctree b/docs/_build/doctrees/installation.doctree
diff --git a/docs/_build/doctrees/source/modules.doctree b/docs/_build/doctrees/source/modules.doctree
diff --git a/docs/_build/doctrees/source/pylipd.doctree b/docs/_build/doctrees/source/pylipd.doctree
diff --git a/docs/_build/doctrees/source/pylipd.globals.doctree b/docs/_build/doctrees/source/pylipd.globals.doctree
diff --git a/docs/_build/doctrees/source/pylipd.series.doctree b/docs/_build/doctrees/source/pylipd.series.doctree
diff --git a/docs/_build/doctrees/source/rdfgraph.doctree b/docs/_build/doctrees/source/rdfgraph.doctree
diff --git a/docs/_build/doctrees/tutorials.doctree b/docs/_build/doctrees/tutorials.doctree
diff --git a/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js b/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js
@@ -1,20 +1,9 @@
-/*
- * _sphinx_javascript_frameworks_compat.js
- * ~~~~~~~~~~
- *
- * Compatability shim for jQuery and underscores.js.
- *
- * WILL BE REMOVED IN Sphinx 6.0
- * xref RemovedInSphinx60Warning
+/* Compatability shim for jQuery and underscores.js.
  *
+ * Copyright Sphinx contributors
+ * Released under the two clause BSD licence
  */
 
-/**
- * select a different prefix for underscore
- */
-$u = _.noConflict();
-
-
 /**
  * small helper function to urldecode strings
  *

diff --git a/docs/rtd_env.yml b/docs/rtd_env.yml
@@ -6,7 +6,7 @@ dependencies:
   - python=3.10
   - numpy
   - pandas
-  - Sphinx<6.0
+  - Sphinx
   - pip:
     - sphinx
     - pylint

diff --git a/docs/test.lpd b/docs/test.lpd
diff --git a/pylipd/__init__.py b/pylipd/__init__.py
@@ -3,3 +3,4 @@
 
 
 from .utils import *
+from .classes import *
diff --git a/pylipd/classes/__init__.py b/pylipd/classes/__init__.py
diff --git a/pylipd/classes/dataset.py b/pylipd/classes/dataset.py
@@ -22,6 +22,7 @@ def __init__(self):
         self.chronData: list[ChronData] = []
         self.collectionName: str = None
         self.collectionYear: str = None
+        self.compilationNest: str = None
         self.contributor: Person = None
         self.creators: list[Person] = []
         self.dataSource: str = None
@@ -90,6 +91,12 @@ def from_data(id, data) -> 'Dataset':
                         obj = val["@value"]                        
                     self.collectionYear = obj
 
+            elif key == "hasCompilationNest":
+                for val in value:
+                    if "@value" in val:
+                        obj = val["@value"]                        
+                    self.compilationNest = obj
+
             elif key == "hasContributor":
                 for val in value:
                     if "@id" in val:
@@ -370,6 +377,16 @@ def to_data(self, data={}):
             data[self.id]["hasCollectionYear"] = [obj]
 
 
+        if self.compilationNest:
+            value_obj = self.compilationNest
+            obj = {
+                "@value": value_obj,
+                "@type": "literal",
+                "@datatype": "http://www.w3.org/2001/XMLSchema#string"
+            }
+            data[self.id]["hasCompilationNest"] = [obj]
+
+
         if self.contributor:
             value_obj = self.contributor
             if type(value_obj) is str:
@@ -583,6 +600,11 @@ def to_json(self):
             obj = value_obj
             data["collectionYear"] = obj
 
+        if self.compilationNest:
+            value_obj = self.compilationNest
+            obj = value_obj
+            data["compilation_nest"] = obj
+
         if self.contributor:
             value_obj = self.contributor
             if hasattr(value_obj, "to_json"):
@@ -673,6 +695,10 @@ def from_json(data) -> 'Dataset':
                     value = pvalue
                     obj = value
                     self.collectionYear = obj
+            elif key == "compilation_nest":
+                    value = pvalue
+                    obj = value
+                    self.compilationNest = obj
             elif key == "creator":
                 for value in pvalue:
                     if type(value) is dict:
@@ -803,6 +829,12 @@ def getCollectionYear(self) -> str:
     def setCollectionYear(self, collectionYear:str):
         self.collectionYear = collectionYear
 
+    def getCompilationNest(self) -> str:
+        return self.compilationNest
+
+    def setCompilationNest(self, compilationNest:str):
+        self.compilationNest = compilationNest
+
     def getContributor(self) -> Person:
         return self.contributor
 

diff --git a/pylipd/classes/variable.py b/pylipd/classes/variable.py
@@ -21,7 +21,6 @@ def __init__(self):
         self.archiveType: ArchiveType = None
         self.calibratedVias: list[Calibration] = []
         self.columnNumber: int = None
-        self.compilationNest: str = None
         self.composite: bool = None
         self.description: str = None
         self.foundInDataset: None = None
@@ -98,12 +97,6 @@ def from_data(id, data) -> 'Variable':
                         obj = val["@value"]                        
                     self.columnNumber = obj
 
-            elif key == "hasCompilationNest":
-                for val in value:
-                    if "@value" in val:
-                        obj = val["@value"]                        
-                    self.compilationNest = obj
-
             elif key == "hasDescription":
                 for val in value:
                     if "@value" in val:
@@ -341,16 +334,6 @@ def to_data(self, data={}):
             data[self.id]["hasColumnNumber"] = [obj]
 
 
-        if self.compilationNest:
-            value_obj = self.compilationNest
-            obj = {
-                "@value": value_obj,
-                "@type": "literal",
-                "@datatype": "http://www.w3.org/2001/XMLSchema#string"
-            }
-            data[self.id]["hasCompilationNest"] = [obj]
-
-
         if self.composite:
             value_obj = self.composite
             obj = {
@@ -709,11 +692,6 @@ def to_json(self):
             obj = value_obj
             data["number"] = obj
 
-        if self.compilationNest:
-            value_obj = self.compilationNest
-            obj = value_obj
-            data["compilation_nest"] = obj
-
         if self.composite:
             value_obj = self.composite
             obj = value_obj
@@ -880,10 +858,6 @@ def from_json(data) -> 'Variable':
                     else:
                         obj = value
                     self.calibratedVias.append(obj)
-            elif key == "compilation_nest":
-                    value = pvalue
-                    obj = value
-                    self.compilationNest = obj
             elif key == "description":
                     value = pvalue
                     obj = value
@@ -1042,12 +1016,6 @@ def getColumnNumber(self) -> int:
     def setColumnNumber(self, columnNumber:int):
         self.columnNumber = columnNumber
 
-    def getCompilationNest(self) -> str:
-        return self.compilationNest
-
-    def setCompilationNest(self, compilationNest:str):
-        self.compilationNest = compilationNest
-
     def getDescription(self) -> str:
         return self.description
 

diff --git a/pylipd/globals/schema.py b/pylipd/globals/schema.py
@@ -105,6 +105,12 @@
         },
         'dataSetVersion': { 
             'name': 'hasVersion' 
+        },
+        'compilation_nest': {
+            'name': 'hasCompilationNest',
+            'alternates': ['pages2kRegion', 'paleoDIVERSiteId', 'sisalSiteId', 'LegacyClimateDatasetId', 
+                           'LegacyClimateSiteId', 'ch2kCoreCode', 'coralHydro2kGroup', 'iso2kCertification', 
+                           'iso2kUI', 'ocean2kID', 'pages2kId', 'pages2kID', 'QCCertification', 'SISALEntityID' ]
         }
     },
     'Compilation': {
@@ -159,8 +165,8 @@
             '{identifier.0.id|@parent.dataSetName}',
             '{index}'
         ],
-        '@fromJson': ['_set_identifier_properties'],
-        '@toJson': ['_create_publication_identifier'],
+        # '@fromJson': ['_set_identifier_properties'],
+        # '@toJson': ['_create_publication_identifier'],
         'title': { 
             'name': 'hasTitle' 
         },
@@ -217,7 +223,8 @@
         },
         'doi': {
             'name': 'hasDOI',
-            'type': 'string'
+            'type': 'string',
+            'alternates': ['DOI']
         },
         'author': {
             'name': 'hasAuthor',
@@ -462,12 +469,6 @@
             'schema': 'Compilation',
             'category': 'Compilation'
         },
-        'compilation_nest': {
-            'name': 'hasCompilationNest',
-            'alternates': ['pages2kRegion', 'paleoDIVERSiteId', 'sisalSiteId', 'LegacyClimateDatasetId', 
-                           'LegacyClimateSiteId', 'ch2kCoreCode', 'coralHydro2kGroup', 'iso2kCertification', 
-                           'iso2kUI', 'ocean2kID', 'pages2kId', 'pages2kID', 'QCCertification', 'SISALEntityID' ]
-        },
         'notes': {
             'name': 'hasNotes',
             'alternates': ['qcNotes', 'qCNotes', 'qCnotes', 'qcnotes', 'QCnotes', 'QCNotes']
@@ -525,6 +526,7 @@
         'coordinatesFor': { 
             'type': 'Individual' 
         },
+        'type': { 'name': 'hasType' },
         'continent': { 'name': 'hasContinent' },
         'country': { 'name': 'hasCountry' },
         'countryOcean': { 'name': 'hasCountryOcean' },
@@ -544,6 +546,7 @@
             '.Interpretation',
             '{@index}'
         ],
+        '@fromJson': ['_add_interpretation_rank'],
         '@toJson_pre': [
             '_set_units_label',
             '_set_seasonality_labels',

diff --git a/pylipd/lipd.py b/pylipd/lipd.py
@@ -15,10 +15,7 @@
 import numpy as np
 import json
 
-try:
-    from pylipd.classes.dataset import Dataset
-except:
-    pass
+from pylipd.classes.dataset import Dataset
 
 from pylipd.utils.json_to_rdf import JSONToRDF
 from pylipd.utils.rdf_to_json import RDFToJSON
@@ -393,7 +390,7 @@ def make_bib(row):
 
         return bibs, df       
 
-    def get_timeseries(self, dsnames, to_dataframe=False):
+    def get_timeseries(self, dsnames, to_dataframe=False, mode="paleo", time="age"):
         '''Get Legacy LiPD like Time Series Object (tso)
 
         Parameters
@@ -434,7 +431,7 @@ def get_timeseries(self, dsnames, to_dataframe=False):
         if type(dsnames)==str:
             dsnames=[dsnames]
 
-        ts = self._get_timeseries(dsnames)
+        ts = self._get_timeseries(dsnames, mode=mode, time=time)
         if to_dataframe == False:
             return ts
         elif to_dataframe == True:
@@ -448,18 +445,18 @@ def get_timeseries(self, dsnames, to_dataframe=False):
 
             return ts, df
 
-    def _get_timeseries(self, dsnames):
+    def _get_timeseries(self, dsnames, mode="paleo", time="age"):
         timeseries = {}
         for dsname in dsnames:
             converter = RDFToLiPD(self.graph)
             d = converter.convert_to_json(dsname)
             print("Extracting timeseries from dataset: " + dsname + " ...")
             if len(d.items()):
-                tss = LiPD_Legacy().extract(d)
+                tss = LiPD_Legacy().extract(d, mode=mode, time=time)
                 timeseries[dsname] = tss
         return timeseries
 
-    def get_timeseries_essentials(self, dsname = None, mode='paleo'):
+    def get_timeseries_essentials(self, dsnames=None, mode='paleo'):
         ''' Returns specific properties for timeseries: 'dataSetName', 'archiveType', 'geo_meanLat', 'geo_meanLon',
                'geo_meanElev', 'paleoData_variableName', 'paleoData_values',
                'paleoData_units', 'paleoData_proxy' (paleo only), 'paleoData_proxyGeneral' (paleo only),
@@ -469,8 +466,8 @@ def get_timeseries_essentials(self, dsname = None, mode='paleo'):
 
         Parameters
         ----------
-        dsname : str, optional
-            The name of the dataset for which to return the timeseries information. The default is None.
+        dsnames : list
+            array of dataset id or name strings        
         mode : paleo, chron
             Whether to retrun the information stored in the PaleoMeasurementTable or the ChronMeasurementTable. The default is 'paleo'.
 
@@ -505,28 +502,36 @@ def get_timeseries_essentials(self, dsname = None, mode='paleo'):
     
         '''
 
-        if dsname is None:
-            dsname= ''
-
-        if mode == 'paleo':
-            query = QUERY_TIMESERIES_ESSENTIALS_PALEO
-            query = query.replace("[dsname]", dsname)
-        elif mode == 'chron':
-            query = QUERY_TIMESERIES_ESSENTIALS_CHRON
-            query = query.replace("[dsname]", dsname)
-        else:
-            raise ValueError("The mode should be either 'paleo' or 'chron'")
-
-        qres, qres_df = self.query(query)
-
-        try:
-            qres_df['paleoData_values']=qres_df['paleoData_values'].apply(lambda row : np.array(json.loads(row)))
-        except:
-            qres_df['chronData_values']=qres_df['chronData_values'].apply(lambda row : np.array(json.loads(row)))
+        if dsnames is None:
+            dsnames= ''
+        if type(dsnames)==str:
+            dsnames=[dsnames]            
 
+        qres_df = None
+        for dsname in dsnames:
+            if mode == 'paleo':
+                query = QUERY_TIMESERIES_ESSENTIALS_PALEO
+                query = query.replace("[dsname]", dsname)
+            elif mode == 'chron':
+                query = QUERY_TIMESERIES_ESSENTIALS_CHRON
+                query = query.replace("[dsname]", dsname)
+            else:
+                raise ValueError("The mode should be either 'paleo' or 'chron'")
 
-        qres_df['time_values']=qres_df['time_values'].apply(lambda x : np.array(json.loads(x)) if x is not None else None)
-        qres_df['depth_values']=qres_df['depth_values'].apply(lambda x : np.array(json.loads(x)) if x is not None else None)
+            qres, qtmp_df = self.query(query)
+
+            try:
+                qtmp_df['paleoData_values']=qtmp_df['paleoData_values'].apply(lambda row : np.array(json.loads(row)))
+            except:
+                qtmp_df['chronData_values']=qtmp_df['chronData_values'].apply(lambda row : np.array(json.loads(row)))
+
+
+            qtmp_df['time_values']=qtmp_df['time_values'].apply(lambda x : np.array(json.loads(x)) if x is not None else None)
+            qtmp_df['depth_values']=qtmp_df['depth_values'].apply(lambda x : np.array(json.loads(x)) if x is not None else None)
+            if qres_df is None:
+                qres_df = qtmp_df
+            else:
+                qres_df = pd.concat([qres_df, qtmp_df], ignore_index=True)
 
 
         return qres_df

diff --git a/pylipd/tests/test_LiPD.py b/pylipd/tests/test_LiPD.py
@@ -134,7 +134,7 @@ def test_get_timeseries_essentials_t0(self, odp846, mode):
 
     def test_get_timeseries_essentials_t1(self, odp846):
         D = odp846
-        df = D.get_timeseries_essentials(dsname=D.get_all_dataset_names()[0])
+        df = D.get_timeseries_essentials(dsnames=D.get_all_dataset_names())
 
     @pytest.mark.parametrize('ensname',['year','age'])
     def test_ens_t1(self,odp846, ensname):

diff --git a/pylipd/utils/legacy_utils.py b/pylipd/utils/legacy_utils.py
@@ -46,6 +46,8 @@ def extract(self, d, whichtables="meas", mode="paleo", time="age"):
             _pc = "chronData"
         _root["mode"] = _pc
         _root["time_id"] = time
+        if "archiveType" in d:
+            _root["archiveType"] = d["archiveType"]
         try:
             # Build the root level data.
             # This will serve as the template for which column data will be added onto later.

diff --git a/pylipd/utils/lipd_to_rdf.py b/pylipd/utils/lipd_to_rdf.py
@@ -369,6 +369,12 @@ def _wrap_integration_time(self, obj, objhash) :
 
         return [obj, objhash, []]
 
+    def _add_interpretation_rank(self, obj, objhash):
+        if "rank" not in obj or type(obj["rank"]) != int:
+            rank = obj["@index"] - 1
+            obj["rank"] = rank
+        return [obj, objhash, []]
+
     def _wrap_uncertainty(self, obj, objhash) :
         objid = obj["@id"]
         # Deal with uncertainty
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,7 +6,7 @@ dependencies: @@
       - python=3.10
       - numpy
       - pandas
-      - Sphinx<6.0
+      - Sphinx
       - pip:
         - sphinx
         - pylint
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,3 +3,4 @@


		from .utils import *
		from .classes import *