Skip to content

Commit

Permalink
Merge pull request #66 from LinkedEarth/queries
Browse files Browse the repository at this point in the history
proxy queries
  • Loading branch information
khider authored Jul 26, 2024
2 parents 8012630 + c139a99 commit 1a451cd
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 7 deletions.
4 changes: 3 additions & 1 deletion pylipd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
__version__ = "1.3.7"
from importlib.metadata import version
__version__ = version('pyleoclim')


from .utils import *
from .classes import *
25 changes: 25 additions & 0 deletions pylipd/globals/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@
}
"""

QUERY_DISTINCT_PROXY = """
PREFIX le: <http://linked.earth/ontology#>
SELECT distinct ?proxy
WHERE {
OPTIONAL{?uri le:hasProxy ?proxyObj .
?proxyObj rdfs:label ?proxy .}
?uri le:hasVariableId ?TSID
}
"""

QUERY_VARIABLE = """
Expand Down Expand Up @@ -257,6 +270,18 @@
}
"""

QUERY_FILTER_VARIABLE_PROXY = """
SELECT ?uri ?dsuri ?dsname ?tableuri ?id ?proxy WHERE {
?uri le:hasVariableId ?id .
?uri le:hasProxy ?proxyObj .
?proxyObj rdfs:label ?proxy .
FILTER regex(?proxy, "[proxy].*", "i") .
?uri le:foundInDataset ?dsuri .
?uri le:foundInDatasetName ?dataSetName .
?uri le:foundInTable ?tableuri .
}
"""

QUERY_TIMESERIES_ESSENTIALS_PALEO ="""
PREFIX wgs84: <http://www.w3.org/2003/01/geo/wgs84_pos#>
SELECT ?dataSetName ?archiveType ?geo_meanLat ?geo_meanLon ?geo_meanElev
Expand Down
87 changes: 83 additions & 4 deletions pylipd/lipd_series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tqdm import tqdm
from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS
from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS, QUERY_DISTINCT_PROXY, QUERY_FILTER_VARIABLE_PROXY

from .utils.multi_processing import multi_load_lipd_series
from .utils.rdf_graph import RDFGraph
Expand Down Expand Up @@ -95,7 +95,7 @@ def get_all_variables(self):
def get_all_variable_names(self):

"""
Get a list of all possible distinct variableNames. Useful for filtering and qeurying.
Get a list of all possible distinct variableNames. Useful for filtering and querying.
Returns
-------
Expand All @@ -115,6 +115,30 @@ def get_all_variable_names(self):
"""

return self.query(QUERY_DISTINCT_VARIABLE)[1].iloc[:,0].values.tolist()

def get_all_proxy(self):

"""
Get a list of all possible proxy. Useful for filtering and querying.
Returns
-------
list
A list of unique proxies
Examples
--------
.. jupyter-execute::
from pylipd.utils.dataset import load_dir
lipd = load_dir('Pages2k')
S = lipd.to_lipd_series()
proxyName = S.get_all_proxy()
print(proxyName)
"""

return self.query(QUERY_DISTINCT_PROXY)[1].iloc[:,0].values.tolist()

def get_timeseries_essentials(self):
'''This function returns information about each variable: `dataSetName`, `archiveType`, `name`, `values`, `units`, `TSID`, `proxy`.
Expand Down Expand Up @@ -167,6 +191,18 @@ def filter_by_name(self, name):
pylipd.lipd_series.LiPDSeries
A new LiPDSeries object that only contains variables that have the specified name (regex)
Examples
--------
.. jupyter-execute::
from pylipd.utils.dataset import load_datasets
lipd = load_datasets('ODP846.Lawrence.2006.lpd')
S = lipd.to_lipd_series()
sst = S.filter_by_name('sst')
print(sst.get_all_variable_names())
'''
query = QUERY_FILTER_VARIABLE_NAME
Expand All @@ -176,9 +212,52 @@ def filter_by_name(self, name):
varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

print(len(dsuris))
#print(len(dsuris))

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S

def filter_by_proxy(self, proxy):
'''
Filters series to return a new LiPDSeries that only keeps variables that have the specified proxy (regex)
Parameters
----------
proxy : str
The name of the proxy to filter by
Returns
-------
pylipd.lipd_series.LiPDSeries
A new LiPDSeries object that only contains variables that have the specified name (regex)
Examples
--------
.. jupyter-execute::
from pylipd.utils.dataset import load_dir
lipd = load_dir('Pages2k')
S = lipd.to_lipd_series()
S_filtered = S.filter_by_proxy('ring width')
print(S_filtered.get_all_proxy())
'''
query = QUERY_FILTER_VARIABLE_PROXY
query = query.replace("[proxy]", proxy)

qres, qres_df = self.query(query)
varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

#print(len(dsuris))

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S
return S

7 changes: 6 additions & 1 deletion pylipd/tests/test_LiPD.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from pylipd.lipd import LiPD
import urllib as urllib



class TestLiPDLoad():

def test_load_t0(self, odp846):
Expand Down Expand Up @@ -181,7 +183,10 @@ class TestRdf():

def test_convert_to_rdf_t0(self):
lipd = LiPD()
lipd.convert_lipd_dir_to_rdf("./examples/data/Pages2k", "all-lipd.nq")
try:
lipd.convert_lipd_dir_to_rdf("../data/Pages2k", "all-lipd.nq")
except:
lipd.convert_lipd_dir_to_rdf("./examples/data/Pages2k", "all-lipd.nq")



15 changes: 14 additions & 1 deletion pylipd/tests/test_LiPDSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def test_timeseries_essentials_t0(self, pages2k):
D=pages2k
S = D.to_lipd_series()
names = S.get_timeseries_essentials()


def test_proxy_t0(self, pages2k):
D=pages2k
S = D.to_lipd_series()
names = S.get_all_proxy()

class TestFiler():

def test_name_t0(self,pages2k):
Expand All @@ -52,5 +57,13 @@ def test_name_t0(self,pages2k):
Sfiltered = S.filter_by_name('temperature')
df=Sfiltered.get_timeseries_essentials()
assert len(df.index)==11

def test_proxy_t0(self,pages2k):
D=pages2k
S = D.to_lipd_series()
Sfiltered = S.filter_by_proxy('ring width')
v = Sfiltered.get_all_proxy()
assert len(v)==1



0 comments on commit 1a451cd

Please sign in to comment.