Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proxy queries #66

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pylipd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
__version__ = "1.3.7"
from importlib.metadata import version
__version__ = version('pyleoclim')


from .utils import *
from .classes import *
25 changes: 25 additions & 0 deletions pylipd/globals/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@
}


"""

QUERY_DISTINCT_PROXY = """
PREFIX le: <http://linked.earth/ontology#>

SELECT distinct ?proxy
WHERE {
OPTIONAL{?uri le:hasProxy ?proxyObj .
?proxyObj rdfs:label ?proxy .}
?uri le:hasVariableId ?TSID
}


"""

QUERY_VARIABLE = """
Expand Down Expand Up @@ -257,6 +270,18 @@
}
"""

QUERY_FILTER_VARIABLE_PROXY = """
SELECT ?uri ?dsuri ?dsname ?tableuri ?id ?proxy WHERE {
?uri le:hasVariableId ?id .
?uri le:hasProxy ?proxyObj .
?proxyObj rdfs:label ?proxy .
FILTER regex(?proxy, "[proxy].*", "i") .
?uri le:foundInDataset ?dsuri .
?uri le:foundInDatasetName ?dataSetName .
?uri le:foundInTable ?tableuri .
}
"""

QUERY_TIMESERIES_ESSENTIALS_PALEO ="""
PREFIX wgs84: <http://www.w3.org/2003/01/geo/wgs84_pos#>
SELECT ?dataSetName ?archiveType ?geo_meanLat ?geo_meanLon ?geo_meanElev
Expand Down
87 changes: 83 additions & 4 deletions pylipd/lipd_series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tqdm import tqdm
from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS
from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS, QUERY_DISTINCT_PROXY, QUERY_FILTER_VARIABLE_PROXY

from .utils.multi_processing import multi_load_lipd_series
from .utils.rdf_graph import RDFGraph
Expand Down Expand Up @@ -95,7 +95,7 @@ def get_all_variables(self):
def get_all_variable_names(self):

"""
Get a list of all possible distinct variableNames. Useful for filtering and qeurying.
Get a list of all possible distinct variableNames. Useful for filtering and querying.

Returns
-------
Expand All @@ -115,6 +115,30 @@ def get_all_variable_names(self):
"""

return self.query(QUERY_DISTINCT_VARIABLE)[1].iloc[:,0].values.tolist()

def get_all_proxy(self):

"""
Get a list of all possible proxy. Useful for filtering and querying.

Returns
-------
list
A list of unique proxies

Examples
--------

.. jupyter-execute::

from pylipd.utils.dataset import load_dir
lipd = load_dir('Pages2k')
S = lipd.to_lipd_series()
proxyName = S.get_all_proxy()
print(proxyName)
"""

return self.query(QUERY_DISTINCT_PROXY)[1].iloc[:,0].values.tolist()

def get_timeseries_essentials(self):
'''This function returns information about each variable: `dataSetName`, `archiveType`, `name`, `values`, `units`, `TSID`, `proxy`.
Expand Down Expand Up @@ -167,6 +191,18 @@ def filter_by_name(self, name):

pylipd.lipd_series.LiPDSeries
A new LiPDSeries object that only contains variables that have the specified name (regex)

Examples
--------

.. jupyter-execute::

from pylipd.utils.dataset import load_datasets
lipd = load_datasets('ODP846.Lawrence.2006.lpd')
S = lipd.to_lipd_series()
sst = S.filter_by_name('sst')

print(sst.get_all_variable_names())

'''
query = QUERY_FILTER_VARIABLE_NAME
Expand All @@ -176,9 +212,52 @@ def filter_by_name(self, name):
varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

print(len(dsuris))
#print(len(dsuris))

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S

def filter_by_proxy(self, proxy):
'''
Filters series to return a new LiPDSeries that only keeps variables that have the specified proxy (regex)

Parameters
----------

proxy : str
The name of the proxy to filter by

Returns
-------

pylipd.lipd_series.LiPDSeries
A new LiPDSeries object that only contains variables that have the specified name (regex)

Examples
--------

.. jupyter-execute::

from pylipd.utils.dataset import load_dir
lipd = load_dir('Pages2k')
S = lipd.to_lipd_series()
S_filtered = S.filter_by_proxy('ring width')
print(S_filtered.get_all_proxy())

'''
query = QUERY_FILTER_VARIABLE_PROXY
query = query.replace("[proxy]", proxy)

qres, qres_df = self.query(query)
varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

#print(len(dsuris))

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S
return S

7 changes: 6 additions & 1 deletion pylipd/tests/test_LiPD.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from pylipd.lipd import LiPD
import urllib as urllib



class TestLiPDLoad():

def test_load_t0(self, odp846):
Expand Down Expand Up @@ -181,7 +183,10 @@ class TestRdf():

def test_convert_to_rdf_t0(self):
lipd = LiPD()
lipd.convert_lipd_dir_to_rdf("./examples/data/Pages2k", "all-lipd.nq")
try:
lipd.convert_lipd_dir_to_rdf("../data/Pages2k", "all-lipd.nq")
except:
lipd.convert_lipd_dir_to_rdf("./examples/data/Pages2k", "all-lipd.nq")



15 changes: 14 additions & 1 deletion pylipd/tests/test_LiPDSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def test_timeseries_essentials_t0(self, pages2k):
D=pages2k
S = D.to_lipd_series()
names = S.get_timeseries_essentials()


def test_proxy_t0(self, pages2k):
D=pages2k
S = D.to_lipd_series()
names = S.get_all_proxy()

class TestFiler():

def test_name_t0(self,pages2k):
Expand All @@ -52,5 +57,13 @@ def test_name_t0(self,pages2k):
Sfiltered = S.filter_by_name('temperature')
df=Sfiltered.get_timeseries_essentials()
assert len(df.index)==11

def test_proxy_t0(self,pages2k):
D=pages2k
S = D.to_lipd_series()
Sfiltered = S.filter_by_proxy('ring width')
v = Sfiltered.get_all_proxy()
assert len(v)==1



Loading