Skip to content

Commit

Permalink
filter by resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
khider committed Jul 29, 2024
1 parent 2efe723 commit 8c82257
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 6 deletions.
13 changes: 13 additions & 0 deletions pylipd/globals/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,19 @@
}
"""

QUERY_FILTER_VARIABLE_RESOLUTION = """
SELECT ?uri ?dsuri ?dsname ?tableuri ?id ?v WHERE {
?uri le:hasVariableId ?id .
?uri le:hasResolution ?res .
?res le:has[stat]Value ?v .
FILTER(?v<[value]) .
?uri le:foundInDataset ?dsuri .
?uri le:foundInDatasetName ?dataSetName .
?uri le:foundInTable ?tableuri .
}
"""


QUERY_TIMESERIES_ESSENTIALS_PALEO ="""
PREFIX wgs84: <http://www.w3.org/2003/01/geo/wgs84_pos#>
SELECT ?dataSetName ?archiveType ?geo_meanLat ?geo_meanLon ?geo_meanElev
Expand Down
60 changes: 55 additions & 5 deletions pylipd/lipd_series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from tqdm import tqdm
from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS, QUERY_DISTINCT_PROXY, QUERY_FILTER_VARIABLE_PROXY

from .globals.queries import QUERY_FILTER_VARIABLE_NAME, QUERY_VARIABLE, QUERY_DISTINCT_VARIABLE, QUERY_VARIABLE_ESSENTIALS, QUERY_DISTINCT_PROXY, QUERY_FILTER_VARIABLE_PROXY, QUERY_FILTER_VARIABLE_RESOLUTION
from .utils.multi_processing import multi_load_lipd_series
from .utils.rdf_graph import RDFGraph

Expand Down Expand Up @@ -254,10 +253,61 @@ def filter_by_proxy(self, proxy):
varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

#print(len(dsuris))

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S


def filter_by_resolution(self, threshold, stats='Mean'):
'''
Filters series to return a new LiPDSeries that only keeps variables that have a resolution less than the specified threshold.
Parameters
----------
threshold : float
The maximum resolution to keep
stats : str, optional
Whether to use 'Mean', 'Median', 'Min' or 'Max' resolution. The default is 'Mean'.
Raises
------
ValueError
Make sure that the stats is of ['Mean','Median', 'Min', 'Max'].
Returns
-------
S : pylipd.lipd_series.LiPDSeries
A new LiPDSeries object that only contains the filtered variables
Examples
--------
.. jupyter-execute::
from pylipd.utils.dataset import load_dir
lipd = load_dir('Pages2k')
S = lipd.to_lipd_series()
S_filtered = S.filter_by_resolution(10)
'''

stats = stats.capitalize() #make sure that the first letter is capitalized
stats_allowed = ['Mean','Median', 'Min', 'Max'] #possible values
if stats not in stats_allowed:
raise ValueError("Stats must be ['Mean','Median', 'Min', 'Max']")

threshold = float(threshold) # make sure this is a float or can be coerced in one

query = QUERY_FILTER_VARIABLE_RESOLUTION
query = query.replace("[value]", str(threshold))
query = query.replace("[stat]", stats)

qres,q_df = self.query(query)

varuris = [str(row.uri) for row in qres]
dsuris = [*set([str(row.dsuri) for row in qres])]

rdfgraph = self.get(varuris)
S = LiPDSeries(rdfgraph.graph)
S.lipds = {k: self.lipds[k].copy() for k in dsuris}
return S
8 changes: 7 additions & 1 deletion pylipd/tests/test_LiPDSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
4. after `pip install pytest-xdist`, one may execute "pytest -n 4" to test in parallel with number of workers specified by `-n`
5. for more details, see https://docs.pytest.org/en/stable/usage.html
"""

import pytest
from pylipd.lipd_series import LiPDSeries

class TestLoad():
Expand Down Expand Up @@ -64,6 +64,12 @@ def test_proxy_t0(self,pages2k):
Sfiltered = S.filter_by_proxy('ring width')
v = Sfiltered.get_all_proxy()
assert len(v)==1

@pytest.mark.parametrize('stats',['Mean','Median','Min','Max'])
def test_resolution_t0(self,stats,pages2k):
D=pages2k
S = D.to_lipd_series()
Sfiltered = S.filter_by_resolution(threshold = 10,stats=stats)



0 comments on commit 8c82257

Please sign in to comment.