Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #685 - [Do Not Merge] - DuckDB - E2E Integration #734

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a835234
issue-617 Base Class
kdetry Feb 21, 2024
f6cc990
issue617 - Persistent Data Store
kdetry Feb 21, 2024
ac81100
duckdb dependency is added to setup.py
kdetry Feb 21, 2024
6ffc626
dry fix
kdetry Feb 27, 2024
0914085
Merge branch 'main' into issue617
kdetry Feb 27, 2024
1e3892b
CSV data handler - part 1
kdetry Feb 27, 2024
3670826
csv data store - fill with zero
kdetry Feb 27, 2024
b324491
table csv integration
kdetry Feb 28, 2024
c2c4e81
test fix - 1
kdetry Feb 28, 2024
1b118ae
fixing tests - 2
kdetry Feb 28, 2024
4e4687b
test fixes - 3
kdetry Feb 28, 2024
a56c5ae
Merge branch 'main' into issue617
kdetry Feb 29, 2024
4582fe3
black fix
kdetry Feb 29, 2024
494135b
take-back the gql_data_factory from the main branch
kdetry Feb 29, 2024
6c5ebfe
pylint issues
kdetry Feb 29, 2024
e57000b
issue681 - check fixes
kdetry Feb 29, 2024
3940917
test fixes
kdetry Feb 29, 2024
99b97a5
Merge branch 'main' into issue681
kdetry Feb 29, 2024
bb5d74d
Merge branch 'main' into issue681
kdetry Feb 29, 2024
817c882
Merge branch 'main' into issue681
kdetry Mar 1, 2024
c919b1b
Merge branch 'main' into issue681
kdetry Mar 1, 2024
bb4a2dc
issue681 - Append Logic
kdetry Mar 4, 2024
fecfb25
Merge branch 'issue681' of https://github.com/oceanprotocol/pdr-backe…
kdetry Mar 4, 2024
e8c5843
black pylint fix
kdetry Mar 4, 2024
db99f6e
issue681 _append_to_csv
kdetry Mar 4, 2024
d546816
issue681 - black fix
kdetry Mar 4, 2024
70791a3
Merge branch 'issue681' into issue685
kdetry Mar 4, 2024
c345615
Merge branch 'issue617' into issue685
kdetry Mar 4, 2024
2df30f8
issue685 - integration with 681 and 617
kdetry Mar 4, 2024
c84d85f
issue681 - black pylint fixes
kdetry Mar 4, 2024
bc14bf1
ETL save step
kdetry Mar 4, 2024
ff3961b
issue685 - black fix
kdetry Mar 4, 2024
d808ab6
issue685 - tests are fixed
kdetry Mar 4, 2024
f670216
issue685 - system tests are fixed
kdetry Mar 4, 2024
34d520b
#650 - Clean (predvalue, truevalue) columns (#664)
kdetry Mar 5, 2024
957a025
Fixing test
idiom-bytes Mar 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pdr_backend/analytics/predictoor_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ class PredictoorStat(TypedDict):
def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
# 1 - filter from lake only the rows that you're looking for
df = predictions_df.filter(
~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
)

# Group by pair
df = df.group_by(["pair", "timeframe"]).agg(
pl.col("source").first().alias("source"),
pl.col("payout").sum().alias("sum_payout"),
pl.col("stake").sum().alias("sum_stake"),
pl.col("prediction").count().alias("num_predictions"),
(pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
pl.col("predvalue").count().alias("num_predictions"),
(pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
)

return df
Expand All @@ -53,16 +53,16 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
# 1 - filter from lake only the rows that you're looking for
df = predictions_df.filter(
~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
)

# Group by pair
df = df.group_by(["user", "pair", "timeframe"]).agg(
pl.col("source").first().alias("source"),
pl.col("payout").sum().alias("sum_payout"),
pl.col("stake").sum().alias("sum_stake"),
pl.col("prediction").count().alias("num_predictions"),
(pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
pl.col("predvalue").count().alias("num_predictions"),
(pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
)

return df
Expand Down
44 changes: 44 additions & 0 deletions pdr_backend/lake/base_data_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from hashlib import md5
from abc import abstractmethod
from typing import Optional, Literal

import duckdb
from enforce_typing import enforce_types


class BaseDataStore:
@enforce_types
def __init__(self, base_directory=str):
"""
Initialize a PartitionedDataStore instance.
@arguments:
base_directory - The base directory to store the partitioned Parquet files.
"""

self.base_directory = base_directory
self.duckdb_conn = duckdb.connect(
database=f"{self.base_directory}/duckdb.db"
) # Keep a persistent connection

@enforce_types
def _generate_view_name(self, base_path=str) -> str:
"""
Generate a unique view name for a given base path.
@arguments:
base_path - The base path to generate a view name for.
@returns:
str - A unique view name.
"""

path = f"{self.base_directory}/{base_path}"
hash_object = md5(path.encode())
return f"dataset_{hash_object.hexdigest()}"

@abstractmethod
def query_data(
self,
dataset_identifier: str,
query: str,
partition_type: Optional[Literal["date", "address"]] = None,
):
pass
Loading
Loading