Skip to content

Commit

Permalink
#650 - Clean (predvalue, truevalue) columns (#664)
Browse files Browse the repository at this point in the history
* issue650 renaming

* issue650 - test fixes

* issue650 black format

* issue650: fixes after merges

* black fix

* take-back the gql_data_factory from the main branch

* Removed print statements

---------

Co-authored-by: idiom-bytes <[email protected]>
  • Loading branch information
kdetry and idiom-bytes committed Mar 6, 2024
1 parent 9a2a736 commit b70778f
Show file tree
Hide file tree
Showing 18 changed files with 57 additions and 57 deletions.
12 changes: 6 additions & 6 deletions pdr_backend/analytics/predictoor_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ class PredictoorStat(TypedDict):
def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
# 1 - filter from lake only the rows that you're looking for
df = predictions_df.filter(
~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
)

# Group by pair
df = df.group_by(["pair", "timeframe"]).agg(
pl.col("source").first().alias("source"),
pl.col("payout").sum().alias("sum_payout"),
pl.col("stake").sum().alias("sum_stake"),
pl.col("prediction").count().alias("num_predictions"),
(pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
pl.col("predvalue").count().alias("num_predictions"),
(pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
)

return df
Expand All @@ -53,16 +53,16 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
# 1 - filter from lake only the rows that you're looking for
df = predictions_df.filter(
~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
)

# Group by pair
df = df.group_by(["user", "pair", "timeframe"]).agg(
pl.col("source").first().alias("source"),
pl.col("payout").sum().alias("sum_payout"),
pl.col("stake").sum().alias("sum_stake"),
pl.col("prediction").count().alias("num_predictions"),
(pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
pl.col("predvalue").count().alias("num_predictions"),
(pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
)

return df
Expand Down
12 changes: 6 additions & 6 deletions pdr_backend/lake/table_bronze_pdr_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def get_slot_id(_id: str) -> str:
bronze_predictions_df = predictions_df.with_columns(
[
pl.col("ID").map_elements(get_slot_id, return_dtype=Utf8).alias("slot_id"),
pl.col("prediction").alias("predvalue"),
pl.col("trueval").alias("truevalue"),
pl.col("predvalue").alias("predvalue"),
pl.col("truevalue").alias("truevalue"),
pl.col("timestamp").alias("timestamp"),
pl.col("timestamp").alias("last_event_timestamp"),
]
Expand Down Expand Up @@ -93,14 +93,14 @@ def _process_truevals(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
predictions_df.join(truevals_df, left_on="slot_id", right_on="ID", how="left")
.with_columns(
[
pl.col("trueval").fill_null(pl.col("truevalue")),
pl.col("truevalue_right").fill_null(pl.col("truevalue")),
pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")),
]
)
.drop(["truevalue", "last_event_timestamp"])
.rename(
{
"trueval": "truevalue",
"truevalue_right": "truevalue",
"timestamp_right": "last_event_timestamp",
}
)
Expand Down Expand Up @@ -135,7 +135,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
.with_columns(
[
pl.col("payout_right").fill_null(pl.col("payout")),
pl.col("predictedValue").fill_null(pl.col("predvalue")),
pl.col("predvalue_right").fill_null(pl.col("predvalue")),
pl.col("stake_right").fill_null(pl.col("stake")),
pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")),
]
Expand All @@ -144,7 +144,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
.rename(
{
"payout_right": "payout",
"predictedValue": "predvalue",
"predvalue_right": "predvalue",
"stake_right": "stake",
"timestamp_right": "last_event_timestamp",
}
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/lake/table_pdr_payouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"slot": Int64,
"timestamp": Int64,
"payout": Float64,
"predictedValue": Boolean,
"predvalue": Boolean,
"revenue": Float64,
"roundSumStakesUp": Float64,
"roundSumStakes": Float64,
Expand Down
4 changes: 2 additions & 2 deletions pdr_backend/lake/table_pdr_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
"contract": Utf8,
"pair": Utf8,
"timeframe": Utf8,
"prediction": Boolean,
"predvalue": Boolean,
"stake": Float64,
"trueval": Boolean,
"truevalue": Boolean,
"timestamp": Int64,
"source": Utf8,
"payout": Float64,
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/lake/table_pdr_truevals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
"ID": Utf8,
"token": Utf8,
"timestamp": Int64,
"trueval": Boolean,
"truevalue": Boolean,
"slot": Int64,
}
4 changes: 2 additions & 2 deletions pdr_backend/lake/test/test_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,11 @@ def test_etl_do_bronze_step(

assert (
bronze_pdr_predictions_df["truevalue"][1]
== _gql_datafactory_etl_truevals_df["trueval"][1]
== _gql_datafactory_etl_truevals_df["truevalue"][1]
)
assert (
bronze_pdr_predictions_df["truevalue"][2]
== _gql_datafactory_etl_truevals_df["trueval"][2]
== _gql_datafactory_etl_truevals_df["truevalue"][2]
)

# Assert payout ts > prediction ts
Expand Down
6 changes: 3 additions & 3 deletions pdr_backend/lake/test/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, data):
self.ID = data["ID"]
self.pair = data["pair"]
self.timeframe = data["timeframe"]
self.prediction = data["prediction"]
self.predvalue = data["predvalue"]
self.payout = data["payout"]
self.timestamp = data["timestamp"]
self.slot = data["slot"]
Expand All @@ -28,7 +28,7 @@ def __init__(self, data):
"ID": "0x123",
"pair": "ADA-USDT",
"timeframe": "5m",
"prediction": True,
"predvalue": True,
"payout": 28.2,
"timestamp": 1701634400,
"slot": 1701634400,
Expand Down Expand Up @@ -66,7 +66,7 @@ def get_table_df(network, st_ut, fin_ut, config):
"ID": Utf8,
"pair": Utf8,
"timeframe": Utf8,
"prediction": Boolean,
"predvalue": Boolean,
"payout": Float64,
"timestamp": Int64,
"slot": Int64,
Expand Down
8 changes: 4 additions & 4 deletions pdr_backend/subgraph/payout.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(
slot: UnixTimeS,
timestamp: UnixTimeS,
payout: float,
predictedValue: bool,
predvalue: bool,
revenue: float,
roundSumStakesUp: float,
roundSumStakes: float,
Expand All @@ -26,7 +26,7 @@ def __init__(
self.token = token
self.slot = slot
self.payout = payout
self.predictedValue = predictedValue
self.predvalue = predvalue
self.revenue = revenue
self.roundSumStakesUp = roundSumStakesUp
self.roundSumStakes = roundSumStakes
Expand All @@ -42,7 +42,7 @@ def mock_payout(payout_tuple: tuple) -> Payout:
token,
slot,
payout,
predictedValue,
predvalue,
revenue,
roundSumStakesUp,
roundSumStakes,
Expand All @@ -56,7 +56,7 @@ def mock_payout(payout_tuple: tuple) -> Payout:
token=token,
slot=UnixTimeS(slot),
payout=payout,
predictedValue=predictedValue,
predvalue=predvalue,
revenue=revenue,
roundSumStakesUp=roundSumStakesUp,
roundSumStakes=roundSumStakes,
Expand Down
16 changes: 8 additions & 8 deletions pdr_backend/subgraph/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def __init__(
contract: str,
pair: str,
timeframe: str,
prediction: Union[bool, None], # prediction = subgraph.predicted_value
predvalue: Union[bool, None],
stake: Union[float, None],
trueval: Union[bool, None],
truevalue: Union[bool, None],
timestamp: UnixTimeS, # timestamp == prediction submitted timestamp
source: str,
payout: Union[float, None],
Expand All @@ -26,9 +26,9 @@ def __init__(
self.contract = contract
self.pair = pair
self.timeframe = timeframe
self.prediction = prediction # predvalue
self.predvalue = predvalue
self.stake = stake
self.trueval = trueval # truevalue
self.truevalue = truevalue
self.timestamp = timestamp
self.source = source
self.payout = payout
Expand All @@ -46,9 +46,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction:
contract,
pair_str,
timeframe_str,
prediction,
predvalue,
stake,
trueval,
truevalue,
timestamp,
source,
payout,
Expand All @@ -62,9 +62,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction:
contract=contract,
pair=pair_str,
timeframe=timeframe_str,
prediction=prediction,
predvalue=predvalue,
stake=stake,
trueval=trueval,
truevalue=truevalue,
timestamp=UnixTimeS(timestamp),
source=source,
payout=payout,
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/subgraph/subgraph_payout.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ def fetch_payouts(
"token": payout["prediction"]["slot"]["predictContract"]["token"][
"name"
],
"predvalue": bool(payout["predictedValue"]),
"slot": UnixTimeS(int(payout["id"].split("-")[1])),
"predictedValue": bool(payout["predictedValue"]),
"revenue": float(payout["prediction"]["slot"]["revenue"]),
"roundSumStakesUp": float(
payout["prediction"]["slot"]["roundSumStakesUp"]
Expand Down
8 changes: 4 additions & 4 deletions pdr_backend/subgraph/subgraph_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ def fetch_filtered_predictions(
slot = UnixTimeS(int(prediction_sg_dict["slot"]["slot"]))
user = prediction_sg_dict["user"]["id"]
address = prediction_sg_dict["id"].split("-")[0]
trueval = None
truevalue = None
payout = None
predicted_value = None
stake = None

if not prediction_sg_dict["payout"] is None:
stake = float(prediction_sg_dict["stake"])
trueval = prediction_sg_dict["payout"]["trueValue"]
truevalue = prediction_sg_dict["payout"]["trueValue"]
predicted_value = prediction_sg_dict["payout"]["predictedValue"]
payout = float(prediction_sg_dict["payout"]["payout"])

Expand All @@ -155,9 +155,9 @@ def fetch_filtered_predictions(
contract=address,
pair=pair,
timeframe=timeframe,
prediction=predicted_value,
predvalue=predicted_value,
stake=stake,
trueval=trueval,
truevalue=truevalue,
timestamp=timestamp,
source=source,
payout=payout,
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/subgraph/subgraph_trueval.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def fetch_truevals(
ID=ID,
token=token,
timestamp=timestamp,
trueval=truevalue,
truevalue=truevalue,
slot=slot,
)

Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/subgraph/test/test_subgraph_payout.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_fetch_payouts(mock_query_subgraph):
assert payouts[0].timestamp == 1698527000
assert payouts[0].slot == 1696880700
assert payouts[0].payout == float(0)
assert payouts[0].predictedValue is True
assert payouts[0].predvalue is True
assert payouts[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd"
assert payouts[0].stake == float(1.2)
assert mock_query_subgraph.call_count == 1
8 changes: 4 additions & 4 deletions pdr_backend/subgraph/test/test_subgraph_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
contract="0x18f54cc21b7a2fdd011bea06bba7801b280e3151",
pair="ADA/USDT",
timeframe="5m",
prediction=True,
predvalue=True,
stake=0.050051425480971974,
trueval=False,
truevalue=False,
timestamp=UnixTimeS(1698527000),
source="binance",
payout=0.0,
Expand Down Expand Up @@ -128,8 +128,8 @@ def test_fetch_filtered_predictions(mock_query_subgraph):
assert predictions[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd"
assert predictions[0].pair == "ADA/USDT"
assert predictions[0].contract == "0x18f54cc21b7a2fdd011bea06bba7801b280e3151"
assert predictions[0].trueval is False
assert predictions[0].prediction is True
assert predictions[0].truevalue is False
assert predictions[0].predvalue is True
assert mock_query_subgraph.call_count == 1


Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/subgraph/test/test_subgraph_trueval.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,5 @@ def test_fetch_filtered_truevals(mock_query_subgraph):
assert truevals[0].token == "ADA/USDT"
assert truevals[0].timestamp == 1698527000
assert truevals[0].slot == 1698527100
assert truevals[0].trueval is True
assert truevals[0].truevalue is True
assert mock_query_subgraph.call_count == 1
8 changes: 4 additions & 4 deletions pdr_backend/subgraph/trueval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def __init__(
ID: str,
timestamp: UnixTimeS,
token: str,
trueval: Union[bool, None],
truevalue: Union[bool, None],
slot: UnixTimeS, # slot/epoch timestamp
) -> None:
self.ID = ID
self.trueval = trueval
self.truevalue = truevalue
self.timestamp = timestamp
self.token = token
self.slot = slot
Expand All @@ -28,12 +28,12 @@ def __init__(

@enforce_types
def mock_trueval(trueval_tuple: tuple) -> Trueval:
(ID, timestamp, token, trueval, slot) = trueval_tuple
(ID, timestamp, token, truevalue, slot) = trueval_tuple
return Trueval(
ID=ID,
token=token,
truevalue=truevalue,
slot=UnixTimeS(slot),
trueval=trueval,
timestamp=UnixTimeS(timestamp),
)

Expand Down
8 changes: 4 additions & 4 deletions pdr_backend/util/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def save_prediction_csv(all_predictions: List[Prediction], csv_output_dir: str):
all_predictions,
csv_output_dir,
{
"Predicted Value": "prediction",
"True Value": "trueval",
"Predicted Value": "predvalue",
"True Value": "truevalue",
"Timestamp": "timestamp",
"Stake": "stake",
"Payout": "payout",
Expand All @@ -101,7 +101,7 @@ def save_analysis_csv(all_predictions: List[Prediction], csv_output_dir: str):
"Stake": "stake",
"Wallet": "user",
"Payout": "payout",
"True Value": "trueval",
"Predicted Value": "prediction",
"True Value": "truevalue",
"Predicted Value": "predvalue",
},
)
8 changes: 4 additions & 4 deletions pdr_backend/util/test_noganache/test_csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def test_save_analysis_csv(tmpdir):
data = csv.DictReader(f)
data_rows = list(data)

assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction)
assert data_rows[0]["True Value"] == str(predictions[0].trueval)
assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue)
assert data_rows[0]["True Value"] == str(predictions[0].truevalue)
assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp)
assert list(data_rows[0].keys()) == [
"PredictionID",
Expand All @@ -46,8 +46,8 @@ def test_save_prediction_csv(tmpdir):
data = csv.DictReader(f)
data_rows = list(row for row in data)

assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction)
assert data_rows[0]["True Value"] == str(predictions[0].trueval)
assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue)
assert data_rows[0]["True Value"] == str(predictions[0].truevalue)
assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp)
assert list(data_rows[0].keys()) == [
"Predicted Value",
Expand Down

0 comments on commit b70778f

Please sign in to comment.