From b70778f4f7c721e4076133d5715250efd004b590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mustafa=20Tun=C3=A7ay?= Date: Tue, 5 Mar 2024 04:51:25 +0300 Subject: [PATCH] #650 - Clean (predvalue, truevalue) columns (#664) * issue650 renaming * issue650 - test fixes * issue650 black format * issue650: fixes after merges * black fix * take-back the gql_data_factory from the main branch * Removed print statements --------- Co-authored-by: idiom-bytes --- pdr_backend/analytics/predictoor_stats.py | 12 ++++++------ pdr_backend/lake/table_bronze_pdr_predictions.py | 12 ++++++------ pdr_backend/lake/table_pdr_payouts.py | 2 +- pdr_backend/lake/table_pdr_predictions.py | 4 ++-- pdr_backend/lake/table_pdr_truevals.py | 2 +- pdr_backend/lake/test/test_etl.py | 4 ++-- pdr_backend/lake/test/test_table.py | 6 +++--- pdr_backend/subgraph/payout.py | 8 ++++---- pdr_backend/subgraph/prediction.py | 16 ++++++++-------- pdr_backend/subgraph/subgraph_payout.py | 2 +- pdr_backend/subgraph/subgraph_predictions.py | 8 ++++---- pdr_backend/subgraph/subgraph_trueval.py | 2 +- .../subgraph/test/test_subgraph_payout.py | 2 +- .../subgraph/test/test_subgraph_predictions.py | 8 ++++---- .../subgraph/test/test_subgraph_trueval.py | 2 +- pdr_backend/subgraph/trueval.py | 8 ++++---- pdr_backend/util/csvs.py | 8 ++++---- pdr_backend/util/test_noganache/test_csvs.py | 8 ++++---- 18 files changed, 57 insertions(+), 57 deletions(-) diff --git a/pdr_backend/analytics/predictoor_stats.py b/pdr_backend/analytics/predictoor_stats.py index 27520a85d..bdc8809c9 100644 --- a/pdr_backend/analytics/predictoor_stats.py +++ b/pdr_backend/analytics/predictoor_stats.py @@ -34,7 +34,7 @@ class PredictoorStat(TypedDict): def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame: # 1 - filter from lake only the rows that you're looking for df = predictions_df.filter( - ~((pl.col("trueval").is_null()) | (pl.col("payout").is_null())) + ~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null())) ) # Group by pair @@ -42,8 +42,8 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame: pl.col("source").first().alias("source"), pl.col("payout").sum().alias("sum_payout"), pl.col("stake").sum().alias("sum_stake"), - pl.col("prediction").count().alias("num_predictions"), - (pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"), + pl.col("predvalue").count().alias("num_predictions"), + (pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"), ) return df @@ -53,7 +53,7 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame: def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame: # 1 - filter from lake only the rows that you're looking for df = predictions_df.filter( - ~((pl.col("trueval").is_null()) | (pl.col("payout").is_null())) + ~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null())) ) # Group by pair @@ -61,8 +61,8 @@ def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame: pl.col("source").first().alias("source"), pl.col("payout").sum().alias("sum_payout"), pl.col("stake").sum().alias("sum_stake"), - pl.col("prediction").count().alias("num_predictions"), - (pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"), + pl.col("predvalue").count().alias("num_predictions"), + (pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"), ) return df diff --git a/pdr_backend/lake/table_bronze_pdr_predictions.py b/pdr_backend/lake/table_bronze_pdr_predictions.py index 8146ec391..314254a3d 100644 --- a/pdr_backend/lake/table_bronze_pdr_predictions.py +++ b/pdr_backend/lake/table_bronze_pdr_predictions.py @@ -59,8 +59,8 @@ def get_slot_id(_id: str) -> str: bronze_predictions_df = predictions_df.with_columns( [ pl.col("ID").map_elements(get_slot_id, return_dtype=Utf8).alias("slot_id"), - pl.col("prediction").alias("predvalue"), - pl.col("trueval").alias("truevalue"), + pl.col("predvalue").alias("predvalue"), + pl.col("truevalue").alias("truevalue"), pl.col("timestamp").alias("timestamp"), pl.col("timestamp").alias("last_event_timestamp"), ] @@ -93,14 +93,14 @@ def _process_truevals(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]: predictions_df.join(truevals_df, left_on="slot_id", right_on="ID", how="left") .with_columns( [ - pl.col("trueval").fill_null(pl.col("truevalue")), + pl.col("truevalue_right").fill_null(pl.col("truevalue")), pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")), ] ) .drop(["truevalue", "last_event_timestamp"]) .rename( { - "trueval": "truevalue", + "truevalue_right": "truevalue", "timestamp_right": "last_event_timestamp", } ) @@ -135,7 +135,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]: .with_columns( [ pl.col("payout_right").fill_null(pl.col("payout")), - pl.col("predictedValue").fill_null(pl.col("predvalue")), + pl.col("predvalue_right").fill_null(pl.col("predvalue")), pl.col("stake_right").fill_null(pl.col("stake")), pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")), ] @@ -144,7 +144,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]: .rename( { "payout_right": "payout", - "predictedValue": "predvalue", + "predvalue_right": "predvalue", "stake_right": "stake", "timestamp_right": "last_event_timestamp", } diff --git a/pdr_backend/lake/table_pdr_payouts.py b/pdr_backend/lake/table_pdr_payouts.py index 9304f5bdb..e76a281ba 100644 --- a/pdr_backend/lake/table_pdr_payouts.py +++ b/pdr_backend/lake/table_pdr_payouts.py @@ -10,7 +10,7 @@ "slot": Int64, "timestamp": Int64, "payout": Float64, - "predictedValue": Boolean, + "predvalue": Boolean, "revenue": Float64, "roundSumStakesUp": Float64, "roundSumStakes": Float64, diff --git a/pdr_backend/lake/table_pdr_predictions.py b/pdr_backend/lake/table_pdr_predictions.py index 8475af189..8b5903f74 100644 --- a/pdr_backend/lake/table_pdr_predictions.py +++ b/pdr_backend/lake/table_pdr_predictions.py @@ -10,9 +10,9 @@ "contract": Utf8, "pair": Utf8, "timeframe": Utf8, - "prediction": Boolean, + "predvalue": Boolean, "stake": Float64, - "trueval": Boolean, + "truevalue": Boolean, "timestamp": Int64, "source": Utf8, "payout": Float64, diff --git a/pdr_backend/lake/table_pdr_truevals.py b/pdr_backend/lake/table_pdr_truevals.py index 7f5b68fde..619bec2bc 100644 --- a/pdr_backend/lake/table_pdr_truevals.py +++ b/pdr_backend/lake/table_pdr_truevals.py @@ -7,6 +7,6 @@ "ID": Utf8, "token": Utf8, "timestamp": Int64, - "trueval": Boolean, + "truevalue": Boolean, "slot": Int64, } diff --git a/pdr_backend/lake/test/test_etl.py b/pdr_backend/lake/test/test_etl.py index 64d747642..091b00b1e 100644 --- a/pdr_backend/lake/test/test_etl.py +++ b/pdr_backend/lake/test/test_etl.py @@ -201,11 +201,11 @@ def test_etl_do_bronze_step( assert ( bronze_pdr_predictions_df["truevalue"][1] - == _gql_datafactory_etl_truevals_df["trueval"][1] + == _gql_datafactory_etl_truevals_df["truevalue"][1] ) assert ( bronze_pdr_predictions_df["truevalue"][2] - == _gql_datafactory_etl_truevals_df["trueval"][2] + == _gql_datafactory_etl_truevals_df["truevalue"][2] ) # Assert payout ts > prediction ts diff --git a/pdr_backend/lake/test/test_table.py b/pdr_backend/lake/test/test_table.py index 99fb60098..2b7495b0b 100644 --- a/pdr_backend/lake/test/test_table.py +++ b/pdr_backend/lake/test/test_table.py @@ -17,7 +17,7 @@ def __init__(self, data): self.ID = data["ID"] self.pair = data["pair"] self.timeframe = data["timeframe"] - self.prediction = data["prediction"] + self.predvalue = data["predvalue"] self.payout = data["payout"] self.timestamp = data["timestamp"] self.slot = data["slot"] @@ -28,7 +28,7 @@ def __init__(self, data): "ID": "0x123", "pair": "ADA-USDT", "timeframe": "5m", - "prediction": True, + "predvalue": True, "payout": 28.2, "timestamp": 1701634400, "slot": 1701634400, @@ -66,7 +66,7 @@ def get_table_df(network, st_ut, fin_ut, config): "ID": Utf8, "pair": Utf8, "timeframe": Utf8, - "prediction": Boolean, + "predvalue": Boolean, "payout": Float64, "timestamp": Int64, "slot": Int64, diff --git a/pdr_backend/subgraph/payout.py b/pdr_backend/subgraph/payout.py index 5fcd6dd2f..f1baead0e 100644 --- a/pdr_backend/subgraph/payout.py +++ b/pdr_backend/subgraph/payout.py @@ -14,7 +14,7 @@ def __init__( slot: UnixTimeS, timestamp: UnixTimeS, payout: float, - predictedValue: bool, + predvalue: bool, revenue: float, roundSumStakesUp: float, roundSumStakes: float, @@ -26,7 +26,7 @@ def __init__( self.token = token self.slot = slot self.payout = payout - self.predictedValue = predictedValue + self.predvalue = predvalue self.revenue = revenue self.roundSumStakesUp = roundSumStakesUp self.roundSumStakes = roundSumStakes @@ -42,7 +42,7 @@ def mock_payout(payout_tuple: tuple) -> Payout: token, slot, payout, - predictedValue, + predvalue, revenue, roundSumStakesUp, roundSumStakes, @@ -56,7 +56,7 @@ def mock_payout(payout_tuple: tuple) -> Payout: token=token, slot=UnixTimeS(slot), payout=payout, - predictedValue=predictedValue, + predvalue=predvalue, revenue=revenue, roundSumStakesUp=roundSumStakesUp, roundSumStakes=roundSumStakes, diff --git a/pdr_backend/subgraph/prediction.py b/pdr_backend/subgraph/prediction.py index c9d5dbb47..b959df666 100644 --- a/pdr_backend/subgraph/prediction.py +++ b/pdr_backend/subgraph/prediction.py @@ -13,9 +13,9 @@ def __init__( contract: str, pair: str, timeframe: str, - prediction: Union[bool, None], # prediction = subgraph.predicted_value + predvalue: Union[bool, None], stake: Union[float, None], - trueval: Union[bool, None], + truevalue: Union[bool, None], timestamp: UnixTimeS, # timestamp == prediction submitted timestamp source: str, payout: Union[float, None], @@ -26,9 +26,9 @@ def __init__( self.contract = contract self.pair = pair self.timeframe = timeframe - self.prediction = prediction # predvalue + self.predvalue = predvalue self.stake = stake - self.trueval = trueval # truevalue + self.truevalue = truevalue self.timestamp = timestamp self.source = source self.payout = payout @@ -46,9 +46,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction: contract, pair_str, timeframe_str, - prediction, + predvalue, stake, - trueval, + truevalue, timestamp, source, payout, @@ -62,9 +62,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction: contract=contract, pair=pair_str, timeframe=timeframe_str, - prediction=prediction, + predvalue=predvalue, stake=stake, - trueval=trueval, + truevalue=truevalue, timestamp=UnixTimeS(timestamp), source=source, payout=payout, diff --git a/pdr_backend/subgraph/subgraph_payout.py b/pdr_backend/subgraph/subgraph_payout.py index 069b64f10..2a77e94a0 100644 --- a/pdr_backend/subgraph/subgraph_payout.py +++ b/pdr_backend/subgraph/subgraph_payout.py @@ -156,8 +156,8 @@ def fetch_payouts( "token": payout["prediction"]["slot"]["predictContract"]["token"][ "name" ], + "predvalue": bool(payout["predictedValue"]), "slot": UnixTimeS(int(payout["id"].split("-")[1])), - "predictedValue": bool(payout["predictedValue"]), "revenue": float(payout["prediction"]["slot"]["revenue"]), "roundSumStakesUp": float( payout["prediction"]["slot"]["roundSumStakesUp"] diff --git a/pdr_backend/subgraph/subgraph_predictions.py b/pdr_backend/subgraph/subgraph_predictions.py index 81b48d665..62fa0d4ad 100644 --- a/pdr_backend/subgraph/subgraph_predictions.py +++ b/pdr_backend/subgraph/subgraph_predictions.py @@ -139,14 +139,14 @@ def fetch_filtered_predictions( slot = UnixTimeS(int(prediction_sg_dict["slot"]["slot"])) user = prediction_sg_dict["user"]["id"] address = prediction_sg_dict["id"].split("-")[0] - trueval = None + truevalue = None payout = None predicted_value = None stake = None if not prediction_sg_dict["payout"] is None: stake = float(prediction_sg_dict["stake"]) - trueval = prediction_sg_dict["payout"]["trueValue"] + truevalue = prediction_sg_dict["payout"]["trueValue"] predicted_value = prediction_sg_dict["payout"]["predictedValue"] payout = float(prediction_sg_dict["payout"]["payout"]) @@ -155,9 +155,9 @@ def fetch_filtered_predictions( contract=address, pair=pair, timeframe=timeframe, - prediction=predicted_value, + predvalue=predicted_value, stake=stake, - trueval=trueval, + truevalue=truevalue, timestamp=timestamp, source=source, payout=payout, diff --git a/pdr_backend/subgraph/subgraph_trueval.py b/pdr_backend/subgraph/subgraph_trueval.py index b6578c3cb..e9dcc885d 100644 --- a/pdr_backend/subgraph/subgraph_trueval.py +++ b/pdr_backend/subgraph/subgraph_trueval.py @@ -118,7 +118,7 @@ def fetch_truevals( ID=ID, token=token, timestamp=timestamp, - trueval=truevalue, + truevalue=truevalue, slot=slot, ) diff --git a/pdr_backend/subgraph/test/test_subgraph_payout.py b/pdr_backend/subgraph/test/test_subgraph_payout.py index 9d89a36be..fd71bc978 100644 --- a/pdr_backend/subgraph/test/test_subgraph_payout.py +++ b/pdr_backend/subgraph/test/test_subgraph_payout.py @@ -81,7 +81,7 @@ def test_fetch_payouts(mock_query_subgraph): assert payouts[0].timestamp == 1698527000 assert payouts[0].slot == 1696880700 assert payouts[0].payout == float(0) - assert payouts[0].predictedValue is True + assert payouts[0].predvalue is True assert payouts[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd" assert payouts[0].stake == float(1.2) assert mock_query_subgraph.call_count == 1 diff --git a/pdr_backend/subgraph/test/test_subgraph_predictions.py b/pdr_backend/subgraph/test/test_subgraph_predictions.py index 266525b59..5cc9a5f85 100644 --- a/pdr_backend/subgraph/test/test_subgraph_predictions.py +++ b/pdr_backend/subgraph/test/test_subgraph_predictions.py @@ -20,9 +20,9 @@ contract="0x18f54cc21b7a2fdd011bea06bba7801b280e3151", pair="ADA/USDT", timeframe="5m", - prediction=True, + predvalue=True, stake=0.050051425480971974, - trueval=False, + truevalue=False, timestamp=UnixTimeS(1698527000), source="binance", payout=0.0, @@ -128,8 +128,8 @@ def test_fetch_filtered_predictions(mock_query_subgraph): assert predictions[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd" assert predictions[0].pair == "ADA/USDT" assert predictions[0].contract == "0x18f54cc21b7a2fdd011bea06bba7801b280e3151" - assert predictions[0].trueval is False - assert predictions[0].prediction is True + assert predictions[0].truevalue is False + assert predictions[0].predvalue is True assert mock_query_subgraph.call_count == 1 diff --git a/pdr_backend/subgraph/test/test_subgraph_trueval.py b/pdr_backend/subgraph/test/test_subgraph_trueval.py index 44e4d92f2..72e76f0b6 100644 --- a/pdr_backend/subgraph/test/test_subgraph_trueval.py +++ b/pdr_backend/subgraph/test/test_subgraph_trueval.py @@ -63,5 +63,5 @@ def test_fetch_filtered_truevals(mock_query_subgraph): assert truevals[0].token == "ADA/USDT" assert truevals[0].timestamp == 1698527000 assert truevals[0].slot == 1698527100 - assert truevals[0].trueval is True + assert truevals[0].truevalue is True assert mock_query_subgraph.call_count == 1 diff --git a/pdr_backend/subgraph/trueval.py b/pdr_backend/subgraph/trueval.py index 946ec7906..67841249c 100644 --- a/pdr_backend/subgraph/trueval.py +++ b/pdr_backend/subgraph/trueval.py @@ -12,11 +12,11 @@ def __init__( ID: str, timestamp: UnixTimeS, token: str, - trueval: Union[bool, None], + truevalue: Union[bool, None], slot: UnixTimeS, # slot/epoch timestamp ) -> None: self.ID = ID - self.trueval = trueval + self.truevalue = truevalue self.timestamp = timestamp self.token = token self.slot = slot @@ -28,12 +28,12 @@ def __init__( @enforce_types def mock_trueval(trueval_tuple: tuple) -> Trueval: - (ID, timestamp, token, trueval, slot) = trueval_tuple + (ID, timestamp, token, truevalue, slot) = trueval_tuple return Trueval( ID=ID, token=token, + truevalue=truevalue, slot=UnixTimeS(slot), - trueval=trueval, timestamp=UnixTimeS(timestamp), ) diff --git a/pdr_backend/util/csvs.py b/pdr_backend/util/csvs.py index c3300c5c8..ebf5f38fa 100644 --- a/pdr_backend/util/csvs.py +++ b/pdr_backend/util/csvs.py @@ -80,8 +80,8 @@ def save_prediction_csv(all_predictions: List[Prediction], csv_output_dir: str): all_predictions, csv_output_dir, { - "Predicted Value": "prediction", - "True Value": "trueval", + "Predicted Value": "predvalue", + "True Value": "truevalue", "Timestamp": "timestamp", "Stake": "stake", "Payout": "payout", @@ -101,7 +101,7 @@ def save_analysis_csv(all_predictions: List[Prediction], csv_output_dir: str): "Stake": "stake", "Wallet": "user", "Payout": "payout", - "True Value": "trueval", - "Predicted Value": "prediction", + "True Value": "truevalue", + "Predicted Value": "predvalue", }, ) diff --git a/pdr_backend/util/test_noganache/test_csvs.py b/pdr_backend/util/test_noganache/test_csvs.py index 35a5f91e0..d285797bc 100644 --- a/pdr_backend/util/test_noganache/test_csvs.py +++ b/pdr_backend/util/test_noganache/test_csvs.py @@ -18,8 +18,8 @@ def test_save_analysis_csv(tmpdir): data = csv.DictReader(f) data_rows = list(data) - assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction) - assert data_rows[0]["True Value"] == str(predictions[0].trueval) + assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue) + assert data_rows[0]["True Value"] == str(predictions[0].truevalue) assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp) assert list(data_rows[0].keys()) == [ "PredictionID", @@ -46,8 +46,8 @@ def test_save_prediction_csv(tmpdir): data = csv.DictReader(f) data_rows = list(row for row in data) - assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction) - assert data_rows[0]["True Value"] == str(predictions[0].trueval) + assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue) + assert data_rows[0]["True Value"] == str(predictions[0].truevalue) assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp) assert list(data_rows[0].keys()) == [ "Predicted Value",