From b70778f4f7c721e4076133d5715250efd004b590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mustafa=20Tun=C3=A7ay?= <mustafaislev@gmail.com>
Date: Tue, 5 Mar 2024 04:51:25 +0300
Subject: [PATCH] #650 - Clean (predvalue, truevalue) columns (#664)

* issue650 renaming

* issue650 - test fixes

* issue650 black format

* issue650: fixes after merges

* black fix

* take-back the gql_data_factory from the main branch

* Removed print statements

---------

Co-authored-by: idiom-bytes <idiom.bytes@gmail.com>
---
 pdr_backend/analytics/predictoor_stats.py        | 12 ++++++------
 pdr_backend/lake/table_bronze_pdr_predictions.py | 12 ++++++------
 pdr_backend/lake/table_pdr_payouts.py            |  2 +-
 pdr_backend/lake/table_pdr_predictions.py        |  4 ++--
 pdr_backend/lake/table_pdr_truevals.py           |  2 +-
 pdr_backend/lake/test/test_etl.py                |  4 ++--
 pdr_backend/lake/test/test_table.py              |  6 +++---
 pdr_backend/subgraph/payout.py                   |  8 ++++----
 pdr_backend/subgraph/prediction.py               | 16 ++++++++--------
 pdr_backend/subgraph/subgraph_payout.py          |  2 +-
 pdr_backend/subgraph/subgraph_predictions.py     |  8 ++++----
 pdr_backend/subgraph/subgraph_trueval.py         |  2 +-
 .../subgraph/test/test_subgraph_payout.py        |  2 +-
 .../subgraph/test/test_subgraph_predictions.py   |  8 ++++----
 .../subgraph/test/test_subgraph_trueval.py       |  2 +-
 pdr_backend/subgraph/trueval.py                  |  8 ++++----
 pdr_backend/util/csvs.py                         |  8 ++++----
 pdr_backend/util/test_noganache/test_csvs.py     |  8 ++++----
 18 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/pdr_backend/analytics/predictoor_stats.py b/pdr_backend/analytics/predictoor_stats.py
index 27520a85d..bdc8809c9 100644
--- a/pdr_backend/analytics/predictoor_stats.py
+++ b/pdr_backend/analytics/predictoor_stats.py
@@ -34,7 +34,7 @@ class PredictoorStat(TypedDict):
 def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
     # 1 - filter from lake only the rows that you're looking for
     df = predictions_df.filter(
-        ~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
+        ~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
     )
 
     # Group by pair
@@ -42,8 +42,8 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
         pl.col("source").first().alias("source"),
         pl.col("payout").sum().alias("sum_payout"),
         pl.col("stake").sum().alias("sum_stake"),
-        pl.col("prediction").count().alias("num_predictions"),
-        (pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
+        pl.col("predvalue").count().alias("num_predictions"),
+        (pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
     )
 
     return df
@@ -53,7 +53,7 @@ def get_feed_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
 def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
     # 1 - filter from lake only the rows that you're looking for
     df = predictions_df.filter(
-        ~((pl.col("trueval").is_null()) | (pl.col("payout").is_null()))
+        ~((pl.col("truevalue").is_null()) | (pl.col("payout").is_null()))
     )
 
     # Group by pair
@@ -61,8 +61,8 @@ def get_predictoor_summary_stats(predictions_df: pl.DataFrame) -> pl.DataFrame:
         pl.col("source").first().alias("source"),
         pl.col("payout").sum().alias("sum_payout"),
         pl.col("stake").sum().alias("sum_stake"),
-        pl.col("prediction").count().alias("num_predictions"),
-        (pl.col("prediction").sum() / pl.col("pair").count() * 100).alias("accuracy"),
+        pl.col("predvalue").count().alias("num_predictions"),
+        (pl.col("predvalue").sum() / pl.col("pair").count() * 100).alias("accuracy"),
     )
 
     return df
diff --git a/pdr_backend/lake/table_bronze_pdr_predictions.py b/pdr_backend/lake/table_bronze_pdr_predictions.py
index 8146ec391..314254a3d 100644
--- a/pdr_backend/lake/table_bronze_pdr_predictions.py
+++ b/pdr_backend/lake/table_bronze_pdr_predictions.py
@@ -59,8 +59,8 @@ def get_slot_id(_id: str) -> str:
     bronze_predictions_df = predictions_df.with_columns(
         [
             pl.col("ID").map_elements(get_slot_id, return_dtype=Utf8).alias("slot_id"),
-            pl.col("prediction").alias("predvalue"),
-            pl.col("trueval").alias("truevalue"),
+            pl.col("predvalue").alias("predvalue"),
+            pl.col("truevalue").alias("truevalue"),
             pl.col("timestamp").alias("timestamp"),
             pl.col("timestamp").alias("last_event_timestamp"),
         ]
@@ -93,14 +93,14 @@ def _process_truevals(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
         predictions_df.join(truevals_df, left_on="slot_id", right_on="ID", how="left")
         .with_columns(
             [
-                pl.col("trueval").fill_null(pl.col("truevalue")),
+                pl.col("truevalue_right").fill_null(pl.col("truevalue")),
                 pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")),
             ]
         )
         .drop(["truevalue", "last_event_timestamp"])
         .rename(
             {
-                "trueval": "truevalue",
+                "truevalue_right": "truevalue",
                 "timestamp_right": "last_event_timestamp",
             }
         )
@@ -135,7 +135,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
         .with_columns(
             [
                 pl.col("payout_right").fill_null(pl.col("payout")),
-                pl.col("predictedValue").fill_null(pl.col("predvalue")),
+                pl.col("predvalue_right").fill_null(pl.col("predvalue")),
                 pl.col("stake_right").fill_null(pl.col("stake")),
                 pl.col("timestamp_right").fill_null(pl.col("last_event_timestamp")),
             ]
@@ -144,7 +144,7 @@ def _process_payouts(tables: Dict[str, Table], ppss: PPSS) -> Dict[str, Table]:
         .rename(
             {
                 "payout_right": "payout",
-                "predictedValue": "predvalue",
+                "predvalue_right": "predvalue",
                 "stake_right": "stake",
                 "timestamp_right": "last_event_timestamp",
             }
diff --git a/pdr_backend/lake/table_pdr_payouts.py b/pdr_backend/lake/table_pdr_payouts.py
index 9304f5bdb..e76a281ba 100644
--- a/pdr_backend/lake/table_pdr_payouts.py
+++ b/pdr_backend/lake/table_pdr_payouts.py
@@ -10,7 +10,7 @@
     "slot": Int64,
     "timestamp": Int64,
     "payout": Float64,
-    "predictedValue": Boolean,
+    "predvalue": Boolean,
     "revenue": Float64,
     "roundSumStakesUp": Float64,
     "roundSumStakes": Float64,
diff --git a/pdr_backend/lake/table_pdr_predictions.py b/pdr_backend/lake/table_pdr_predictions.py
index 8475af189..8b5903f74 100644
--- a/pdr_backend/lake/table_pdr_predictions.py
+++ b/pdr_backend/lake/table_pdr_predictions.py
@@ -10,9 +10,9 @@
     "contract": Utf8,
     "pair": Utf8,
     "timeframe": Utf8,
-    "prediction": Boolean,
+    "predvalue": Boolean,
     "stake": Float64,
-    "trueval": Boolean,
+    "truevalue": Boolean,
     "timestamp": Int64,
     "source": Utf8,
     "payout": Float64,
diff --git a/pdr_backend/lake/table_pdr_truevals.py b/pdr_backend/lake/table_pdr_truevals.py
index 7f5b68fde..619bec2bc 100644
--- a/pdr_backend/lake/table_pdr_truevals.py
+++ b/pdr_backend/lake/table_pdr_truevals.py
@@ -7,6 +7,6 @@
     "ID": Utf8,
     "token": Utf8,
     "timestamp": Int64,
-    "trueval": Boolean,
+    "truevalue": Boolean,
     "slot": Int64,
 }
diff --git a/pdr_backend/lake/test/test_etl.py b/pdr_backend/lake/test/test_etl.py
index 64d747642..091b00b1e 100644
--- a/pdr_backend/lake/test/test_etl.py
+++ b/pdr_backend/lake/test/test_etl.py
@@ -201,11 +201,11 @@ def test_etl_do_bronze_step(
 
     assert (
         bronze_pdr_predictions_df["truevalue"][1]
-        == _gql_datafactory_etl_truevals_df["trueval"][1]
+        == _gql_datafactory_etl_truevals_df["truevalue"][1]
     )
     assert (
         bronze_pdr_predictions_df["truevalue"][2]
-        == _gql_datafactory_etl_truevals_df["trueval"][2]
+        == _gql_datafactory_etl_truevals_df["truevalue"][2]
     )
 
     # Assert payout ts > prediction ts
diff --git a/pdr_backend/lake/test/test_table.py b/pdr_backend/lake/test/test_table.py
index 99fb60098..2b7495b0b 100644
--- a/pdr_backend/lake/test/test_table.py
+++ b/pdr_backend/lake/test/test_table.py
@@ -17,7 +17,7 @@ def __init__(self, data):
         self.ID = data["ID"]
         self.pair = data["pair"]
         self.timeframe = data["timeframe"]
-        self.prediction = data["prediction"]
+        self.predvalue = data["predvalue"]
         self.payout = data["payout"]
         self.timestamp = data["timestamp"]
         self.slot = data["slot"]
@@ -28,7 +28,7 @@ def __init__(self, data):
     "ID": "0x123",
     "pair": "ADA-USDT",
     "timeframe": "5m",
-    "prediction": True,
+    "predvalue": True,
     "payout": 28.2,
     "timestamp": 1701634400,
     "slot": 1701634400,
@@ -66,7 +66,7 @@ def get_table_df(network, st_ut, fin_ut, config):
     "ID": Utf8,
     "pair": Utf8,
     "timeframe": Utf8,
-    "prediction": Boolean,
+    "predvalue": Boolean,
     "payout": Float64,
     "timestamp": Int64,
     "slot": Int64,
diff --git a/pdr_backend/subgraph/payout.py b/pdr_backend/subgraph/payout.py
index 5fcd6dd2f..f1baead0e 100644
--- a/pdr_backend/subgraph/payout.py
+++ b/pdr_backend/subgraph/payout.py
@@ -14,7 +14,7 @@ def __init__(
         slot: UnixTimeS,
         timestamp: UnixTimeS,
         payout: float,
-        predictedValue: bool,
+        predvalue: bool,
         revenue: float,
         roundSumStakesUp: float,
         roundSumStakes: float,
@@ -26,7 +26,7 @@ def __init__(
         self.token = token
         self.slot = slot
         self.payout = payout
-        self.predictedValue = predictedValue
+        self.predvalue = predvalue
         self.revenue = revenue
         self.roundSumStakesUp = roundSumStakesUp
         self.roundSumStakes = roundSumStakes
@@ -42,7 +42,7 @@ def mock_payout(payout_tuple: tuple) -> Payout:
         token,
         slot,
         payout,
-        predictedValue,
+        predvalue,
         revenue,
         roundSumStakesUp,
         roundSumStakes,
@@ -56,7 +56,7 @@ def mock_payout(payout_tuple: tuple) -> Payout:
         token=token,
         slot=UnixTimeS(slot),
         payout=payout,
-        predictedValue=predictedValue,
+        predvalue=predvalue,
         revenue=revenue,
         roundSumStakesUp=roundSumStakesUp,
         roundSumStakes=roundSumStakes,
diff --git a/pdr_backend/subgraph/prediction.py b/pdr_backend/subgraph/prediction.py
index c9d5dbb47..b959df666 100644
--- a/pdr_backend/subgraph/prediction.py
+++ b/pdr_backend/subgraph/prediction.py
@@ -13,9 +13,9 @@ def __init__(
         contract: str,
         pair: str,
         timeframe: str,
-        prediction: Union[bool, None],  # prediction = subgraph.predicted_value
+        predvalue: Union[bool, None],
         stake: Union[float, None],
-        trueval: Union[bool, None],
+        truevalue: Union[bool, None],
         timestamp: UnixTimeS,  # timestamp == prediction submitted timestamp
         source: str,
         payout: Union[float, None],
@@ -26,9 +26,9 @@ def __init__(
         self.contract = contract
         self.pair = pair
         self.timeframe = timeframe
-        self.prediction = prediction  # predvalue
+        self.predvalue = predvalue
         self.stake = stake
-        self.trueval = trueval  # truevalue
+        self.truevalue = truevalue
         self.timestamp = timestamp
         self.source = source
         self.payout = payout
@@ -46,9 +46,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction:
         contract,
         pair_str,
         timeframe_str,
-        prediction,
+        predvalue,
         stake,
-        trueval,
+        truevalue,
         timestamp,
         source,
         payout,
@@ -62,9 +62,9 @@ def mock_prediction(prediction_tuple: tuple) -> Prediction:
         contract=contract,
         pair=pair_str,
         timeframe=timeframe_str,
-        prediction=prediction,
+        predvalue=predvalue,
         stake=stake,
-        trueval=trueval,
+        truevalue=truevalue,
         timestamp=UnixTimeS(timestamp),
         source=source,
         payout=payout,
diff --git a/pdr_backend/subgraph/subgraph_payout.py b/pdr_backend/subgraph/subgraph_payout.py
index 069b64f10..2a77e94a0 100644
--- a/pdr_backend/subgraph/subgraph_payout.py
+++ b/pdr_backend/subgraph/subgraph_payout.py
@@ -156,8 +156,8 @@ def fetch_payouts(
                 "token": payout["prediction"]["slot"]["predictContract"]["token"][
                     "name"
                 ],
+                "predvalue": bool(payout["predictedValue"]),
                 "slot": UnixTimeS(int(payout["id"].split("-")[1])),
-                "predictedValue": bool(payout["predictedValue"]),
                 "revenue": float(payout["prediction"]["slot"]["revenue"]),
                 "roundSumStakesUp": float(
                     payout["prediction"]["slot"]["roundSumStakesUp"]
diff --git a/pdr_backend/subgraph/subgraph_predictions.py b/pdr_backend/subgraph/subgraph_predictions.py
index 81b48d665..62fa0d4ad 100644
--- a/pdr_backend/subgraph/subgraph_predictions.py
+++ b/pdr_backend/subgraph/subgraph_predictions.py
@@ -139,14 +139,14 @@ def fetch_filtered_predictions(
         slot = UnixTimeS(int(prediction_sg_dict["slot"]["slot"]))
         user = prediction_sg_dict["user"]["id"]
         address = prediction_sg_dict["id"].split("-")[0]
-        trueval = None
+        truevalue = None
         payout = None
         predicted_value = None
         stake = None
 
         if not prediction_sg_dict["payout"] is None:
             stake = float(prediction_sg_dict["stake"])
-            trueval = prediction_sg_dict["payout"]["trueValue"]
+            truevalue = prediction_sg_dict["payout"]["trueValue"]
             predicted_value = prediction_sg_dict["payout"]["predictedValue"]
             payout = float(prediction_sg_dict["payout"]["payout"])
 
@@ -155,9 +155,9 @@ def fetch_filtered_predictions(
             contract=address,
             pair=pair,
             timeframe=timeframe,
-            prediction=predicted_value,
+            predvalue=predicted_value,
             stake=stake,
-            trueval=trueval,
+            truevalue=truevalue,
             timestamp=timestamp,
             source=source,
             payout=payout,
diff --git a/pdr_backend/subgraph/subgraph_trueval.py b/pdr_backend/subgraph/subgraph_trueval.py
index b6578c3cb..e9dcc885d 100644
--- a/pdr_backend/subgraph/subgraph_trueval.py
+++ b/pdr_backend/subgraph/subgraph_trueval.py
@@ -118,7 +118,7 @@ def fetch_truevals(
             ID=ID,
             token=token,
             timestamp=timestamp,
-            trueval=truevalue,
+            truevalue=truevalue,
             slot=slot,
         )
 
diff --git a/pdr_backend/subgraph/test/test_subgraph_payout.py b/pdr_backend/subgraph/test/test_subgraph_payout.py
index 9d89a36be..fd71bc978 100644
--- a/pdr_backend/subgraph/test/test_subgraph_payout.py
+++ b/pdr_backend/subgraph/test/test_subgraph_payout.py
@@ -81,7 +81,7 @@ def test_fetch_payouts(mock_query_subgraph):
     assert payouts[0].timestamp == 1698527000
     assert payouts[0].slot == 1696880700
     assert payouts[0].payout == float(0)
-    assert payouts[0].predictedValue is True
+    assert payouts[0].predvalue is True
     assert payouts[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd"
     assert payouts[0].stake == float(1.2)
     assert mock_query_subgraph.call_count == 1
diff --git a/pdr_backend/subgraph/test/test_subgraph_predictions.py b/pdr_backend/subgraph/test/test_subgraph_predictions.py
index 266525b59..5cc9a5f85 100644
--- a/pdr_backend/subgraph/test/test_subgraph_predictions.py
+++ b/pdr_backend/subgraph/test/test_subgraph_predictions.py
@@ -20,9 +20,9 @@
     contract="0x18f54cc21b7a2fdd011bea06bba7801b280e3151",
     pair="ADA/USDT",
     timeframe="5m",
-    prediction=True,
+    predvalue=True,
     stake=0.050051425480971974,
-    trueval=False,
+    truevalue=False,
     timestamp=UnixTimeS(1698527000),
     source="binance",
     payout=0.0,
@@ -128,8 +128,8 @@ def test_fetch_filtered_predictions(mock_query_subgraph):
     assert predictions[0].user == "0xd2a24cb4ff2584bad80ff5f109034a891c3d88dd"
     assert predictions[0].pair == "ADA/USDT"
     assert predictions[0].contract == "0x18f54cc21b7a2fdd011bea06bba7801b280e3151"
-    assert predictions[0].trueval is False
-    assert predictions[0].prediction is True
+    assert predictions[0].truevalue is False
+    assert predictions[0].predvalue is True
     assert mock_query_subgraph.call_count == 1
 
 
diff --git a/pdr_backend/subgraph/test/test_subgraph_trueval.py b/pdr_backend/subgraph/test/test_subgraph_trueval.py
index 44e4d92f2..72e76f0b6 100644
--- a/pdr_backend/subgraph/test/test_subgraph_trueval.py
+++ b/pdr_backend/subgraph/test/test_subgraph_trueval.py
@@ -63,5 +63,5 @@ def test_fetch_filtered_truevals(mock_query_subgraph):
     assert truevals[0].token == "ADA/USDT"
     assert truevals[0].timestamp == 1698527000
     assert truevals[0].slot == 1698527100
-    assert truevals[0].trueval is True
+    assert truevals[0].truevalue is True
     assert mock_query_subgraph.call_count == 1
diff --git a/pdr_backend/subgraph/trueval.py b/pdr_backend/subgraph/trueval.py
index 946ec7906..67841249c 100644
--- a/pdr_backend/subgraph/trueval.py
+++ b/pdr_backend/subgraph/trueval.py
@@ -12,11 +12,11 @@ def __init__(
         ID: str,
         timestamp: UnixTimeS,
         token: str,
-        trueval: Union[bool, None],
+        truevalue: Union[bool, None],
         slot: UnixTimeS,  # slot/epoch timestamp
     ) -> None:
         self.ID = ID
-        self.trueval = trueval
+        self.truevalue = truevalue
         self.timestamp = timestamp
         self.token = token
         self.slot = slot
@@ -28,12 +28,12 @@ def __init__(
 
 @enforce_types
 def mock_trueval(trueval_tuple: tuple) -> Trueval:
-    (ID, timestamp, token, trueval, slot) = trueval_tuple
+    (ID, timestamp, token, truevalue, slot) = trueval_tuple
     return Trueval(
         ID=ID,
         token=token,
+        truevalue=truevalue,
         slot=UnixTimeS(slot),
-        trueval=trueval,
         timestamp=UnixTimeS(timestamp),
     )
 
diff --git a/pdr_backend/util/csvs.py b/pdr_backend/util/csvs.py
index c3300c5c8..ebf5f38fa 100644
--- a/pdr_backend/util/csvs.py
+++ b/pdr_backend/util/csvs.py
@@ -80,8 +80,8 @@ def save_prediction_csv(all_predictions: List[Prediction], csv_output_dir: str):
         all_predictions,
         csv_output_dir,
         {
-            "Predicted Value": "prediction",
-            "True Value": "trueval",
+            "Predicted Value": "predvalue",
+            "True Value": "truevalue",
             "Timestamp": "timestamp",
             "Stake": "stake",
             "Payout": "payout",
@@ -101,7 +101,7 @@ def save_analysis_csv(all_predictions: List[Prediction], csv_output_dir: str):
             "Stake": "stake",
             "Wallet": "user",
             "Payout": "payout",
-            "True Value": "trueval",
-            "Predicted Value": "prediction",
+            "True Value": "truevalue",
+            "Predicted Value": "predvalue",
         },
     )
diff --git a/pdr_backend/util/test_noganache/test_csvs.py b/pdr_backend/util/test_noganache/test_csvs.py
index 35a5f91e0..d285797bc 100644
--- a/pdr_backend/util/test_noganache/test_csvs.py
+++ b/pdr_backend/util/test_noganache/test_csvs.py
@@ -18,8 +18,8 @@ def test_save_analysis_csv(tmpdir):
         data = csv.DictReader(f)
         data_rows = list(data)
 
-    assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction)
-    assert data_rows[0]["True Value"] == str(predictions[0].trueval)
+    assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue)
+    assert data_rows[0]["True Value"] == str(predictions[0].truevalue)
     assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp)
     assert list(data_rows[0].keys()) == [
         "PredictionID",
@@ -46,8 +46,8 @@ def test_save_prediction_csv(tmpdir):
         data = csv.DictReader(f)
         data_rows = list(row for row in data)
 
-    assert data_rows[0]["Predicted Value"] == str(predictions[0].prediction)
-    assert data_rows[0]["True Value"] == str(predictions[0].trueval)
+    assert data_rows[0]["Predicted Value"] == str(predictions[0].predvalue)
+    assert data_rows[0]["True Value"] == str(predictions[0].truevalue)
     assert data_rows[0]["Timestamp"] == str(predictions[0].timestamp)
     assert list(data_rows[0].keys()) == [
         "Predicted Value",