oceanprotocol · KatunaNorbert · Jun 25, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024
diff --git a/pdr_backend/cli/cli_arguments.py b/pdr_backend/cli/cli_arguments.py
@@ -19,7 +19,7 @@
 
 HELP_MAIN = """
 Main tools:
-  pdr sim PPSS_FILE
+  pdr sim PPSS_FILE NETWORK
   pdr sim_plots [--run_id RUN_ID] [--port PORT] [--debug_mode False]
   pdr predictoor PPSS_FILE NETWORK
   pdr trader APPROACH PPSS_FILE NETWORK
@@ -557,7 +557,7 @@ def print_args(arguments: Namespace, nested_args: dict):
 ## below, list *ArgParser classes in same order as HELP_LONG
 
 # main tools
-SimArgParser = _ArgParser_PPSS
+SimArgParser = _ArgParser_PPSS_NETWORK
 PredictoorArgParser = _ArgParser_PPSS_NETWORK
 TraderArgParser = _ArgParser_APPROACH_PPSS_NETWORK
 ClaimOceanArgParser = _ArgParser_PPSS

diff --git a/pdr_backend/cli/cli_module.py b/pdr_backend/cli/cli_module.py
@@ -83,7 +83,7 @@ def _do_main():
 def do_sim(args, nested_args=None):
     ppss = PPSS(
         yaml_filename=args.PPSS_FILE,
-        network="development",
+        network=args.NETWORK,
         nested_override_args=nested_args,
     )
     feedset = ppss.predictoor_ss.predict_train_feedsets[0]

diff --git a/pdr_backend/cli/test/test_cli_arguments.py b/pdr_backend/cli/test/test_cli_arguments.py
@@ -33,7 +33,7 @@ def test_do_help_long(capfd):
 def test_print_args(caplog):
     SimArgParser = defined_parsers["do_sim"]
     parser = SimArgParser
-    args = ["sim", "ppss.yaml"]
+    args = ["sim", "ppss.yaml", "development"]
     parsed_args = parser.parse_args(args)
     nested_args = {"foo": "bar"}
 
@@ -42,4 +42,5 @@ def test_print_args(caplog):
     assert "pdr sim: Begin" in caplog.text
     assert "Arguments:" in caplog.text
     assert "PPSS_FILE=ppss.yaml" in caplog.text
+    assert "NETWORK=development" in caplog.text
     assert "foo" in caplog.text
diff --git a/pdr_backend/cli/test/test_cli_module.py b/pdr_backend/cli/test/test_cli_module.py
@@ -58,12 +58,12 @@ class _PPSS:
 class _PPSS_OBJ:
     PPSS = PPSSClass(
         yaml_filename=os.path.abspath("ppss.yaml"),
-        network="development",
+        network="sapphire-testnet",
     )
 
 
 class _NETWORK:
-    NETWORK = "development"
+    NETWORK = "sapphire-testnet"
 
 
 class _LOOKBACK:
@@ -229,7 +229,7 @@ def test_do_sim(monkeypatch):
     mock_f = Mock()
     monkeypatch.setattr(f"{_CLI_PATH}.SimEngine.run", mock_f)
 
-    do_sim(MockArgParser_PPSS().parse_args())
+    do_sim(MockArgParser_PPSS_NETWORK().parse_args())
 
     mock_f.assert_called()
 

diff --git a/pdr_backend/ppss/sim_ss.py b/pdr_backend/ppss/sim_ss.py
@@ -58,6 +58,10 @@ def test_n(self) -> int:
     def tradetype(self) -> str:
         return self.d.get("tradetype", "histmock")
 
+    @property
+    def use_own_model(self) -> bool:
+        return self.d["use_own_model"]
+
     # --------------------------------
     # derived methods
     def is_final_iter(self, iter_i: int) -> bool:
@@ -74,11 +78,13 @@ def is_final_iter(self, iter_i: int) -> bool:
 @enforce_types
 def sim_ss_test_dict(
     log_dir: str,
+    use_own_model: bool,
     test_n: Optional[int] = None,
     tradetype: Optional[str] = None,
 ) -> dict:
     d = {
         "log_dir": log_dir,
+        "use_own_model": use_own_model,
         "test_n": test_n or 10,
         "tradetype": tradetype or "histmock",
     }

diff --git a/pdr_backend/ppss/test/test_sim_ss.py b/pdr_backend/ppss/test/test_sim_ss.py
@@ -12,7 +12,7 @@
 
 @enforce_types
 def test_sim_ss_defaults(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(_logdir(tmpdir), True)
     ss = SimSS(d)
 
     # yaml properties
@@ -31,6 +31,7 @@ def test_sim_ss_specify_values(tmpdir):
         log_dir=os.path.join(tmpdir, "mylogs"),
         test_n=13,
         tradetype="livereal",
+        use_own_model=True,
     )
     ss = SimSS(d)
 
@@ -48,7 +49,7 @@ def test_sim_ss_log_dir_relative_path():
     # it will work with the relative path
     expanded_path = os.path.abspath("mylogs")
     had_before = os.path.exists(expanded_path)
-    d = sim_ss_test_dict("mylogs")
+    d = sim_ss_test_dict("mylogs", True)
     ss = SimSS(d)
     assert ss.log_dir == expanded_path
     if not had_before:
@@ -57,16 +58,16 @@ def test_sim_ss_log_dir_relative_path():
 
 @enforce_types
 def test_sim_ss_test_n_badpaths(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir), test_n=-3)
+    d = sim_ss_test_dict(_logdir(tmpdir), True, test_n=-3)
     with pytest.raises(ValueError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(_logdir(tmpdir), True)
     d["test_n"] = "not_an_int"
     with pytest.raises(TypeError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(_logdir(tmpdir), True)
     d["test_n"] = 3.2
     with pytest.raises(TypeError):
         _ = SimSS(d)
@@ -75,26 +76,26 @@ def test_sim_ss_test_n_badpaths(tmpdir):
 @enforce_types
 def test_sim_ss_tradetype_happypaths(tmpdir):
     for tradetype in TRADETYPE_OPTIONS:
-        d = sim_ss_test_dict(_logdir(tmpdir), tradetype=tradetype)
+        d = sim_ss_test_dict(_logdir(tmpdir), True, tradetype=tradetype)
         ss = SimSS(d)
         assert ss.tradetype == tradetype
 
 
 @enforce_types
 def test_sim_ss_tradetype_badpaths(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(_logdir(tmpdir), True)
     d["tradetype"] = 3.2
     with pytest.raises(TypeError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir), tradetype="not a tradetype")
+    d = sim_ss_test_dict(_logdir(tmpdir), True, tradetype="not a tradetype")
     with pytest.raises(ValueError):
         _ = SimSS(d)
 
 
 @enforce_types
 def test_sim_ss_is_final_iter(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir), test_n=10)
+    d = sim_ss_test_dict(_logdir(tmpdir), True, test_n=10)
     ss = SimSS(d)
     with pytest.raises(ValueError):
         _ = ss.is_final_iter(-5)

diff --git a/pdr_backend/sim/sim_engine.py b/pdr_backend/sim/sim_engine.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import uuid
+import time
 from typing import Optional
 
 import numpy as np
@@ -25,6 +26,9 @@
 from pdr_backend.sim.sim_state import SimState
 from pdr_backend.util.strutil import shift_one_earlier
 from pdr_backend.util.time_types import UnixTimeMs
+from pdr_backend.lake.etl import ETL
+from pdr_backend.lake.gql_data_factory import GQLDataFactory
+from pdr_backend.lake.duckdb_data_store import DuckDBDataStore
 
 logger = logging.getLogger("sim_engine")
 
@@ -105,6 +109,12 @@ def run(self):
         f = OhlcvDataFactory(self.ppss.lake_ss)
         mergedohlcv_df = f.get_mergedohlcv_df()
 
+        # fetch predictions data
+        if not self.ppss.sim_ss.use_own_model:
+            chain_prediction_data = self._get_past_predictions_from_chain(self.ppss)
+            if not chain_prediction_data:
+                return
+
         for test_i in range(self.ppss.sim_ss.test_n):
             self.run_one_iter(test_i, mergedohlcv_df)
 
@@ -359,3 +369,68 @@ def save_state(self, i: int, N: int):
             return False, False
 
         return True, False
+
+    @enforce_types
+    def _get_past_predictions_from_chain(self, ppss: PPSS):
+        # calculate needed data start date
+        current_time_s = int(time.time())
+        timeframe = ppss.trader_ss.feed.timeframe
+        number_of_data_points = ppss.sim_ss.test_n
+        start_date = current_time_s - (timeframe.s * number_of_data_points)
+
+        # check if ppss is correctly configured for data ferching
+        if start_date < int(
+            UnixTimeMs.from_timestr(self.ppss.lake_ss.st_timestr) / 1000
+        ):
+            logger.info(
+                (
+                    "Lake dates configuration doesn't meet the requirements. "
+                    "Make sure you set start date before %s"
+                ),
+                time.strftime("%Y-%m-%d", time.localtime(start_date)),
+            )
+            return False
+
+        # fetch data from subgraph
+        gql_data_factory = GQLDataFactory(ppss)
+        etl = ETL(ppss, gql_data_factory)
+        etl.do_etl()
+        time.sleep(3)
+
+        # check if required data exists in the data base
+        db = DuckDBDataStore(self.ppss.lake_ss.lake_dir)
+        query = """
+        (SELECT timestamp
+            FROM pdr_payouts
+            ORDER BY timestamp ASC
+            LIMIT 1)
+            UNION ALL
+            (SELECT timestamp
+            FROM pdr_payouts
+            ORDER BY timestamp DESC
+            LIMIT 1);
+        """
+        data = db.query_data(query)
+        if len(data["timestamp"]) < 2:
+            logger.info(
+                "No prediction data found in database at %s", self.ppss.lake_ss.lake_dir
+            )
+            return False
+        start_timestamp = data["timestamp"][0] / 1000
+        # end_timestamp = data["timestamp"][1] / 1000
+
+        if start_timestamp > start_date:
+            logger.info(
+                (
+                    "Not enough predictions data in the lake. "
+                    "Make sure you fetch data starting from %s up to today"
+                ),
+                time.strftime("%Y-%m-%d", time.localtime(start_date)),
+            )
+            return False
+
+        # if (end_timestamp + timeframe.s) < time.time():
+        #    logger.info("Lake data is not up to date.")
+        #    return False
+
+        return True
diff --git a/pdr_backend/sim/test/test_multisim_engine.py b/pdr_backend/sim/test/test_multisim_engine.py
@@ -62,7 +62,7 @@ def _constructor_d_with_fast_runtime(tmpdir):
 
     # sim ss
     log_dir = os.path.join(tmpdir, "logs")
-    sim_d = sim_ss_test_dict(log_dir=log_dir, test_n=10)
+    sim_d = sim_ss_test_dict(log_dir=log_dir, use_own_model=True, test_n=10)
     assert "sim_ss" in main_d
     main_d["sim_ss"] = sim_d
 

diff --git a/pdr_backend/sim/test/test_sim_engine.py b/pdr_backend/sim/test/test_sim_engine.py
@@ -1,5 +1,5 @@
 import os
-
+import shutil
 import pytest
 from dash import Dash
 from enforce_typing import enforce_types
@@ -53,7 +53,7 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo):
 
     # sim ss
     log_dir = os.path.join(tmpdir, "logs")
-    d = sim_ss_test_dict(log_dir, test_n=5)
+    d = sim_ss_test_dict(log_dir, True, test_n=5)
     ppss.sim_ss = SimSS(d)
 
     # go
@@ -100,3 +100,34 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo):
         assert "tab--selected" in tab.get_attribute("class")
         for figure_name in figures:
             dash_duo.find_element(f"#{figure_name}")
+
+
+def test_get_past_predictions_from_chain():
+    s = os.path.abspath("ppss.yaml")
+    d = PPSS.constructor_dict(s)
+    path = "my_lake_data"
+
+    d["lake_ss"]["lake_dir"] = path
+    d["lake_ss"]["st_timestr"] = "2 hours ago"
+    d["trader_ss"]["feed.timeframe"] = "5m"
+    d["sim_ss"]["test_n"] = 1000
+    ppss = PPSS(d=d, network="sapphire-mainnet")
+    feedsets = ppss.predictoor_ss.predict_train_feedsets
+    sim_engine = SimEngine(ppss, feedsets[0])
+
+    # run with wrong ppss lake config so there is not enough data fetched
+    resp = sim_engine._get_past_predictions_from_chain(ppss)
+    assert resp is False
+
+    # run with right ppss lake config
+    if os.path.exists(path):
+        shutil.rmtree(path)
+
+    # needs to be inspected and fixed
+    # d["sim_ss"]["test_n"] = 20
+    # ppss = PPSS(d=d, network="sapphire-mainnet")
+    # print(ppss.lake_ss)
+
+    # sim_engine = SimEngine(ppss, feedsets[0])
+    # resp = sim_engine._get_past_predictions_from_chain(ppss)
+    # assert resp is True
diff --git a/pdr_backend/sim/test/test_sim_logger.py b/pdr_backend/sim/test/test_sim_logger.py
@@ -13,7 +13,7 @@ def test_compact_num(tmpdir, caplog):
     ppss = PPSS(yaml_str=s, network="development")
 
     log_dir = os.path.join(tmpdir, "logs")
-    d = sim_ss_test_dict(log_dir, test_n=5)
+    d = sim_ss_test_dict(log_dir, True, test_n=5)
     ppss.sim_ss = SimSS(d)
 
     st = Mock(spec=SimState)

diff --git a/ppss.yaml b/ppss.yaml
@@ -65,6 +65,7 @@ sim_ss: # sim only
   log_dir: logs
   test_n: 5000 # number of epochs to simulate
   tradetype: histmock # histmock | livemock | livereal
+  use_own_model: True # use own model predictions signals if true, else use chain signals
 
 multisim_ss:
   approach: SimpleSweep # SimpleSweep | FastSweep (future) | ..