Fix #1257: Use chain predictions inside trading sim (#1296)

* Fix 1262: Get prediction data for sim (PR #1265) * added new field to ppss for type of prediction source * add network param to sim cli and make fetch work * verify there is enough data in database and write test * Fix #1263: Transform lake data into prediction signals (#1297) * change ppss config to better fit the 2 prediction sources * get feed contract address from duckDB (#1311) * Fix #1316: Update readme with chain data predictions signals inside trading (#1320) * Fix #1313: mock GQLDataFactory for the sim engine (#1315) * Stop tracking lake_data/ folder
oceanprotocol · Jul 4, 2024 · bf0a07a · bf0a07a
1 parent 703e6fd
commit bf0a07a
Show file tree

Hide file tree

Showing 20 changed files with 351 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ This will output something like:
 Usage: pdr sim|predictoor|trader|..
 
 Main tools:
-  pdr sim YAML_FILE
+  pdr sim YAML_FILE NETWORK
   pdr predictoor YAML_FILE NETWORK
   pdr trader APPROACH YAML_FILE NETWORK
 ...

diff --git a/READMEs/predictoor.md b/READMEs/predictoor.md
@@ -69,7 +69,7 @@ cp ppss.yaml my_ppss.yaml
 
 Let's run the simulation engine. In console:
 ```console
-pdr sim my_ppss.yaml
+pdr sim my_ppss.yaml sapphire-mainnet
 ```
 
 What the engine does does:

diff --git a/READMEs/trader.md b/READMEs/trader.md
@@ -59,16 +59,21 @@ Copy [`ppss.yaml`](../ppss.yaml) into your own file `my_ppss.yaml` and change pa
 cp ppss.yaml my_ppss.yaml
 ```
 
+Chose the source of the predictions used as signals for trading:
+- Own built model, creates predictions on the way   `use_own_model=True`
+- Live chain, gets the predictions from give chain  `use_own_model=False`
+By default `use_own_model` is set to True, and can be changed inside `my_ppss.yaml` file.
+
 Let's run the simulation engine. In console:
 ```console
-pdr sim my_ppss.yaml
+pdr sim my_ppss.yaml sapphire-mainnet
 ```
 
 What the engine does does:
 1. Set simulation parameters.
-1. Grab historical price data from exchanges and stores in `lake_data/` dir. It re-uses any previously saved data.
-1. Run through many 5min epochs. At each epoch:
-   - Build a model
+2. Grab historical price data from exchanges and stores in `lake_data/` dir. It re-uses any previously saved data.
+3. Run through many 5min epochs. At each epoch:
+   - Build a model or Get predictions from chain
    - Predict
    - Trade
    - Log to console and `logs/out_<time>.txt`

diff --git a/pdr_backend/aimodel/aimodel_plotter.py b/pdr_backend/aimodel/aimodel_plotter.py
@@ -6,6 +6,7 @@
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 from enforce_typing import enforce_types
+from pdr_backend.sim.sim_plotter import empty_fig
 
 from pdr_backend.aimodel.aimodel_plotdata import AimodelPlotdata
 
@@ -342,7 +343,11 @@ def plot_aimodel_varimps(d: AimodelPlotdata):
     @arguments
       d -- AimodelPlotdata
     """
-    imps_avg, imps_stddev = d.model.importance_per_var(include_stddev=True)
+    try:
+        imps_avg, imps_stddev = d.model.importance_per_var(include_stddev=True)
+    except Exception:
+        return empty_fig("Can't plot var imp for chain data")
+
     imps_avg = imps_avg + 1e-15  # give imp > 0, so before dummy vars in plot
     varnames = d.colnames
     n = len(varnames)

diff --git a/pdr_backend/cli/cli_arguments.py b/pdr_backend/cli/cli_arguments.py
@@ -23,7 +23,7 @@
 
 HELP_MAIN = """
 Main tools:
-  pdr sim PPSS_FILE
+  pdr sim PPSS_FILE NETWORK
   pdr sim_plots [--run_id RUN_ID] [--port PORT] [--debug_mode False]
   pdr predictoor PPSS_FILE NETWORK
   pdr trader APPROACH PPSS_FILE NETWORK
@@ -553,7 +553,7 @@ def print_args(arguments: Namespace, nested_args: dict):
 ## below, list *ArgParser classes in same order as HELP_LONG
 
 # main tools
-SimArgParser = _ArgParser_PPSS
+SimArgParser = _ArgParser_PPSS_NETWORK
 PredictoorArgParser = _ArgParser_PPSS_NETWORK
 TraderArgParser = _ArgParser_APPROACH_PPSS_NETWORK
 ClaimOceanArgParser = _ArgParser_PPSS

diff --git a/pdr_backend/cli/cli_module.py b/pdr_backend/cli/cli_module.py
@@ -87,7 +87,7 @@ def _do_main():
 def do_sim(args, nested_args=None):
     ppss = PPSS(
         yaml_filename=args.PPSS_FILE,
-        network="development",
+        network=args.NETWORK,
         nested_override_args=nested_args,
     )
     feedset = ppss.predictoor_ss.predict_train_feedsets[0]

diff --git a/pdr_backend/cli/test/test_cli_arguments.py b/pdr_backend/cli/test/test_cli_arguments.py
@@ -37,7 +37,7 @@ def test_do_help_long(capfd):
 def test_print_args(caplog):
     SimArgParser = defined_parsers["do_sim"]
     parser = SimArgParser
-    args = ["sim", "ppss.yaml"]
+    args = ["sim", "ppss.yaml", "development"]
     parsed_args = parser.parse_args(args)
     nested_args = {"foo": "bar"}
 
@@ -46,4 +46,5 @@ def test_print_args(caplog):
     assert "pdr sim: Begin" in caplog.text
     assert "Arguments:" in caplog.text
     assert "PPSS_FILE=ppss.yaml" in caplog.text
+    assert "NETWORK=development" in caplog.text
     assert "foo" in caplog.text
diff --git a/pdr_backend/cli/test/test_cli_module.py b/pdr_backend/cli/test/test_cli_module.py
@@ -62,12 +62,12 @@ class _PPSS:
 class _PPSS_OBJ:
     PPSS = PPSSClass(
         yaml_filename=os.path.abspath("ppss.yaml"),
-        network="development",
+        network="sapphire-testnet",
     )
 
 
 class _NETWORK:
-    NETWORK = "development"
+    NETWORK = "sapphire-testnet"
 
 
 class _LOOKBACK:
@@ -233,7 +233,7 @@ def test_do_sim(monkeypatch):
     mock_f = Mock()
     monkeypatch.setattr(f"{_CLI_PATH}.SimEngine.run", mock_f)
 
-    do_sim(MockArgParser_PPSS().parse_args())
+    do_sim(MockArgParser_PPSS_NETWORK().parse_args())
 
     mock_f.assert_called()
 

diff --git a/pdr_backend/lake/gql_data_factory.py b/pdr_backend/lake/gql_data_factory.py
@@ -220,6 +220,8 @@ def _do_subgraph_fetch(
                     schema=dataclass.get_lake_schema(),
                 )
                 save_backoff_count = 0
+                if len(df["timestamp"]) == 0:
+                    return
                 if df["timestamp"][0] > df["timestamp"][len(df) - 1]:
                     return
 

diff --git a/pdr_backend/ppss/ppss.py b/pdr_backend/ppss/ppss.py
@@ -5,6 +5,7 @@
 import os
 import tempfile
 from typing import Optional, Tuple
+import time
 
 import yaml
 from enforce_typing import enforce_types
@@ -21,6 +22,7 @@
 from pdr_backend.ppss.trader_ss import TraderSS
 from pdr_backend.ppss.trueval_ss import TruevalSS
 from pdr_backend.ppss.exchange_mgr_ss import ExchangeMgrSS
+from pdr_backend.util.time_types import UnixTimeMs, UnixTimeS
 from pdr_backend.ppss.web3_pp import Web3PP
 from pdr_backend.subgraph.subgraph_feed import SubgraphFeed, mock_feed
 from pdr_backend.util.dictutil import recursive_update
@@ -73,6 +75,7 @@ def __init__(
 
         # postconditions
         self.verify_feed_dependencies()
+        self.verify_use_chain_data_in_syms_dependencies()
 
     @staticmethod
     def constructor_dict(
@@ -97,6 +100,27 @@ def constructor_dict(
 
         return d
 
+    def verify_use_chain_data_in_syms_dependencies(self):
+        current_time_s = int(time.time())
+        timeframe = self.trader_ss.feed.timeframe
+        number_of_data_points = self.sim_ss.test_n
+        start_date = current_time_s - (timeframe.s * number_of_data_points)
+        formatted_start_date_as_string = time.strftime(
+            "%Y-%m-%d", time.localtime(start_date)
+        )
+
+        # check if ppss is correctly configured for using chain data into simulations
+        if (
+            UnixTimeS(start_date)
+            < UnixTimeMs.from_timestr(self.lake_ss.st_timestr).to_seconds()
+        ):
+            raise ValueError(
+                (
+                    "Lake dates configuration doesn't meet the requirements. "
+                    f"Make sure you set start date before {formatted_start_date_as_string}"
+                )
+            )
+
     def verify_feed_dependencies(self):
         """Raise ValueError if a feed dependency is violated"""
         lake_fs = self.lake_ss.feeds

diff --git a/pdr_backend/ppss/sim_ss.py b/pdr_backend/ppss/sim_ss.py
@@ -62,6 +62,10 @@ def test_n(self) -> int:
     def tradetype(self) -> str:
         return self.d.get("tradetype", "histmock")
 
+    @property
+    def use_own_model(self) -> bool:
+        return self.d["use_own_model"]
+
     # --------------------------------
     # derived methods
     def is_final_iter(self, iter_i: int) -> bool:
@@ -78,11 +82,13 @@ def is_final_iter(self, iter_i: int) -> bool:
 @enforce_types
 def sim_ss_test_dict(
     log_dir: str,
+    use_own_model: bool,
     test_n: Optional[int] = None,
     tradetype: Optional[str] = None,
 ) -> dict:
     d = {
         "log_dir": log_dir,
+        "use_own_model": use_own_model,
         "test_n": test_n or 10,
         "tradetype": tradetype or "histmock",
     }

diff --git a/pdr_backend/ppss/test/test_ppss.py b/pdr_backend/ppss/test/test_ppss.py
@@ -201,3 +201,22 @@ def test_verify_feed_dependencies():
     ]
     with pytest.raises(ValueError):
         ppss2.verify_feed_dependencies()
+
+
+@enforce_types
+def test_verify_use_chain_data_in_syms_dependencies():
+    # create ppss
+    ppss = mock_ppss(
+        feedset_test_list(),
+        "sapphire-mainnet",
+    )
+
+    # baseline should pass
+    ppss.verify_use_chain_data_in_syms_dependencies()
+
+    # modify lake time and number of epochs to simulate so the verification fails
+    ppss2 = deepcopy(ppss)
+    ppss2.sim_ss.d["test_n"] = 1000
+    ppss2.lake_ss.d["st_timestr"] = "2 hours ago"
+    with pytest.raises(ValueError):
+        ppss2.verify_use_chain_data_in_syms_dependencies()
diff --git a/pdr_backend/ppss/test/test_sim_ss.py b/pdr_backend/ppss/test/test_sim_ss.py
@@ -16,7 +16,7 @@
 
 @enforce_types
 def test_sim_ss_defaults(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True)
     ss = SimSS(d)
 
     # yaml properties
@@ -35,6 +35,7 @@ def test_sim_ss_specify_values(tmpdir):
         log_dir=os.path.join(tmpdir, "mylogs"),
         test_n=13,
         tradetype="livereal",
+        use_own_model=True,
     )
     ss = SimSS(d)
 
@@ -52,7 +53,7 @@ def test_sim_ss_log_dir_relative_path():
     # it will work with the relative path
     expanded_path = os.path.abspath("mylogs")
     had_before = os.path.exists(expanded_path)
-    d = sim_ss_test_dict("mylogs")
+    d = sim_ss_test_dict(log_dir="mylogs", use_own_model=True)
     ss = SimSS(d)
     assert ss.log_dir == expanded_path
     if not had_before:
@@ -61,16 +62,16 @@ def test_sim_ss_log_dir_relative_path():
 
 @enforce_types
 def test_sim_ss_test_n_badpaths(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir), test_n=-3)
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True, test_n=-3)
     with pytest.raises(ValueError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True)
     d["test_n"] = "not_an_int"
     with pytest.raises(TypeError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True)
     d["test_n"] = 3.2
     with pytest.raises(TypeError):
         _ = SimSS(d)
@@ -79,26 +80,30 @@ def test_sim_ss_test_n_badpaths(tmpdir):
 @enforce_types
 def test_sim_ss_tradetype_happypaths(tmpdir):
     for tradetype in TRADETYPE_OPTIONS:
-        d = sim_ss_test_dict(_logdir(tmpdir), tradetype=tradetype)
+        d = sim_ss_test_dict(
+            log_dir=_logdir(tmpdir), use_own_model=True, tradetype=tradetype
+        )
         ss = SimSS(d)
         assert ss.tradetype == tradetype
 
 
 @enforce_types
 def test_sim_ss_tradetype_badpaths(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir))
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True)
     d["tradetype"] = 3.2
     with pytest.raises(TypeError):
         _ = SimSS(d)
 
-    d = sim_ss_test_dict(_logdir(tmpdir), tradetype="not a tradetype")
+    d = sim_ss_test_dict(
+        log_dir=_logdir(tmpdir), use_own_model=True, tradetype="not a tradetype"
+    )
     with pytest.raises(ValueError):
         _ = SimSS(d)
 
 
 @enforce_types
 def test_sim_ss_is_final_iter(tmpdir):
-    d = sim_ss_test_dict(_logdir(tmpdir), test_n=10)
+    d = sim_ss_test_dict(log_dir=_logdir(tmpdir), use_own_model=True, test_n=10)
     ss = SimSS(d)
     with pytest.raises(ValueError):
         _ = ss.is_final_iter(-5)