Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 1262: Get prediction data for sim #1265

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pdr_backend/cli/cli_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

HELP_MAIN = """
Main tools:
pdr sim PPSS_FILE
pdr sim PPSS_FILE NETWORK
pdr sim_plots [--run_id RUN_ID] [--port PORT] [--debug_mode False]
pdr predictoor PPSS_FILE NETWORK
pdr trader APPROACH PPSS_FILE NETWORK
Expand Down Expand Up @@ -557,7 +557,7 @@ def print_args(arguments: Namespace, nested_args: dict):
## below, list *ArgParser classes in same order as HELP_LONG

# main tools
SimArgParser = _ArgParser_PPSS
SimArgParser = _ArgParser_PPSS_NETWORK
PredictoorArgParser = _ArgParser_PPSS_NETWORK
TraderArgParser = _ArgParser_APPROACH_PPSS_NETWORK
ClaimOceanArgParser = _ArgParser_PPSS
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/cli/cli_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _do_main():
def do_sim(args, nested_args=None):
ppss = PPSS(
yaml_filename=args.PPSS_FILE,
network="development",
network=args.NETWORK,
nested_override_args=nested_args,
)
feedset = ppss.predictoor_ss.predict_train_feedsets[0]
Expand Down
3 changes: 2 additions & 1 deletion pdr_backend/cli/test/test_cli_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_do_help_long(capfd):
def test_print_args(caplog):
SimArgParser = defined_parsers["do_sim"]
parser = SimArgParser
args = ["sim", "ppss.yaml"]
args = ["sim", "ppss.yaml", "development"]
parsed_args = parser.parse_args(args)
nested_args = {"foo": "bar"}

Expand All @@ -42,4 +42,5 @@ def test_print_args(caplog):
assert "pdr sim: Begin" in caplog.text
assert "Arguments:" in caplog.text
assert "PPSS_FILE=ppss.yaml" in caplog.text
assert "NETWORK=development" in caplog.text
assert "foo" in caplog.text
6 changes: 3 additions & 3 deletions pdr_backend/cli/test/test_cli_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ class _PPSS:
class _PPSS_OBJ:
PPSS = PPSSClass(
yaml_filename=os.path.abspath("ppss.yaml"),
network="development",
network="sapphire-testnet",
)


class _NETWORK:
NETWORK = "development"
NETWORK = "sapphire-testnet"


class _LOOKBACK:
Expand Down Expand Up @@ -229,7 +229,7 @@ def test_do_sim(monkeypatch):
mock_f = Mock()
monkeypatch.setattr(f"{_CLI_PATH}.SimEngine.run", mock_f)

do_sim(MockArgParser_PPSS().parse_args())
do_sim(MockArgParser_PPSS_NETWORK().parse_args())

mock_f.assert_called()

Expand Down
6 changes: 6 additions & 0 deletions pdr_backend/ppss/sim_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def test_n(self) -> int:
def tradetype(self) -> str:
return self.d.get("tradetype", "histmock")

@property
def use_own_model(self) -> bool:
return self.d["use_own_model"]

# --------------------------------
# derived methods
def is_final_iter(self, iter_i: int) -> bool:
Expand All @@ -74,11 +78,13 @@ def is_final_iter(self, iter_i: int) -> bool:
@enforce_types
def sim_ss_test_dict(
log_dir: str,
use_own_model: bool,
test_n: Optional[int] = None,
tradetype: Optional[str] = None,
) -> dict:
d = {
"log_dir": log_dir,
"use_own_model": use_own_model,
"test_n": test_n or 10,
"tradetype": tradetype or "histmock",
}
Expand Down
19 changes: 10 additions & 9 deletions pdr_backend/ppss/test/test_sim_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@enforce_types
def test_sim_ss_defaults(tmpdir):
d = sim_ss_test_dict(_logdir(tmpdir))
d = sim_ss_test_dict(_logdir(tmpdir), True)
ss = SimSS(d)

# yaml properties
Expand All @@ -31,6 +31,7 @@ def test_sim_ss_specify_values(tmpdir):
log_dir=os.path.join(tmpdir, "mylogs"),
test_n=13,
tradetype="livereal",
use_own_model=True,
)
ss = SimSS(d)

Expand All @@ -48,7 +49,7 @@ def test_sim_ss_log_dir_relative_path():
# it will work with the relative path
expanded_path = os.path.abspath("mylogs")
had_before = os.path.exists(expanded_path)
d = sim_ss_test_dict("mylogs")
d = sim_ss_test_dict("mylogs", True)
ss = SimSS(d)
assert ss.log_dir == expanded_path
if not had_before:
Expand All @@ -57,16 +58,16 @@ def test_sim_ss_log_dir_relative_path():

@enforce_types
def test_sim_ss_test_n_badpaths(tmpdir):
d = sim_ss_test_dict(_logdir(tmpdir), test_n=-3)
d = sim_ss_test_dict(_logdir(tmpdir), True, test_n=-3)
with pytest.raises(ValueError):
_ = SimSS(d)

d = sim_ss_test_dict(_logdir(tmpdir))
d = sim_ss_test_dict(_logdir(tmpdir), True)
d["test_n"] = "not_an_int"
with pytest.raises(TypeError):
_ = SimSS(d)

d = sim_ss_test_dict(_logdir(tmpdir))
d = sim_ss_test_dict(_logdir(tmpdir), True)
d["test_n"] = 3.2
with pytest.raises(TypeError):
_ = SimSS(d)
Expand All @@ -75,26 +76,26 @@ def test_sim_ss_test_n_badpaths(tmpdir):
@enforce_types
def test_sim_ss_tradetype_happypaths(tmpdir):
for tradetype in TRADETYPE_OPTIONS:
d = sim_ss_test_dict(_logdir(tmpdir), tradetype=tradetype)
d = sim_ss_test_dict(_logdir(tmpdir), True, tradetype=tradetype)
ss = SimSS(d)
assert ss.tradetype == tradetype


@enforce_types
def test_sim_ss_tradetype_badpaths(tmpdir):
d = sim_ss_test_dict(_logdir(tmpdir))
d = sim_ss_test_dict(_logdir(tmpdir), True)
d["tradetype"] = 3.2
with pytest.raises(TypeError):
_ = SimSS(d)

d = sim_ss_test_dict(_logdir(tmpdir), tradetype="not a tradetype")
d = sim_ss_test_dict(_logdir(tmpdir), True, tradetype="not a tradetype")
with pytest.raises(ValueError):
_ = SimSS(d)


@enforce_types
def test_sim_ss_is_final_iter(tmpdir):
d = sim_ss_test_dict(_logdir(tmpdir), test_n=10)
d = sim_ss_test_dict(_logdir(tmpdir), True, test_n=10)
ss = SimSS(d)
with pytest.raises(ValueError):
_ = ss.is_final_iter(-5)
Expand Down
75 changes: 75 additions & 0 deletions pdr_backend/sim/sim_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import uuid
import time
from typing import Optional

import numpy as np
Expand All @@ -25,6 +26,9 @@
from pdr_backend.sim.sim_state import SimState
from pdr_backend.util.strutil import shift_one_earlier
from pdr_backend.util.time_types import UnixTimeMs
from pdr_backend.lake.etl import ETL
from pdr_backend.lake.gql_data_factory import GQLDataFactory
from pdr_backend.lake.duckdb_data_store import DuckDBDataStore

logger = logging.getLogger("sim_engine")

Expand Down Expand Up @@ -105,6 +109,12 @@ def run(self):
f = OhlcvDataFactory(self.ppss.lake_ss)
mergedohlcv_df = f.get_mergedohlcv_df()

# fetch predictions data
if not self.ppss.sim_ss.use_own_model:
chain_prediction_data = self._get_past_predictions_from_chain(self.ppss)
if not chain_prediction_data:
return

for test_i in range(self.ppss.sim_ss.test_n):
self.run_one_iter(test_i, mergedohlcv_df)

Expand Down Expand Up @@ -359,3 +369,68 @@ def save_state(self, i: int, N: int):
return False, False

return True, False

@enforce_types
def _get_past_predictions_from_chain(self, ppss: PPSS):
# calculate needed data start date
current_time_s = int(time.time())
timeframe = ppss.trader_ss.feed.timeframe
number_of_data_points = ppss.sim_ss.test_n
start_date = current_time_s - (timeframe.s * number_of_data_points)

# check if ppss is correctly configured for data ferching
if start_date < int(
UnixTimeMs.from_timestr(self.ppss.lake_ss.st_timestr) / 1000
):
logger.info(
(
"Lake dates configuration doesn't meet the requirements. "
"Make sure you set start date before %s"
),
time.strftime("%Y-%m-%d", time.localtime(start_date)),
)
return False

# fetch data from subgraph
gql_data_factory = GQLDataFactory(ppss)
etl = ETL(ppss, gql_data_factory)
etl.do_etl()
time.sleep(3)

# check if required data exists in the data base
db = DuckDBDataStore(self.ppss.lake_ss.lake_dir)
query = """
(SELECT timestamp
FROM pdr_payouts
ORDER BY timestamp ASC
LIMIT 1)
UNION ALL
(SELECT timestamp
FROM pdr_payouts
ORDER BY timestamp DESC
LIMIT 1);
"""
data = db.query_data(query)
if len(data["timestamp"]) < 2:
logger.info(
"No prediction data found in database at %s", self.ppss.lake_ss.lake_dir
)
return False
start_timestamp = data["timestamp"][0] / 1000
# end_timestamp = data["timestamp"][1] / 1000

if start_timestamp > start_date:
logger.info(
(
"Not enough predictions data in the lake. "
"Make sure you fetch data starting from %s up to today"
),
time.strftime("%Y-%m-%d", time.localtime(start_date)),
)
return False

# if (end_timestamp + timeframe.s) < time.time():
# logger.info("Lake data is not up to date.")
# return False

return True
2 changes: 1 addition & 1 deletion pdr_backend/sim/test/test_multisim_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _constructor_d_with_fast_runtime(tmpdir):

# sim ss
log_dir = os.path.join(tmpdir, "logs")
sim_d = sim_ss_test_dict(log_dir=log_dir, test_n=10)
sim_d = sim_ss_test_dict(log_dir=log_dir, use_own_model=True, test_n=10)
assert "sim_ss" in main_d
main_d["sim_ss"] = sim_d

Expand Down
35 changes: 33 additions & 2 deletions pdr_backend/sim/test/test_sim_engine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

import shutil
import pytest
from dash import Dash
from enforce_typing import enforce_types
Expand Down Expand Up @@ -53,7 +53,7 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo):

# sim ss
log_dir = os.path.join(tmpdir, "logs")
d = sim_ss_test_dict(log_dir, test_n=5)
d = sim_ss_test_dict(log_dir, True, test_n=5)
ppss.sim_ss = SimSS(d)

# go
Expand Down Expand Up @@ -100,3 +100,34 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo):
assert "tab--selected" in tab.get_attribute("class")
for figure_name in figures:
dash_duo.find_element(f"#{figure_name}")


def test_get_past_predictions_from_chain():
s = os.path.abspath("ppss.yaml")
d = PPSS.constructor_dict(s)
path = "my_lake_data"

d["lake_ss"]["lake_dir"] = path
d["lake_ss"]["st_timestr"] = "2 hours ago"
d["trader_ss"]["feed.timeframe"] = "5m"
d["sim_ss"]["test_n"] = 1000
ppss = PPSS(d=d, network="sapphire-mainnet")
feedsets = ppss.predictoor_ss.predict_train_feedsets
sim_engine = SimEngine(ppss, feedsets[0])

# run with wrong ppss lake config so there is not enough data fetched
resp = sim_engine._get_past_predictions_from_chain(ppss)
assert resp is False

# run with right ppss lake config
if os.path.exists(path):
shutil.rmtree(path)

# needs to be inspected and fixed
# d["sim_ss"]["test_n"] = 20
# ppss = PPSS(d=d, network="sapphire-mainnet")
# print(ppss.lake_ss)

# sim_engine = SimEngine(ppss, feedsets[0])
# resp = sim_engine._get_past_predictions_from_chain(ppss)
# assert resp is True
2 changes: 1 addition & 1 deletion pdr_backend/sim/test/test_sim_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_compact_num(tmpdir, caplog):
ppss = PPSS(yaml_str=s, network="development")

log_dir = os.path.join(tmpdir, "logs")
d = sim_ss_test_dict(log_dir, test_n=5)
d = sim_ss_test_dict(log_dir, True, test_n=5)
ppss.sim_ss = SimSS(d)

st = Mock(spec=SimState)
Expand Down
1 change: 1 addition & 0 deletions ppss.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ sim_ss: # sim only
log_dir: logs
test_n: 5000 # number of epochs to simulate
tradetype: histmock # histmock | livemock | livereal
use_own_model: True # use own model predictions signals if true, else use chain signals

multisim_ss:
approach: SimpleSweep # SimpleSweep | FastSweep (future) | ..
Expand Down
Loading