Skip to content

Commit

Permalink
refactored evaluator passing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jrosenfeld13 committed Nov 28, 2023
1 parent 37b83f2 commit bd1b21b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 25 deletions.
20 changes: 9 additions & 11 deletions numerblox/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .feature_groups import FNCV3_FEATURES

FAST_METRICS = ["mean_std_sharpe", "apy", "max_drawdown", "calmar_ratio"]
ALL_METRICS = ["mean_std_sharpe"]
ALL_METRICS = FAST_METRICS + ["autocorrelation", "max_feature_exposure", "smart_sharpe", "corr_with", "legacy_mean_std_sharpe", "legacy_mc_mean_std_sharpe", "ex_diss", "fn_mean_std_sharpe", "tb200_mean_std_sharpe", "tb500_mean_std_sharpe"]


class BaseEvaluator:
Expand All @@ -37,10 +37,8 @@ class BaseEvaluator:
- Mean, Standard Deviation and Sharpe for TB200 (Buy top 200 stocks and sell bottom 200 stocks).
- Mean, Standard Deviation and Sharpe for TB500 (Buy top 500 stocks and sell bottom 500 stocks).
:param era_col: Column name pointing to eras.
Most commonly "era" for Numerai Classic and "friday_date" for Numerai Signals.
:param fast_mode: Will skip compute intensive metrics if set to True,
namely max_exposure, feature neutral mean, TB200 and TB500.
:param metrics_list: List of metrics to calculate. Default: FAST_METRICS.
:param era_col: Column name pointing to eras. Most commonly "era" for Numerai Classic and "friday_date" for Numerai Signals.
:param custom_functions: Additional functions called in evaluation.
Each custom function should:
- Be a callable (function or class that implements __call__).
Expand All @@ -58,8 +56,8 @@ class BaseEvaluator:

def __init__(
self,
metrics_list: List[str],
era_col: str = "era",
metrics_list: List[str] = FAST_METRICS,
custom_functions: List[Callable] = None,
):
self.era_col = era_col
Expand Down Expand Up @@ -846,11 +844,11 @@ class NumeraiClassicEvaluator(BaseEvaluator):
def __init__(
self,
era_col: str = "era",
fast_mode=False,
metrics_list=FAST_METRICS,
custom_functions: List[Callable] = None,
):
super().__init__(
era_col=era_col, fast_mode=fast_mode, custom_functions=custom_functions
era_col=era_col, metrics_list=metrics_list, custom_functions=custom_functions
)
self.fncv3_features = FNCV3_FEATURES

Expand Down Expand Up @@ -888,7 +886,7 @@ def full_evaluation(
benchmark_cols=benchmark_cols,
)
# Numerai Classic specific metrics
if not self.fast_mode and valid_features:
if valid_features and "fn_mean_std_sharpe" in self.metrics_list:
# Using only valid features defined in FNCV3_FEATURES
fnc_v3, fn_std_v3, fn_sharpe_v3 = self.feature_neutral_mean_std_sharpe(
dataf=dataf,
Expand All @@ -910,11 +908,11 @@ class NumeraiSignalsEvaluator(BaseEvaluator):
def __init__(
self,
era_col: str = "friday_date",
fast_mode=False,
metrics_list=FAST_METRICS,
custom_functions: List[Callable] = None,
):
super().__init__(
era_col=era_col, fast_mode=fast_mode, custom_functions=custom_functions
era_col=era_col, metrics_list=metrics_list, custom_functions=custom_functions
)

def get_neutralized_corr(
Expand Down
24 changes: 10 additions & 14 deletions tests/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,10 @@

from utils import create_signals_sample_data, classic_test_data

BASE_STATS_COLS = ["target", "mean", "std", "sharpe",
"max_drawdown", "apy", "calmar_ratio", "autocorrelation",
"legacy_mean", "legacy_std", "legacy_sharpe"]
CLASSIC_SPECIFIC_STATS_COLS = ["feature_neutral_mean_v3", "feature_neutral_std_v3",
"feature_neutral_sharpe_v3"]

BASE_STATS_COLS = ["target", "mean", "std", "sharpe", "apy", "max_drawdown", "calmar_ratio"]

CLASSIC_STATS_COLS = BASE_STATS_COLS + CLASSIC_SPECIFIC_STATS_COLS
CLASSIC_STATS_COLS = BASE_STATS_COLS
SIGNALS_STATS_COLS = BASE_STATS_COLS


Expand All @@ -24,7 +20,7 @@ def test_numerai_classic_evaluator(classic_test_data):
df.loc[:, "prediction"] = np.random.uniform(size=len(df))
df.loc[:, "prediction_random"] = np.random.uniform(size=len(df))

evaluator = NumeraiClassicEvaluator(era_col="era", fast_mode=False)
evaluator = NumeraiClassicEvaluator(era_col="era")
val_stats = evaluator.full_evaluation(
dataf=df,
target_col="target",
Expand All @@ -44,7 +40,7 @@ def test_evaluation_benchmark_cols(classic_test_data):
df.loc[:, "benchmark2"] = np.random.uniform(size=len(df))
benchmark_cols = ["benchmark1", "benchmark2"]

evaluator = NumeraiClassicEvaluator(era_col="era", fast_mode=False)
evaluator = NumeraiClassicEvaluator(era_col="era")
val_stats = evaluator.full_evaluation(
dataf=df,
target_col="target",
Expand All @@ -65,7 +61,7 @@ def test_evaluation_benchmark_cols(classic_test_data):

def test_numerai_signals_evaluator(create_signals_sample_data):
df = create_signals_sample_data
evaluator = NumeraiSignalsEvaluator(era_col="date", fast_mode=False)
evaluator = NumeraiSignalsEvaluator(era_col="date")
val_stats = evaluator.full_evaluation(
dataf=df,
target_col="target",
Expand All @@ -84,7 +80,7 @@ def custom_func(dataf, target_col, pred_col):
""" Simple example func: Mean of residuals. """
return np.mean(dataf[target_col] - dataf[pred_col])

evaluator = NumeraiClassicEvaluator(era_col="era", fast_mode=False, custom_functions=[custom_func])
evaluator = NumeraiClassicEvaluator(era_col="era", custom_functions=[custom_func])
val_stats = evaluator.full_evaluation(
dataf=df,
target_col="target",
Expand All @@ -104,7 +100,7 @@ def custom_func(dataf, other_col):

# Initialization fails if any functions are defined incorrectly
with pytest.raises(AssertionError):
NumeraiClassicEvaluator(era_col="era", fast_mode=False, custom_functions=[custom_func])
NumeraiClassicEvaluator(era_col="era", custom_functions=[custom_func])


@pytest.fixture
Expand All @@ -125,7 +121,7 @@ def mock_api():

def test_get_neutralized_corr(create_signals_sample_data, mock_api):
df = create_signals_sample_data
obj = NumeraiSignalsEvaluator(era_col="date", fast_mode=True)
obj = NumeraiSignalsEvaluator(era_col="date")
result = obj.get_neutralized_corr(df, "test_model", Key("Hello", "World"))

# Asserting if the output is correct
Expand All @@ -147,7 +143,7 @@ def test_await_diagnostics_timeout(mock_api):


def test_get_raw_feature_exposures_pearson(classic_test_data):
evaluator = NumeraiClassicEvaluator(era_col="era", fast_mode=False)
evaluator = NumeraiClassicEvaluator(era_col="era")
np.random.seed(1)
classic_test_data["prediction"] = np.random.uniform(size=len(classic_test_data))

Expand All @@ -163,7 +159,7 @@ def test_get_raw_feature_exposures_pearson(classic_test_data):


def test_get_feature_exposures_corrv2(classic_test_data):
evaluator = NumeraiClassicEvaluator(era_col="era", fast_mode=False)
evaluator = NumeraiClassicEvaluator(era_col="era")
np.random.seed(1)
classic_test_data["prediction"] = np.random.uniform(size=len(classic_test_data))

Expand Down

0 comments on commit bd1b21b

Please sign in to comment.