From 8df727af7c658a5ea77fff740509abd3e2f2115a Mon Sep 17 00:00:00 2001 From: Amichay Oren Date: Tue, 22 Dec 2020 15:55:20 +0200 Subject: [PATCH] advance version & add new notebook Signed-off-by: Amichay Oren --- analysis/notebooks/strategy-p-value.ipynb | 389 ++++++++++++++++++++++ liualgotrader/__init__.py | 2 +- 2 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 analysis/notebooks/strategy-p-value.ipynb diff --git a/analysis/notebooks/strategy-p-value.ipynb b/analysis/notebooks/strategy-p-value.ipynb new file mode 100644 index 000000000..79af05e8a --- /dev/null +++ b/analysis/notebooks/strategy-p-value.ipynb @@ -0,0 +1,389 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How good was my strategy?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "environment = \"PAPER\" # PAPER / PROD / BACKTEST\n", + "date = \"2020-12-17\"\n", + "strategy ='short_trap_buster'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from datetime import datetime, timedelta\n", + "import numpy as np\n", + "from pytz import timezone\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import HTML, display, Markdown\n", + "from liualgotrader.models.gain_loss import TradeAnalysis\n", + "from liualgotrader.models.trending_tickers import TrendingTickers\n", + "from liualgotrader.analytics.analysis import strategy_return_for_qty_one\n", + "import alpaca_trade_api as tradeapi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### connect to data-source" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api = tradeapi.REST(base_url=\"https://api.alpaca.markets\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to calculate strategy relevance, using p-value" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Pick $e𝑛𝑣𝑖𝑟𝑜𝑛𝑚𝑒𝑛𝑡$ , $𝑠𝑡𝑟𝑎𝑡𝑒𝑔𝑦$ , and a $𝑑𝑎𝑡𝑒$ when strategy was used,\n", + "* Let $window$ be the length in minutes of trading windows, during which $strategy$ may buy a stock,\n", + "* Let $C$ be the list of all scanned stocks during $date$,\n", + "* For $c \\in C$, Let $T_{0}(c)$=the time $c$ was added to $C$, \n", + "* For $c \\in C$, Let $Duration(c)$=$window-T_{0}(c)$, \n", + "* For $c \\in C$, Let $Hold(c)$=sum of time $stategy$ had position in $c$, \n", + "* Let $buy-actions$/$sell-actions$ represent all buy(/sell) decisions made by $strategy$," + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### $Prob(BUY) = \\frac{|buy-actions|}{\\sum \\limits _{c}^{C} Duration(c) - Hold(c)}$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### $Prob(SELL) = \\frac{|sell-actions|}{\\sum \\limits _{c}^{C} Hold(c)}$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### pseudo-code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "gain-vs-strat = 0 \n", + " for i in range(10000):\n", + " gain = 0 \n", + " for c in C:\n", + " for t in range($T_{0}(c)$,$windows$):\n", + " gain += calculate gain based on Prob(BUY) and Prob(SELL)\n", + " gain-vs-strat += 1 if stategy_gain > gain else 0 " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## $1 - p_{value} = \\frac{gain-vs-strat}{10,000}$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NOTE: below 0.95 strategy is bad. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The real stuff" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### calculate gains from my strategy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_gains = await strategy_return_for_qty_one(\n", + " strategy=strategy, env=environment, start_date=datetime.strptime(date, \"%Y-%m-%d\")\n", + ")\n", + "my_gains" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scanned = await TrendingTickers.load_by_date_and_env(env=environment, start_date=datetime.strptime(date, \"%Y-%m-%d\"))\n", + "scanned['hold'] = timedelta(0)\n", + "print(f\"loaded {len(scanned)} entries\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = await TradeAnalysis.load(environment, datetime.strptime(date, \"%Y-%m-%d\"))\n", + "df = df.loc[df.algo_name == strategy]\n", + "print(f\"loaded {len(df)} entries\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, row in scanned.iterrows():\n", + " hold_time = df.loc[\n", + " (df.batch_id == row.batch_id) & (df.symbol == row.symbol)\n", + " ].hold.sum()\n", + " scanned.loc[scanned.index == i, \"hold\"] = (\n", + " hold_time if not pd.isnull(hold_time) else timedelta(0)\n", + " )\n", + " duration = timedelta(minutes=120) - (\n", + " row.create_tstamp.to_pydatetime()\n", + " - row.create_tstamp.to_pydatetime().replace(\n", + " hour=14, minute=30, second=0, microsecond=0\n", + " )\n", + " )\n", + " scanned.loc[scanned.index == i, \"duration\"] = (\n", + " duration if duration >= timedelta(days=0) else pd.NaT\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scanned=scanned.dropna(how='any',axis=0) \n", + "scanned['delta'] = scanned.duration-scanned.hold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "prob_buy = 1.0 * len(df) / (scanned.delta.sum().total_seconds() // 60)\n", + "prob_buy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "prob_sell = 1.0 * len(df) / (scanned.hold.sum().total_seconds() // 60)\n", + "prob_sell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def gen_rand(prob:float) -> int:\n", + " return np.random.choice([1,0],1,p=[prob,1-prob])[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "utc = timezone(\"UTC\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "minute_history = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_data(symbol: str, start: datetime, end: datetime) -> pd.DataFrame:\n", + " print(\"load\", symbol, start)\n", + " retry = 5\n", + " while retry > 0:\n", + " try:\n", + " payload = api.polygon.historic_agg_v2(\n", + " symbol,\n", + " 1,\n", + " \"minute\",\n", + " _from=str(start),\n", + " to=str(end),\n", + " ).df\n", + " break\n", + " except Exception:\n", + " retry -= 1\n", + " continue\n", + " \n", + " return payload\n", + "\n", + "\n", + "def calculate_random_strategy():\n", + " gain = 0\n", + " for i, row in scanned.iterrows():\n", + " b = row.create_tstamp.replace(second=0, microsecond=0)\n", + " end_buy = b.replace(hour=16, minute=30, second=0, microsecond=0)\n", + " end_sell = b.replace(hour=21, minute=0, second=0, microsecond=0)\n", + " while b < end_buy:\n", + " to_buy = False\n", + " if gen_rand(prob_buy):\n", + " # print(f\"found buy {row.symbol} {b}\")\n", + " to_buy = True\n", + "\n", + " b += timedelta(minutes=1)\n", + " if to_buy:\n", + " s = b\n", + " while s < end_sell:\n", + " if gen_rand(prob_sell):\n", + " break\n", + " s += timedelta(minutes=1)\n", + " \n", + " if row.symbol not in minute_history:\n", + " minute_history[row.symbol] = load_data(row.symbol, b, s)\n", + " else:\n", + " try:\n", + " _ = minute_history[row.symbol].index.get_loc(\n", + " str(pd.Timestamp(b, tz=utc).tz_convert(\"US/Eastern\")),\n", + " method=\"pad\",\n", + " tolerance=None,\n", + " )\n", + " except Exception as e:\n", + " new_data = load_data(row.symbol, b, s)\n", + "\n", + " minute_history[row.symbol] = (\n", + " minute_history[row.symbol].append(new_data)\n", + " if minute_history[row.symbol].index[0] < new_data.index[0]\n", + " else new_data.append(minute_history[row.symbol])\n", + " )\n", + "\n", + " b_index = minute_history[row.symbol].index.get_loc(\n", + " str(pd.Timestamp(b, tz=utc).tz_convert(\"US/Eastern\")),\n", + " method=\"pad\",\n", + " tolerance=None,\n", + " )\n", + " s_index = minute_history[row.symbol].index.get_loc(\n", + " str(pd.Timestamp(s, tz=utc).tz_convert(\"US/Eastern\")),\n", + " method=\"pad\",\n", + " tolerance=None,\n", + " )\n", + " gain += (\n", + " minute_history[row.symbol].iloc[s_index].close\n", + " - minute_history[row.symbol].iloc[b_index].close\n", + " )\n", + "\n", + " b = s\n", + " return gain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "i_win = 0\n", + "for i in range (10000):\n", + " random_gains = calculate_random_strategy()\n", + " if my_gains > random_gains:\n", + " i_win += 1\n", + " \n", + " print(i+1, i_win/(i+1), random_gains)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/liualgotrader/__init__.py b/liualgotrader/__init__.py index c75c10af3..483124ec9 100644 --- a/liualgotrader/__init__.py +++ b/liualgotrader/__init__.py @@ -1 +1 @@ -__version__ = "0.0.81" +__version__ = "0.0.82"