From 8df727af7c658a5ea77fff740509abd3e2f2115a Mon Sep 17 00:00:00 2001
From: Amichay Oren <amichay@sgeltd.com>
Date: Tue, 22 Dec 2020 15:55:20 +0200
Subject: [PATCH] advance version & add new notebook

Signed-off-by: Amichay Oren <amichay@sgeltd.com>
---
 analysis/notebooks/strategy-p-value.ipynb | 389 ++++++++++++++++++++++
 liualgotrader/__init__.py                 |   2 +-
 2 files changed, 390 insertions(+), 1 deletion(-)
 create mode 100644 analysis/notebooks/strategy-p-value.ipynb
diff --git a/analysis/notebooks/strategy-p-value.ipynb b/analysis/notebooks/strategy-p-value.ipynb
new file mode 100644
index 000000000..79af05e8a
--- /dev/null
+++ b/analysis/notebooks/strategy-p-value.ipynb
@@ -0,0 +1,389 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# How good was my strategy?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "environment = \"PAPER\" # PAPER / PROD / BACKTEST\n",
+    "date = \"2020-12-17\"\n",
+    "strategy ='short_trap_buster'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from datetime import datetime, timedelta\n",
+    "import numpy as np\n",
+    "from pytz import timezone\n",
+    "import matplotlib.pyplot as plt\n",
+    "from IPython.display import HTML, display, Markdown\n",
+    "from liualgotrader.models.gain_loss import TradeAnalysis\n",
+    "from liualgotrader.models.trending_tickers import TrendingTickers\n",
+    "from liualgotrader.analytics.analysis import strategy_return_for_qty_one\n",
+    "import alpaca_trade_api as tradeapi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### connect to data-source"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "api = tradeapi.REST(base_url=\"https://api.alpaca.markets\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## How to calculate strategy relevance, using p-value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* Pick $e𝑛𝑣𝑖𝑟𝑜𝑛𝑚𝑒𝑛𝑡$ , $𝑠𝑡𝑟𝑎𝑡𝑒𝑔𝑦$ , and a $𝑑𝑎𝑡𝑒$ when strategy was used,\n",
+    "* Let $window$ be the length in minutes of trading windows, during which $strategy$ may buy a stock,\n",
+    "* Let $C$ be the list of all scanned stocks during $date$,\n",
+    "* For $c \\in C$, Let $T_{0}(c)$=the time $c$ was added to $C$, \n",
+    "* For $c \\in C$, Let $Duration(c)$=$window-T_{0}(c)$, \n",
+    "* For $c \\in C$, Let $Hold(c)$=sum of time $stategy$ had position in $c$, \n",
+    "* Let $buy-actions$/$sell-actions$ represent all buy(/sell) decisions made by $strategy$,"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### $Prob(BUY) = \\frac{|buy-actions|}{\\sum \\limits _{c}^{C} Duration(c) - Hold(c)}$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### $Prob(SELL) = \\frac{|sell-actions|}{\\sum \\limits _{c}^{C} Hold(c)}$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### pseudo-code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<code>gain-vs-strat = 0 \n",
+    "    for i in range(10000):\n",
+    "        gain = 0 \n",
+    "        for c in C:\n",
+    "            for t in range($T_{0}(c)$,$windows$):\n",
+    "                gain += calculate gain based on Prob(BUY) and Prob(SELL)\n",
+    "        gain-vs-strat += 1 if stategy_gain > gain else 0</code>      "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## $1 - p_{value} = \\frac{gain-vs-strat}{10,000}$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "NOTE: below 0.95 strategy is bad. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The real stuff"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### calculate gains from my strategy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_gains = await strategy_return_for_qty_one(\n",
+    "    strategy=strategy, env=environment, start_date=datetime.strptime(date, \"%Y-%m-%d\")\n",
+    ")\n",
+    "my_gains"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scanned = await TrendingTickers.load_by_date_and_env(env=environment, start_date=datetime.strptime(date, \"%Y-%m-%d\"))\n",
+    "scanned['hold'] = timedelta(0)\n",
+    "print(f\"loaded {len(scanned)} entries\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = await TradeAnalysis.load(environment,  datetime.strptime(date, \"%Y-%m-%d\"))\n",
+    "df = df.loc[df.algo_name == strategy]\n",
+    "print(f\"loaded {len(df)} entries\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, row in scanned.iterrows():\n",
+    "    hold_time = df.loc[\n",
+    "        (df.batch_id == row.batch_id) & (df.symbol == row.symbol)\n",
+    "    ].hold.sum()\n",
+    "    scanned.loc[scanned.index == i, \"hold\"] = (\n",
+    "        hold_time if not pd.isnull(hold_time) else timedelta(0)\n",
+    "    )\n",
+    "    duration = timedelta(minutes=120) - (\n",
+    "        row.create_tstamp.to_pydatetime()\n",
+    "        - row.create_tstamp.to_pydatetime().replace(\n",
+    "            hour=14, minute=30, second=0, microsecond=0\n",
+    "        )\n",
+    "    )\n",
+    "    scanned.loc[scanned.index == i, \"duration\"] = (\n",
+    "        duration if duration >= timedelta(days=0) else pd.NaT\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scanned=scanned.dropna(how='any',axis=0) \n",
+    "scanned['delta'] = scanned.duration-scanned.hold"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prob_buy = 1.0 * len(df) / (scanned.delta.sum().total_seconds() // 60)\n",
+    "prob_buy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prob_sell = 1.0 * len(df) / (scanned.hold.sum().total_seconds() // 60)\n",
+    "prob_sell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def gen_rand(prob:float) -> int:\n",
+    "    return np.random.choice([1,0],1,p=[prob,1-prob])[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "utc = timezone(\"UTC\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "minute_history = {}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_data(symbol: str, start: datetime, end: datetime) -> pd.DataFrame:\n",
+    "    print(\"load\", symbol, start)\n",
+    "    retry = 5\n",
+    "    while retry > 0:\n",
+    "        try:\n",
+    "            payload = api.polygon.historic_agg_v2(\n",
+    "                symbol,\n",
+    "                1,\n",
+    "                \"minute\",\n",
+    "                _from=str(start),\n",
+    "                to=str(end),\n",
+    "            ).df\n",
+    "            break\n",
+    "        except Exception:\n",
+    "            retry -= 1\n",
+    "            continue\n",
+    "            \n",
+    "    return payload\n",
+    "\n",
+    "\n",
+    "def calculate_random_strategy():\n",
+    "    gain = 0\n",
+    "    for i, row in scanned.iterrows():\n",
+    "        b = row.create_tstamp.replace(second=0, microsecond=0)\n",
+    "        end_buy = b.replace(hour=16, minute=30, second=0, microsecond=0)\n",
+    "        end_sell = b.replace(hour=21, minute=0, second=0, microsecond=0)\n",
+    "        while b < end_buy:\n",
+    "            to_buy = False\n",
+    "            if gen_rand(prob_buy):\n",
+    "                # print(f\"found buy {row.symbol} {b}\")\n",
+    "                to_buy = True\n",
+    "\n",
+    "            b += timedelta(minutes=1)\n",
+    "            if to_buy:\n",
+    "                s = b\n",
+    "                while s < end_sell:\n",
+    "                    if gen_rand(prob_sell):\n",
+    "                        break\n",
+    "                    s += timedelta(minutes=1)\n",
+    "                    \n",
+    "                if row.symbol not in minute_history:\n",
+    "                    minute_history[row.symbol] = load_data(row.symbol, b, s)\n",
+    "                else:\n",
+    "                    try:\n",
+    "                        _ = minute_history[row.symbol].index.get_loc(\n",
+    "                            str(pd.Timestamp(b, tz=utc).tz_convert(\"US/Eastern\")),\n",
+    "                            method=\"pad\",\n",
+    "                            tolerance=None,\n",
+    "                        )\n",
+    "                    except Exception as e:\n",
+    "                        new_data = load_data(row.symbol, b, s)\n",
+    "\n",
+    "                        minute_history[row.symbol] = (\n",
+    "                            minute_history[row.symbol].append(new_data)\n",
+    "                            if minute_history[row.symbol].index[0] < new_data.index[0]\n",
+    "                            else new_data.append(minute_history[row.symbol])\n",
+    "                        )\n",
+    "\n",
+    "                b_index = minute_history[row.symbol].index.get_loc(\n",
+    "                    str(pd.Timestamp(b, tz=utc).tz_convert(\"US/Eastern\")),\n",
+    "                    method=\"pad\",\n",
+    "                    tolerance=None,\n",
+    "                )\n",
+    "                s_index = minute_history[row.symbol].index.get_loc(\n",
+    "                    str(pd.Timestamp(s, tz=utc).tz_convert(\"US/Eastern\")),\n",
+    "                    method=\"pad\",\n",
+    "                    tolerance=None,\n",
+    "                )\n",
+    "                gain += (\n",
+    "                    minute_history[row.symbol].iloc[s_index].close\n",
+    "                    - minute_history[row.symbol].iloc[b_index].close\n",
+    "                )\n",
+    "\n",
+    "                b = s\n",
+    "    return gain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "i_win = 0\n",
+    "for i in range (10000):\n",
+    "    random_gains = calculate_random_strategy()\n",
+    "    if my_gains > random_gains:\n",
+    "        i_win += 1\n",
+    "        \n",
+    "    print(i+1, i_win/(i+1), random_gains)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/liualgotrader/__init__.py b/liualgotrader/__init__.py
index c75c10af3..483124ec9 100644
--- a/liualgotrader/__init__.py
+++ b/liualgotrader/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.81"
+__version__ = "0.0.82"