From 065bebfcda1a4e9e4d235a2878f3f061d71ed622 Mon Sep 17 00:00:00 2001 From: wac Date: Wed, 6 Jan 2021 17:23:55 +0800 Subject: [PATCH] # Solve no stock on the day, data alignment --- FinRL_multiple_stock_trading.ipynb | 17677 ++++++++++++++------------- 1 file changed, 8854 insertions(+), 8823 deletions(-) diff --git a/FinRL_multiple_stock_trading.ipynb b/FinRL_multiple_stock_trading.ipynb index 3fa507d8b..b61712189 100644 --- a/FinRL_multiple_stock_trading.ipynb +++ b/FinRL_multiple_stock_trading.ipynb @@ -1,8930 +1,8961 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "FinRL_multiple_stock_trading.ipynb", - "provenance": [], - "collapsed_sections": [ - "uijiWgkuh1jB", - "MRiOtrywfAo1", - "_gDkU-j-fCmZ", - "3Zpv4S0-fDBv" - ], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "FinRL_multiple_stock_trading.ipynb", + "provenance": [], + "collapsed_sections": [ + "uijiWgkuh1jB", + "MRiOtrywfAo1", + "_gDkU-j-fCmZ", + "3Zpv4S0-fDBv" + ], + "toc_visible": true, + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gXaoZs2lh1hi" - }, - "source": [ - "# Deep Reinforcement Learning for Stock Trading from Scratch: Multiple Stock Trading\n", - "\n", - "Tutorials to use OpenAI DRL to trade multiple stocks in one Jupyter Notebook | Presented at NeurIPS 2020: Deep RL Workshop\n", - "\n", - "* This blog is based on our paper: FinRL: A Deep Reinforcement Learning Library for Automated Stock Trading in Quantitative Finance, presented at NeurIPS 2020: Deep RL Workshop.\n", - "* Check out medium blog for detailed explanations: https://towardsdatascience.com/finrl-for-quantitative-finance-tutorial-for-multiple-stock-trading-7b00763b7530\n", - "* Please report any issues to our Github: https://github.com/AI4Finance-LLC/FinRL-Library/issues\n", - "* **Pytorch Version** \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lGunVt8oLCVS" - }, - "source": [ - "# Content" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HOzAKQ-SLGX6" - }, - "source": [ - "* [1. Problem Definition](#0)\n", - "* [2. Getting Started - Load Python packages](#1)\n", - " * [2.1. Install Packages](#1.1) \n", - " * [2.2. Check Additional Packages](#1.2)\n", - " * [2.3. Import Packages](#1.3)\n", - " * [2.4. Create Folders](#1.4)\n", - "* [3. Download Data](#2)\n", - "* [4. Preprocess Data](#3) \n", - " * [4.1. Technical Indicators](#3.1)\n", - " * [4.2. Perform Feature Engineering](#3.2)\n", - "* [5.Build Environment](#4) \n", - " * [5.1. Training & Trade Data Split](#4.1)\n", - " * [5.2. User-defined Environment](#4.2) \n", - " * [5.3. Initialize Environment](#4.3) \n", - "* [6.Implement DRL Algorithms](#5) \n", - "* [7.Backtesting Performance](#6) \n", - " * [7.1. BackTestStats](#6.1)\n", - " * [7.2. BackTestPlot](#6.2) \n", - " * [7.3. Baseline Stats](#6.3) \n", - " * [7.3. Compare to Stock Market Index](#6.4) " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sApkDlD9LIZv" - }, - "source": [ - "\n", - "# Part 1. Problem Definition" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HjLD2TZSLKZ-" - }, - "source": [ - "This problem is to design an automated trading solution for single stock trading. We model the stock trading process as a Markov Decision Process (MDP). We then formulate our trading goal as a maximization problem.\n", - "\n", - "The algorithm is trained using Deep Reinforcement Learning (DRL) algorithms and the components of the reinforcement learning environment are:\n", - "\n", - "\n", - "* Action: The action space describes the allowed actions that the agent interacts with the\n", - "environment. Normally, a ∈ A includes three actions: a ∈ {−1, 0, 1}, where −1, 0, 1 represent\n", - "selling, holding, and buying one stock. Also, an action can be carried upon multiple shares. We use\n", - "an action space {−k, ..., −1, 0, 1, ..., k}, where k denotes the number of shares. For example, \"Buy\n", - "10 shares of AAPL\" or \"Sell 10 shares of AAPL\" are 10 or −10, respectively\n", - "\n", - "* Reward function: r(s, a, s′) is the incentive mechanism for an agent to learn a better action. The change of the portfolio value when action a is taken at state s and arriving at new state s', i.e., r(s, a, s′) = v′ − v, where v′ and v represent the portfolio\n", - "values at state s′ and s, respectively\n", - "\n", - "* State: The state space describes the observations that the agent receives from the environment. Just as a human trader needs to analyze various information before executing a trade, so\n", - "our trading agent observes many different features to better learn in an interactive environment.\n", - "\n", - "* Environment: Dow 30 consituents\n", - "\n", - "\n", - "The data of the single stock that we will be using for this case study is obtained from Yahoo Finance API. The data contains Open-High-Low-Close price and volume.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ffsre789LY08" - }, - "source": [ - "\n", - "# Part 2. Getting Started- Load Python Packages" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uy5_PTmOh1hj" - }, - "source": [ - "\n", - "## 2.1. Install all the packages through FinRL library\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mPT0ipYE28wL", - "outputId": "802ae0b5-d88e-46ba-8082-9eb5890f9cba" - }, - "source": [ - "## install finrl library\n", - "!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Collecting git+https://github.com/AI4Finance-LLC/FinRL-Library.git\n", - " Cloning https://github.com/AI4Finance-LLC/FinRL-Library.git to /tmp/pip-req-build-4_oi9rum\n", - " Running command git clone -q https://github.com/AI4Finance-LLC/FinRL-Library.git /tmp/pip-req-build-4_oi9rum\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (1.19.4)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (1.1.5)\n", - "Collecting stockstats\n", - " Downloading https://files.pythonhosted.org/packages/32/41/d3828c5bc0a262cb3112a4024108a3b019c183fa3b3078bff34bf25abf91/stockstats-0.3.2-py2.py3-none-any.whl\n", - "Collecting yfinance\n", - " Downloading https://files.pythonhosted.org/packages/7a/e8/b9d7104d3a4bf39924799067592d9e59119fcfc900a425a12e80a3123ec8/yfinance-0.1.55.tar.gz\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (3.2.2)\n", - "Requirement already satisfied: scikit-learn>=0.21.0 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.22.2.post1)\n", - "Requirement already satisfied: gym>=0.17 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.17.3)\n", - "Collecting stable-baselines3[extra]\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/76/7c/ec89fd9a51c2ff640f150479069be817136c02f02349b5dd27a6e3bb8b3d/stable_baselines3-0.10.0-py3-none-any.whl (145kB)\n", - "\u001b[K |████████████████████████████████| 153kB 6.0MB/s \n", - "\u001b[?25hRequirement already satisfied: pytest in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (3.6.4)\n", - "Requirement already satisfied: setuptools>=41.4.0 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (51.0.0)\n", - "Requirement already satisfied: wheel>=0.33.6 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.36.2)\n", - "Collecting pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2\n", - " Cloning https://github.com/quantopian/pyfolio.git to /tmp/pip-install-r44a2amx/pyfolio\n", - " Running command git clone -q https://github.com/quantopian/pyfolio.git /tmp/pip-install-r44a2amx/pyfolio\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->finrl==0.0.2) (2.8.1)\n", - "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->finrl==0.0.2) (2018.9)\n", - "Collecting int-date>=0.1.7\n", - " Downloading https://files.pythonhosted.org/packages/43/27/31803df15173ab341fe7548c14154b54227dfd8f630daa09a1c6e7db52f7/int_date-0.1.8-py2.py3-none-any.whl\n", - "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.6/dist-packages (from yfinance->finrl==0.0.2) (2.23.0)\n", - "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from yfinance->finrl==0.0.2) (0.0.9)\n", - "Collecting lxml>=4.5.1\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/bd/78/56a7c88a57d0d14945472535d0df9fb4bbad7d34ede658ec7961635c790e/lxml-4.6.2-cp36-cp36m-manylinux1_x86_64.whl (5.5MB)\n", - "\u001b[K |████████████████████████████████| 5.5MB 18.0MB/s \n", - "\u001b[?25hRequirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (0.10.0)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (2.4.7)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (1.3.1)\n", - "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.21.0->finrl==0.0.2) (1.0.0)\n", - "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.21.0->finrl==0.0.2) (1.4.1)\n", - "Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from gym>=0.17->finrl==0.0.2) (1.5.0)\n", - "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from gym>=0.17->finrl==0.0.2) (1.3.0)\n", - "Requirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (1.7.0+cu101)\n", - "Requirement already satisfied: pillow; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (7.0.0)\n", - "Requirement already satisfied: atari-py~=0.2.0; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (0.2.6)\n", - "Requirement already satisfied: psutil; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (5.4.8)\n", - "Requirement already satisfied: opencv-python; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (4.1.2.30)\n", - "Requirement already satisfied: tensorboard; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (2.4.0)\n", - "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (0.7.1)\n", - "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.15.0)\n", - "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.4.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (20.3.0)\n", - "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.10.0)\n", - "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (8.6.0)\n", - "Requirement already satisfied: ipython>=3.2.3 in /usr/local/lib/python3.6/dist-packages (from pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (5.5.0)\n", - "Requirement already satisfied: seaborn>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.11.0)\n", - "Collecting empyrical>=0.5.0\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/43/1b997c21411c6ab7c96dc034e160198272c7a785aeea7654c9bcf98bec83/empyrical-0.5.5.tar.gz (52kB)\n", - "\u001b[K |████████████████████████████████| 61kB 6.9MB/s \n", - "\u001b[?25hRequirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (2.10)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (2020.12.5)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (1.24.3)\n", - "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from pyglet<=1.5.0,>=1.4.0->gym>=0.17->finrl==0.0.2) (0.16.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.0->stable-baselines3[extra]->finrl==0.0.2) (3.7.4.3)\n", - "Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.0->stable-baselines3[extra]->finrl==0.0.2) (0.8)\n", - "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.0.1)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.3.3)\n", - "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.4.2)\n", - "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.32.0)\n", - "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.7.0)\n", - "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.17.2)\n", - "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.10.0)\n", - "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.12.4)\n", - "Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.8.1)\n", - "Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (1.0.18)\n", - "Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.4.2)\n", - "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.8.0)\n", - "Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.7.5)\n", - "Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (2.6.1)\n", - "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.3.3)\n", - "Requirement already satisfied: pandas-datareader>=0.2 in /usr/local/lib/python3.6/dist-packages (from empyrical>=0.5.0->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.9.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.3.0)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (4.6)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.2.8)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (4.2.0)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.2.5)\n", - "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != \"win32\"->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.6.0)\n", - "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.2.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.4.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.1.0)\n", - "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3\"->google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.4.8)\n", - "Building wheels for collected packages: finrl, yfinance, pyfolio, empyrical\n", - " Building wheel for finrl (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for finrl: filename=finrl-0.0.2-cp36-none-any.whl size=23235 sha256=96343730296d82eab621f59e797ee5070763f62f0781366ad0c7f891320730c3\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-cesdfnqn/wheels/9c/19/bf/c644def96612df1ad42c94d5304966797eaa3221dffc5efe0b\n", - " Building wheel for yfinance (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for yfinance: filename=yfinance-0.1.55-py2.py3-none-any.whl size=22616 sha256=81424134934f5e39ce03a7cacee299829bc9064e6e8723329c6586438ee93839\n", - " Stored in directory: /root/.cache/pip/wheels/04/98/cc/2702a4242d60bdc14f48b4557c427ded1fe92aedf257d4565c\n", - " Building wheel for pyfolio (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyfolio: filename=pyfolio-0.9.2+75.g4b901f6-cp36-none-any.whl size=75764 sha256=d386c94dd6aa49b4acd82579c5e23f839043337a87eea7f28a1a9c56f7f0b1c0\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-cesdfnqn/wheels/43/ce/d9/6752fb6e03205408773235435205a0519d2c608a94f1976e56\n", - " Building wheel for empyrical (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for empyrical: filename=empyrical-0.5.5-cp36-none-any.whl size=39765 sha256=fbecbe48a3eb6e2d7ad06f9f3de71b0cd0a03d8b4d93092ab2ed9dab47cd8ef6\n", - " Stored in directory: /root/.cache/pip/wheels/ea/b2/c8/6769d8444d2f2e608fae2641833110668d0ffd1abeb2e9f3fc\n", - "Successfully built finrl yfinance pyfolio empyrical\n", - "Installing collected packages: int-date, stockstats, lxml, yfinance, stable-baselines3, empyrical, pyfolio, finrl\n", - " Found existing installation: lxml 4.2.6\n", - " Uninstalling lxml-4.2.6:\n", - " Successfully uninstalled lxml-4.2.6\n", - "Successfully installed empyrical-0.5.5 finrl-0.0.2 int-date-0.1.8 lxml-4.6.2 pyfolio-0.9.2+75.g4b901f6 stable-baselines3-0.10.0 stockstats-0.3.2 yfinance-0.1.55\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "osBHhVysOEzi" - }, - "source": [ - "\n", - "\n", - "## 2.2. Check if the additional packages needed are present, if not install them. \n", - "* Yahoo Finance API\n", - "* pandas\n", - "* numpy\n", - "* matplotlib\n", - "* stockstats\n", - "* OpenAI gym\n", - "* stable-baselines\n", - "* tensorflow\n", - "* pyfolio" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nGv01K8Sh1hn" - }, - "source": [ - "\n", - "## 2.3. Import Packages" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lPqeTTwoh1hn", - "outputId": "c437c266-2780-4c50-af8b-6868e7fdaa1f" - }, - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "# matplotlib.use('Agg')\n", - "import datetime\n", - "\n", - "%matplotlib inline\n", - "from finrl.config import config\n", - "from finrl.marketdata.yahoodownloader import YahooDownloader\n", - "from finrl.preprocessing.preprocessors import FeatureEngineer\n", - "from finrl.preprocessing.data import data_split\n", - "from finrl.env.env_stocktrading import StockTradingEnv\n", - "from finrl.model.models import DRLAgent\n", - "from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot\n", - "\n", - "from pprint import pprint\n", - "\n", - "import sys\n", - "sys.path.append(\"../FinRL-Library\")\n", - "\n" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/pyfolio/pos.py:27: UserWarning: Module \"zipline.assets\" not found; multipliers will not be applied to position notionals.\n", - " 'Module \"zipline.assets\" not found; multipliers will not be applied'\n" - ], - "name": "stderr" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T2owTj985RW4" - }, - "source": [ - "\n", - "## 2.4. Create Folders" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "w9A8CN5R5PuZ" - }, - "source": [ - "import os\n", - "if not os.path.exists(\"./\" + config.DATA_SAVE_DIR):\n", - " os.makedirs(\"./\" + config.DATA_SAVE_DIR)\n", - "if not os.path.exists(\"./\" + config.TRAINED_MODEL_DIR):\n", - " os.makedirs(\"./\" + config.TRAINED_MODEL_DIR)\n", - "if not os.path.exists(\"./\" + config.TENSORBOARD_LOG_DIR):\n", - " os.makedirs(\"./\" + config.TENSORBOARD_LOG_DIR)\n", - "if not os.path.exists(\"./\" + config.RESULTS_DIR):\n", - " os.makedirs(\"./\" + config.RESULTS_DIR)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A289rQWMh1hq" - }, - "source": [ - "\n", - "# Part 3. Download Data\n", - "Yahoo Finance is a website that provides stock data, financial news, financial reports, etc. All the data provided by Yahoo Finance is free.\n", - "* FinRL uses a class **YahooDownloader** to fetch data from Yahoo Finance API\n", - "* Call Limit: Using the Public API (without authentication), you are limited to 2,000 requests per hour per IP (or up to a total of 48,000 requests a day).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NPeQ7iS-LoMm" - }, - "source": [ - "\n", - "\n", - "-----\n", - "class YahooDownloader:\n", - " Provides methods for retrieving daily stock data from\n", - " Yahoo Finance API\n", - "\n", - " Attributes\n", - " ----------\n", - " start_date : str\n", - " start date of the data (modified from config.py)\n", - " end_date : str\n", - " end date of the data (modified from config.py)\n", - " ticker_list : list\n", - " a list of stock tickers (modified from config.py)\n", - "\n", - " Methods\n", - " -------\n", - " fetch_data()\n", - " Fetches data from yahoo API\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "id": "h3XJnvrbLp-C", - "outputId": "87dea23f-469d-4e9d-de91-0f8a74929de2" - }, - "source": [ - "# from config.py start_date is a string\n", - "config.START_DATE" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'2009-01-01'" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "id": "FUnY8WEfLq3C", - "outputId": "c635ae69-a13e-408f-d932-9d386d1d6dcf" - }, - "source": [ - "# from config.py end_date is a string\n", - "config.END_DATE" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'2020-12-01'" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JzqRRTOX6aFu", - "outputId": "d3baf63f-948a-49f9-f6f2-b7241971b8ea" - }, - "source": [ - "print(config.DOW_30_TICKER)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "['AAPL', 'MSFT', 'JPM', 'V', 'RTX', 'PG', 'GS', 'NKE', 'DIS', 'AXP', 'HD', 'INTC', 'WMT', 'IBM', 'MRK', 'UNH', 'KO', 'CAT', 'TRV', 'JNJ', 'CVX', 'MCD', 'VZ', 'CSCO', 'XOM', 'BA', 'MMM', 'PFE', 'WBA', 'DD']\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yCKm4om-s9kE", - "outputId": "932583d8-f98b-4243-c02d-375f7272db1a" - }, - "source": [ - "df = YahooDownloader(start_date = '2009-01-01',\n", - " end_date = '2021-01-01',\n", - " ticker_list = config.DOW_30_TICKER).fetch_data()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "[*********************100%***********************] 1 of 1 completed\n", - "Shape of DataFrame: (90630, 7)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "CV3HrZHLh1hy", - "outputId": "b7b78172-8c8a-41c9-c8a6-0167edb9bd11" - }, - "source": [ - "df.shape" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(90630, 7)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 62 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 195 - }, - "id": "4hYkeaPiICHS", - "outputId": "ce9d7463-a74c-4917-c96d-848a1e8ad493" - }, - "source": [ - "df.sort_values(['date','tic'],ignore_index=True).head()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateopenhighlowclosevolumetic
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL
12009-01-0218.57000019.52000018.40000015.80062410955700.0AXP
22009-01-0242.79999945.56000142.77999933.6809357010200.0BA
32009-01-0244.91000046.98000044.70999932.5144007117200.0CAT
42009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO
\n", - "
" - ], - "text/plain": [ - " date open high low close volume tic\n", - "0 2009-01-02 3.067143 3.251429 3.041429 2.795913 746015200.0 AAPL\n", - "1 2009-01-02 18.570000 19.520000 18.400000 15.800624 10955700.0 AXP\n", - "2 2009-01-02 42.799999 45.560001 42.779999 33.680935 7010200.0 BA\n", - "3 2009-01-02 44.910000 46.980000 44.709999 32.514400 7117200.0 CAT\n", - "4 2009-01-02 16.410000 17.000000 16.250000 12.786087 40980600.0 CSCO" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uqC6c40Zh1iH" - }, - "source": [ - "# Part 4: Preprocess Data\n", - "Data preprocessing is a crucial step for training a high quality machine learning model. We need to check for missing data and do feature engineering in order to convert the data into a model-ready state.\n", - "* Add technical indicators. In practical trading, various information needs to be taken into account, for example the historical stock prices, current holding shares, technical indicators, etc. In this article, we demonstrate two trend-following technical indicators: MACD and RSI.\n", - "* Add turbulence index. Risk-aversion reflects whether an investor will choose to preserve the capital. It also influences one's trading strategy when facing different market volatility level. To control the risk in a worst-case scenario, such as financial crisis of 2007–2008, FinRL employs the financial turbulence index that measures extreme asset price fluctuation." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Le342Hc1h1iI", - "outputId": "7049c022-122e-47c3-ef30-e9a8481808bd" - }, - "source": [ - "fe = FeatureEngineer(\n", - " use_technical_indicator=True,\n", - " tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,\n", - " use_turbulence=True,\n", - " user_defined_feature = False)\n", - "\n", - "processed = fe.preprocess_data(df)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Successfully added technical indicators\n", - "Successfully added turbulence index\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 340 - }, - "id": "grvhGJJII3Xn", - "outputId": "91d09c37-b0e9-4c5c-d532-967e40d11f41" - }, - "source": [ - "processed.sort_values(['date','tic'],ignore_index=True).head(10)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL0.0100.066.666667100.00.0
12009-01-0218.57000019.52000018.40000015.80062410955700.0AXP0.0100.066.666667100.00.0
22009-01-0242.79999945.56000142.77999933.6809357010200.0BA0.0100.066.666667100.00.0
32009-01-0244.91000046.98000044.70999932.5144007117200.0CAT0.0100.066.666667100.00.0
42009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO0.0100.066.666667100.00.0
52009-01-0274.23000377.30000373.58000248.04326213695900.0CVX0.0100.066.666667100.00.0
62009-01-0221.60523422.06068020.99322914.52727613251000.0DD0.0100.066.666667100.00.0
72009-01-0222.76000024.03000122.50000020.5974969796600.0DIS0.0100.066.666667100.00.0
82009-01-0284.01999787.62000382.19000272.84446714088500.0GS0.0100.066.666667100.00.0
92009-01-0223.07000024.19000122.95999917.90945214902500.0HD0.0100.066.666667100.00.0
\n", - "
" - ], - "text/plain": [ - " date open high ... cci_30 dx_30 turbulence\n", - "0 2009-01-02 3.067143 3.251429 ... 66.666667 100.0 0.0\n", - "1 2009-01-02 18.570000 19.520000 ... 66.666667 100.0 0.0\n", - "2 2009-01-02 42.799999 45.560001 ... 66.666667 100.0 0.0\n", - "3 2009-01-02 44.910000 46.980000 ... 66.666667 100.0 0.0\n", - "4 2009-01-02 16.410000 17.000000 ... 66.666667 100.0 0.0\n", - "5 2009-01-02 74.230003 77.300003 ... 66.666667 100.0 0.0\n", - "6 2009-01-02 21.605234 22.060680 ... 66.666667 100.0 0.0\n", - "7 2009-01-02 22.760000 24.030001 ... 66.666667 100.0 0.0\n", - "8 2009-01-02 84.019997 87.620003 ... 66.666667 100.0 0.0\n", - "9 2009-01-02 23.070000 24.190001 ... 66.666667 100.0 0.0\n", - "\n", - "[10 rows x 12 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 6 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-QsYaY0Dh1iw" - }, - "source": [ - "\n", - "# Part 5. Design Environment\n", - "Considering the stochastic and interactive nature of the automated stock trading tasks, a financial task is modeled as a **Markov Decision Process (MDP)** problem. The training process involves observing stock price change, taking an action and reward's calculation to have the agent adjusting its strategy accordingly. By interacting with the environment, the trading agent will derive a trading strategy with the maximized rewards as time proceeds.\n", - "\n", - "Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation.\n", - "\n", - "The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, \"Buy 10 shares of AAPL\" or \"Sell 10 shares of AAPL\" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5TOhcryx44bb" - }, - "source": [ - "## Training data split: 2009-01-01 to 2018-12-31\n", - "## Trade data split: 2019-01-01 to 2020-09-30" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "W0qaVGjLtgbI", - "outputId": "c98aeb90-84e3-4b83-9671-d679f3fe148f" - }, - "source": [ - "train = data_split(processed, '2009-01-01','2019-01-01')\n", - "trade = data_split(processed, '2019-01-01','2021-01-01')\n", - "print(len(train))\n", - "print(len(trade))" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "75480\n", - "15150\n" - ], - "name": "stdout" - } - ] + { + "cell_type": "markdown", + "metadata": { + "id": "gXaoZs2lh1hi" + }, + "source": [ + "# Deep Reinforcement Learning for Stock Trading from Scratch: Multiple Stock Trading\n", + "\n", + "Tutorials to use OpenAI DRL to trade multiple stocks in one Jupyter Notebook | Presented at NeurIPS 2020: Deep RL Workshop\n", + "\n", + "* This blog is based on our paper: FinRL: A Deep Reinforcement Learning Library for Automated Stock Trading in Quantitative Finance, presented at NeurIPS 2020: Deep RL Workshop.\n", + "* Check out medium blog for detailed explanations: https://towardsdatascience.com/finrl-for-quantitative-finance-tutorial-for-multiple-stock-trading-7b00763b7530\n", + "* Please report any issues to our Github: https://github.com/AI4Finance-LLC/FinRL-Library/issues\n", + "* **Pytorch Version** \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lGunVt8oLCVS" + }, + "source": [ + "# Content" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HOzAKQ-SLGX6" + }, + "source": [ + "* [1. Problem Definition](#0)\n", + "* [2. Getting Started - Load Python packages](#1)\n", + " * [2.1. Install Packages](#1.1) \n", + " * [2.2. Check Additional Packages](#1.2)\n", + " * [2.3. Import Packages](#1.3)\n", + " * [2.4. Create Folders](#1.4)\n", + "* [3. Download Data](#2)\n", + "* [4. Preprocess Data](#3) \n", + " * [4.1. Technical Indicators](#3.1)\n", + " * [4.2. Perform Feature Engineering](#3.2)\n", + "* [5.Build Environment](#4) \n", + " * [5.1. Training & Trade Data Split](#4.1)\n", + " * [5.2. User-defined Environment](#4.2) \n", + " * [5.3. Initialize Environment](#4.3) \n", + "* [6.Implement DRL Algorithms](#5) \n", + "* [7.Backtesting Performance](#6) \n", + " * [7.1. BackTestStats](#6.1)\n", + " * [7.2. BackTestPlot](#6.2) \n", + " * [7.3. Baseline Stats](#6.3) \n", + " * [7.3. Compare to Stock Market Index](#6.4) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sApkDlD9LIZv" + }, + "source": [ + "\n", + "# Part 1. Problem Definition" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HjLD2TZSLKZ-" + }, + "source": [ + "This problem is to design an automated trading solution for single stock trading. We model the stock trading process as a Markov Decision Process (MDP). We then formulate our trading goal as a maximization problem.\n", + "\n", + "The algorithm is trained using Deep Reinforcement Learning (DRL) algorithms and the components of the reinforcement learning environment are:\n", + "\n", + "\n", + "* Action: The action space describes the allowed actions that the agent interacts with the\n", + "environment. Normally, a ∈ A includes three actions: a ∈ {−1, 0, 1}, where −1, 0, 1 represent\n", + "selling, holding, and buying one stock. Also, an action can be carried upon multiple shares. We use\n", + "an action space {−k, ..., −1, 0, 1, ..., k}, where k denotes the number of shares. For example, \"Buy\n", + "10 shares of AAPL\" or \"Sell 10 shares of AAPL\" are 10 or −10, respectively\n", + "\n", + "* Reward function: r(s, a, s′) is the incentive mechanism for an agent to learn a better action. The change of the portfolio value when action a is taken at state s and arriving at new state s', i.e., r(s, a, s′) = v′ − v, where v′ and v represent the portfolio\n", + "values at state s′ and s, respectively\n", + "\n", + "* State: The state space describes the observations that the agent receives from the environment. Just as a human trader needs to analyze various information before executing a trade, so\n", + "our trading agent observes many different features to better learn in an interactive environment.\n", + "\n", + "* Environment: Dow 30 consituents\n", + "\n", + "\n", + "The data of the single stock that we will be using for this case study is obtained from Yahoo Finance API. The data contains Open-High-Low-Close price and volume.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ffsre789LY08" + }, + "source": [ + "\n", + "# Part 2. Getting Started- Load Python Packages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uy5_PTmOh1hj" + }, + "source": [ + "\n", + "## 2.1. Install all the packages through FinRL library\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mPT0ipYE28wL", + "outputId": "802ae0b5-d88e-46ba-8082-9eb5890f9cba" + }, + "source": [ + "## install finrl library\n", + "!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 279 - }, - "id": "p52zNCOhTtLR", - "outputId": "c41f9be0-a99f-4108-a427-3112b6bd4129" - }, - "source": [ - "train.head()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL0.0100.066.666667100.00.0
02009-01-0218.57000019.52000018.40000015.80062410955700.0AXP0.0100.066.666667100.00.0
02009-01-0242.79999945.56000142.77999933.6809357010200.0BA0.0100.066.666667100.00.0
02009-01-0244.91000046.98000044.70999932.5144007117200.0CAT0.0100.066.666667100.00.0
02009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO0.0100.066.666667100.00.0
\n", - "
" - ], - "text/plain": [ - " date open high ... cci_30 dx_30 turbulence\n", - "0 2009-01-02 3.067143 3.251429 ... 66.666667 100.0 0.0\n", - "0 2009-01-02 18.570000 19.520000 ... 66.666667 100.0 0.0\n", - "0 2009-01-02 42.799999 45.560001 ... 66.666667 100.0 0.0\n", - "0 2009-01-02 44.910000 46.980000 ... 66.666667 100.0 0.0\n", - "0 2009-01-02 16.410000 17.000000 ... 66.666667 100.0 0.0\n", - "\n", - "[5 rows x 12 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 67 - } - ] + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/AI4Finance-LLC/FinRL-Library.git\n", + " Cloning https://github.com/AI4Finance-LLC/FinRL-Library.git to /tmp/pip-req-build-4_oi9rum\n", + " Running command git clone -q https://github.com/AI4Finance-LLC/FinRL-Library.git /tmp/pip-req-build-4_oi9rum\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (1.19.4)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (1.1.5)\n", + "Collecting stockstats\n", + " Downloading https://files.pythonhosted.org/packages/32/41/d3828c5bc0a262cb3112a4024108a3b019c183fa3b3078bff34bf25abf91/stockstats-0.3.2-py2.py3-none-any.whl\n", + "Collecting yfinance\n", + " Downloading https://files.pythonhosted.org/packages/7a/e8/b9d7104d3a4bf39924799067592d9e59119fcfc900a425a12e80a3123ec8/yfinance-0.1.55.tar.gz\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (3.2.2)\n", + "Requirement already satisfied: scikit-learn>=0.21.0 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.22.2.post1)\n", + "Requirement already satisfied: gym>=0.17 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.17.3)\n", + "Collecting stable-baselines3[extra]\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/76/7c/ec89fd9a51c2ff640f150479069be817136c02f02349b5dd27a6e3bb8b3d/stable_baselines3-0.10.0-py3-none-any.whl (145kB)\n", + "\u001b[K |████████████████████████████████| 153kB 6.0MB/s \n", + "\u001b[?25hRequirement already satisfied: pytest in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (3.6.4)\n", + "Requirement already satisfied: setuptools>=41.4.0 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (51.0.0)\n", + "Requirement already satisfied: wheel>=0.33.6 in /usr/local/lib/python3.6/dist-packages (from finrl==0.0.2) (0.36.2)\n", + "Collecting pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2\n", + " Cloning https://github.com/quantopian/pyfolio.git to /tmp/pip-install-r44a2amx/pyfolio\n", + " Running command git clone -q https://github.com/quantopian/pyfolio.git /tmp/pip-install-r44a2amx/pyfolio\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->finrl==0.0.2) (2.8.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->finrl==0.0.2) (2018.9)\n", + "Collecting int-date>=0.1.7\n", + " Downloading https://files.pythonhosted.org/packages/43/27/31803df15173ab341fe7548c14154b54227dfd8f630daa09a1c6e7db52f7/int_date-0.1.8-py2.py3-none-any.whl\n", + "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.6/dist-packages (from yfinance->finrl==0.0.2) (2.23.0)\n", + "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.6/dist-packages (from yfinance->finrl==0.0.2) (0.0.9)\n", + "Collecting lxml>=4.5.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/bd/78/56a7c88a57d0d14945472535d0df9fb4bbad7d34ede658ec7961635c790e/lxml-4.6.2-cp36-cp36m-manylinux1_x86_64.whl (5.5MB)\n", + "\u001b[K |████████████████████████████████| 5.5MB 18.0MB/s \n", + "\u001b[?25hRequirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (2.4.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->finrl==0.0.2) (1.3.1)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.21.0->finrl==0.0.2) (1.0.0)\n", + "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.21.0->finrl==0.0.2) (1.4.1)\n", + "Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from gym>=0.17->finrl==0.0.2) (1.5.0)\n", + "Requirement already satisfied: cloudpickle<1.7.0,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from gym>=0.17->finrl==0.0.2) (1.3.0)\n", + "Requirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (1.7.0+cu101)\n", + "Requirement already satisfied: pillow; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (7.0.0)\n", + "Requirement already satisfied: atari-py~=0.2.0; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (0.2.6)\n", + "Requirement already satisfied: psutil; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (5.4.8)\n", + "Requirement already satisfied: opencv-python; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (4.1.2.30)\n", + "Requirement already satisfied: tensorboard; extra == \"extra\" in /usr/local/lib/python3.6/dist-packages (from stable-baselines3[extra]->finrl==0.0.2) (2.4.0)\n", + "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (0.7.1)\n", + "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.15.0)\n", + "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (20.3.0)\n", + "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (1.10.0)\n", + "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from pytest->finrl==0.0.2) (8.6.0)\n", + "Requirement already satisfied: ipython>=3.2.3 in /usr/local/lib/python3.6/dist-packages (from pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (5.5.0)\n", + "Requirement already satisfied: seaborn>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.11.0)\n", + "Collecting empyrical>=0.5.0\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/43/1b997c21411c6ab7c96dc034e160198272c7a785aeea7654c9bcf98bec83/empyrical-0.5.5.tar.gz (52kB)\n", + "\u001b[K |████████████████████████████████| 61kB 6.9MB/s \n", + "\u001b[?25hRequirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (2020.12.5)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->yfinance->finrl==0.0.2) (1.24.3)\n", + "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from pyglet<=1.5.0,>=1.4.0->gym>=0.17->finrl==0.0.2) (0.16.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.0->stable-baselines3[extra]->finrl==0.0.2) (3.7.4.3)\n", + "Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch>=1.4.0->stable-baselines3[extra]->finrl==0.0.2) (0.8)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.0.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.3.3)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.4.2)\n", + "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.32.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.7.0)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.17.2)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.10.0)\n", + "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.12.4)\n", + "Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.8.1)\n", + "Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (1.0.18)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.4.2)\n", + "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.8.0)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.7.5)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (2.6.1)\n", + "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (4.3.3)\n", + "Requirement already satisfied: pandas-datareader>=0.2 in /usr/local/lib/python3.6/dist-packages (from empyrical>=0.5.0->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.9.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.3.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (1.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (4.6)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.2.8)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (4.2.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.2.5)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != \"win32\"->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.6.0)\n", + "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->ipython>=3.2.3->pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2->finrl==0.0.2) (0.2.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.4.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (3.1.0)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3\"->google-auth<2,>=1.6.3->tensorboard; extra == \"extra\"->stable-baselines3[extra]->finrl==0.0.2) (0.4.8)\n", + "Building wheels for collected packages: finrl, yfinance, pyfolio, empyrical\n", + " Building wheel for finrl (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for finrl: filename=finrl-0.0.2-cp36-none-any.whl size=23235 sha256=96343730296d82eab621f59e797ee5070763f62f0781366ad0c7f891320730c3\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-cesdfnqn/wheels/9c/19/bf/c644def96612df1ad42c94d5304966797eaa3221dffc5efe0b\n", + " Building wheel for yfinance (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for yfinance: filename=yfinance-0.1.55-py2.py3-none-any.whl size=22616 sha256=81424134934f5e39ce03a7cacee299829bc9064e6e8723329c6586438ee93839\n", + " Stored in directory: /root/.cache/pip/wheels/04/98/cc/2702a4242d60bdc14f48b4557c427ded1fe92aedf257d4565c\n", + " Building wheel for pyfolio (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pyfolio: filename=pyfolio-0.9.2+75.g4b901f6-cp36-none-any.whl size=75764 sha256=d386c94dd6aa49b4acd82579c5e23f839043337a87eea7f28a1a9c56f7f0b1c0\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-cesdfnqn/wheels/43/ce/d9/6752fb6e03205408773235435205a0519d2c608a94f1976e56\n", + " Building wheel for empyrical (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for empyrical: filename=empyrical-0.5.5-cp36-none-any.whl size=39765 sha256=fbecbe48a3eb6e2d7ad06f9f3de71b0cd0a03d8b4d93092ab2ed9dab47cd8ef6\n", + " Stored in directory: /root/.cache/pip/wheels/ea/b2/c8/6769d8444d2f2e608fae2641833110668d0ffd1abeb2e9f3fc\n", + "Successfully built finrl yfinance pyfolio empyrical\n", + "Installing collected packages: int-date, stockstats, lxml, yfinance, stable-baselines3, empyrical, pyfolio, finrl\n", + " Found existing installation: lxml 4.2.6\n", + " Uninstalling lxml-4.2.6:\n", + " Successfully uninstalled lxml-4.2.6\n", + "Successfully installed empyrical-0.5.5 finrl-0.0.2 int-date-0.1.8 lxml-4.6.2 pyfolio-0.9.2+75.g4b901f6 stable-baselines3-0.10.0 stockstats-0.3.2 yfinance-0.1.55\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osBHhVysOEzi" + }, + "source": [ + "\n", + "\n", + "## 2.2. Check if the additional packages needed are present, if not install them. \n", + "* Yahoo Finance API\n", + "* pandas\n", + "* numpy\n", + "* matplotlib\n", + "* stockstats\n", + "* OpenAI gym\n", + "* stable-baselines\n", + "* tensorflow\n", + "* pyfolio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGv01K8Sh1hn" + }, + "source": [ + "\n", + "## 2.3. Import Packages" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "lPqeTTwoh1hn", + "outputId": "c437c266-2780-4c50-af8b-6868e7fdaa1f" + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "# matplotlib.use('Agg')\n", + "import datetime\n", + "\n", + "%matplotlib inline\n", + "from finrl.config import config\n", + "from finrl.marketdata.yahoodownloader import YahooDownloader\n", + "from finrl.preprocessing.preprocessors import FeatureEngineer\n", + "from finrl.preprocessing.data import data_split\n", + "from finrl.env.env_stocktrading import StockTradingEnv\n", + "from finrl.model.models import DRLAgent\n", + "from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot\n", + "\n", + "from pprint import pprint\n", + "\n", + "import sys\n", + "sys.path.append(\"../FinRL-Library\")\n", + "\n" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 299 - }, - "id": "k9zU9YaTTvFq", - "outputId": "705f46e4-0529-4ef5-d182-c2a1337397a4" - }, - "source": [ - "trade.head()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02019-01-0238.72250039.71250238.55749938.562561148158800.0AAPL-2.01990337.867349-91.56785242.250808119.879197
02019-01-0293.91000496.26999793.76999792.6433114175400.0AXP-3.42600841.204982-97.74226926.709417119.879197
02019-01-02316.190002323.950012313.709991314.6451423292200.0BA-5.55059247.010000-21.71238213.611972119.879197
02019-01-02124.029999127.879997123.000000119.3025824783200.0CAT-0.68675948.229089-5.0912090.873482119.879197
02019-01-0242.27999943.20000142.20999940.38209923833500.0CSCO-0.96006144.872557-87.49685029.529377119.879197
\n", - "
" - ], - "text/plain": [ - " date open high ... cci_30 dx_30 turbulence\n", - "0 2019-01-02 38.722500 39.712502 ... -91.567852 42.250808 119.879197\n", - "0 2019-01-02 93.910004 96.269997 ... -97.742269 26.709417 119.879197\n", - "0 2019-01-02 316.190002 323.950012 ... -21.712382 13.611972 119.879197\n", - "0 2019-01-02 124.029999 127.879997 ... -5.091209 0.873482 119.879197\n", - "0 2019-01-02 42.279999 43.200001 ... -87.496850 29.529377 119.879197\n", - "\n", - "[5 rows x 12 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 68 - } - ] + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/pyfolio/pos.py:27: UserWarning: Module \"zipline.assets\" not found; multipliers will not be applied to position notionals.\n", + " 'Module \"zipline.assets\" not found; multipliers will not be applied'\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T2owTj985RW4" + }, + "source": [ + "\n", + "## 2.4. Create Folders" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "w9A8CN5R5PuZ" + }, + "source": [ + "import os\n", + "if not os.path.exists(\"./\" + config.DATA_SAVE_DIR):\n", + " os.makedirs(\"./\" + config.DATA_SAVE_DIR)\n", + "if not os.path.exists(\"./\" + config.TRAINED_MODEL_DIR):\n", + " os.makedirs(\"./\" + config.TRAINED_MODEL_DIR)\n", + "if not os.path.exists(\"./\" + config.TENSORBOARD_LOG_DIR):\n", + " os.makedirs(\"./\" + config.TENSORBOARD_LOG_DIR)\n", + "if not os.path.exists(\"./\" + config.RESULTS_DIR):\n", + " os.makedirs(\"./\" + config.RESULTS_DIR)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A289rQWMh1hq" + }, + "source": [ + "\n", + "# Part 3. Download Data\n", + "Yahoo Finance is a website that provides stock data, financial news, financial reports, etc. All the data provided by Yahoo Finance is free.\n", + "* FinRL uses a class **YahooDownloader** to fetch data from Yahoo Finance API\n", + "* Call Limit: Using the Public API (without authentication), you are limited to 2,000 requests per hour per IP (or up to a total of 48,000 requests a day).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NPeQ7iS-LoMm" + }, + "source": [ + "\n", + "\n", + "-----\n", + "class YahooDownloader:\n", + " Provides methods for retrieving daily stock data from\n", + " Yahoo Finance API\n", + "\n", + " Attributes\n", + " ----------\n", + " start_date : str\n", + " start date of the data (modified from config.py)\n", + " end_date : str\n", + " end date of the data (modified from config.py)\n", + " ticker_list : list\n", + " a list of stock tickers (modified from config.py)\n", + "\n", + " Methods\n", + " -------\n", + " fetch_data()\n", + " Fetches data from yahoo API\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 }, + "id": "h3XJnvrbLp-C", + "outputId": "87dea23f-469d-4e9d-de91-0f8a74929de2" + }, + "source": [ + "# from config.py start_date is a string\n", + "config.START_DATE" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zYN573SOHhxG", - "outputId": "187c6d1b-3e91-40f8-dafd-230d787f2ee1" + "output_type": "execute_result", + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "source": [ - "config.TECHNICAL_INDICATORS_LIST" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['macd', 'rsi_30', 'cci_30', 'dx_30']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } + "text/plain": [ + "'2009-01-01'" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 }, + "id": "FUnY8WEfLq3C", + "outputId": "c635ae69-a13e-408f-d932-9d386d1d6dcf" + }, + "source": [ + "# from config.py end_date is a string\n", + "config.END_DATE" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Q2zqII8rMIqn", - "outputId": "8a2c943b-1be4-4b8d-b64f-666e0852b7e6" + "output_type": "execute_result", + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "source": [ - "stock_dimension = len(train.tic.unique())\n", - "state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension\n", - "print(f\"Stock Dimension: {stock_dimension}, State Space: {state_space}\")\n" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Stock Dimension: 30, State Space: 181\n" - ], - "name": "stdout" - } + "text/plain": [ + "'2020-12-01'" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "JzqRRTOX6aFu", + "outputId": "d3baf63f-948a-49f9-f6f2-b7241971b8ea" + }, + "source": [ + "print(config.DOW_30_TICKER)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "AWyp84Ltto19" - }, - "source": [ - "env_kwargs = {\n", - " \"hmax\": 100, \n", - " \"initial_amount\": 1000000, \n", - " \"transaction_cost_pct\": 0.001, \n", - " \"state_space\": state_space, \n", - " \"stock_dim\": stock_dimension, \n", - " \"tech_indicator_list\": config.TECHNICAL_INDICATORS_LIST, \n", - " \"action_space\": stock_dimension, \n", - " \"reward_scaling\": 1e-4\n", - " \n", - "}\n", - "\n", - "e_train_gym = StockTradingEnv(df = train, **env_kwargs)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "64EoqOrQjiVf" - }, - "source": [ - "## Environment for Training\n", - "\n" - ] + "output_type": "stream", + "text": [ + "['AAPL', 'MSFT', 'JPM', 'V', 'RTX', 'PG', 'GS', 'NKE', 'DIS', 'AXP', 'HD', 'INTC', 'WMT', 'IBM', 'MRK', 'UNH', 'KO', 'CAT', 'TRV', 'JNJ', 'CVX', 'MCD', 'VZ', 'CSCO', 'XOM', 'BA', 'MMM', 'PFE', 'WBA', 'DD']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "yCKm4om-s9kE", + "outputId": "932583d8-f98b-4243-c02d-375f7272db1a" + }, + "source": [ + "df = YahooDownloader(start_date = '2009-01-01',\n", + " end_date = '2021-01-01',\n", + " ticker_list = config.DOW_30_TICKER).fetch_data()" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xwSvvPjutpqS", - "outputId": "406e5ec3-28ba-4a72-9b22-0d031f7bf9a6" - }, - "source": [ - "env_train, _ = e_train_gym.get_sb_env()\n", - "print(type(env_train))" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "[*********************100%***********************] 1 of 1 completed\n", + "Shape of DataFrame: (90630, 7)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "CV3HrZHLh1hy", + "outputId": "b7b78172-8c8a-41c9-c8a6-0167edb9bd11" + }, + "source": [ + "df.shape" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "HMNR5nHjh1iz" - }, - "source": [ - "\n", - "# Part 6: Implement DRL Algorithms\n", - "* The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.\n", - "* FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,\n", - "Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to\n", - "design their own DRL algorithms by adapting these DRL algorithms." + "output_type": "execute_result", + "data": { + "text/plain": [ + "(90630, 7)" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 62 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 }, + "id": "4hYkeaPiICHS", + "outputId": "ce9d7463-a74c-4917-c96d-848a1e8ad493" + }, + "source": [ + "df.sort_values(['date','tic'],ignore_index=True).head()" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "364PsqckttcQ" - }, - "source": [ - "agent = DRLAgent(env = env_train)" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumetic
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL
12009-01-0218.57000019.52000018.40000015.80062410955700.0AXP
22009-01-0242.79999945.56000142.77999933.6809357010200.0BA
32009-01-0244.91000046.98000044.70999932.5144007117200.0CAT
42009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO
\n", + "
" ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YDmqOyF9h1iz" - }, - "source": [ - "### Model Training: 5 models, A2C DDPG, PPO, TD3, SAC\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uijiWgkuh1jB" - }, - "source": [ - "### Model 1: A2C\n" + "text/plain": [ + " date open high low close volume tic\n", + "0 2009-01-02 3.067143 3.251429 3.041429 2.795913 746015200.0 AAPL\n", + "1 2009-01-02 18.570000 19.520000 18.400000 15.800624 10955700.0 AXP\n", + "2 2009-01-02 42.799999 45.560001 42.779999 33.680935 7010200.0 BA\n", + "3 2009-01-02 44.910000 46.980000 44.709999 32.514400 7117200.0 CAT\n", + "4 2009-01-02 16.410000 17.000000 16.250000 12.786087 40980600.0 CSCO" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uqC6c40Zh1iH" + }, + "source": [ + "# Part 4: Preprocess Data\n", + "Data preprocessing is a crucial step for training a high quality machine learning model. We need to check for missing data and do feature engineering in order to convert the data into a model-ready state.\n", + "* Add technical indicators. In practical trading, various information needs to be taken into account, for example the historical stock prices, current holding shares, technical indicators, etc. In this article, we demonstrate two trend-following technical indicators: MACD and RSI.\n", + "* Add turbulence index. Risk-aversion reflects whether an investor will choose to preserve the capital. It also influences one's trading strategy when facing different market volatility level. To control the risk in a worst-case scenario, such as financial crisis of 2007–2008, FinRL employs the financial turbulence index that measures extreme asset price fluctuation." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Le342Hc1h1iI", + "outputId": "7049c022-122e-47c3-ef30-e9a8481808bd" + }, + "source": [ + "fe = FeatureEngineer(\n", + " use_technical_indicator=True,\n", + " tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,\n", + " use_turbulence=True,\n", + " user_defined_feature = False)\n", + "\n", + "processed = fe.preprocess_data(df)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GUCnkn-HIbmj", - "outputId": "2fdb297a-8d35-4c7e-806f-de859d70e19e" - }, - "source": [ - "agent = DRLAgent(env = env_train)\n", - "model_a2c = agent.get_model(\"a2c\")" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}\n", - "Using cpu device\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "Successfully added technical indicators\n", + "Successfully added turbulence index\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 340 }, + "id": "grvhGJJII3Xn", + "outputId": "91d09c37-b0e9-4c5c-d532-967e40d11f41" + }, + "source": [ + "processed.sort_values(['date','tic'],ignore_index=True).head(10)\n", + "\n", + "# Solve no stock on the day, data alignment\n", + "x = processed.tic.unique().reshape(-1,1)\n", + "temp_df = pd.DataFrame.from_records(x, columns=['tic'])\n", + "\n", + "temp_df = temp_df.reindex(columns=list(processed.columns), fill_value=1)\n", + "\n", + "data_merge = pd.DataFrame(columns=list(processed.columns))\n", + "\n", + "for name, group in processed.groupby('date'):\n", + " temp_df['date'] = name\n", + "\n", + " result_outer = pd.merge(group, temp_df, on=list(processed.columns), how='outer')\n", + " result_outer = result_outer.drop_duplicates(subset=['tic'], keep='first')\n", + " result_outer = result_outer.reset_index(drop=True)\n", + " result_outer = result_outer.sort_values(['date', 'tic'], ignore_index=True)\n", + " result_outer = result_outer.fillna(value=1)\n", + "\n", + " assert len(result_outer) == len(processed.tic.unique())\n", + " data_merge = data_merge.append(result_outer)\n", + "\n", + "processed = data_merge\n" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0GVpkWGqH4-D", - "outputId": "9eb09ba2-fc4b-46a1-ea3d-bd9b3bfefffd" - }, - "source": [ - "trained_a2c = agent.train_model(model=model_a2c, \n", - " tb_log_name='a2c',\n", - " total_timesteps=100000)" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL0.0100.066.666667100.00.0
12009-01-0218.57000019.52000018.40000015.80062410955700.0AXP0.0100.066.666667100.00.0
22009-01-0242.79999945.56000142.77999933.6809357010200.0BA0.0100.066.666667100.00.0
32009-01-0244.91000046.98000044.70999932.5144007117200.0CAT0.0100.066.666667100.00.0
42009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO0.0100.066.666667100.00.0
52009-01-0274.23000377.30000373.58000248.04326213695900.0CVX0.0100.066.666667100.00.0
62009-01-0221.60523422.06068020.99322914.52727613251000.0DD0.0100.066.666667100.00.0
72009-01-0222.76000024.03000122.50000020.5974969796600.0DIS0.0100.066.666667100.00.0
82009-01-0284.01999787.62000382.19000272.84446714088500.0GS0.0100.066.666667100.00.0
92009-01-0223.07000024.19000122.95999917.90945214902500.0HD0.0100.066.666667100.00.0
\n", + "
" ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Logging to tensorboard_log/a2c/a2c_1\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 131 |\n", - "| iterations | 100 |\n", - "| time_elapsed | 3 |\n", - "| total_timesteps | 500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 99 |\n", - "| policy_loss | -14.9 |\n", - "| std | 1 |\n", - "| value_loss | 0.362 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 177 |\n", - "| iterations | 200 |\n", - "| time_elapsed | 5 |\n", - "| total_timesteps | 1000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 199 |\n", - "| policy_loss | -52 |\n", - "| std | 1 |\n", - "| value_loss | 2.03 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 199 |\n", - "| iterations | 300 |\n", - "| time_elapsed | 7 |\n", - "| total_timesteps | 1500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -754 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 299 |\n", - "| policy_loss | -379 |\n", - "| std | 1.01 |\n", - "| value_loss | 72.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 213 |\n", - "| iterations | 400 |\n", - "| time_elapsed | 9 |\n", - "| total_timesteps | 2000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -899 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 399 |\n", - "| policy_loss | -50.2 |\n", - "| std | 1.01 |\n", - "| value_loss | 2.23 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 222 |\n", - "| iterations | 500 |\n", - "| time_elapsed | 11 |\n", - "| total_timesteps | 2500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -5.49e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 499 |\n", - "| policy_loss | 863 |\n", - "| std | 1.01 |\n", - "| value_loss | 470 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5069607.313605958\n", - "total_reward:4069607.3136059577\n", - "total_cost: 67556.9160195016\n", - "total_trades: 54955\n", - "Sharpe: 1.034955174352521\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 225 |\n", - "| iterations | 600 |\n", - "| time_elapsed | 13 |\n", - "| total_timesteps | 3000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -1.13e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 599 |\n", - "| policy_loss | -56.4 |\n", - "| std | 1.01 |\n", - "| value_loss | 3.43 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 229 |\n", - "| iterations | 700 |\n", - "| time_elapsed | 15 |\n", - "| total_timesteps | 3500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -3.16e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 699 |\n", - "| policy_loss | 93.9 |\n", - "| std | 1.01 |\n", - "| value_loss | 8.12 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 232 |\n", - "| iterations | 800 |\n", - "| time_elapsed | 17 |\n", - "| total_timesteps | 4000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -3.3e+11 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 799 |\n", - "| policy_loss | 65.4 |\n", - "| std | 1.01 |\n", - "| value_loss | 3.13 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 236 |\n", - "| iterations | 900 |\n", - "| time_elapsed | 19 |\n", - "| total_timesteps | 4500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -1.57e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 899 |\n", - "| policy_loss | 628 |\n", - "| std | 1.01 |\n", - "| value_loss | 222 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 239 |\n", - "| iterations | 1000 |\n", - "| time_elapsed | 20 |\n", - "| total_timesteps | 5000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 999 |\n", - "| policy_loss | 283 |\n", - "| std | 1.01 |\n", - "| value_loss | 51.9 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4806928.073206688\n", - "total_reward:3806928.0732066883\n", - "total_cost: 29371.967713621536\n", - "total_trades: 48579\n", - "Sharpe: 0.9611082492472007\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 241 |\n", - "| iterations | 1100 |\n", - "| time_elapsed | 22 |\n", - "| total_timesteps | 5500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -1.34e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1099 |\n", - "| policy_loss | -9.16 |\n", - "| std | 1.01 |\n", - "| value_loss | 5.7 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 243 |\n", - "| iterations | 1200 |\n", - "| time_elapsed | 24 |\n", - "| total_timesteps | 6000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1199 |\n", - "| policy_loss | -169 |\n", - "| std | 1.01 |\n", - "| value_loss | 35 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 244 |\n", - "| iterations | 1300 |\n", - "| time_elapsed | 26 |\n", - "| total_timesteps | 6500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -8.12e+05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1299 |\n", - "| policy_loss | 796 |\n", - "| std | 1.01 |\n", - "| value_loss | 360 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 246 |\n", - "| iterations | 1400 |\n", - "| time_elapsed | 28 |\n", - "| total_timesteps | 7000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1399 |\n", - "| policy_loss | -31.3 |\n", - "| std | 1.01 |\n", - "| value_loss | 0.783 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 248 |\n", - "| iterations | 1500 |\n", - "| time_elapsed | 30 |\n", - "| total_timesteps | 7500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -3.62e+14 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1499 |\n", - "| policy_loss | -693 |\n", - "| std | 1.01 |\n", - "| value_loss | 542 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5032249.439668636\n", - "total_reward:4032249.439668636\n", - "total_cost: 27369.775673342636\n", - "total_trades: 46757\n", - "Sharpe: 0.9689568826715832\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 249 |\n", - "| iterations | 1600 |\n", - "| time_elapsed | 32 |\n", - "| total_timesteps | 8000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -4.17e+11 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1599 |\n", - "| policy_loss | -12.2 |\n", - "| std | 1.01 |\n", - "| value_loss | 0.468 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 250 |\n", - "| iterations | 1700 |\n", - "| time_elapsed | 33 |\n", - "| total_timesteps | 8500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1699 |\n", - "| policy_loss | 87.7 |\n", - "| std | 1.01 |\n", - "| value_loss | 4.56 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 251 |\n", - "| iterations | 1800 |\n", - "| time_elapsed | 35 |\n", - "| total_timesteps | 9000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -4.62e+05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1799 |\n", - "| policy_loss | -255 |\n", - "| std | 1.01 |\n", - "| value_loss | 40.4 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 252 |\n", - "| iterations | 1900 |\n", - "| time_elapsed | 37 |\n", - "| total_timesteps | 9500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1899 |\n", - "| policy_loss | -127 |\n", - "| std | 1.01 |\n", - "| value_loss | 16.6 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 253 |\n", - "| iterations | 2000 |\n", - "| time_elapsed | 39 |\n", - "| total_timesteps | 10000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -1.97e+13 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1999 |\n", - "| policy_loss | 406 |\n", - "| std | 1.01 |\n", - "| value_loss | 95.1 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3904628.721074527\n", - "total_reward:2904628.721074527\n", - "total_cost: 32800.81143295443\n", - "total_trades: 45335\n", - "Sharpe: 0.8354269955192407\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 253 |\n", - "| iterations | 2100 |\n", - "| time_elapsed | 41 |\n", - "| total_timesteps | 10500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -10.3 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2099 |\n", - "| policy_loss | 69.7 |\n", - "| std | 1.01 |\n", - "| value_loss | 2.66 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 253 |\n", - "| iterations | 2200 |\n", - "| time_elapsed | 43 |\n", - "| total_timesteps | 11000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2199 |\n", - "| policy_loss | -42.8 |\n", - "| std | 1.01 |\n", - "| value_loss | 1.92 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 253 |\n", - "| iterations | 2300 |\n", - "| time_elapsed | 45 |\n", - "| total_timesteps | 11500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2299 |\n", - "| policy_loss | 48.1 |\n", - "| std | 1.01 |\n", - "| value_loss | 9.7 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 254 |\n", - "| iterations | 2400 |\n", - "| time_elapsed | 47 |\n", - "| total_timesteps | 12000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -49.7 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2399 |\n", - "| policy_loss | 204 |\n", - "| std | 1.01 |\n", - "| value_loss | 24.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 255 |\n", - "| iterations | 2500 |\n", - "| time_elapsed | 49 |\n", - "| total_timesteps | 12500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2499 |\n", - "| policy_loss | 56.3 |\n", - "| std | 1.01 |\n", - "| value_loss | 3.8 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3630490.4667401677\n", - "total_reward:2630490.4667401677\n", - "total_cost: 49957.625875016725\n", - "total_trades: 49675\n", - "Sharpe: 0.7870109277440298\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 255 |\n", - "| iterations | 2600 |\n", - "| time_elapsed | 50 |\n", - "| total_timesteps | 13000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -1.27e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2599 |\n", - "| policy_loss | -122 |\n", - "| std | 1.01 |\n", - "| value_loss | 9.1 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 255 |\n", - "| iterations | 2700 |\n", - "| time_elapsed | 52 |\n", - "| total_timesteps | 13500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -3.15e+11 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2699 |\n", - "| policy_loss | 16.4 |\n", - "| std | 1.01 |\n", - "| value_loss | 0.422 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 255 |\n", - "| iterations | 2800 |\n", - "| time_elapsed | 54 |\n", - "| total_timesteps | 14000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2799 |\n", - "| policy_loss | 119 |\n", - "| std | 1.01 |\n", - "| value_loss | 9.84 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 256 |\n", - "| iterations | 2900 |\n", - "| time_elapsed | 56 |\n", - "| total_timesteps | 14500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2899 |\n", - "| policy_loss | 230 |\n", - "| std | 1.01 |\n", - "| value_loss | 38.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 257 |\n", - "| iterations | 3000 |\n", - "| time_elapsed | 58 |\n", - "| total_timesteps | 15000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 2999 |\n", - "| policy_loss | 54.7 |\n", - "| std | 1.01 |\n", - "| value_loss | 14.1 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4105857.0455575557\n", - "total_reward:3105857.0455575557\n", - "total_cost: 12537.663790287688\n", - "total_trades: 43652\n", - "Sharpe: 0.8861282120753707\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 258 |\n", - "| iterations | 3100 |\n", - "| time_elapsed | 60 |\n", - "| total_timesteps | 15500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -7.93e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3099 |\n", - "| policy_loss | 99.6 |\n", - "| std | 1.01 |\n", - "| value_loss | 6.67 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 258 |\n", - "| iterations | 3200 |\n", - "| time_elapsed | 61 |\n", - "| total_timesteps | 16000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -1.05e+05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3199 |\n", - "| policy_loss | 190 |\n", - "| std | 1.01 |\n", - "| value_loss | 23.1 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 258 |\n", - "| iterations | 3300 |\n", - "| time_elapsed | 63 |\n", - "| total_timesteps | 16500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3299 |\n", - "| policy_loss | 17.2 |\n", - "| std | 1.01 |\n", - "| value_loss | 2.04 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 259 |\n", - "| iterations | 3400 |\n", - "| time_elapsed | 65 |\n", - "| total_timesteps | 17000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -9.4e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3399 |\n", - "| policy_loss | -46.1 |\n", - "| std | 1.01 |\n", - "| value_loss | 1.93 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 259 |\n", - "| iterations | 3500 |\n", - "| time_elapsed | 67 |\n", - "| total_timesteps | 17500 |\n", - "| train/ | |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3499 |\n", - "| policy_loss | -17.4 |\n", - "| std | 1.01 |\n", - "| value_loss | 5.37 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3307214.1514936504\n", - "total_reward:2307214.1514936504\n", - "total_cost: 23884.956163034414\n", - "total_trades: 42682\n", - "Sharpe: 0.7168631999656054\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 259 |\n", - "| iterations | 3600 |\n", - "| time_elapsed | 69 |\n", - "| total_timesteps | 18000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -4.3e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3599 |\n", - "| policy_loss | 226 |\n", - "| std | 1.01 |\n", - "| value_loss | 28.9 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 260 |\n", - "| iterations | 3700 |\n", - "| time_elapsed | 71 |\n", - "| total_timesteps | 18500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3699 |\n", - "| policy_loss | 59.8 |\n", - "| std | 1.01 |\n", - "| value_loss | 8.43 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 260 |\n", - "| iterations | 3800 |\n", - "| time_elapsed | 72 |\n", - "| total_timesteps | 19000 |\n", - "| train/ | |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | -7.04e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3799 |\n", - "| policy_loss | 50.8 |\n", - "| std | 1.01 |\n", - "| value_loss | 1.82 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 260 |\n", - "| iterations | 3900 |\n", - "| time_elapsed | 74 |\n", - "| total_timesteps | 19500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -4.89e+08 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3899 |\n", - "| policy_loss | -457 |\n", - "| std | 1.01 |\n", - "| value_loss | 140 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 261 |\n", - "| iterations | 4000 |\n", - "| time_elapsed | 76 |\n", - "| total_timesteps | 20000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -2.78e+07 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 3999 |\n", - "| policy_loss | -441 |\n", - "| std | 1.01 |\n", - "| value_loss | 143 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4148540.1545087425\n", - "total_reward:3148540.1545087425\n", - "total_cost: 15764.782369253146\n", - "total_trades: 38897\n", - "Sharpe: 0.8610175924981203\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 261 |\n", - "| iterations | 4100 |\n", - "| time_elapsed | 78 |\n", - "| total_timesteps | 20500 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -2.42e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4099 |\n", - "| policy_loss | 76.1 |\n", - "| std | 1.01 |\n", - "| value_loss | 5.77 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 261 |\n", - "| iterations | 4200 |\n", - "| time_elapsed | 80 |\n", - "| total_timesteps | 21000 |\n", - "| train/ | |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4199 |\n", - "| policy_loss | 143 |\n", - "| std | 1.01 |\n", - "| value_loss | 15.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 4300 |\n", - "| time_elapsed | 81 |\n", - "| total_timesteps | 21500 |\n", - "| train/ | |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4299 |\n", - "| policy_loss | 29.3 |\n", - "| std | 1.02 |\n", - "| value_loss | 3.48 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 4400 |\n", - "| time_elapsed | 83 |\n", - "| total_timesteps | 22000 |\n", - "| train/ | |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4399 |\n", - "| policy_loss | -52.3 |\n", - "| std | 1.02 |\n", - "| value_loss | 3.13 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 4500 |\n", - "| time_elapsed | 85 |\n", - "| total_timesteps | 22500 |\n", - "| train/ | |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4499 |\n", - "| policy_loss | -53.7 |\n", - "| std | 1.02 |\n", - "| value_loss | 14.7 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4751485.433416299\n", - "total_reward:3751485.4334162986\n", - "total_cost: 15499.176757445255\n", - "total_trades: 39836\n", - "Sharpe: 0.9930905921879077\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 4600 |\n", - "| time_elapsed | 87 |\n", - "| total_timesteps | 23000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4599 |\n", - "| policy_loss | -62.3 |\n", - "| std | 1.02 |\n", - "| value_loss | 6.41 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 4700 |\n", - "| time_elapsed | 89 |\n", - "| total_timesteps | 23500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4699 |\n", - "| policy_loss | -86.6 |\n", - "| std | 1.02 |\n", - "| value_loss | 5.69 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 4800 |\n", - "| time_elapsed | 91 |\n", - "| total_timesteps | 24000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4799 |\n", - "| policy_loss | -160 |\n", - "| std | 1.02 |\n", - "| value_loss | 18.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 4900 |\n", - "| time_elapsed | 93 |\n", - "| total_timesteps | 24500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4899 |\n", - "| policy_loss | -162 |\n", - "| std | 1.02 |\n", - "| value_loss | 20.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5000 |\n", - "| time_elapsed | 94 |\n", - "| total_timesteps | 25000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 4999 |\n", - "| policy_loss | 481 |\n", - "| std | 1.02 |\n", - "| value_loss | 143 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4724903.433106359\n", - "total_reward:3724903.433106359\n", - "total_cost: 8886.69877304687\n", - "total_trades: 38303\n", - "Sharpe: 0.9980131996548207\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5100 |\n", - "| time_elapsed | 96 |\n", - "| total_timesteps | 25500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5099 |\n", - "| policy_loss | -139 |\n", - "| std | 1.02 |\n", - "| value_loss | 12.7 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5200 |\n", - "| time_elapsed | 98 |\n", - "| total_timesteps | 26000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | -6.12e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5199 |\n", - "| policy_loss | 128 |\n", - "| std | 1.02 |\n", - "| value_loss | 8.81 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5300 |\n", - "| time_elapsed | 100 |\n", - "| total_timesteps | 26500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5299 |\n", - "| policy_loss | 5.06 |\n", - "| std | 1.02 |\n", - "| value_loss | 1.05 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5400 |\n", - "| time_elapsed | 102 |\n", - "| total_timesteps | 27000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5399 |\n", - "| policy_loss | 190 |\n", - "| std | 1.02 |\n", - "| value_loss | 24.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5500 |\n", - "| time_elapsed | 104 |\n", - "| total_timesteps | 27500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5499 |\n", - "| policy_loss | 42.8 |\n", - "| std | 1.02 |\n", - "| value_loss | 9 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4783015.926924407\n", - "total_reward:3783015.9269244066\n", - "total_cost: 7815.295760473641\n", - "total_trades: 36995\n", - "Sharpe: 0.9898009778895888\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5600 |\n", - "| time_elapsed | 106 |\n", - "| total_timesteps | 28000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5599 |\n", - "| policy_loss | -1.76 |\n", - "| std | 1.02 |\n", - "| value_loss | 0.422 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 5700 |\n", - "| time_elapsed | 108 |\n", - "| total_timesteps | 28500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5699 |\n", - "| policy_loss | -69.8 |\n", - "| std | 1.02 |\n", - "| value_loss | 2.85 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 262 |\n", - "| iterations | 5800 |\n", - "| time_elapsed | 110 |\n", - "| total_timesteps | 29000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5799 |\n", - "| policy_loss | 165 |\n", - "| std | 1.02 |\n", - "| value_loss | 15.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 5900 |\n", - "| time_elapsed | 112 |\n", - "| total_timesteps | 29500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5899 |\n", - "| policy_loss | 14.9 |\n", - "| std | 1.02 |\n", - "| value_loss | 2.67 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 6000 |\n", - "| time_elapsed | 113 |\n", - "| total_timesteps | 30000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 5999 |\n", - "| policy_loss | -145 |\n", - "| std | 1.02 |\n", - "| value_loss | 21.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3767699.432239705\n", - "total_reward:2767699.432239705\n", - "total_cost: 3225.8563617229293\n", - "total_trades: 31503\n", - "Sharpe: 0.8438602812346044\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 6100 |\n", - "| time_elapsed | 115 |\n", - "| total_timesteps | 30500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6099 |\n", - "| policy_loss | 75 |\n", - "| std | 1.02 |\n", - "| value_loss | 3.38 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 263 |\n", - "| iterations | 6200 |\n", - "| time_elapsed | 117 |\n", - "| total_timesteps | 31000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6199 |\n", - "| policy_loss | 65.1 |\n", - "| std | 1.02 |\n", - "| value_loss | 4.46 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 6300 |\n", - "| time_elapsed | 119 |\n", - "| total_timesteps | 31500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6299 |\n", - "| policy_loss | 19.5 |\n", - "| std | 1.02 |\n", - "| value_loss | 4.29 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 6400 |\n", - "| time_elapsed | 121 |\n", - "| total_timesteps | 32000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6399 |\n", - "| policy_loss | 131 |\n", - "| std | 1.02 |\n", - "| value_loss | 15.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 6500 |\n", - "| time_elapsed | 122 |\n", - "| total_timesteps | 32500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6499 |\n", - "| policy_loss | 113 |\n", - "| std | 1.02 |\n", - "| value_loss | 38.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3966658.0536604635\n", - "total_reward:2966658.0536604635\n", - "total_cost: 7977.4614967514335\n", - "total_trades: 34678\n", - "Sharpe: 0.8831165688078209\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 6600 |\n", - "| time_elapsed | 124 |\n", - "| total_timesteps | 33000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6599 |\n", - "| policy_loss | 5.64 |\n", - "| std | 1.02 |\n", - "| value_loss | 0.305 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 6700 |\n", - "| time_elapsed | 126 |\n", - "| total_timesteps | 33500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6699 |\n", - "| policy_loss | 5.23 |\n", - "| std | 1.02 |\n", - "| value_loss | 0.54 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 6800 |\n", - "| time_elapsed | 128 |\n", - "| total_timesteps | 34000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6799 |\n", - "| policy_loss | 85.1 |\n", - "| std | 1.02 |\n", - "| value_loss | 6.29 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 6900 |\n", - "| time_elapsed | 130 |\n", - "| total_timesteps | 34500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6899 |\n", - "| policy_loss | -97.3 |\n", - "| std | 1.02 |\n", - "| value_loss | 9.65 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7000 |\n", - "| time_elapsed | 131 |\n", - "| total_timesteps | 35000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 6999 |\n", - "| policy_loss | -585 |\n", - "| std | 1.02 |\n", - "| value_loss | 198 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3446294.959740542\n", - "total_reward:2446294.959740542\n", - "total_cost: 3397.7268977155813\n", - "total_trades: 31617\n", - "Sharpe: 0.7885649055566806\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7100 |\n", - "| time_elapsed | 133 |\n", - "| total_timesteps | 35500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7099 |\n", - "| policy_loss | -23.1 |\n", - "| std | 1.02 |\n", - "| value_loss | 2.04 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 7200 |\n", - "| time_elapsed | 135 |\n", - "| total_timesteps | 36000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | -1.25e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7199 |\n", - "| policy_loss | 25.2 |\n", - "| std | 1.02 |\n", - "| value_loss | 1.22 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 264 |\n", - "| iterations | 7300 |\n", - "| time_elapsed | 137 |\n", - "| total_timesteps | 36500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7299 |\n", - "| policy_loss | -86.7 |\n", - "| std | 1.02 |\n", - "| value_loss | 6.06 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7400 |\n", - "| time_elapsed | 139 |\n", - "| total_timesteps | 37000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7399 |\n", - "| policy_loss | -371 |\n", - "| std | 1.02 |\n", - "| value_loss | 82.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7500 |\n", - "| time_elapsed | 141 |\n", - "| total_timesteps | 37500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7499 |\n", - "| policy_loss | -34.4 |\n", - "| std | 1.02 |\n", - "| value_loss | 2.71 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3344736.938978183\n", - "total_reward:2344736.938978183\n", - "total_cost: 2206.6413143639265\n", - "total_trades: 31325\n", - "Sharpe: 0.7692258924747282\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7600 |\n", - "| time_elapsed | 143 |\n", - "| total_timesteps | 38000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7599 |\n", - "| policy_loss | 49.6 |\n", - "| std | 1.03 |\n", - "| value_loss | 1.61 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7700 |\n", - "| time_elapsed | 144 |\n", - "| total_timesteps | 38500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7699 |\n", - "| policy_loss | -50.2 |\n", - "| std | 1.03 |\n", - "| value_loss | 2.28 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7800 |\n", - "| time_elapsed | 146 |\n", - "| total_timesteps | 39000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7799 |\n", - "| policy_loss | 92.3 |\n", - "| std | 1.03 |\n", - "| value_loss | 5.65 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 7900 |\n", - "| time_elapsed | 148 |\n", - "| total_timesteps | 39500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7899 |\n", - "| policy_loss | -82.3 |\n", - "| std | 1.03 |\n", - "| value_loss | 20.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 265 |\n", - "| iterations | 8000 |\n", - "| time_elapsed | 150 |\n", - "| total_timesteps | 40000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 7999 |\n", - "| policy_loss | 144 |\n", - "| std | 1.03 |\n", - "| value_loss | 15.5 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3405743.1783298114\n", - "total_reward:2405743.1783298114\n", - "total_cost: 2954.0446352297254\n", - "total_trades: 33773\n", - "Sharpe: 0.8134505006039155\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8100 |\n", - "| time_elapsed | 152 |\n", - "| total_timesteps | 40500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.4 |\n", - "| explained_variance | -3.13e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8099 |\n", - "| policy_loss | 70.7 |\n", - "| std | 1.03 |\n", - "| value_loss | 5.87 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8200 |\n", - "| time_elapsed | 154 |\n", - "| total_timesteps | 41000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.4 |\n", - "| explained_variance | -4.64 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8199 |\n", - "| policy_loss | 171 |\n", - "| std | 1.03 |\n", - "| value_loss | 17 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8300 |\n", - "| time_elapsed | 155 |\n", - "| total_timesteps | 41500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8299 |\n", - "| policy_loss | -160 |\n", - "| std | 1.03 |\n", - "| value_loss | 23.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8400 |\n", - "| time_elapsed | 157 |\n", - "| total_timesteps | 42000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8399 |\n", - "| policy_loss | -85.1 |\n", - "| std | 1.03 |\n", - "| value_loss | 3.98 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8500 |\n", - "| time_elapsed | 159 |\n", - "| total_timesteps | 42500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8499 |\n", - "| policy_loss | 63.9 |\n", - "| std | 1.03 |\n", - "| value_loss | 5.08 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3319582.998510127\n", - "total_reward:2319582.998510127\n", - "total_cost: 12366.33568307691\n", - "total_trades: 37206\n", - "Sharpe: 0.7728922919437156\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8600 |\n", - "| time_elapsed | 161 |\n", - "| total_timesteps | 43000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8599 |\n", - "| policy_loss | -62.1 |\n", - "| std | 1.04 |\n", - "| value_loss | 2.26 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8700 |\n", - "| time_elapsed | 163 |\n", - "| total_timesteps | 43500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.6 |\n", - "| explained_variance | -2.19e+13 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8699 |\n", - "| policy_loss | -27.8 |\n", - "| std | 1.04 |\n", - "| value_loss | 5.62 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8800 |\n", - "| time_elapsed | 164 |\n", - "| total_timesteps | 44000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8799 |\n", - "| policy_loss | 59.2 |\n", - "| std | 1.04 |\n", - "| value_loss | 2.79 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 266 |\n", - "| iterations | 8900 |\n", - "| time_elapsed | 166 |\n", - "| total_timesteps | 44500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8899 |\n", - "| policy_loss | 40.6 |\n", - "| std | 1.04 |\n", - "| value_loss | 1.43 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9000 |\n", - "| time_elapsed | 168 |\n", - "| total_timesteps | 45000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 8999 |\n", - "| policy_loss | -86.3 |\n", - "| std | 1.04 |\n", - "| value_loss | 6.33 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:2904244.1476431573\n", - "total_reward:1904244.1476431573\n", - "total_cost: 15007.745762967967\n", - "total_trades: 37861\n", - "Sharpe: 0.7277540513736201\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9100 |\n", - "| time_elapsed | 170 |\n", - "| total_timesteps | 45500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.7 |\n", - "| explained_variance | -37.3 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9099 |\n", - "| policy_loss | -252 |\n", - "| std | 1.04 |\n", - "| value_loss | 35.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9200 |\n", - "| time_elapsed | 172 |\n", - "| total_timesteps | 46000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9199 |\n", - "| policy_loss | 129 |\n", - "| std | 1.04 |\n", - "| value_loss | 10.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9300 |\n", - "| time_elapsed | 173 |\n", - "| total_timesteps | 46500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9299 |\n", - "| policy_loss | 57.2 |\n", - "| std | 1.04 |\n", - "| value_loss | 3.01 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9400 |\n", - "| time_elapsed | 175 |\n", - "| total_timesteps | 47000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9399 |\n", - "| policy_loss | -63.5 |\n", - "| std | 1.04 |\n", - "| value_loss | 2.74 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9500 |\n", - "| time_elapsed | 177 |\n", - "| total_timesteps | 47500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9499 |\n", - "| policy_loss | 17.2 |\n", - "| std | 1.04 |\n", - "| value_loss | 3.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3181599.2553931386\n", - "total_reward:2181599.2553931386\n", - "total_cost: 6695.658203102723\n", - "total_trades: 37040\n", - "Sharpe: 0.7662862328769516\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9600 |\n", - "| time_elapsed | 179 |\n", - "| total_timesteps | 48000 |\n", - "| train/ | |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9599 |\n", - "| policy_loss | 87 |\n", - "| std | 1.04 |\n", - "| value_loss | 5.95 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9700 |\n", - "| time_elapsed | 181 |\n", - "| total_timesteps | 48500 |\n", - "| train/ | |\n", - "| entropy_loss | -43.9 |\n", - "| explained_variance | -4.02e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9699 |\n", - "| policy_loss | 65 |\n", - "| std | 1.05 |\n", - "| value_loss | 5.72 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9800 |\n", - "| time_elapsed | 183 |\n", - "| total_timesteps | 49000 |\n", - "| train/ | |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -4.34e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9799 |\n", - "| policy_loss | -82.4 |\n", - "| std | 1.05 |\n", - "| value_loss | 7.55 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 9900 |\n", - "| time_elapsed | 184 |\n", - "| total_timesteps | 49500 |\n", - "| train/ | |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9899 |\n", - "| policy_loss | -233 |\n", - "| std | 1.05 |\n", - "| value_loss | 34.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 10000 |\n", - "| time_elapsed | 186 |\n", - "| total_timesteps | 50000 |\n", - "| train/ | |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -212 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 9999 |\n", - "| policy_loss | 125 |\n", - "| std | 1.05 |\n", - "| value_loss | 15.3 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3163155.7293832605\n", - "total_reward:2163155.7293832605\n", - "total_cost: 2870.1664502791505\n", - "total_trades: 34933\n", - "Sharpe: 0.7643903649884202\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 10100 |\n", - "| time_elapsed | 188 |\n", - "| total_timesteps | 50500 |\n", - "| train/ | |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -6.08e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10099 |\n", - "| policy_loss | 128 |\n", - "| std | 1.05 |\n", - "| value_loss | 12.8 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 267 |\n", - "| iterations | 10200 |\n", - "| time_elapsed | 190 |\n", - "| total_timesteps | 51000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10199 |\n", - "| policy_loss | -39.2 |\n", - "| std | 1.05 |\n", - "| value_loss | 10.6 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10300 |\n", - "| time_elapsed | 192 |\n", - "| total_timesteps | 51500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10299 |\n", - "| policy_loss | 74.1 |\n", - "| std | 1.06 |\n", - "| value_loss | 2.81 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10400 |\n", - "| time_elapsed | 193 |\n", - "| total_timesteps | 52000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | -1.17e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10399 |\n", - "| policy_loss | 241 |\n", - "| std | 1.05 |\n", - "| value_loss | 53.4 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10500 |\n", - "| time_elapsed | 195 |\n", - "| total_timesteps | 52500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10499 |\n", - "| policy_loss | -66.3 |\n", - "| std | 1.06 |\n", - "| value_loss | 6.42 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3196491.408967822\n", - "total_reward:2196491.408967822\n", - "total_cost: 4270.783389629947\n", - "total_trades: 41108\n", - "Sharpe: 0.7902910911867141\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10600 |\n", - "| time_elapsed | 197 |\n", - "| total_timesteps | 53000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10599 |\n", - "| policy_loss | 22.2 |\n", - "| std | 1.06 |\n", - "| value_loss | 2.71 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10700 |\n", - "| time_elapsed | 199 |\n", - "| total_timesteps | 53500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | -3.64e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10699 |\n", - "| policy_loss | 246 |\n", - "| std | 1.06 |\n", - "| value_loss | 43.3 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10800 |\n", - "| time_elapsed | 201 |\n", - "| total_timesteps | 54000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10799 |\n", - "| policy_loss | -146 |\n", - "| std | 1.06 |\n", - "| value_loss | 12 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 10900 |\n", - "| time_elapsed | 203 |\n", - "| total_timesteps | 54500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10899 |\n", - "| policy_loss | -263 |\n", - "| std | 1.06 |\n", - "| value_loss | 37.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11000 |\n", - "| time_elapsed | 205 |\n", - "| total_timesteps | 55000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 10999 |\n", - "| policy_loss | 114 |\n", - "| std | 1.06 |\n", - "| value_loss | 12.2 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3849179.1372045293\n", - "total_reward:2849179.1372045293\n", - "total_cost: 14247.086195249696\n", - "total_trades: 45210\n", - "Sharpe: 0.9919759691333234\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11100 |\n", - "| time_elapsed | 207 |\n", - "| total_timesteps | 55500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11099 |\n", - "| policy_loss | -54.8 |\n", - "| std | 1.06 |\n", - "| value_loss | 3.89 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11200 |\n", - "| time_elapsed | 208 |\n", - "| total_timesteps | 56000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11199 |\n", - "| policy_loss | 105 |\n", - "| std | 1.06 |\n", - "| value_loss | 7.82 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11300 |\n", - "| time_elapsed | 210 |\n", - "| total_timesteps | 56500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11299 |\n", - "| policy_loss | 51.1 |\n", - "| std | 1.06 |\n", - "| value_loss | 2.34 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11400 |\n", - "| time_elapsed | 212 |\n", - "| total_timesteps | 57000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | -7.43e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11399 |\n", - "| policy_loss | 126 |\n", - "| std | 1.06 |\n", - "| value_loss | 15.9 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11500 |\n", - "| time_elapsed | 214 |\n", - "| total_timesteps | 57500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | -11.7 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11499 |\n", - "| policy_loss | -122 |\n", - "| std | 1.06 |\n", - "| value_loss | 9.54 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3576028.4597782856\n", - "total_reward:2576028.4597782856\n", - "total_cost: 9016.778400975834\n", - "total_trades: 42915\n", - "Sharpe: 0.8953228502423565\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11600 |\n", - "| time_elapsed | 216 |\n", - "| total_timesteps | 58000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.3 |\n", - "| explained_variance | -425 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11599 |\n", - "| policy_loss | -120 |\n", - "| std | 1.06 |\n", - "| value_loss | 10.6 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11700 |\n", - "| time_elapsed | 218 |\n", - "| total_timesteps | 58500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11699 |\n", - "| policy_loss | 46.7 |\n", - "| std | 1.06 |\n", - "| value_loss | 3.25 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11800 |\n", - "| time_elapsed | 219 |\n", - "| total_timesteps | 59000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11799 |\n", - "| policy_loss | -16.5 |\n", - "| std | 1.06 |\n", - "| value_loss | 7.51 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 11900 |\n", - "| time_elapsed | 221 |\n", - "| total_timesteps | 59500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | -62.2 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11899 |\n", - "| policy_loss | 115 |\n", - "| std | 1.07 |\n", - "| value_loss | 9.24 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12000 |\n", - "| time_elapsed | 223 |\n", - "| total_timesteps | 60000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 11999 |\n", - "| policy_loss | 0.0658 |\n", - "| std | 1.06 |\n", - "| value_loss | 4.37 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3436426.812452521\n", - "total_reward:2436426.812452521\n", - "total_cost: 6259.129675209552\n", - "total_trades: 41073\n", - "Sharpe: 0.8546131042738302\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12100 |\n", - "| time_elapsed | 225 |\n", - "| total_timesteps | 60500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12099 |\n", - "| policy_loss | -14.7 |\n", - "| std | 1.07 |\n", - "| value_loss | 0.461 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12200 |\n", - "| time_elapsed | 227 |\n", - "| total_timesteps | 61000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.5 |\n", - "| explained_variance | -32.5 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12199 |\n", - "| policy_loss | -114 |\n", - "| std | 1.07 |\n", - "| value_loss | 14 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12300 |\n", - "| time_elapsed | 229 |\n", - "| total_timesteps | 61500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12299 |\n", - "| policy_loss | -42.1 |\n", - "| std | 1.07 |\n", - "| value_loss | 4.82 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12400 |\n", - "| time_elapsed | 231 |\n", - "| total_timesteps | 62000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12399 |\n", - "| policy_loss | -34.7 |\n", - "| std | 1.07 |\n", - "| value_loss | 1.68 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12500 |\n", - "| time_elapsed | 232 |\n", - "| total_timesteps | 62500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12499 |\n", - "| policy_loss | 76.1 |\n", - "| std | 1.07 |\n", - "| value_loss | 8.46 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3018532.345473118\n", - "total_reward:2018532.3454731181\n", - "total_cost: 6047.126481140976\n", - "total_trades: 42707\n", - "Sharpe: 0.7384948297244762\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12600 |\n", - "| time_elapsed | 234 |\n", - "| total_timesteps | 63000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -553 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12599 |\n", - "| policy_loss | -18.4 |\n", - "| std | 1.07 |\n", - "| value_loss | 4.33 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12700 |\n", - "| time_elapsed | 236 |\n", - "| total_timesteps | 63500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12699 |\n", - "| policy_loss | -156 |\n", - "| std | 1.07 |\n", - "| value_loss | 16.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12800 |\n", - "| time_elapsed | 238 |\n", - "| total_timesteps | 64000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12799 |\n", - "| policy_loss | 86 |\n", - "| std | 1.07 |\n", - "| value_loss | 4.19 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 12900 |\n", - "| time_elapsed | 240 |\n", - "| total_timesteps | 64500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12899 |\n", - "| policy_loss | -77.7 |\n", - "| std | 1.07 |\n", - "| value_loss | 5.54 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13000 |\n", - "| time_elapsed | 241 |\n", - "| total_timesteps | 65000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 12999 |\n", - "| policy_loss | -48.1 |\n", - "| std | 1.07 |\n", - "| value_loss | 3.39 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3005454.017886528\n", - "total_reward:2005454.0178865278\n", - "total_cost: 5775.348413782655\n", - "total_trades: 37868\n", - "Sharpe: 0.6834871369231124\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13100 |\n", - "| time_elapsed | 243 |\n", - "| total_timesteps | 65500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13099 |\n", - "| policy_loss | -41.1 |\n", - "| std | 1.07 |\n", - "| value_loss | 0.966 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13200 |\n", - "| time_elapsed | 245 |\n", - "| total_timesteps | 66000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -20.2 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13199 |\n", - "| policy_loss | -51.7 |\n", - "| std | 1.07 |\n", - "| value_loss | 5.59 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13300 |\n", - "| time_elapsed | 247 |\n", - "| total_timesteps | 66500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -6.77e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13299 |\n", - "| policy_loss | -257 |\n", - "| std | 1.07 |\n", - "| value_loss | 43.6 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13400 |\n", - "| time_elapsed | 249 |\n", - "| total_timesteps | 67000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13399 |\n", - "| policy_loss | 101 |\n", - "| std | 1.07 |\n", - "| value_loss | 5.95 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13500 |\n", - "| time_elapsed | 251 |\n", - "| total_timesteps | 67500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -103 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13499 |\n", - "| policy_loss | -60.1 |\n", - "| std | 1.07 |\n", - "| value_loss | 2.95 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:2861607.410381282\n", - "total_reward:1861607.4103812822\n", - "total_cost: 5185.6480773171215\n", - "total_trades: 32918\n", - "Sharpe: 0.627333223770252\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13600 |\n", - "| time_elapsed | 252 |\n", - "| total_timesteps | 68000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -15 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13599 |\n", - "| policy_loss | 291 |\n", - "| std | 1.07 |\n", - "| value_loss | 51.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13700 |\n", - "| time_elapsed | 254 |\n", - "| total_timesteps | 68500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13699 |\n", - "| policy_loss | 13.2 |\n", - "| std | 1.07 |\n", - "| value_loss | 0.659 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13800 |\n", - "| time_elapsed | 256 |\n", - "| total_timesteps | 69000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -1.15e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13799 |\n", - "| policy_loss | 11.6 |\n", - "| std | 1.07 |\n", - "| value_loss | 1.12 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 13900 |\n", - "| time_elapsed | 258 |\n", - "| total_timesteps | 69500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -8.56e+08 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13899 |\n", - "| policy_loss | 150 |\n", - "| std | 1.07 |\n", - "| value_loss | 13.7 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 14000 |\n", - "| time_elapsed | 260 |\n", - "| total_timesteps | 70000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 13999 |\n", - "| policy_loss | -43.5 |\n", - "| std | 1.07 |\n", - "| value_loss | 1.76 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3191285.6374592897\n", - "total_reward:2191285.6374592897\n", - "total_cost: 4185.107238528008\n", - "total_trades: 33416\n", - "Sharpe: 0.715991374478748\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14100 |\n", - "| time_elapsed | 262 |\n", - "| total_timesteps | 70500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -31.8 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14099 |\n", - "| policy_loss | 1.39e+03 |\n", - "| std | 1.07 |\n", - "| value_loss | 944 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14200 |\n", - "| time_elapsed | 263 |\n", - "| total_timesteps | 71000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -2.69e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14199 |\n", - "| policy_loss | -96.5 |\n", - "| std | 1.07 |\n", - "| value_loss | 6.91 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14300 |\n", - "| time_elapsed | 265 |\n", - "| total_timesteps | 71500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -3.11e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14299 |\n", - "| policy_loss | 94.2 |\n", - "| std | 1.07 |\n", - "| value_loss | 7.25 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14400 |\n", - "| time_elapsed | 267 |\n", - "| total_timesteps | 72000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14399 |\n", - "| policy_loss | 21 |\n", - "| std | 1.08 |\n", - "| value_loss | 1.09 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14500 |\n", - "| time_elapsed | 269 |\n", - "| total_timesteps | 72500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.7 |\n", - "| explained_variance | -1.56e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14499 |\n", - "| policy_loss | 114 |\n", - "| std | 1.08 |\n", - "| value_loss | 6.86 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3276649.777189667\n", - "total_reward:2276649.777189667\n", - "total_cost: 3942.9014864051105\n", - "total_trades: 34694\n", - "Sharpe: 0.7189915467634915\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14600 |\n", - "| time_elapsed | 271 |\n", - "| total_timesteps | 73000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14599 |\n", - "| policy_loss | -80.3 |\n", - "| std | 1.08 |\n", - "| value_loss | 4.13 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14700 |\n", - "| time_elapsed | 272 |\n", - "| total_timesteps | 73500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | -42.9 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14699 |\n", - "| policy_loss | 5.46 |\n", - "| std | 1.08 |\n", - "| value_loss | 1.23 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14800 |\n", - "| time_elapsed | 274 |\n", - "| total_timesteps | 74000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14799 |\n", - "| policy_loss | -41.4 |\n", - "| std | 1.08 |\n", - "| value_loss | 1.92 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 14900 |\n", - "| time_elapsed | 276 |\n", - "| total_timesteps | 74500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14899 |\n", - "| policy_loss | 69.1 |\n", - "| std | 1.08 |\n", - "| value_loss | 9.59 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15000 |\n", - "| time_elapsed | 278 |\n", - "| total_timesteps | 75000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 14999 |\n", - "| policy_loss | -10.7 |\n", - "| std | 1.08 |\n", - "| value_loss | 0.911 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3508348.255896097\n", - "total_reward:2508348.255896097\n", - "total_cost: 11208.941549323808\n", - "total_trades: 37043\n", - "Sharpe: 0.8124699557413589\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15100 |\n", - "| time_elapsed | 280 |\n", - "| total_timesteps | 75500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15099 |\n", - "| policy_loss | 2.28 |\n", - "| std | 1.08 |\n", - "| value_loss | 0.074 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15200 |\n", - "| time_elapsed | 281 |\n", - "| total_timesteps | 76000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | -1.87 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15199 |\n", - "| policy_loss | -80 |\n", - "| std | 1.08 |\n", - "| value_loss | 3.56 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15300 |\n", - "| time_elapsed | 283 |\n", - "| total_timesteps | 76500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15299 |\n", - "| policy_loss | 8.44 |\n", - "| std | 1.08 |\n", - "| value_loss | 1.24 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15400 |\n", - "| time_elapsed | 285 |\n", - "| total_timesteps | 77000 |\n", - "| train/ | |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15399 |\n", - "| policy_loss | 276 |\n", - "| std | 1.08 |\n", - "| value_loss | 57.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15500 |\n", - "| time_elapsed | 287 |\n", - "| total_timesteps | 77500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15499 |\n", - "| policy_loss | 160 |\n", - "| std | 1.08 |\n", - "| value_loss | 16.4 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4416862.49751315\n", - "total_reward:3416862.49751315\n", - "total_cost: 18937.26260040585\n", - "total_trades: 37061\n", - "Sharpe: 0.9703548552780149\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15600 |\n", - "| time_elapsed | 289 |\n", - "| total_timesteps | 78000 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15599 |\n", - "| policy_loss | 577 |\n", - "| std | 1.09 |\n", - "| value_loss | 273 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15700 |\n", - "| time_elapsed | 290 |\n", - "| total_timesteps | 78500 |\n", - "| train/ | |\n", - "| entropy_loss | -44.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15699 |\n", - "| policy_loss | -72.5 |\n", - "| std | 1.09 |\n", - "| value_loss | 3.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15800 |\n", - "| time_elapsed | 292 |\n", - "| total_timesteps | 79000 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | -271 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15799 |\n", - "| policy_loss | -63.8 |\n", - "| std | 1.09 |\n", - "| value_loss | 3.84 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 15900 |\n", - "| time_elapsed | 294 |\n", - "| total_timesteps | 79500 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15899 |\n", - "| policy_loss | -514 |\n", - "| std | 1.09 |\n", - "| value_loss | 170 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16000 |\n", - "| time_elapsed | 296 |\n", - "| total_timesteps | 80000 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 15999 |\n", - "| policy_loss | 293 |\n", - "| std | 1.09 |\n", - "| value_loss | 53.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16100 |\n", - "| time_elapsed | 298 |\n", - "| total_timesteps | 80500 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16099 |\n", - "| policy_loss | -312 |\n", - "| std | 1.09 |\n", - "| value_loss | 109 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:6572073.540279714\n", - "total_reward:5572073.540279714\n", - "total_cost: 25558.900906312338\n", - "total_trades: 38195\n", - "Sharpe: 1.1694339512811986\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16200 |\n", - "| time_elapsed | 299 |\n", - "| total_timesteps | 81000 |\n", - "| train/ | |\n", - "| entropy_loss | -45 |\n", - "| explained_variance | -509 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16199 |\n", - "| policy_loss | 257 |\n", - "| std | 1.09 |\n", - "| value_loss | 32.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16300 |\n", - "| time_elapsed | 301 |\n", - "| total_timesteps | 81500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16299 |\n", - "| policy_loss | 117 |\n", - "| std | 1.09 |\n", - "| value_loss | 9.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16400 |\n", - "| time_elapsed | 303 |\n", - "| total_timesteps | 82000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16399 |\n", - "| policy_loss | 262 |\n", - "| std | 1.09 |\n", - "| value_loss | 35.9 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16500 |\n", - "| time_elapsed | 305 |\n", - "| total_timesteps | 82500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16499 |\n", - "| policy_loss | -45 |\n", - "| std | 1.09 |\n", - "| value_loss | 2.27 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16600 |\n", - "| time_elapsed | 307 |\n", - "| total_timesteps | 83000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16599 |\n", - "| policy_loss | -561 |\n", - "| std | 1.09 |\n", - "| value_loss | 236 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5698994.463846846\n", - "total_reward:4698994.463846846\n", - "total_cost: 17337.4506195575\n", - "total_trades: 36912\n", - "Sharpe: 1.0295608824494007\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16700 |\n", - "| time_elapsed | 308 |\n", - "| total_timesteps | 83500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16699 |\n", - "| policy_loss | -54.8 |\n", - "| std | 1.09 |\n", - "| value_loss | 2.36 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16800 |\n", - "| time_elapsed | 310 |\n", - "| total_timesteps | 84000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16799 |\n", - "| policy_loss | 56.3 |\n", - "| std | 1.09 |\n", - "| value_loss | 4.36 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 16900 |\n", - "| time_elapsed | 312 |\n", - "| total_timesteps | 84500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.2 |\n", - "| explained_variance | -7.42e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16899 |\n", - "| policy_loss | 20.5 |\n", - "| std | 1.1 |\n", - "| value_loss | 6.59 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17000 |\n", - "| time_elapsed | 314 |\n", - "| total_timesteps | 85000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 16999 |\n", - "| policy_loss | 306 |\n", - "| std | 1.1 |\n", - "| value_loss | 66.7 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17100 |\n", - "| time_elapsed | 316 |\n", - "| total_timesteps | 85500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17099 |\n", - "| policy_loss | -195 |\n", - "| std | 1.1 |\n", - "| value_loss | 66.1 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:6381904.8543528775\n", - "total_reward:5381904.8543528775\n", - "total_cost: 12508.200039626663\n", - "total_trades: 35689\n", - "Sharpe: 1.1424293800622989\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17200 |\n", - "| time_elapsed | 317 |\n", - "| total_timesteps | 86000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17199 |\n", - "| policy_loss | 30 |\n", - "| std | 1.1 |\n", - "| value_loss | 0.588 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17300 |\n", - "| time_elapsed | 319 |\n", - "| total_timesteps | 86500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17299 |\n", - "| policy_loss | -206 |\n", - "| std | 1.1 |\n", - "| value_loss | 21.8 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17400 |\n", - "| time_elapsed | 321 |\n", - "| total_timesteps | 87000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.3 |\n", - "| explained_variance | -5.48e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17399 |\n", - "| policy_loss | 215 |\n", - "| std | 1.1 |\n", - "| value_loss | 25.9 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17500 |\n", - "| time_elapsed | 323 |\n", - "| total_timesteps | 87500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17499 |\n", - "| policy_loss | -28.9 |\n", - "| std | 1.1 |\n", - "| value_loss | 4.87 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17600 |\n", - "| time_elapsed | 325 |\n", - "| total_timesteps | 88000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17599 |\n", - "| policy_loss | -75.1 |\n", - "| std | 1.1 |\n", - "| value_loss | 6.57 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5436034.522246395\n", - "total_reward:4436034.522246395\n", - "total_cost: 15350.251113259093\n", - "total_trades: 38300\n", - "Sharpe: 1.111300596501636\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17700 |\n", - "| time_elapsed | 327 |\n", - "| total_timesteps | 88500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17699 |\n", - "| policy_loss | 131 |\n", - "| std | 1.1 |\n", - "| value_loss | 8.69 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17800 |\n", - "| time_elapsed | 329 |\n", - "| total_timesteps | 89000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17799 |\n", - "| policy_loss | 37.7 |\n", - "| std | 1.1 |\n", - "| value_loss | 1.64 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 17900 |\n", - "| time_elapsed | 330 |\n", - "| total_timesteps | 89500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17899 |\n", - "| policy_loss | 14.6 |\n", - "| std | 1.1 |\n", - "| value_loss | 2.22 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18000 |\n", - "| time_elapsed | 332 |\n", - "| total_timesteps | 90000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 17999 |\n", - "| policy_loss | -304 |\n", - "| std | 1.1 |\n", - "| value_loss | 49.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18100 |\n", - "| time_elapsed | 334 |\n", - "| total_timesteps | 90500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18099 |\n", - "| policy_loss | -370 |\n", - "| std | 1.1 |\n", - "| value_loss | 72.5 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5112916.556362064\n", - "total_reward:4112916.5563620636\n", - "total_cost: 15612.707192791122\n", - "total_trades: 37413\n", - "Sharpe: 1.0611073756631733\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18200 |\n", - "| time_elapsed | 336 |\n", - "| total_timesteps | 91000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.5 |\n", - "| explained_variance | -6.66e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18199 |\n", - "| policy_loss | 74.9 |\n", - "| std | 1.11 |\n", - "| value_loss | 3.92 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18300 |\n", - "| time_elapsed | 338 |\n", - "| total_timesteps | 91500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18299 |\n", - "| policy_loss | -133 |\n", - "| std | 1.11 |\n", - "| value_loss | 13.7 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18400 |\n", - "| time_elapsed | 339 |\n", - "| total_timesteps | 92000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18399 |\n", - "| policy_loss | 73 |\n", - "| std | 1.11 |\n", - "| value_loss | 3.98 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18500 |\n", - "| time_elapsed | 341 |\n", - "| total_timesteps | 92500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18499 |\n", - "| policy_loss | 4.46 |\n", - "| std | 1.11 |\n", - "| value_loss | 0.844 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18600 |\n", - "| time_elapsed | 343 |\n", - "| total_timesteps | 93000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18599 |\n", - "| policy_loss | -214 |\n", - "| std | 1.11 |\n", - "| value_loss | 26.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4986097.277640037\n", - "total_reward:3986097.2776400372\n", - "total_cost: 13702.647875393004\n", - "total_trades: 35305\n", - "Sharpe: 1.0387271032164815\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18700 |\n", - "| time_elapsed | 345 |\n", - "| total_timesteps | 93500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.5 |\n", - "| explained_variance | -26.8 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18699 |\n", - "| policy_loss | -40.8 |\n", - "| std | 1.11 |\n", - "| value_loss | 0.888 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18800 |\n", - "| time_elapsed | 347 |\n", - "| total_timesteps | 94000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18799 |\n", - "| policy_loss | -114 |\n", - "| std | 1.11 |\n", - "| value_loss | 9.15 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 18900 |\n", - "| time_elapsed | 348 |\n", - "| total_timesteps | 94500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18899 |\n", - "| policy_loss | -360 |\n", - "| std | 1.11 |\n", - "| value_loss | 58.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19000 |\n", - "| time_elapsed | 350 |\n", - "| total_timesteps | 95000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 18999 |\n", - "| policy_loss | 94.4 |\n", - "| std | 1.11 |\n", - "| value_loss | 8.57 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19100 |\n", - "| time_elapsed | 352 |\n", - "| total_timesteps | 95500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19099 |\n", - "| policy_loss | -4.65 |\n", - "| std | 1.11 |\n", - "| value_loss | 2.46 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:5478501.103530731\n", - "total_reward:4478501.103530731\n", - "total_cost: 10256.280938558313\n", - "total_trades: 37074\n", - "Sharpe: 1.1342798023300105\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19200 |\n", - "| time_elapsed | 354 |\n", - "| total_timesteps | 96000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.6 |\n", - "| explained_variance | -2.2e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19199 |\n", - "| policy_loss | -52.2 |\n", - "| std | 1.11 |\n", - "| value_loss | 3.13 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19300 |\n", - "| time_elapsed | 356 |\n", - "| total_timesteps | 96500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19299 |\n", - "| policy_loss | -221 |\n", - "| std | 1.11 |\n", - "| value_loss | 29.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19400 |\n", - "| time_elapsed | 358 |\n", - "| total_timesteps | 97000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19399 |\n", - "| policy_loss | 2.54 |\n", - "| std | 1.11 |\n", - "| value_loss | 0.552 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19500 |\n", - "| time_elapsed | 360 |\n", - "| total_timesteps | 97500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19499 |\n", - "| policy_loss | 324 |\n", - "| std | 1.12 |\n", - "| value_loss | 73.7 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19600 |\n", - "| time_elapsed | 361 |\n", - "| total_timesteps | 98000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.7 |\n", - "| explained_variance | -2.23e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19599 |\n", - "| policy_loss | -546 |\n", - "| std | 1.11 |\n", - "| value_loss | 139 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4206773.89180218\n", - "total_reward:3206773.8918021796\n", - "total_cost: 5223.3386326608415\n", - "total_trades: 36723\n", - "Sharpe: 0.9776063927933439\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19700 |\n", - "| time_elapsed | 363 |\n", - "| total_timesteps | 98500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19699 |\n", - "| policy_loss | -155 |\n", - "| std | 1.12 |\n", - "| value_loss | 12.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19800 |\n", - "| time_elapsed | 365 |\n", - "| total_timesteps | 99000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19799 |\n", - "| policy_loss | 73.5 |\n", - "| std | 1.12 |\n", - "| value_loss | 4.66 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 19900 |\n", - "| time_elapsed | 367 |\n", - "| total_timesteps | 99500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19899 |\n", - "| policy_loss | -24.7 |\n", - "| std | 1.12 |\n", - "| value_loss | 2.18 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20000 |\n", - "| time_elapsed | 369 |\n", - "| total_timesteps | 100000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 19999 |\n", - "| policy_loss | 42 |\n", - "| std | 1.12 |\n", - "| value_loss | 1.86 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20100 |\n", - "| time_elapsed | 371 |\n", - "| total_timesteps | 100500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20099 |\n", - "| policy_loss | 279 |\n", - "| std | 1.12 |\n", - "| value_loss | 51.4 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4319570.605313044\n", - "total_reward:3319570.605313044\n", - "total_cost: 6777.852646750923\n", - "total_trades: 38079\n", - "Sharpe: 0.9793624584136245\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20200 |\n", - "| time_elapsed | 373 |\n", - "| total_timesteps | 101000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20199 |\n", - "| policy_loss | 94 |\n", - "| std | 1.13 |\n", - "| value_loss | 6.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20300 |\n", - "| time_elapsed | 375 |\n", - "| total_timesteps | 101500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20299 |\n", - "| policy_loss | -23.3 |\n", - "| std | 1.13 |\n", - "| value_loss | 1.69 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20400 |\n", - "| time_elapsed | 376 |\n", - "| total_timesteps | 102000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20399 |\n", - "| policy_loss | 33.9 |\n", - "| std | 1.13 |\n", - "| value_loss | 2.74 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20500 |\n", - "| time_elapsed | 378 |\n", - "| total_timesteps | 102500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20499 |\n", - "| policy_loss | -137 |\n", - "| std | 1.13 |\n", - "| value_loss | 12 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20600 |\n", - "| time_elapsed | 380 |\n", - "| total_timesteps | 103000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20599 |\n", - "| policy_loss | 374 |\n", - "| std | 1.12 |\n", - "| value_loss | 99 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:6257628.032702145\n", - "total_reward:5257628.032702145\n", - "total_cost: 15497.552403549977\n", - "total_trades: 41618\n", - "Sharpe: 1.1223670233311491\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20700 |\n", - "| time_elapsed | 382 |\n", - "| total_timesteps | 103500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | -1.38e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20699 |\n", - "| policy_loss | -30.9 |\n", - "| std | 1.12 |\n", - "| value_loss | 21.9 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20800 |\n", - "| time_elapsed | 384 |\n", - "| total_timesteps | 104000 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20799 |\n", - "| policy_loss | -34 |\n", - "| std | 1.12 |\n", - "| value_loss | 1.24 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 20900 |\n", - "| time_elapsed | 386 |\n", - "| total_timesteps | 104500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20899 |\n", - "| policy_loss | 72.1 |\n", - "| std | 1.13 |\n", - "| value_loss | 3.54 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21000 |\n", - "| time_elapsed | 388 |\n", - "| total_timesteps | 105000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 20999 |\n", - "| policy_loss | -385 |\n", - "| std | 1.13 |\n", - "| value_loss | 89.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21100 |\n", - "| time_elapsed | 389 |\n", - "| total_timesteps | 105500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21099 |\n", - "| policy_loss | 115 |\n", - "| std | 1.13 |\n", - "| value_loss | 32.1 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4738471.037828859\n", - "total_reward:3738471.037828859\n", - "total_cost: 7014.150195751989\n", - "total_trades: 41430\n", - "Sharpe: 0.9741579164389573\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21200 |\n", - "| time_elapsed | 391 |\n", - "| total_timesteps | 106000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | -4.84e+10 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21199 |\n", - "| policy_loss | -199 |\n", - "| std | 1.13 |\n", - "| value_loss | 19.4 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21300 |\n", - "| time_elapsed | 393 |\n", - "| total_timesteps | 106500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | -2.18e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21299 |\n", - "| policy_loss | -306 |\n", - "| std | 1.13 |\n", - "| value_loss | 45.8 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21400 |\n", - "| time_elapsed | 395 |\n", - "| total_timesteps | 107000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | -1.53e+05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21399 |\n", - "| policy_loss | -210 |\n", - "| std | 1.13 |\n", - "| value_loss | 24.8 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21500 |\n", - "| time_elapsed | 397 |\n", - "| total_timesteps | 107500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21499 |\n", - "| policy_loss | 126 |\n", - "| std | 1.13 |\n", - "| value_loss | 9.59 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21600 |\n", - "| time_elapsed | 399 |\n", - "| total_timesteps | 108000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21599 |\n", - "| policy_loss | -214 |\n", - "| std | 1.13 |\n", - "| value_loss | 96.2 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4857941.929380179\n", - "total_reward:3857941.9293801794\n", - "total_cost: 4300.517490341594\n", - "total_trades: 39933\n", - "Sharpe: 1.0101593537518043\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21700 |\n", - "| time_elapsed | 401 |\n", - "| total_timesteps | 108500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21699 |\n", - "| policy_loss | -26.1 |\n", - "| std | 1.13 |\n", - "| value_loss | 0.598 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21800 |\n", - "| time_elapsed | 402 |\n", - "| total_timesteps | 109000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21799 |\n", - "| policy_loss | 81.4 |\n", - "| std | 1.13 |\n", - "| value_loss | 6.68 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 21900 |\n", - "| time_elapsed | 404 |\n", - "| total_timesteps | 109500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21899 |\n", - "| policy_loss | -198 |\n", - "| std | 1.12 |\n", - "| value_loss | 18.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22000 |\n", - "| time_elapsed | 406 |\n", - "| total_timesteps | 110000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 21999 |\n", - "| policy_loss | -107 |\n", - "| std | 1.13 |\n", - "| value_loss | 6.12 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22100 |\n", - "| time_elapsed | 408 |\n", - "| total_timesteps | 110500 |\n", - "| train/ | |\n", - "| entropy_loss | -45.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22099 |\n", - "| policy_loss | -209 |\n", - "| std | 1.12 |\n", - "| value_loss | 74 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3889237.068636508\n", - "total_reward:2889237.068636508\n", - "total_cost: 2349.804122118537\n", - "total_trades: 40372\n", - "Sharpe: 0.8843985305523498\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22200 |\n", - "| time_elapsed | 410 |\n", - "| total_timesteps | 111000 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22199 |\n", - "| policy_loss | 29.7 |\n", - "| std | 1.13 |\n", - "| value_loss | 0.671 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22300 |\n", - "| time_elapsed | 412 |\n", - "| total_timesteps | 111500 |\n", - "| train/ | |\n", - "| entropy_loss | -46 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22299 |\n", - "| policy_loss | 78.5 |\n", - "| std | 1.13 |\n", - "| value_loss | 3.36 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22400 |\n", - "| time_elapsed | 414 |\n", - "| total_timesteps | 112000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22399 |\n", - "| policy_loss | 33.8 |\n", - "| std | 1.13 |\n", - "| value_loss | 1.25 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22500 |\n", - "| time_elapsed | 416 |\n", - "| total_timesteps | 112500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22499 |\n", - "| policy_loss | 221 |\n", - "| std | 1.13 |\n", - "| value_loss | 29.2 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22600 |\n", - "| time_elapsed | 418 |\n", - "| total_timesteps | 113000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22599 |\n", - "| policy_loss | -1.03e+03 |\n", - "| std | 1.13 |\n", - "| value_loss | 551 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4224562.913610662\n", - "total_reward:3224562.9136106623\n", - "total_cost: 7311.709253680451\n", - "total_trades: 39684\n", - "Sharpe: 0.908724330269282\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22700 |\n", - "| time_elapsed | 420 |\n", - "| total_timesteps | 113500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | -2.31e+04 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22699 |\n", - "| policy_loss | -135 |\n", - "| std | 1.13 |\n", - "| value_loss | 11.1 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22800 |\n", - "| time_elapsed | 422 |\n", - "| total_timesteps | 114000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22799 |\n", - "| policy_loss | -74.1 |\n", - "| std | 1.13 |\n", - "| value_loss | 3.66 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 22900 |\n", - "| time_elapsed | 424 |\n", - "| total_timesteps | 114500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | -1.82e+10 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22899 |\n", - "| policy_loss | 44.3 |\n", - "| std | 1.13 |\n", - "| value_loss | 5.06 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23000 |\n", - "| time_elapsed | 425 |\n", - "| total_timesteps | 115000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | -2.81e+05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 22999 |\n", - "| policy_loss | 98.9 |\n", - "| std | 1.13 |\n", - "| value_loss | 14.7 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23100 |\n", - "| time_elapsed | 427 |\n", - "| total_timesteps | 115500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23099 |\n", - "| policy_loss | 252 |\n", - "| std | 1.13 |\n", - "| value_loss | 39.1 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4058599.1541633434\n", - "total_reward:3058599.1541633434\n", - "total_cost: 4712.075511668796\n", - "total_trades: 39992\n", - "Sharpe: 0.9184456466750243\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23200 |\n", - "| time_elapsed | 429 |\n", - "| total_timesteps | 116000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | -19.7 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23199 |\n", - "| policy_loss | 34.4 |\n", - "| std | 1.13 |\n", - "| value_loss | 1.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23300 |\n", - "| time_elapsed | 431 |\n", - "| total_timesteps | 116500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23299 |\n", - "| policy_loss | 79.1 |\n", - "| std | 1.13 |\n", - "| value_loss | 7.28 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23400 |\n", - "| time_elapsed | 433 |\n", - "| total_timesteps | 117000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23399 |\n", - "| policy_loss | -95.2 |\n", - "| std | 1.13 |\n", - "| value_loss | 5.33 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23500 |\n", - "| time_elapsed | 435 |\n", - "| total_timesteps | 117500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23499 |\n", - "| policy_loss | 138 |\n", - "| std | 1.13 |\n", - "| value_loss | 15.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 270 |\n", - "| iterations | 23600 |\n", - "| time_elapsed | 436 |\n", - "| total_timesteps | 118000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23599 |\n", - "| policy_loss | 211 |\n", - "| std | 1.13 |\n", - "| value_loss | 28.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4447977.909194936\n", - "total_reward:3447977.909194936\n", - "total_cost: 4003.027452147933\n", - "total_trades: 41100\n", - "Sharpe: 0.9956972796668654\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 23700 |\n", - "| time_elapsed | 438 |\n", - "| total_timesteps | 118500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | -6.88 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23699 |\n", - "| policy_loss | -68.4 |\n", - "| std | 1.13 |\n", - "| value_loss | 2.73 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 23800 |\n", - "| time_elapsed | 440 |\n", - "| total_timesteps | 119000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | -186 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23799 |\n", - "| policy_loss | -106 |\n", - "| std | 1.13 |\n", - "| value_loss | 6.79 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 23900 |\n", - "| time_elapsed | 442 |\n", - "| total_timesteps | 119500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23899 |\n", - "| policy_loss | 30.7 |\n", - "| std | 1.13 |\n", - "| value_loss | 1.59 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24000 |\n", - "| time_elapsed | 444 |\n", - "| total_timesteps | 120000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 23999 |\n", - "| policy_loss | 69.7 |\n", - "| std | 1.13 |\n", - "| value_loss | 5.71 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24100 |\n", - "| time_elapsed | 446 |\n", - "| total_timesteps | 120500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24099 |\n", - "| policy_loss | 224 |\n", - "| std | 1.13 |\n", - "| value_loss | 22.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3735086.557625893\n", - "total_reward:2735086.557625893\n", - "total_cost: 2757.089181630181\n", - "total_trades: 40506\n", - "Sharpe: 0.8851253072732341\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24200 |\n", - "| time_elapsed | 448 |\n", - "| total_timesteps | 121000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | -2.36 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24199 |\n", - "| policy_loss | 5.03 |\n", - "| std | 1.13 |\n", - "| value_loss | 0.398 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24300 |\n", - "| time_elapsed | 450 |\n", - "| total_timesteps | 121500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24299 |\n", - "| policy_loss | -225 |\n", - "| std | 1.14 |\n", - "| value_loss | 28.7 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24400 |\n", - "| time_elapsed | 452 |\n", - "| total_timesteps | 122000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24399 |\n", - "| policy_loss | 65.6 |\n", - "| std | 1.13 |\n", - "| value_loss | 2.32 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24500 |\n", - "| time_elapsed | 454 |\n", - "| total_timesteps | 122500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24499 |\n", - "| policy_loss | -163 |\n", - "| std | 1.14 |\n", - "| value_loss | 16.1 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24600 |\n", - "| time_elapsed | 456 |\n", - "| total_timesteps | 123000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24599 |\n", - "| policy_loss | 53.5 |\n", - "| std | 1.14 |\n", - "| value_loss | 2.15 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3941900.9207990007\n", - "total_reward:2941900.9207990007\n", - "total_cost: 3208.161901015157\n", - "total_trades: 39655\n", - "Sharpe: 0.916833519860494\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24700 |\n", - "| time_elapsed | 458 |\n", - "| total_timesteps | 123500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.3 |\n", - "| explained_variance | -10.9 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24699 |\n", - "| policy_loss | -21.7 |\n", - "| std | 1.14 |\n", - "| value_loss | 1.55 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24800 |\n", - "| time_elapsed | 459 |\n", - "| total_timesteps | 124000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.3 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24799 |\n", - "| policy_loss | 274 |\n", - "| std | 1.14 |\n", - "| value_loss | 37.5 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 24900 |\n", - "| time_elapsed | 461 |\n", - "| total_timesteps | 124500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24899 |\n", - "| policy_loss | -99.3 |\n", - "| std | 1.14 |\n", - "| value_loss | 5.44 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25000 |\n", - "| time_elapsed | 463 |\n", - "| total_timesteps | 125000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 24999 |\n", - "| policy_loss | 73.4 |\n", - "| std | 1.14 |\n", - "| value_loss | 2.62 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25100 |\n", - "| time_elapsed | 465 |\n", - "| total_timesteps | 125500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25099 |\n", - "| policy_loss | 85.4 |\n", - "| std | 1.14 |\n", - "| value_loss | 4.21 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3918748.3829585924\n", - "total_reward:2918748.3829585924\n", - "total_cost: 7273.962180458869\n", - "total_trades: 40377\n", - "Sharpe: 0.9114365429898307\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25200 |\n", - "| time_elapsed | 467 |\n", - "| total_timesteps | 126000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.4 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25199 |\n", - "| policy_loss | 78.4 |\n", - "| std | 1.14 |\n", - "| value_loss | 3.83 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25300 |\n", - "| time_elapsed | 469 |\n", - "| total_timesteps | 126500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | -359 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25299 |\n", - "| policy_loss | 43.3 |\n", - "| std | 1.14 |\n", - "| value_loss | 11.3 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25400 |\n", - "| time_elapsed | 471 |\n", - "| total_timesteps | 127000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25399 |\n", - "| policy_loss | -117 |\n", - "| std | 1.15 |\n", - "| value_loss | 8.74 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25500 |\n", - "| time_elapsed | 473 |\n", - "| total_timesteps | 127500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25499 |\n", - "| policy_loss | -334 |\n", - "| std | 1.15 |\n", - "| value_loss | 55.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25600 |\n", - "| time_elapsed | 475 |\n", - "| total_timesteps | 128000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25599 |\n", - "| policy_loss | 80.1 |\n", - "| std | 1.15 |\n", - "| value_loss | 7.16 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3416634.0516581917\n", - "total_reward:2416634.0516581917\n", - "total_cost: 4919.955620021787\n", - "total_trades: 38886\n", - "Sharpe: 0.7925876800612837\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25700 |\n", - "| time_elapsed | 476 |\n", - "| total_timesteps | 128500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25699 |\n", - "| policy_loss | 74.3 |\n", - "| std | 1.15 |\n", - "| value_loss | 4.39 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25800 |\n", - "| time_elapsed | 478 |\n", - "| total_timesteps | 129000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25799 |\n", - "| policy_loss | -45.1 |\n", - "| std | 1.15 |\n", - "| value_loss | 7.72 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 25900 |\n", - "| time_elapsed | 480 |\n", - "| total_timesteps | 129500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -2.03e+08 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25899 |\n", - "| policy_loss | 237 |\n", - "| std | 1.15 |\n", - "| value_loss | 24.9 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26000 |\n", - "| time_elapsed | 482 |\n", - "| total_timesteps | 130000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -2.15e+03 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 25999 |\n", - "| policy_loss | -103 |\n", - "| std | 1.15 |\n", - "| value_loss | 9.79 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26100 |\n", - "| time_elapsed | 484 |\n", - "| total_timesteps | 130500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -3.4e+11 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26099 |\n", - "| policy_loss | 43.2 |\n", - "| std | 1.15 |\n", - "| value_loss | 1.28 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3276619.3638079385\n", - "total_reward:2276619.3638079385\n", - "total_cost: 5264.404229684018\n", - "total_trades: 38979\n", - "Sharpe: 0.7353175977211657\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26200 |\n", - "| time_elapsed | 486 |\n", - "| total_timesteps | 131000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -908 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26199 |\n", - "| policy_loss | 60 |\n", - "| std | 1.15 |\n", - "| value_loss | 3.88 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26300 |\n", - "| time_elapsed | 488 |\n", - "| total_timesteps | 131500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -2.84e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26299 |\n", - "| policy_loss | -556 |\n", - "| std | 1.15 |\n", - "| value_loss | 149 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26400 |\n", - "| time_elapsed | 489 |\n", - "| total_timesteps | 132000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26399 |\n", - "| policy_loss | -144 |\n", - "| std | 1.15 |\n", - "| value_loss | 10.9 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26500 |\n", - "| time_elapsed | 491 |\n", - "| total_timesteps | 132500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26499 |\n", - "| policy_loss | 68.5 |\n", - "| std | 1.15 |\n", - "| value_loss | 4.74 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26600 |\n", - "| time_elapsed | 493 |\n", - "| total_timesteps | 133000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26599 |\n", - "| policy_loss | 2.66 |\n", - "| std | 1.15 |\n", - "| value_loss | 0.188 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3639991.011414096\n", - "total_reward:2639991.011414096\n", - "total_cost: 5876.438289118703\n", - "total_trades: 39596\n", - "Sharpe: 0.792662828054479\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26700 |\n", - "| time_elapsed | 495 |\n", - "| total_timesteps | 133500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | -22.5 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26699 |\n", - "| policy_loss | 114 |\n", - "| std | 1.15 |\n", - "| value_loss | 7.19 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26800 |\n", - "| time_elapsed | 497 |\n", - "| total_timesteps | 134000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26799 |\n", - "| policy_loss | -227 |\n", - "| std | 1.15 |\n", - "| value_loss | 28.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 26900 |\n", - "| time_elapsed | 499 |\n", - "| total_timesteps | 134500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.5 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26899 |\n", - "| policy_loss | -99.1 |\n", - "| std | 1.15 |\n", - "| value_loss | 5.7 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27000 |\n", - "| time_elapsed | 501 |\n", - "| total_timesteps | 135000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 26999 |\n", - "| policy_loss | -50.5 |\n", - "| std | 1.15 |\n", - "| value_loss | 1.92 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27100 |\n", - "| time_elapsed | 503 |\n", - "| total_timesteps | 135500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27099 |\n", - "| policy_loss | 86.8 |\n", - "| std | 1.15 |\n", - "| value_loss | 4.17 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3312273.2546917126\n", - "total_reward:2312273.2546917126\n", - "total_cost: 6513.921766223839\n", - "total_trades: 39866\n", - "Sharpe: 0.7669939696087845\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27200 |\n", - "| time_elapsed | 505 |\n", - "| total_timesteps | 136000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.6 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27199 |\n", - "| policy_loss | 83.3 |\n", - "| std | 1.15 |\n", - "| value_loss | 4.52 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27300 |\n", - "| time_elapsed | 507 |\n", - "| total_timesteps | 136500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.7 |\n", - "| explained_variance | -242 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27299 |\n", - "| policy_loss | 196 |\n", - "| std | 1.15 |\n", - "| value_loss | 27.6 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27400 |\n", - "| time_elapsed | 509 |\n", - "| total_timesteps | 137000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.7 |\n", - "| explained_variance | -256 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27399 |\n", - "| policy_loss | -14.1 |\n", - "| std | 1.15 |\n", - "| value_loss | 0.802 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27500 |\n", - "| time_elapsed | 510 |\n", - "| total_timesteps | 137500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27499 |\n", - "| policy_loss | -133 |\n", - "| std | 1.15 |\n", - "| value_loss | 10.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27600 |\n", - "| time_elapsed | 512 |\n", - "| total_timesteps | 138000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.7 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27599 |\n", - "| policy_loss | -216 |\n", - "| std | 1.15 |\n", - "| value_loss | 23.3 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3537920.924942015\n", - "total_reward:2537920.924942015\n", - "total_cost: 7636.677849389829\n", - "total_trades: 39571\n", - "Sharpe: 0.7721256456339295\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27700 |\n", - "| time_elapsed | 514 |\n", - "| total_timesteps | 138500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.7 |\n", - "| explained_variance | -538 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27699 |\n", - "| policy_loss | -78.9 |\n", - "| std | 1.15 |\n", - "| value_loss | 5.95 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27800 |\n", - "| time_elapsed | 516 |\n", - "| total_timesteps | 139000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27799 |\n", - "| policy_loss | -135 |\n", - "| std | 1.16 |\n", - "| value_loss | 11.2 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 27900 |\n", - "| time_elapsed | 518 |\n", - "| total_timesteps | 139500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.8 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27899 |\n", - "| policy_loss | -7.94 |\n", - "| std | 1.16 |\n", - "| value_loss | 2.54 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28000 |\n", - "| time_elapsed | 520 |\n", - "| total_timesteps | 140000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 27999 |\n", - "| policy_loss | -118 |\n", - "| std | 1.16 |\n", - "| value_loss | 7.13 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28100 |\n", - "| time_elapsed | 522 |\n", - "| total_timesteps | 140500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.9 |\n", - "| explained_variance | -1.4e+12 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28099 |\n", - "| policy_loss | 33.8 |\n", - "| std | 1.16 |\n", - "| value_loss | 1.74 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3335901.863268089\n", - "total_reward:2335901.863268089\n", - "total_cost: 6148.2616701473435\n", - "total_trades: 38459\n", - "Sharpe: 0.8009972305518047\n", - "=================================\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28200 |\n", - "| time_elapsed | 523 |\n", - "| total_timesteps | 141000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.9 |\n", - "| explained_variance | -1.72e+07 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28199 |\n", - "| policy_loss | -75.4 |\n", - "| std | 1.16 |\n", - "| value_loss | 4.2 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28300 |\n", - "| time_elapsed | 525 |\n", - "| total_timesteps | 141500 |\n", - "| train/ | |\n", - "| entropy_loss | -46.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28299 |\n", - "| policy_loss | 13.6 |\n", - "| std | 1.16 |\n", - "| value_loss | 3.07 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28400 |\n", - "| time_elapsed | 527 |\n", - "| total_timesteps | 142000 |\n", - "| train/ | |\n", - "| entropy_loss | -46.9 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28399 |\n", - "| policy_loss | -38.5 |\n", - "| std | 1.16 |\n", - "| value_loss | 0.936 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28500 |\n", - "| time_elapsed | 529 |\n", - "| total_timesteps | 142500 |\n", - "| train/ | |\n", - "| entropy_loss | -47 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28499 |\n", - "| policy_loss | -20.5 |\n", - "| std | 1.16 |\n", - "| value_loss | 1.02 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28600 |\n", - "| time_elapsed | 531 |\n", - "| total_timesteps | 143000 |\n", - "| train/ | |\n", - "| entropy_loss | -47 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28599 |\n", - "| policy_loss | -95.6 |\n", - "| std | 1.16 |\n", - "| value_loss | 6.74 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3791388.9622966833\n", - "total_reward:2791388.9622966833\n", - "total_cost: 4739.291239631439\n", - "total_trades: 36786\n", - "Sharpe: 0.8352371557337978\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28700 |\n", - "| time_elapsed | 533 |\n", - "| total_timesteps | 143500 |\n", - "| train/ | |\n", - "| entropy_loss | -47 |\n", - "| explained_variance | -656 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28699 |\n", - "| policy_loss | 145 |\n", - "| std | 1.17 |\n", - "| value_loss | 10 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28800 |\n", - "| time_elapsed | 535 |\n", - "| total_timesteps | 144000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28799 |\n", - "| policy_loss | 195 |\n", - "| std | 1.17 |\n", - "| value_loss | 23 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 28900 |\n", - "| time_elapsed | 536 |\n", - "| total_timesteps | 144500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28899 |\n", - "| policy_loss | -26 |\n", - "| std | 1.17 |\n", - "| value_loss | 2.42 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 29000 |\n", - "| time_elapsed | 538 |\n", - "| total_timesteps | 145000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 28999 |\n", - "| policy_loss | 32.1 |\n", - "| std | 1.17 |\n", - "| value_loss | 3.84 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 29100 |\n", - "| time_elapsed | 540 |\n", - "| total_timesteps | 145500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | -1.11e+11 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29099 |\n", - "| policy_loss | -51.3 |\n", - "| std | 1.17 |\n", - "| value_loss | 1.21 |\n", - "-------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3722466.511508156\n", - "total_reward:2722466.511508156\n", - "total_cost: 2619.4388887420964\n", - "total_trades: 36838\n", - "Sharpe: 0.8751149961312088\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 29200 |\n", - "| time_elapsed | 542 |\n", - "| total_timesteps | 146000 |\n", - "| train/ | |\n", - "| entropy_loss | -47 |\n", - "| explained_variance | -37.7 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29199 |\n", - "| policy_loss | 97.3 |\n", - "| std | 1.17 |\n", - "| value_loss | 5.24 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 29300 |\n", - "| time_elapsed | 544 |\n", - "| total_timesteps | 146500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29299 |\n", - "| policy_loss | 63.7 |\n", - "| std | 1.17 |\n", - "| value_loss | 3.25 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 269 |\n", - "| iterations | 29400 |\n", - "| time_elapsed | 546 |\n", - "| total_timesteps | 147000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29399 |\n", - "| policy_loss | 76.1 |\n", - "| std | 1.17 |\n", - "| value_loss | 3.03 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 29500 |\n", - "| time_elapsed | 548 |\n", - "| total_timesteps | 147500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.2 |\n", - "| explained_variance | -134 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29499 |\n", - "| policy_loss | -178 |\n", - "| std | 1.17 |\n", - "| value_loss | 15.8 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 29600 |\n", - "| time_elapsed | 550 |\n", - "| total_timesteps | 148000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29599 |\n", - "| policy_loss | -202 |\n", - "| std | 1.17 |\n", - "| value_loss | 15.6 |\n", - "------------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:3503288.5069474406\n", - "total_reward:2503288.5069474406\n", - "total_cost: 2306.8302833824664\n", - "total_trades: 38804\n", - "Sharpe: 0.8406587986683967\n", - "=================================\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 29700 |\n", - "| time_elapsed | 552 |\n", - "| total_timesteps | 148500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.2 |\n", - "| explained_variance | -338 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29699 |\n", - "| policy_loss | 174 |\n", - "| std | 1.17 |\n", - "| value_loss | 17.4 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 29800 |\n", - "| time_elapsed | 553 |\n", - "| total_timesteps | 149000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.2 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29799 |\n", - "| policy_loss | -106 |\n", - "| std | 1.17 |\n", - "| value_loss | 7.64 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 29900 |\n", - "| time_elapsed | 555 |\n", - "| total_timesteps | 149500 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29899 |\n", - "| policy_loss | 67.9 |\n", - "| std | 1.17 |\n", - "| value_loss | 2.68 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| time/ | |\n", - "| fps | 268 |\n", - "| iterations | 30000 |\n", - "| time_elapsed | 557 |\n", - "| total_timesteps | 150000 |\n", - "| train/ | |\n", - "| entropy_loss | -47.1 |\n", - "| explained_variance | nan |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 29999 |\n", - "| policy_loss | -121 |\n", - "| std | 1.17 |\n", - "| value_loss | 8.47 |\n", - "------------------------------------\n" - ], - "name": "stdout" - } + "text/plain": [ + " date open high ... cci_30 dx_30 turbulence\n", + "0 2009-01-02 3.067143 3.251429 ... 66.666667 100.0 0.0\n", + "1 2009-01-02 18.570000 19.520000 ... 66.666667 100.0 0.0\n", + "2 2009-01-02 42.799999 45.560001 ... 66.666667 100.0 0.0\n", + "3 2009-01-02 44.910000 46.980000 ... 66.666667 100.0 0.0\n", + "4 2009-01-02 16.410000 17.000000 ... 66.666667 100.0 0.0\n", + "5 2009-01-02 74.230003 77.300003 ... 66.666667 100.0 0.0\n", + "6 2009-01-02 21.605234 22.060680 ... 66.666667 100.0 0.0\n", + "7 2009-01-02 22.760000 24.030001 ... 66.666667 100.0 0.0\n", + "8 2009-01-02 84.019997 87.620003 ... 66.666667 100.0 0.0\n", + "9 2009-01-02 23.070000 24.190001 ... 66.666667 100.0 0.0\n", + "\n", + "[10 rows x 12 columns]" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-QsYaY0Dh1iw" + }, + "source": [ + "\n", + "# Part 5. Design Environment\n", + "Considering the stochastic and interactive nature of the automated stock trading tasks, a financial task is modeled as a **Markov Decision Process (MDP)** problem. The training process involves observing stock price change, taking an action and reward's calculation to have the agent adjusting its strategy accordingly. By interacting with the environment, the trading agent will derive a trading strategy with the maximized rewards as time proceeds.\n", + "\n", + "Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation.\n", + "\n", + "The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, \"Buy 10 shares of AAPL\" or \"Sell 10 shares of AAPL\" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5TOhcryx44bb" + }, + "source": [ + "## Training data split: 2009-01-01 to 2018-12-31\n", + "## Trade data split: 2019-01-01 to 2020-09-30" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "W0qaVGjLtgbI", + "outputId": "c98aeb90-84e3-4b83-9671-d679f3fe148f" + }, + "source": [ + "train = data_split(processed, '2009-01-01','2019-01-01')\n", + "trade = data_split(processed, '2019-01-01','2021-01-01')\n", + "print(len(train))\n", + "print(len(trade))" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "MRiOtrywfAo1" - }, - "source": [ - "### Model 2: DDPG" - ] + "output_type": "stream", + "text": [ + "75480\n", + "15150\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 }, + "id": "p52zNCOhTtLR", + "outputId": "c41f9be0-a99f-4108-a427-3112b6bd4129" + }, + "source": [ + "train.head()" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "M2YadjfnLwgt", - "outputId": "3b2a8f89-0561-4083-a015-fbee11693037" - }, - "source": [ - "agent = DRLAgent(env = env_train)\n", - "model_ddpg = agent.get_model(\"ddpg\")" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02009-01-023.0671433.2514293.0414292.795913746015200.0AAPL0.0100.066.666667100.00.0
02009-01-0218.57000019.52000018.40000015.80062410955700.0AXP0.0100.066.666667100.00.0
02009-01-0242.79999945.56000142.77999933.6809357010200.0BA0.0100.066.666667100.00.0
02009-01-0244.91000046.98000044.70999932.5144007117200.0CAT0.0100.066.666667100.00.0
02009-01-0216.41000017.00000016.25000012.78608740980600.0CSCO0.0100.066.666667100.00.0
\n", + "
" ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}\n", - "Using cpu device\n" - ], - "name": "stdout" - } + "text/plain": [ + " date open high ... cci_30 dx_30 turbulence\n", + "0 2009-01-02 3.067143 3.251429 ... 66.666667 100.0 0.0\n", + "0 2009-01-02 18.570000 19.520000 ... 66.666667 100.0 0.0\n", + "0 2009-01-02 42.799999 45.560001 ... 66.666667 100.0 0.0\n", + "0 2009-01-02 44.910000 46.980000 ... 66.666667 100.0 0.0\n", + "0 2009-01-02 16.410000 17.000000 ... 66.666667 100.0 0.0\n", + "\n", + "[5 rows x 12 columns]" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 67 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 299 }, + "id": "k9zU9YaTTvFq", + "outputId": "705f46e4-0529-4ef5-d182-c2a1337397a4" + }, + "source": [ + "trade.head()" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "background_save": true, - "base_uri": "https://localhost:8080/" - }, - "id": "tCDa78rqfO_a", - "outputId": "f651f8be-4c93-4b1e-c88a-7e3a09976693" - }, - "source": [ - "trained_ddpg = agent.train_model(model=model_ddpg, \n", - " tb_log_name='ddpg',\n", - " total_timesteps=50000)" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeticmacdrsi_30cci_30dx_30turbulence
02019-01-0238.72250039.71250238.55749938.562561148158800.0AAPL-2.01990337.867349-91.56785242.250808119.879197
02019-01-0293.91000496.26999793.76999792.6433114175400.0AXP-3.42600841.204982-97.74226926.709417119.879197
02019-01-02316.190002323.950012313.709991314.6451423292200.0BA-5.55059247.010000-21.71238213.611972119.879197
02019-01-02124.029999127.879997123.000000119.3025824783200.0CAT-0.68675948.229089-5.0912090.873482119.879197
02019-01-0242.27999943.20000142.20999940.38209923833500.0CSCO-0.96006144.872557-87.49685029.529377119.879197
\n", + "
" ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Logging to tensorboard_log/ddpg/ddpg_1\n", - "begin_total_asset:1000000\n", - "end_total_asset:3761309.8057632465\n", - "total_reward:2761309.8057632465\n", - "total_cost: 6807.077776350557\n", - "total_trades: 39070\n", - "Sharpe: 1.0173492167488003\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 4 |\n", - "| fps | 38 |\n", - "| time_elapsed | 258 |\n", - "| total timesteps | 10064 |\n", - "| train/ | |\n", - "| actor_loss | -2.81 |\n", - "| critic_loss | 272 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 7548 |\n", - "---------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 8 |\n", - "| fps | 33 |\n", - "| time_elapsed | 604 |\n", - "| total timesteps | 20128 |\n", - "| train/ | |\n", - "| actor_loss | -8.32 |\n", - "| critic_loss | 12.8 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 17612 |\n", - "---------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 12 |\n", - "| fps | 31 |\n", - "| time_elapsed | 953 |\n", - "| total timesteps | 30192 |\n", - "| train/ | |\n", - "| actor_loss | -9.46 |\n", - "| critic_loss | 4.31 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 27676 |\n", - "---------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 16 |\n", - "| fps | 30 |\n", - "| time_elapsed | 1309 |\n", - "| total timesteps | 40256 |\n", - "| train/ | |\n", - "| actor_loss | -10.2 |\n", - "| critic_loss | 3.19 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 37740 |\n", - "---------------------------------\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "begin_total_asset:1000000\n", - "end_total_asset:4423657.61673363\n", - "total_reward:3423657.61673363\n", - "total_cost: 1277.392035166502\n", - "total_trades: 32819\n", - "Sharpe: 0.8726982452731067\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 20 |\n", - "| fps | 30 |\n", - "| time_elapsed | 1675 |\n", - "| total timesteps | 50320 |\n", - "| train/ | |\n", - "| actor_loss | -11.1 |\n", - "| critic_loss | 2.24 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 47804 |\n", - "---------------------------------\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_gDkU-j-fCmZ" - }, - "source": [ - "### Model 3: PPO" + "text/plain": [ + " date open high ... cci_30 dx_30 turbulence\n", + "0 2019-01-02 38.722500 39.712502 ... -91.567852 42.250808 119.879197\n", + "0 2019-01-02 93.910004 96.269997 ... -97.742269 26.709417 119.879197\n", + "0 2019-01-02 316.190002 323.950012 ... -21.712382 13.611972 119.879197\n", + "0 2019-01-02 124.029999 127.879997 ... -5.091209 0.873482 119.879197\n", + "0 2019-01-02 42.279999 43.200001 ... -87.496850 29.529377 119.879197\n", + "\n", + "[5 rows x 12 columns]" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 68 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "zYN573SOHhxG", + "outputId": "187c6d1b-3e91-40f8-dafd-230d787f2ee1" + }, + "source": [ + "config.TECHNICAL_INDICATORS_LIST" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "y5D5PFUhMzSV", - "outputId": "2716af5e-06e5-4eab-b071-a506c60a0475" - }, - "source": [ - "agent = DRLAgent(env = env_train)\n", - "PPO_PARAMS = {\n", - " \"n_steps\": 2048,\n", - " \"ent_coef\": 0.01,\n", - " \"learning_rate\": 0.00025,\n", - " \"batch_size\": 128,\n", - "}\n", - "model_ppo = agent.get_model(\"ppo\",model_kwargs = PPO_PARAMS)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}\n", - "Using cpu device\n" - ], - "name": "stdout" - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "['macd', 'rsi_30', 'cci_30', 'dx_30']" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Q2zqII8rMIqn", + "outputId": "8a2c943b-1be4-4b8d-b64f-666e0852b7e6" + }, + "source": [ + "stock_dimension = len(train.tic.unique())\n", + "state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension\n", + "print(f\"Stock Dimension: {stock_dimension}, State Space: {state_space}\")\n" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Gt8eIQKYM4G3", - "outputId": "1016cc05-58b6-45dc-c871-a322f1c3dc89" - }, - "source": [ - "trained_ppo = agent.train_model(model=model_ppo, \n", - " tb_log_name='ppo',\n", - " total_timesteps=100000)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Logging to tensorboard_log/ppo/ppo_2\n", - "-----------------------------\n", - "| time/ | |\n", - "| fps | 104 |\n", - "| iterations | 1 |\n", - "| time_elapsed | 19 |\n", - "| total_timesteps | 2048 |\n", - "-----------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 102 |\n", - "| iterations | 2 |\n", - "| time_elapsed | 39 |\n", - "| total_timesteps | 4096 |\n", - "| train/ | |\n", - "| approx_kl | 0.014151055 |\n", - "| clip_fraction | 0.212 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.6 |\n", - "| explained_variance | -28.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 5.76 |\n", - "| n_updates | 10 |\n", - "| policy_gradient_loss | -0.0277 |\n", - "| std | 1 |\n", - "| value_loss | 12 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 101 |\n", - "| iterations | 3 |\n", - "| time_elapsed | 60 |\n", - "| total_timesteps | 6144 |\n", - "| train/ | |\n", - "| approx_kl | 0.016467014 |\n", - "| clip_fraction | 0.186 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.6 |\n", - "| explained_variance | -176 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 9.99 |\n", - "| n_updates | 20 |\n", - "| policy_gradient_loss | -0.0275 |\n", - "| std | 1 |\n", - "| value_loss | 18.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 101 |\n", - "| iterations | 4 |\n", - "| time_elapsed | 80 |\n", - "| total_timesteps | 8192 |\n", - "| train/ | |\n", - "| approx_kl | 0.020772668 |\n", - "| clip_fraction | 0.191 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.6 |\n", - "| explained_variance | -87.8 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 16.7 |\n", - "| n_updates | 30 |\n", - "| policy_gradient_loss | -0.028 |\n", - "| std | 1 |\n", - "| value_loss | 32.2 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 101 |\n", - "| iterations | 5 |\n", - "| time_elapsed | 101 |\n", - "| total_timesteps | 10240 |\n", - "| train/ | |\n", - "| approx_kl | 0.019156657 |\n", - "| clip_fraction | 0.225 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -81.3 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 11 |\n", - "| n_updates | 40 |\n", - "| policy_gradient_loss | -0.0184 |\n", - "| std | 1 |\n", - "| value_loss | 26.6 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 6 |\n", - "| time_elapsed | 122 |\n", - "| total_timesteps | 12288 |\n", - "| train/ | |\n", - "| approx_kl | 0.02388929 |\n", - "| clip_fraction | 0.223 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.7 |\n", - "| explained_variance | -67 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 7.86 |\n", - "| n_updates | 50 |\n", - "| policy_gradient_loss | -0.0269 |\n", - "| std | 1.01 |\n", - "| value_loss | 23 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 7 |\n", - "| time_elapsed | 142 |\n", - "| total_timesteps | 14336 |\n", - "| train/ | |\n", - "| approx_kl | 0.023960019 |\n", - "| clip_fraction | 0.21 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -58.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 6.32 |\n", - "| n_updates | 60 |\n", - "| policy_gradient_loss | -0.0234 |\n", - "| std | 1.01 |\n", - "| value_loss | 12 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 8 |\n", - "| time_elapsed | 163 |\n", - "| total_timesteps | 16384 |\n", - "| train/ | |\n", - "| approx_kl | 0.021991765 |\n", - "| clip_fraction | 0.212 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.8 |\n", - "| explained_variance | -36.4 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 9.39 |\n", - "| n_updates | 70 |\n", - "| policy_gradient_loss | -0.0243 |\n", - "| std | 1.01 |\n", - "| value_loss | 19.9 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 9 |\n", - "| time_elapsed | 183 |\n", - "| total_timesteps | 18432 |\n", - "| train/ | |\n", - "| approx_kl | 0.01857267 |\n", - "| clip_fraction | 0.205 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -59.3 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 8.22 |\n", - "| n_updates | 80 |\n", - "| policy_gradient_loss | -0.0235 |\n", - "| std | 1.01 |\n", - "| value_loss | 20.5 |\n", - "----------------------------------------\n", - "day: 2515, episode: 130\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:3383653.45\n", - "total_reward:2383653.45\n", - "total_cost: 255155.22\n", - "total_trades: 72649\n", - "Sharpe: 0.863\n", - "=================================\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 10 |\n", - "| time_elapsed | 203 |\n", - "| total_timesteps | 20480 |\n", - "| train/ | |\n", - "| approx_kl | 0.022291362 |\n", - "| clip_fraction | 0.213 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -70.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 12.4 |\n", - "| n_updates | 90 |\n", - "| policy_gradient_loss | -0.019 |\n", - "| std | 1.01 |\n", - "| value_loss | 34.1 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 11 |\n", - "| time_elapsed | 224 |\n", - "| total_timesteps | 22528 |\n", - "| train/ | |\n", - "| approx_kl | 0.017316487 |\n", - "| clip_fraction | 0.22 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -42.9 |\n", - "| explained_variance | -159 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 21.4 |\n", - "| n_updates | 100 |\n", - "| policy_gradient_loss | -0.0182 |\n", - "| std | 1.01 |\n", - "| value_loss | 38.8 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 12 |\n", - "| time_elapsed | 244 |\n", - "| total_timesteps | 24576 |\n", - "| train/ | |\n", - "| approx_kl | 0.018951386 |\n", - "| clip_fraction | 0.179 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43 |\n", - "| explained_variance | -25.3 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 11.4 |\n", - "| n_updates | 110 |\n", - "| policy_gradient_loss | -0.0135 |\n", - "| std | 1.02 |\n", - "| value_loss | 29.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 13 |\n", - "| time_elapsed | 265 |\n", - "| total_timesteps | 26624 |\n", - "| train/ | |\n", - "| approx_kl | 0.033302963 |\n", - "| clip_fraction | 0.298 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | -58.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 9.17 |\n", - "| n_updates | 120 |\n", - "| policy_gradient_loss | -0.0236 |\n", - "| std | 1.02 |\n", - "| value_loss | 28.3 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 14 |\n", - "| time_elapsed | 285 |\n", - "| total_timesteps | 28672 |\n", - "| train/ | |\n", - "| approx_kl | 0.027676268 |\n", - "| clip_fraction | 0.278 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.1 |\n", - "| explained_variance | -91.7 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 12.8 |\n", - "| n_updates | 130 |\n", - "| policy_gradient_loss | -0.0192 |\n", - "| std | 1.02 |\n", - "| value_loss | 32.7 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 15 |\n", - "| time_elapsed | 306 |\n", - "| total_timesteps | 30720 |\n", - "| train/ | |\n", - "| approx_kl | 0.027800845 |\n", - "| clip_fraction | 0.233 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.2 |\n", - "| explained_variance | -85.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 26 |\n", - "| n_updates | 140 |\n", - "| policy_gradient_loss | -0.0217 |\n", - "| std | 1.02 |\n", - "| value_loss | 40.1 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 16 |\n", - "| time_elapsed | 326 |\n", - "| total_timesteps | 32768 |\n", - "| train/ | |\n", - "| approx_kl | 0.016968882 |\n", - "| clip_fraction | 0.219 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | -71.3 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 10.2 |\n", - "| n_updates | 150 |\n", - "| policy_gradient_loss | -0.0209 |\n", - "| std | 1.02 |\n", - "| value_loss | 26.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 17 |\n", - "| time_elapsed | 347 |\n", - "| total_timesteps | 34816 |\n", - "| train/ | |\n", - "| approx_kl | 0.022131229 |\n", - "| clip_fraction | 0.215 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.3 |\n", - "| explained_variance | -15.7 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 20.6 |\n", - "| n_updates | 160 |\n", - "| policy_gradient_loss | -0.0153 |\n", - "| std | 1.03 |\n", - "| value_loss | 49.1 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 18 |\n", - "| time_elapsed | 368 |\n", - "| total_timesteps | 36864 |\n", - "| train/ | |\n", - "| approx_kl | 0.029286291 |\n", - "| clip_fraction | 0.266 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.4 |\n", - "| explained_variance | -43.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 13.2 |\n", - "| n_updates | 170 |\n", - "| policy_gradient_loss | -0.015 |\n", - "| std | 1.03 |\n", - "| value_loss | 19.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 19 |\n", - "| time_elapsed | 388 |\n", - "| total_timesteps | 38912 |\n", - "| train/ | |\n", - "| approx_kl | 0.027719798 |\n", - "| clip_fraction | 0.24 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.4 |\n", - "| explained_variance | -131 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 16.8 |\n", - "| n_updates | 180 |\n", - "| policy_gradient_loss | -0.0183 |\n", - "| std | 1.03 |\n", - "| value_loss | 34 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 20 |\n", - "| time_elapsed | 409 |\n", - "| total_timesteps | 40960 |\n", - "| train/ | |\n", - "| approx_kl | 0.022764063 |\n", - "| clip_fraction | 0.217 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.5 |\n", - "| explained_variance | -63.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 22.5 |\n", - "| n_updates | 190 |\n", - "| policy_gradient_loss | -0.0186 |\n", - "| std | 1.03 |\n", - "| value_loss | 37.9 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 21 |\n", - "| time_elapsed | 429 |\n", - "| total_timesteps | 43008 |\n", - "| train/ | |\n", - "| approx_kl | 0.02734076 |\n", - "| clip_fraction | 0.208 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.5 |\n", - "| explained_variance | -113 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 21 |\n", - "| n_updates | 200 |\n", - "| policy_gradient_loss | -0.0201 |\n", - "| std | 1.03 |\n", - "| value_loss | 60.7 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 22 |\n", - "| time_elapsed | 450 |\n", - "| total_timesteps | 45056 |\n", - "| train/ | |\n", - "| approx_kl | 0.023378888 |\n", - "| clip_fraction | 0.277 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.6 |\n", - "| explained_variance | -57 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 19.6 |\n", - "| n_updates | 210 |\n", - "| policy_gradient_loss | -0.0153 |\n", - "| std | 1.03 |\n", - "| value_loss | 38.9 |\n", - "-----------------------------------------\n", - "day: 2515, episode: 140\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:5223199.40\n", - "total_reward:4223199.40\n", - "total_cost: 235269.98\n", - "total_trades: 71552\n", - "Sharpe: 1.128\n", - "=================================\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 23 |\n", - "| time_elapsed | 470 |\n", - "| total_timesteps | 47104 |\n", - "| train/ | |\n", - "| approx_kl | 0.025331508 |\n", - "| clip_fraction | 0.29 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.6 |\n", - "| explained_variance | -61.4 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 19.7 |\n", - "| n_updates | 220 |\n", - "| policy_gradient_loss | -0.0119 |\n", - "| std | 1.04 |\n", - "| value_loss | 34.8 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 24 |\n", - "| time_elapsed | 491 |\n", - "| total_timesteps | 49152 |\n", - "| train/ | |\n", - "| approx_kl | 0.025766762 |\n", - "| clip_fraction | 0.231 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.7 |\n", - "| explained_variance | -64.7 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 20.4 |\n", - "| n_updates | 230 |\n", - "| policy_gradient_loss | -0.0187 |\n", - "| std | 1.04 |\n", - "| value_loss | 47.4 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 25 |\n", - "| time_elapsed | 511 |\n", - "| total_timesteps | 51200 |\n", - "| train/ | |\n", - "| approx_kl | 0.041917183 |\n", - "| clip_fraction | 0.278 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | -34 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 22.2 |\n", - "| n_updates | 240 |\n", - "| policy_gradient_loss | -0.0164 |\n", - "| std | 1.04 |\n", - "| value_loss | 48 |\n", - "-----------------------------------------\n", - "---------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 26 |\n", - "| time_elapsed | 531 |\n", - "| total_timesteps | 53248 |\n", - "| train/ | |\n", - "| approx_kl | 0.0367468 |\n", - "| clip_fraction | 0.273 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.8 |\n", - "| explained_variance | -48.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 21.5 |\n", - "| n_updates | 250 |\n", - "| policy_gradient_loss | -0.00821 |\n", - "| std | 1.04 |\n", - "| value_loss | 39.5 |\n", - "---------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 27 |\n", - "| time_elapsed | 552 |\n", - "| total_timesteps | 55296 |\n", - "| train/ | |\n", - "| approx_kl | 0.024581099 |\n", - "| clip_fraction | 0.211 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.9 |\n", - "| explained_variance | -198 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 32.1 |\n", - "| n_updates | 260 |\n", - "| policy_gradient_loss | -0.0106 |\n", - "| std | 1.05 |\n", - "| value_loss | 58.2 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 28 |\n", - "| time_elapsed | 573 |\n", - "| total_timesteps | 57344 |\n", - "| train/ | |\n", - "| approx_kl | 0.02569989 |\n", - "| clip_fraction | 0.209 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -43.9 |\n", - "| explained_variance | -161 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 25.1 |\n", - "| n_updates | 270 |\n", - "| policy_gradient_loss | -0.0137 |\n", - "| std | 1.05 |\n", - "| value_loss | 55 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 29 |\n", - "| time_elapsed | 593 |\n", - "| total_timesteps | 59392 |\n", - "| train/ | |\n", - "| approx_kl | 0.032340243 |\n", - "| clip_fraction | 0.252 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -24.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 9.23 |\n", - "| n_updates | 280 |\n", - "| policy_gradient_loss | -0.0167 |\n", - "| std | 1.05 |\n", - "| value_loss | 34 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 30 |\n", - "| time_elapsed | 613 |\n", - "| total_timesteps | 61440 |\n", - "| train/ | |\n", - "| approx_kl | 0.018233867 |\n", - "| clip_fraction | 0.239 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -34.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 28.1 |\n", - "| n_updates | 290 |\n", - "| policy_gradient_loss | -0.0158 |\n", - "| std | 1.05 |\n", - "| value_loss | 41.2 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 31 |\n", - "| time_elapsed | 634 |\n", - "| total_timesteps | 63488 |\n", - "| train/ | |\n", - "| approx_kl | 0.030068567 |\n", - "| clip_fraction | 0.152 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44 |\n", - "| explained_variance | -26.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 19.2 |\n", - "| n_updates | 300 |\n", - "| policy_gradient_loss | -0.0121 |\n", - "| std | 1.05 |\n", - "| value_loss | 64.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 32 |\n", - "| time_elapsed | 654 |\n", - "| total_timesteps | 65536 |\n", - "| train/ | |\n", - "| approx_kl | 0.024889158 |\n", - "| clip_fraction | 0.27 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | -31.2 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 37.3 |\n", - "| n_updates | 310 |\n", - "| policy_gradient_loss | -0.0148 |\n", - "| std | 1.05 |\n", - "| value_loss | 58 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 33 |\n", - "| time_elapsed | 674 |\n", - "| total_timesteps | 67584 |\n", - "| train/ | |\n", - "| approx_kl | 0.03883523 |\n", - "| clip_fraction | 0.234 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.1 |\n", - "| explained_variance | -39.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 24.5 |\n", - "| n_updates | 320 |\n", - "| policy_gradient_loss | -0.0121 |\n", - "| std | 1.05 |\n", - "| value_loss | 84.4 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 34 |\n", - "| time_elapsed | 695 |\n", - "| total_timesteps | 69632 |\n", - "| train/ | |\n", - "| approx_kl | 0.024309162 |\n", - "| clip_fraction | 0.225 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | -12.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 8.79 |\n", - "| n_updates | 330 |\n", - "| policy_gradient_loss | -0.015 |\n", - "| std | 1.06 |\n", - "| value_loss | 23.8 |\n", - "-----------------------------------------\n", - "day: 2515, episode: 150\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:6320097.75\n", - "total_reward:5320097.75\n", - "total_cost: 222029.44\n", - "total_trades: 69973\n", - "Sharpe: 1.250\n", - "=================================\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 35 |\n", - "| time_elapsed | 715 |\n", - "| total_timesteps | 71680 |\n", - "| train/ | |\n", - "| approx_kl | 0.024664927 |\n", - "| clip_fraction | 0.183 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.2 |\n", - "| explained_variance | -17.2 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 15.3 |\n", - "| n_updates | 340 |\n", - "| policy_gradient_loss | -0.0141 |\n", - "| std | 1.06 |\n", - "| value_loss | 48.7 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 36 |\n", - "| time_elapsed | 735 |\n", - "| total_timesteps | 73728 |\n", - "| train/ | |\n", - "| approx_kl | 0.03882557 |\n", - "| clip_fraction | 0.207 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.3 |\n", - "| explained_variance | -27.1 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 50.3 |\n", - "| n_updates | 350 |\n", - "| policy_gradient_loss | -0.0141 |\n", - "| std | 1.06 |\n", - "| value_loss | 93.7 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 37 |\n", - "| time_elapsed | 756 |\n", - "| total_timesteps | 75776 |\n", - "| train/ | |\n", - "| approx_kl | 0.022156972 |\n", - "| clip_fraction | 0.214 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.3 |\n", - "| explained_variance | -23.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 26.5 |\n", - "| n_updates | 360 |\n", - "| policy_gradient_loss | -0.0161 |\n", - "| std | 1.06 |\n", - "| value_loss | 71.7 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 38 |\n", - "| time_elapsed | 776 |\n", - "| total_timesteps | 77824 |\n", - "| train/ | |\n", - "| approx_kl | 0.022767432 |\n", - "| clip_fraction | 0.223 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | -17.5 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 23.8 |\n", - "| n_updates | 370 |\n", - "| policy_gradient_loss | -0.0154 |\n", - "| std | 1.06 |\n", - "| value_loss | 38.7 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 39 |\n", - "| time_elapsed | 797 |\n", - "| total_timesteps | 79872 |\n", - "| train/ | |\n", - "| approx_kl | 0.020827759 |\n", - "| clip_fraction | 0.178 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.4 |\n", - "| explained_variance | -56 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 36.3 |\n", - "| n_updates | 380 |\n", - "| policy_gradient_loss | -0.00964 |\n", - "| std | 1.07 |\n", - "| value_loss | 82.1 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 40 |\n", - "| time_elapsed | 817 |\n", - "| total_timesteps | 81920 |\n", - "| train/ | |\n", - "| approx_kl | 0.013000591 |\n", - "| clip_fraction | 0.132 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.5 |\n", - "| explained_variance | -23 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 14 |\n", - "| n_updates | 390 |\n", - "| policy_gradient_loss | -0.0162 |\n", - "| std | 1.07 |\n", - "| value_loss | 63.1 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 41 |\n", - "| time_elapsed | 837 |\n", - "| total_timesteps | 83968 |\n", - "| train/ | |\n", - "| approx_kl | 0.021172233 |\n", - "| clip_fraction | 0.19 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.5 |\n", - "| explained_variance | -26.6 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 47.3 |\n", - "| n_updates | 400 |\n", - "| policy_gradient_loss | -0.0191 |\n", - "| std | 1.07 |\n", - "| value_loss | 98 |\n", - "-----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 42 |\n", - "| time_elapsed | 858 |\n", - "| total_timesteps | 86016 |\n", - "| train/ | |\n", - "| approx_kl | 0.02925424 |\n", - "| clip_fraction | 0.16 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -33.8 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 59.2 |\n", - "| n_updates | 410 |\n", - "| policy_gradient_loss | -0.0117 |\n", - "| std | 1.07 |\n", - "| value_loss | 163 |\n", - "----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 43 |\n", - "| time_elapsed | 878 |\n", - "| total_timesteps | 88064 |\n", - "| train/ | |\n", - "| approx_kl | 0.01635669 |\n", - "| clip_fraction | 0.138 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -28.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 57.9 |\n", - "| n_updates | 420 |\n", - "| policy_gradient_loss | -0.0135 |\n", - "| std | 1.07 |\n", - "| value_loss | 122 |\n", - "----------------------------------------\n", - "----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 44 |\n", - "| time_elapsed | 898 |\n", - "| total_timesteps | 90112 |\n", - "| train/ | |\n", - "| approx_kl | 0.03150232 |\n", - "| clip_fraction | 0.188 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -20.9 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 45.1 |\n", - "| n_updates | 430 |\n", - "| policy_gradient_loss | -0.0222 |\n", - "| std | 1.07 |\n", - "| value_loss | 84.9 |\n", - "----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 45 |\n", - "| time_elapsed | 919 |\n", - "| total_timesteps | 92160 |\n", - "| train/ | |\n", - "| approx_kl | 0.035686597 |\n", - "| clip_fraction | 0.335 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.6 |\n", - "| explained_variance | -5.37 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 4.34 |\n", - "| n_updates | 440 |\n", - "| policy_gradient_loss | -0.0119 |\n", - "| std | 1.07 |\n", - "| value_loss | 14.2 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 46 |\n", - "| time_elapsed | 940 |\n", - "| total_timesteps | 94208 |\n", - "| train/ | |\n", - "| approx_kl | 0.028425248 |\n", - "| clip_fraction | 0.293 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.7 |\n", - "| explained_variance | -4.65 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 22.9 |\n", - "| n_updates | 450 |\n", - "| policy_gradient_loss | -0.0184 |\n", - "| std | 1.07 |\n", - "| value_loss | 26.4 |\n", - "-----------------------------------------\n", - "day: 2515, episode: 160\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:5044806.56\n", - "total_reward:4044806.56\n", - "total_cost: 237117.70\n", - "total_trades: 70270\n", - "Sharpe: 1.271\n", - "=================================\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 47 |\n", - "| time_elapsed | 960 |\n", - "| total_timesteps | 96256 |\n", - "| train/ | |\n", - "| approx_kl | 0.034343738 |\n", - "| clip_fraction | 0.299 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.7 |\n", - "| explained_variance | -3.42 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 17.6 |\n", - "| n_updates | 460 |\n", - "| policy_gradient_loss | -0.0185 |\n", - "| std | 1.08 |\n", - "| value_loss | 56.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 48 |\n", - "| time_elapsed | 981 |\n", - "| total_timesteps | 98304 |\n", - "| train/ | |\n", - "| approx_kl | 0.017608875 |\n", - "| clip_fraction | 0.231 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | -17.7 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 17.6 |\n", - "| n_updates | 470 |\n", - "| policy_gradient_loss | -0.0054 |\n", - "| std | 1.08 |\n", - "| value_loss | 35.9 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| time/ | |\n", - "| fps | 100 |\n", - "| iterations | 49 |\n", - "| time_elapsed | 1001 |\n", - "| total_timesteps | 100352 |\n", - "| train/ | |\n", - "| approx_kl | 0.024408635 |\n", - "| clip_fraction | 0.168 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -44.8 |\n", - "| explained_variance | -4.44 |\n", - "| learning_rate | 0.00025 |\n", - "| loss | 16.2 |\n", - "| n_updates | 480 |\n", - "| policy_gradient_loss | -0.00922 |\n", - "| std | 1.08 |\n", - "| value_loss | 50.8 |\n", - "-----------------------------------------\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "Stock Dimension: 30, State Space: 181\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AWyp84Ltto19" + }, + "source": [ + "env_kwargs = {\n", + " \"hmax\": 100, \n", + " \"initial_amount\": 1000000, \n", + " \"transaction_cost_pct\": 0.001, \n", + " \"state_space\": state_space, \n", + " \"stock_dim\": stock_dimension, \n", + " \"tech_indicator_list\": config.TECHNICAL_INDICATORS_LIST, \n", + " \"action_space\": stock_dimension, \n", + " \"reward_scaling\": 1e-4\n", + " \n", + "}\n", + "\n", + "e_train_gym = StockTradingEnv(df = train, **env_kwargs)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "64EoqOrQjiVf" + }, + "source": [ + "## Environment for Training\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "xwSvvPjutpqS", + "outputId": "406e5ec3-28ba-4a72-9b22-0d031f7bf9a6" + }, + "source": [ + "env_train, _ = e_train_gym.get_sb_env()\n", + "print(type(env_train))" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "3Zpv4S0-fDBv" - }, - "source": [ - "### Model 4: TD3" - ] + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HMNR5nHjh1iz" + }, + "source": [ + "\n", + "# Part 6: Implement DRL Algorithms\n", + "* The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.\n", + "* FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,\n", + "Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to\n", + "design their own DRL algorithms by adapting these DRL algorithms." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "364PsqckttcQ" + }, + "source": [ + "agent = DRLAgent(env = env_train)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YDmqOyF9h1iz" + }, + "source": [ + "### Model Training: 5 models, A2C DDPG, PPO, TD3, SAC\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uijiWgkuh1jB" + }, + "source": [ + "### Model 1: A2C\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "GUCnkn-HIbmj", + "outputId": "2fdb297a-8d35-4c7e-806f-de859d70e19e" + }, + "source": [ + "agent = DRLAgent(env = env_train)\n", + "model_a2c = agent.get_model(\"a2c\")" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JSAHhV4Xc-bh", - "outputId": "e531db14-aab4-47d1-cc15-02c893ec66c9" - }, - "source": [ - "agent = DRLAgent(env = env_train)\n", - "TD3_PARAMS = {\"batch_size\": 100, \n", - " \"buffer_size\": 1000000, \n", - " \"learning_rate\": 0.001}\n", - "\n", - "model_td3 = agent.get_model(\"td3\",model_kwargs = TD3_PARAMS)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}\n", - "Using cpu device\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}\n", + "Using cpu device\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "0GVpkWGqH4-D", + "outputId": "9eb09ba2-fc4b-46a1-ea3d-bd9b3bfefffd" + }, + "source": [ + "trained_a2c = agent.train_model(model=model_a2c, \n", + " tb_log_name='a2c',\n", + " total_timesteps=100000)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OSRxNYAxdKpU", - "outputId": "ddc4193c-884b-4a2c-9e49-31397e2cfbec" - }, - "source": [ - "trained_td3 = agent.train_model(model=model_td3, \n", - " tb_log_name='td3',\n", - " total_timesteps=30000)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Logging to tensorboard_log/td3/td3_2\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 4 |\n", - "| fps | 33 |\n", - "| time_elapsed | 296 |\n", - "| total timesteps | 10064 |\n", - "| train/ | |\n", - "| actor_loss | 67.9 |\n", - "| critic_loss | 979 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 7548 |\n", - "---------------------------------\n", - "day: 2515, episode: 10\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:4438572.29\n", - "total_reward:3438572.29\n", - "total_cost: 1038.05\n", - "total_trades: 40290\n", - "Sharpe: 1.049\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 8 |\n", - "| fps | 30 |\n", - "| time_elapsed | 669 |\n", - "| total timesteps | 20128 |\n", - "| train/ | |\n", - "| actor_loss | 54 |\n", - "| critic_loss | 199 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 17612 |\n", - "---------------------------------\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 12 |\n", - "| fps | 28 |\n", - "| time_elapsed | 1052 |\n", - "| total timesteps | 30192 |\n", - "| train/ | |\n", - "| actor_loss | 41.4 |\n", - "| critic_loss | 25.2 |\n", - "| learning_rate | 0.001 |\n", - "| n_updates | 27676 |\n", - "---------------------------------\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "Logging to tensorboard_log/a2c/a2c_1\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99 |\n", + "| policy_loss | -14.9 |\n", + "| std | 1 |\n", + "| value_loss | 0.362 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 177 |\n", + "| iterations | 200 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 1000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 199 |\n", + "| policy_loss | -52 |\n", + "| std | 1 |\n", + "| value_loss | 2.03 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 199 |\n", + "| iterations | 300 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 1500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -754 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 299 |\n", + "| policy_loss | -379 |\n", + "| std | 1.01 |\n", + "| value_loss | 72.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 400 |\n", + "| time_elapsed | 9 |\n", + "| total_timesteps | 2000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -899 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 399 |\n", + "| policy_loss | -50.2 |\n", + "| std | 1.01 |\n", + "| value_loss | 2.23 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 500 |\n", + "| time_elapsed | 11 |\n", + "| total_timesteps | 2500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -5.49e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 499 |\n", + "| policy_loss | 863 |\n", + "| std | 1.01 |\n", + "| value_loss | 470 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5069607.313605958\n", + "total_reward:4069607.3136059577\n", + "total_cost: 67556.9160195016\n", + "total_trades: 54955\n", + "Sharpe: 1.034955174352521\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 225 |\n", + "| iterations | 600 |\n", + "| time_elapsed | 13 |\n", + "| total_timesteps | 3000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -1.13e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 599 |\n", + "| policy_loss | -56.4 |\n", + "| std | 1.01 |\n", + "| value_loss | 3.43 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 229 |\n", + "| iterations | 700 |\n", + "| time_elapsed | 15 |\n", + "| total_timesteps | 3500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -3.16e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 699 |\n", + "| policy_loss | 93.9 |\n", + "| std | 1.01 |\n", + "| value_loss | 8.12 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 232 |\n", + "| iterations | 800 |\n", + "| time_elapsed | 17 |\n", + "| total_timesteps | 4000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -3.3e+11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 799 |\n", + "| policy_loss | 65.4 |\n", + "| std | 1.01 |\n", + "| value_loss | 3.13 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 236 |\n", + "| iterations | 900 |\n", + "| time_elapsed | 19 |\n", + "| total_timesteps | 4500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -1.57e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 899 |\n", + "| policy_loss | 628 |\n", + "| std | 1.01 |\n", + "| value_loss | 222 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 239 |\n", + "| iterations | 1000 |\n", + "| time_elapsed | 20 |\n", + "| total_timesteps | 5000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 999 |\n", + "| policy_loss | 283 |\n", + "| std | 1.01 |\n", + "| value_loss | 51.9 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4806928.073206688\n", + "total_reward:3806928.0732066883\n", + "total_cost: 29371.967713621536\n", + "total_trades: 48579\n", + "Sharpe: 0.9611082492472007\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 241 |\n", + "| iterations | 1100 |\n", + "| time_elapsed | 22 |\n", + "| total_timesteps | 5500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -1.34e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1099 |\n", + "| policy_loss | -9.16 |\n", + "| std | 1.01 |\n", + "| value_loss | 5.7 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 243 |\n", + "| iterations | 1200 |\n", + "| time_elapsed | 24 |\n", + "| total_timesteps | 6000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1199 |\n", + "| policy_loss | -169 |\n", + "| std | 1.01 |\n", + "| value_loss | 35 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 244 |\n", + "| iterations | 1300 |\n", + "| time_elapsed | 26 |\n", + "| total_timesteps | 6500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -8.12e+05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1299 |\n", + "| policy_loss | 796 |\n", + "| std | 1.01 |\n", + "| value_loss | 360 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 246 |\n", + "| iterations | 1400 |\n", + "| time_elapsed | 28 |\n", + "| total_timesteps | 7000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1399 |\n", + "| policy_loss | -31.3 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.783 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 248 |\n", + "| iterations | 1500 |\n", + "| time_elapsed | 30 |\n", + "| total_timesteps | 7500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -3.62e+14 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1499 |\n", + "| policy_loss | -693 |\n", + "| std | 1.01 |\n", + "| value_loss | 542 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5032249.439668636\n", + "total_reward:4032249.439668636\n", + "total_cost: 27369.775673342636\n", + "total_trades: 46757\n", + "Sharpe: 0.9689568826715832\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 249 |\n", + "| iterations | 1600 |\n", + "| time_elapsed | 32 |\n", + "| total_timesteps | 8000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -4.17e+11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1599 |\n", + "| policy_loss | -12.2 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.468 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 250 |\n", + "| iterations | 1700 |\n", + "| time_elapsed | 33 |\n", + "| total_timesteps | 8500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1699 |\n", + "| policy_loss | 87.7 |\n", + "| std | 1.01 |\n", + "| value_loss | 4.56 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 251 |\n", + "| iterations | 1800 |\n", + "| time_elapsed | 35 |\n", + "| total_timesteps | 9000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -4.62e+05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1799 |\n", + "| policy_loss | -255 |\n", + "| std | 1.01 |\n", + "| value_loss | 40.4 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 252 |\n", + "| iterations | 1900 |\n", + "| time_elapsed | 37 |\n", + "| total_timesteps | 9500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1899 |\n", + "| policy_loss | -127 |\n", + "| std | 1.01 |\n", + "| value_loss | 16.6 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 253 |\n", + "| iterations | 2000 |\n", + "| time_elapsed | 39 |\n", + "| total_timesteps | 10000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -1.97e+13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1999 |\n", + "| policy_loss | 406 |\n", + "| std | 1.01 |\n", + "| value_loss | 95.1 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3904628.721074527\n", + "total_reward:2904628.721074527\n", + "total_cost: 32800.81143295443\n", + "total_trades: 45335\n", + "Sharpe: 0.8354269955192407\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 253 |\n", + "| iterations | 2100 |\n", + "| time_elapsed | 41 |\n", + "| total_timesteps | 10500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -10.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2099 |\n", + "| policy_loss | 69.7 |\n", + "| std | 1.01 |\n", + "| value_loss | 2.66 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 253 |\n", + "| iterations | 2200 |\n", + "| time_elapsed | 43 |\n", + "| total_timesteps | 11000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2199 |\n", + "| policy_loss | -42.8 |\n", + "| std | 1.01 |\n", + "| value_loss | 1.92 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 253 |\n", + "| iterations | 2300 |\n", + "| time_elapsed | 45 |\n", + "| total_timesteps | 11500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2299 |\n", + "| policy_loss | 48.1 |\n", + "| std | 1.01 |\n", + "| value_loss | 9.7 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 254 |\n", + "| iterations | 2400 |\n", + "| time_elapsed | 47 |\n", + "| total_timesteps | 12000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -49.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2399 |\n", + "| policy_loss | 204 |\n", + "| std | 1.01 |\n", + "| value_loss | 24.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 255 |\n", + "| iterations | 2500 |\n", + "| time_elapsed | 49 |\n", + "| total_timesteps | 12500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2499 |\n", + "| policy_loss | 56.3 |\n", + "| std | 1.01 |\n", + "| value_loss | 3.8 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3630490.4667401677\n", + "total_reward:2630490.4667401677\n", + "total_cost: 49957.625875016725\n", + "total_trades: 49675\n", + "Sharpe: 0.7870109277440298\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 255 |\n", + "| iterations | 2600 |\n", + "| time_elapsed | 50 |\n", + "| total_timesteps | 13000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -1.27e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2599 |\n", + "| policy_loss | -122 |\n", + "| std | 1.01 |\n", + "| value_loss | 9.1 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 255 |\n", + "| iterations | 2700 |\n", + "| time_elapsed | 52 |\n", + "| total_timesteps | 13500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -3.15e+11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2699 |\n", + "| policy_loss | 16.4 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.422 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 255 |\n", + "| iterations | 2800 |\n", + "| time_elapsed | 54 |\n", + "| total_timesteps | 14000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2799 |\n", + "| policy_loss | 119 |\n", + "| std | 1.01 |\n", + "| value_loss | 9.84 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 256 |\n", + "| iterations | 2900 |\n", + "| time_elapsed | 56 |\n", + "| total_timesteps | 14500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2899 |\n", + "| policy_loss | 230 |\n", + "| std | 1.01 |\n", + "| value_loss | 38.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 257 |\n", + "| iterations | 3000 |\n", + "| time_elapsed | 58 |\n", + "| total_timesteps | 15000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2999 |\n", + "| policy_loss | 54.7 |\n", + "| std | 1.01 |\n", + "| value_loss | 14.1 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4105857.0455575557\n", + "total_reward:3105857.0455575557\n", + "total_cost: 12537.663790287688\n", + "total_trades: 43652\n", + "Sharpe: 0.8861282120753707\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 258 |\n", + "| iterations | 3100 |\n", + "| time_elapsed | 60 |\n", + "| total_timesteps | 15500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -7.93e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3099 |\n", + "| policy_loss | 99.6 |\n", + "| std | 1.01 |\n", + "| value_loss | 6.67 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 258 |\n", + "| iterations | 3200 |\n", + "| time_elapsed | 61 |\n", + "| total_timesteps | 16000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -1.05e+05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3199 |\n", + "| policy_loss | 190 |\n", + "| std | 1.01 |\n", + "| value_loss | 23.1 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 258 |\n", + "| iterations | 3300 |\n", + "| time_elapsed | 63 |\n", + "| total_timesteps | 16500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3299 |\n", + "| policy_loss | 17.2 |\n", + "| std | 1.01 |\n", + "| value_loss | 2.04 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 259 |\n", + "| iterations | 3400 |\n", + "| time_elapsed | 65 |\n", + "| total_timesteps | 17000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -9.4e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3399 |\n", + "| policy_loss | -46.1 |\n", + "| std | 1.01 |\n", + "| value_loss | 1.93 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 259 |\n", + "| iterations | 3500 |\n", + "| time_elapsed | 67 |\n", + "| total_timesteps | 17500 |\n", + "| train/ | |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3499 |\n", + "| policy_loss | -17.4 |\n", + "| std | 1.01 |\n", + "| value_loss | 5.37 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3307214.1514936504\n", + "total_reward:2307214.1514936504\n", + "total_cost: 23884.956163034414\n", + "total_trades: 42682\n", + "Sharpe: 0.7168631999656054\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 259 |\n", + "| iterations | 3600 |\n", + "| time_elapsed | 69 |\n", + "| total_timesteps | 18000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -4.3e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3599 |\n", + "| policy_loss | 226 |\n", + "| std | 1.01 |\n", + "| value_loss | 28.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 260 |\n", + "| iterations | 3700 |\n", + "| time_elapsed | 71 |\n", + "| total_timesteps | 18500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3699 |\n", + "| policy_loss | 59.8 |\n", + "| std | 1.01 |\n", + "| value_loss | 8.43 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 260 |\n", + "| iterations | 3800 |\n", + "| time_elapsed | 72 |\n", + "| total_timesteps | 19000 |\n", + "| train/ | |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | -7.04e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3799 |\n", + "| policy_loss | 50.8 |\n", + "| std | 1.01 |\n", + "| value_loss | 1.82 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 260 |\n", + "| iterations | 3900 |\n", + "| time_elapsed | 74 |\n", + "| total_timesteps | 19500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -4.89e+08 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3899 |\n", + "| policy_loss | -457 |\n", + "| std | 1.01 |\n", + "| value_loss | 140 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 261 |\n", + "| iterations | 4000 |\n", + "| time_elapsed | 76 |\n", + "| total_timesteps | 20000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -2.78e+07 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3999 |\n", + "| policy_loss | -441 |\n", + "| std | 1.01 |\n", + "| value_loss | 143 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4148540.1545087425\n", + "total_reward:3148540.1545087425\n", + "total_cost: 15764.782369253146\n", + "total_trades: 38897\n", + "Sharpe: 0.8610175924981203\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 261 |\n", + "| iterations | 4100 |\n", + "| time_elapsed | 78 |\n", + "| total_timesteps | 20500 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -2.42e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4099 |\n", + "| policy_loss | 76.1 |\n", + "| std | 1.01 |\n", + "| value_loss | 5.77 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 261 |\n", + "| iterations | 4200 |\n", + "| time_elapsed | 80 |\n", + "| total_timesteps | 21000 |\n", + "| train/ | |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4199 |\n", + "| policy_loss | 143 |\n", + "| std | 1.01 |\n", + "| value_loss | 15.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 4300 |\n", + "| time_elapsed | 81 |\n", + "| total_timesteps | 21500 |\n", + "| train/ | |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4299 |\n", + "| policy_loss | 29.3 |\n", + "| std | 1.02 |\n", + "| value_loss | 3.48 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 4400 |\n", + "| time_elapsed | 83 |\n", + "| total_timesteps | 22000 |\n", + "| train/ | |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4399 |\n", + "| policy_loss | -52.3 |\n", + "| std | 1.02 |\n", + "| value_loss | 3.13 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 4500 |\n", + "| time_elapsed | 85 |\n", + "| total_timesteps | 22500 |\n", + "| train/ | |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4499 |\n", + "| policy_loss | -53.7 |\n", + "| std | 1.02 |\n", + "| value_loss | 14.7 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4751485.433416299\n", + "total_reward:3751485.4334162986\n", + "total_cost: 15499.176757445255\n", + "total_trades: 39836\n", + "Sharpe: 0.9930905921879077\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 4600 |\n", + "| time_elapsed | 87 |\n", + "| total_timesteps | 23000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4599 |\n", + "| policy_loss | -62.3 |\n", + "| std | 1.02 |\n", + "| value_loss | 6.41 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 4700 |\n", + "| time_elapsed | 89 |\n", + "| total_timesteps | 23500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4699 |\n", + "| policy_loss | -86.6 |\n", + "| std | 1.02 |\n", + "| value_loss | 5.69 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 4800 |\n", + "| time_elapsed | 91 |\n", + "| total_timesteps | 24000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4799 |\n", + "| policy_loss | -160 |\n", + "| std | 1.02 |\n", + "| value_loss | 18.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 4900 |\n", + "| time_elapsed | 93 |\n", + "| total_timesteps | 24500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4899 |\n", + "| policy_loss | -162 |\n", + "| std | 1.02 |\n", + "| value_loss | 20.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5000 |\n", + "| time_elapsed | 94 |\n", + "| total_timesteps | 25000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4999 |\n", + "| policy_loss | 481 |\n", + "| std | 1.02 |\n", + "| value_loss | 143 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4724903.433106359\n", + "total_reward:3724903.433106359\n", + "total_cost: 8886.69877304687\n", + "total_trades: 38303\n", + "Sharpe: 0.9980131996548207\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5100 |\n", + "| time_elapsed | 96 |\n", + "| total_timesteps | 25500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5099 |\n", + "| policy_loss | -139 |\n", + "| std | 1.02 |\n", + "| value_loss | 12.7 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5200 |\n", + "| time_elapsed | 98 |\n", + "| total_timesteps | 26000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | -6.12e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5199 |\n", + "| policy_loss | 128 |\n", + "| std | 1.02 |\n", + "| value_loss | 8.81 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5300 |\n", + "| time_elapsed | 100 |\n", + "| total_timesteps | 26500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5299 |\n", + "| policy_loss | 5.06 |\n", + "| std | 1.02 |\n", + "| value_loss | 1.05 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5400 |\n", + "| time_elapsed | 102 |\n", + "| total_timesteps | 27000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5399 |\n", + "| policy_loss | 190 |\n", + "| std | 1.02 |\n", + "| value_loss | 24.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5500 |\n", + "| time_elapsed | 104 |\n", + "| total_timesteps | 27500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5499 |\n", + "| policy_loss | 42.8 |\n", + "| std | 1.02 |\n", + "| value_loss | 9 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4783015.926924407\n", + "total_reward:3783015.9269244066\n", + "total_cost: 7815.295760473641\n", + "total_trades: 36995\n", + "Sharpe: 0.9898009778895888\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5600 |\n", + "| time_elapsed | 106 |\n", + "| total_timesteps | 28000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5599 |\n", + "| policy_loss | -1.76 |\n", + "| std | 1.02 |\n", + "| value_loss | 0.422 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 5700 |\n", + "| time_elapsed | 108 |\n", + "| total_timesteps | 28500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5699 |\n", + "| policy_loss | -69.8 |\n", + "| std | 1.02 |\n", + "| value_loss | 2.85 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 262 |\n", + "| iterations | 5800 |\n", + "| time_elapsed | 110 |\n", + "| total_timesteps | 29000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5799 |\n", + "| policy_loss | 165 |\n", + "| std | 1.02 |\n", + "| value_loss | 15.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 5900 |\n", + "| time_elapsed | 112 |\n", + "| total_timesteps | 29500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5899 |\n", + "| policy_loss | 14.9 |\n", + "| std | 1.02 |\n", + "| value_loss | 2.67 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 6000 |\n", + "| time_elapsed | 113 |\n", + "| total_timesteps | 30000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5999 |\n", + "| policy_loss | -145 |\n", + "| std | 1.02 |\n", + "| value_loss | 21.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3767699.432239705\n", + "total_reward:2767699.432239705\n", + "total_cost: 3225.8563617229293\n", + "total_trades: 31503\n", + "Sharpe: 0.8438602812346044\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 6100 |\n", + "| time_elapsed | 115 |\n", + "| total_timesteps | 30500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6099 |\n", + "| policy_loss | 75 |\n", + "| std | 1.02 |\n", + "| value_loss | 3.38 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 263 |\n", + "| iterations | 6200 |\n", + "| time_elapsed | 117 |\n", + "| total_timesteps | 31000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6199 |\n", + "| policy_loss | 65.1 |\n", + "| std | 1.02 |\n", + "| value_loss | 4.46 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 6300 |\n", + "| time_elapsed | 119 |\n", + "| total_timesteps | 31500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6299 |\n", + "| policy_loss | 19.5 |\n", + "| std | 1.02 |\n", + "| value_loss | 4.29 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 6400 |\n", + "| time_elapsed | 121 |\n", + "| total_timesteps | 32000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6399 |\n", + "| policy_loss | 131 |\n", + "| std | 1.02 |\n", + "| value_loss | 15.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 6500 |\n", + "| time_elapsed | 122 |\n", + "| total_timesteps | 32500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6499 |\n", + "| policy_loss | 113 |\n", + "| std | 1.02 |\n", + "| value_loss | 38.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3966658.0536604635\n", + "total_reward:2966658.0536604635\n", + "total_cost: 7977.4614967514335\n", + "total_trades: 34678\n", + "Sharpe: 0.8831165688078209\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 6600 |\n", + "| time_elapsed | 124 |\n", + "| total_timesteps | 33000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6599 |\n", + "| policy_loss | 5.64 |\n", + "| std | 1.02 |\n", + "| value_loss | 0.305 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 6700 |\n", + "| time_elapsed | 126 |\n", + "| total_timesteps | 33500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6699 |\n", + "| policy_loss | 5.23 |\n", + "| std | 1.02 |\n", + "| value_loss | 0.54 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 6800 |\n", + "| time_elapsed | 128 |\n", + "| total_timesteps | 34000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6799 |\n", + "| policy_loss | 85.1 |\n", + "| std | 1.02 |\n", + "| value_loss | 6.29 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 6900 |\n", + "| time_elapsed | 130 |\n", + "| total_timesteps | 34500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6899 |\n", + "| policy_loss | -97.3 |\n", + "| std | 1.02 |\n", + "| value_loss | 9.65 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7000 |\n", + "| time_elapsed | 131 |\n", + "| total_timesteps | 35000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6999 |\n", + "| policy_loss | -585 |\n", + "| std | 1.02 |\n", + "| value_loss | 198 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3446294.959740542\n", + "total_reward:2446294.959740542\n", + "total_cost: 3397.7268977155813\n", + "total_trades: 31617\n", + "Sharpe: 0.7885649055566806\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7100 |\n", + "| time_elapsed | 133 |\n", + "| total_timesteps | 35500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7099 |\n", + "| policy_loss | -23.1 |\n", + "| std | 1.02 |\n", + "| value_loss | 2.04 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 7200 |\n", + "| time_elapsed | 135 |\n", + "| total_timesteps | 36000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | -1.25e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7199 |\n", + "| policy_loss | 25.2 |\n", + "| std | 1.02 |\n", + "| value_loss | 1.22 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 264 |\n", + "| iterations | 7300 |\n", + "| time_elapsed | 137 |\n", + "| total_timesteps | 36500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7299 |\n", + "| policy_loss | -86.7 |\n", + "| std | 1.02 |\n", + "| value_loss | 6.06 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7400 |\n", + "| time_elapsed | 139 |\n", + "| total_timesteps | 37000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7399 |\n", + "| policy_loss | -371 |\n", + "| std | 1.02 |\n", + "| value_loss | 82.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7500 |\n", + "| time_elapsed | 141 |\n", + "| total_timesteps | 37500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7499 |\n", + "| policy_loss | -34.4 |\n", + "| std | 1.02 |\n", + "| value_loss | 2.71 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3344736.938978183\n", + "total_reward:2344736.938978183\n", + "total_cost: 2206.6413143639265\n", + "total_trades: 31325\n", + "Sharpe: 0.7692258924747282\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7600 |\n", + "| time_elapsed | 143 |\n", + "| total_timesteps | 38000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7599 |\n", + "| policy_loss | 49.6 |\n", + "| std | 1.03 |\n", + "| value_loss | 1.61 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7700 |\n", + "| time_elapsed | 144 |\n", + "| total_timesteps | 38500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7699 |\n", + "| policy_loss | -50.2 |\n", + "| std | 1.03 |\n", + "| value_loss | 2.28 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7800 |\n", + "| time_elapsed | 146 |\n", + "| total_timesteps | 39000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7799 |\n", + "| policy_loss | 92.3 |\n", + "| std | 1.03 |\n", + "| value_loss | 5.65 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 7900 |\n", + "| time_elapsed | 148 |\n", + "| total_timesteps | 39500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7899 |\n", + "| policy_loss | -82.3 |\n", + "| std | 1.03 |\n", + "| value_loss | 20.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 265 |\n", + "| iterations | 8000 |\n", + "| time_elapsed | 150 |\n", + "| total_timesteps | 40000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7999 |\n", + "| policy_loss | 144 |\n", + "| std | 1.03 |\n", + "| value_loss | 15.5 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3405743.1783298114\n", + "total_reward:2405743.1783298114\n", + "total_cost: 2954.0446352297254\n", + "total_trades: 33773\n", + "Sharpe: 0.8134505006039155\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8100 |\n", + "| time_elapsed | 152 |\n", + "| total_timesteps | 40500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.4 |\n", + "| explained_variance | -3.13e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8099 |\n", + "| policy_loss | 70.7 |\n", + "| std | 1.03 |\n", + "| value_loss | 5.87 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8200 |\n", + "| time_elapsed | 154 |\n", + "| total_timesteps | 41000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.4 |\n", + "| explained_variance | -4.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8199 |\n", + "| policy_loss | 171 |\n", + "| std | 1.03 |\n", + "| value_loss | 17 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8300 |\n", + "| time_elapsed | 155 |\n", + "| total_timesteps | 41500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8299 |\n", + "| policy_loss | -160 |\n", + "| std | 1.03 |\n", + "| value_loss | 23.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8400 |\n", + "| time_elapsed | 157 |\n", + "| total_timesteps | 42000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8399 |\n", + "| policy_loss | -85.1 |\n", + "| std | 1.03 |\n", + "| value_loss | 3.98 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8500 |\n", + "| time_elapsed | 159 |\n", + "| total_timesteps | 42500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8499 |\n", + "| policy_loss | 63.9 |\n", + "| std | 1.03 |\n", + "| value_loss | 5.08 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3319582.998510127\n", + "total_reward:2319582.998510127\n", + "total_cost: 12366.33568307691\n", + "total_trades: 37206\n", + "Sharpe: 0.7728922919437156\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8600 |\n", + "| time_elapsed | 161 |\n", + "| total_timesteps | 43000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8599 |\n", + "| policy_loss | -62.1 |\n", + "| std | 1.04 |\n", + "| value_loss | 2.26 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8700 |\n", + "| time_elapsed | 163 |\n", + "| total_timesteps | 43500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.6 |\n", + "| explained_variance | -2.19e+13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8699 |\n", + "| policy_loss | -27.8 |\n", + "| std | 1.04 |\n", + "| value_loss | 5.62 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8800 |\n", + "| time_elapsed | 164 |\n", + "| total_timesteps | 44000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8799 |\n", + "| policy_loss | 59.2 |\n", + "| std | 1.04 |\n", + "| value_loss | 2.79 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 266 |\n", + "| iterations | 8900 |\n", + "| time_elapsed | 166 |\n", + "| total_timesteps | 44500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8899 |\n", + "| policy_loss | 40.6 |\n", + "| std | 1.04 |\n", + "| value_loss | 1.43 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9000 |\n", + "| time_elapsed | 168 |\n", + "| total_timesteps | 45000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8999 |\n", + "| policy_loss | -86.3 |\n", + "| std | 1.04 |\n", + "| value_loss | 6.33 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:2904244.1476431573\n", + "total_reward:1904244.1476431573\n", + "total_cost: 15007.745762967967\n", + "total_trades: 37861\n", + "Sharpe: 0.7277540513736201\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9100 |\n", + "| time_elapsed | 170 |\n", + "| total_timesteps | 45500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.7 |\n", + "| explained_variance | -37.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9099 |\n", + "| policy_loss | -252 |\n", + "| std | 1.04 |\n", + "| value_loss | 35.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9200 |\n", + "| time_elapsed | 172 |\n", + "| total_timesteps | 46000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9199 |\n", + "| policy_loss | 129 |\n", + "| std | 1.04 |\n", + "| value_loss | 10.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9300 |\n", + "| time_elapsed | 173 |\n", + "| total_timesteps | 46500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9299 |\n", + "| policy_loss | 57.2 |\n", + "| std | 1.04 |\n", + "| value_loss | 3.01 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9400 |\n", + "| time_elapsed | 175 |\n", + "| total_timesteps | 47000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9399 |\n", + "| policy_loss | -63.5 |\n", + "| std | 1.04 |\n", + "| value_loss | 2.74 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9500 |\n", + "| time_elapsed | 177 |\n", + "| total_timesteps | 47500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9499 |\n", + "| policy_loss | 17.2 |\n", + "| std | 1.04 |\n", + "| value_loss | 3.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3181599.2553931386\n", + "total_reward:2181599.2553931386\n", + "total_cost: 6695.658203102723\n", + "total_trades: 37040\n", + "Sharpe: 0.7662862328769516\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9600 |\n", + "| time_elapsed | 179 |\n", + "| total_timesteps | 48000 |\n", + "| train/ | |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9599 |\n", + "| policy_loss | 87 |\n", + "| std | 1.04 |\n", + "| value_loss | 5.95 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9700 |\n", + "| time_elapsed | 181 |\n", + "| total_timesteps | 48500 |\n", + "| train/ | |\n", + "| entropy_loss | -43.9 |\n", + "| explained_variance | -4.02e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9699 |\n", + "| policy_loss | 65 |\n", + "| std | 1.05 |\n", + "| value_loss | 5.72 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9800 |\n", + "| time_elapsed | 183 |\n", + "| total_timesteps | 49000 |\n", + "| train/ | |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -4.34e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9799 |\n", + "| policy_loss | -82.4 |\n", + "| std | 1.05 |\n", + "| value_loss | 7.55 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 9900 |\n", + "| time_elapsed | 184 |\n", + "| total_timesteps | 49500 |\n", + "| train/ | |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9899 |\n", + "| policy_loss | -233 |\n", + "| std | 1.05 |\n", + "| value_loss | 34.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 10000 |\n", + "| time_elapsed | 186 |\n", + "| total_timesteps | 50000 |\n", + "| train/ | |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -212 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9999 |\n", + "| policy_loss | 125 |\n", + "| std | 1.05 |\n", + "| value_loss | 15.3 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3163155.7293832605\n", + "total_reward:2163155.7293832605\n", + "total_cost: 2870.1664502791505\n", + "total_trades: 34933\n", + "Sharpe: 0.7643903649884202\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 10100 |\n", + "| time_elapsed | 188 |\n", + "| total_timesteps | 50500 |\n", + "| train/ | |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -6.08e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10099 |\n", + "| policy_loss | 128 |\n", + "| std | 1.05 |\n", + "| value_loss | 12.8 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 267 |\n", + "| iterations | 10200 |\n", + "| time_elapsed | 190 |\n", + "| total_timesteps | 51000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10199 |\n", + "| policy_loss | -39.2 |\n", + "| std | 1.05 |\n", + "| value_loss | 10.6 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10300 |\n", + "| time_elapsed | 192 |\n", + "| total_timesteps | 51500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10299 |\n", + "| policy_loss | 74.1 |\n", + "| std | 1.06 |\n", + "| value_loss | 2.81 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10400 |\n", + "| time_elapsed | 193 |\n", + "| total_timesteps | 52000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | -1.17e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10399 |\n", + "| policy_loss | 241 |\n", + "| std | 1.05 |\n", + "| value_loss | 53.4 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10500 |\n", + "| time_elapsed | 195 |\n", + "| total_timesteps | 52500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10499 |\n", + "| policy_loss | -66.3 |\n", + "| std | 1.06 |\n", + "| value_loss | 6.42 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3196491.408967822\n", + "total_reward:2196491.408967822\n", + "total_cost: 4270.783389629947\n", + "total_trades: 41108\n", + "Sharpe: 0.7902910911867141\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10600 |\n", + "| time_elapsed | 197 |\n", + "| total_timesteps | 53000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10599 |\n", + "| policy_loss | 22.2 |\n", + "| std | 1.06 |\n", + "| value_loss | 2.71 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10700 |\n", + "| time_elapsed | 199 |\n", + "| total_timesteps | 53500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | -3.64e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10699 |\n", + "| policy_loss | 246 |\n", + "| std | 1.06 |\n", + "| value_loss | 43.3 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10800 |\n", + "| time_elapsed | 201 |\n", + "| total_timesteps | 54000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10799 |\n", + "| policy_loss | -146 |\n", + "| std | 1.06 |\n", + "| value_loss | 12 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 10900 |\n", + "| time_elapsed | 203 |\n", + "| total_timesteps | 54500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10899 |\n", + "| policy_loss | -263 |\n", + "| std | 1.06 |\n", + "| value_loss | 37.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11000 |\n", + "| time_elapsed | 205 |\n", + "| total_timesteps | 55000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10999 |\n", + "| policy_loss | 114 |\n", + "| std | 1.06 |\n", + "| value_loss | 12.2 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3849179.1372045293\n", + "total_reward:2849179.1372045293\n", + "total_cost: 14247.086195249696\n", + "total_trades: 45210\n", + "Sharpe: 0.9919759691333234\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11100 |\n", + "| time_elapsed | 207 |\n", + "| total_timesteps | 55500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11099 |\n", + "| policy_loss | -54.8 |\n", + "| std | 1.06 |\n", + "| value_loss | 3.89 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11200 |\n", + "| time_elapsed | 208 |\n", + "| total_timesteps | 56000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11199 |\n", + "| policy_loss | 105 |\n", + "| std | 1.06 |\n", + "| value_loss | 7.82 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11300 |\n", + "| time_elapsed | 210 |\n", + "| total_timesteps | 56500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11299 |\n", + "| policy_loss | 51.1 |\n", + "| std | 1.06 |\n", + "| value_loss | 2.34 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11400 |\n", + "| time_elapsed | 212 |\n", + "| total_timesteps | 57000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | -7.43e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11399 |\n", + "| policy_loss | 126 |\n", + "| std | 1.06 |\n", + "| value_loss | 15.9 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11500 |\n", + "| time_elapsed | 214 |\n", + "| total_timesteps | 57500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | -11.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11499 |\n", + "| policy_loss | -122 |\n", + "| std | 1.06 |\n", + "| value_loss | 9.54 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3576028.4597782856\n", + "total_reward:2576028.4597782856\n", + "total_cost: 9016.778400975834\n", + "total_trades: 42915\n", + "Sharpe: 0.8953228502423565\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11600 |\n", + "| time_elapsed | 216 |\n", + "| total_timesteps | 58000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.3 |\n", + "| explained_variance | -425 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11599 |\n", + "| policy_loss | -120 |\n", + "| std | 1.06 |\n", + "| value_loss | 10.6 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11700 |\n", + "| time_elapsed | 218 |\n", + "| total_timesteps | 58500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11699 |\n", + "| policy_loss | 46.7 |\n", + "| std | 1.06 |\n", + "| value_loss | 3.25 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11800 |\n", + "| time_elapsed | 219 |\n", + "| total_timesteps | 59000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11799 |\n", + "| policy_loss | -16.5 |\n", + "| std | 1.06 |\n", + "| value_loss | 7.51 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 11900 |\n", + "| time_elapsed | 221 |\n", + "| total_timesteps | 59500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | -62.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11899 |\n", + "| policy_loss | 115 |\n", + "| std | 1.07 |\n", + "| value_loss | 9.24 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12000 |\n", + "| time_elapsed | 223 |\n", + "| total_timesteps | 60000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11999 |\n", + "| policy_loss | 0.0658 |\n", + "| std | 1.06 |\n", + "| value_loss | 4.37 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3436426.812452521\n", + "total_reward:2436426.812452521\n", + "total_cost: 6259.129675209552\n", + "total_trades: 41073\n", + "Sharpe: 0.8546131042738302\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12100 |\n", + "| time_elapsed | 225 |\n", + "| total_timesteps | 60500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12099 |\n", + "| policy_loss | -14.7 |\n", + "| std | 1.07 |\n", + "| value_loss | 0.461 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12200 |\n", + "| time_elapsed | 227 |\n", + "| total_timesteps | 61000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.5 |\n", + "| explained_variance | -32.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12199 |\n", + "| policy_loss | -114 |\n", + "| std | 1.07 |\n", + "| value_loss | 14 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12300 |\n", + "| time_elapsed | 229 |\n", + "| total_timesteps | 61500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12299 |\n", + "| policy_loss | -42.1 |\n", + "| std | 1.07 |\n", + "| value_loss | 4.82 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12400 |\n", + "| time_elapsed | 231 |\n", + "| total_timesteps | 62000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12399 |\n", + "| policy_loss | -34.7 |\n", + "| std | 1.07 |\n", + "| value_loss | 1.68 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12500 |\n", + "| time_elapsed | 232 |\n", + "| total_timesteps | 62500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12499 |\n", + "| policy_loss | 76.1 |\n", + "| std | 1.07 |\n", + "| value_loss | 8.46 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3018532.345473118\n", + "total_reward:2018532.3454731181\n", + "total_cost: 6047.126481140976\n", + "total_trades: 42707\n", + "Sharpe: 0.7384948297244762\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12600 |\n", + "| time_elapsed | 234 |\n", + "| total_timesteps | 63000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -553 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12599 |\n", + "| policy_loss | -18.4 |\n", + "| std | 1.07 |\n", + "| value_loss | 4.33 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12700 |\n", + "| time_elapsed | 236 |\n", + "| total_timesteps | 63500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12699 |\n", + "| policy_loss | -156 |\n", + "| std | 1.07 |\n", + "| value_loss | 16.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12800 |\n", + "| time_elapsed | 238 |\n", + "| total_timesteps | 64000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12799 |\n", + "| policy_loss | 86 |\n", + "| std | 1.07 |\n", + "| value_loss | 4.19 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 12900 |\n", + "| time_elapsed | 240 |\n", + "| total_timesteps | 64500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12899 |\n", + "| policy_loss | -77.7 |\n", + "| std | 1.07 |\n", + "| value_loss | 5.54 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13000 |\n", + "| time_elapsed | 241 |\n", + "| total_timesteps | 65000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12999 |\n", + "| policy_loss | -48.1 |\n", + "| std | 1.07 |\n", + "| value_loss | 3.39 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3005454.017886528\n", + "total_reward:2005454.0178865278\n", + "total_cost: 5775.348413782655\n", + "total_trades: 37868\n", + "Sharpe: 0.6834871369231124\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13100 |\n", + "| time_elapsed | 243 |\n", + "| total_timesteps | 65500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13099 |\n", + "| policy_loss | -41.1 |\n", + "| std | 1.07 |\n", + "| value_loss | 0.966 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13200 |\n", + "| time_elapsed | 245 |\n", + "| total_timesteps | 66000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -20.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13199 |\n", + "| policy_loss | -51.7 |\n", + "| std | 1.07 |\n", + "| value_loss | 5.59 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13300 |\n", + "| time_elapsed | 247 |\n", + "| total_timesteps | 66500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -6.77e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13299 |\n", + "| policy_loss | -257 |\n", + "| std | 1.07 |\n", + "| value_loss | 43.6 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13400 |\n", + "| time_elapsed | 249 |\n", + "| total_timesteps | 67000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13399 |\n", + "| policy_loss | 101 |\n", + "| std | 1.07 |\n", + "| value_loss | 5.95 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13500 |\n", + "| time_elapsed | 251 |\n", + "| total_timesteps | 67500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -103 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13499 |\n", + "| policy_loss | -60.1 |\n", + "| std | 1.07 |\n", + "| value_loss | 2.95 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:2861607.410381282\n", + "total_reward:1861607.4103812822\n", + "total_cost: 5185.6480773171215\n", + "total_trades: 32918\n", + "Sharpe: 0.627333223770252\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13600 |\n", + "| time_elapsed | 252 |\n", + "| total_timesteps | 68000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -15 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13599 |\n", + "| policy_loss | 291 |\n", + "| std | 1.07 |\n", + "| value_loss | 51.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13700 |\n", + "| time_elapsed | 254 |\n", + "| total_timesteps | 68500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13699 |\n", + "| policy_loss | 13.2 |\n", + "| std | 1.07 |\n", + "| value_loss | 0.659 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13800 |\n", + "| time_elapsed | 256 |\n", + "| total_timesteps | 69000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -1.15e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13799 |\n", + "| policy_loss | 11.6 |\n", + "| std | 1.07 |\n", + "| value_loss | 1.12 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 13900 |\n", + "| time_elapsed | 258 |\n", + "| total_timesteps | 69500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -8.56e+08 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13899 |\n", + "| policy_loss | 150 |\n", + "| std | 1.07 |\n", + "| value_loss | 13.7 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 14000 |\n", + "| time_elapsed | 260 |\n", + "| total_timesteps | 70000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13999 |\n", + "| policy_loss | -43.5 |\n", + "| std | 1.07 |\n", + "| value_loss | 1.76 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3191285.6374592897\n", + "total_reward:2191285.6374592897\n", + "total_cost: 4185.107238528008\n", + "total_trades: 33416\n", + "Sharpe: 0.715991374478748\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14100 |\n", + "| time_elapsed | 262 |\n", + "| total_timesteps | 70500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -31.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14099 |\n", + "| policy_loss | 1.39e+03 |\n", + "| std | 1.07 |\n", + "| value_loss | 944 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14200 |\n", + "| time_elapsed | 263 |\n", + "| total_timesteps | 71000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -2.69e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14199 |\n", + "| policy_loss | -96.5 |\n", + "| std | 1.07 |\n", + "| value_loss | 6.91 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14300 |\n", + "| time_elapsed | 265 |\n", + "| total_timesteps | 71500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -3.11e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14299 |\n", + "| policy_loss | 94.2 |\n", + "| std | 1.07 |\n", + "| value_loss | 7.25 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14400 |\n", + "| time_elapsed | 267 |\n", + "| total_timesteps | 72000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14399 |\n", + "| policy_loss | 21 |\n", + "| std | 1.08 |\n", + "| value_loss | 1.09 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14500 |\n", + "| time_elapsed | 269 |\n", + "| total_timesteps | 72500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.7 |\n", + "| explained_variance | -1.56e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14499 |\n", + "| policy_loss | 114 |\n", + "| std | 1.08 |\n", + "| value_loss | 6.86 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3276649.777189667\n", + "total_reward:2276649.777189667\n", + "total_cost: 3942.9014864051105\n", + "total_trades: 34694\n", + "Sharpe: 0.7189915467634915\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14600 |\n", + "| time_elapsed | 271 |\n", + "| total_timesteps | 73000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14599 |\n", + "| policy_loss | -80.3 |\n", + "| std | 1.08 |\n", + "| value_loss | 4.13 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14700 |\n", + "| time_elapsed | 272 |\n", + "| total_timesteps | 73500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | -42.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14699 |\n", + "| policy_loss | 5.46 |\n", + "| std | 1.08 |\n", + "| value_loss | 1.23 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14800 |\n", + "| time_elapsed | 274 |\n", + "| total_timesteps | 74000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14799 |\n", + "| policy_loss | -41.4 |\n", + "| std | 1.08 |\n", + "| value_loss | 1.92 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 14900 |\n", + "| time_elapsed | 276 |\n", + "| total_timesteps | 74500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14899 |\n", + "| policy_loss | 69.1 |\n", + "| std | 1.08 |\n", + "| value_loss | 9.59 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15000 |\n", + "| time_elapsed | 278 |\n", + "| total_timesteps | 75000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14999 |\n", + "| policy_loss | -10.7 |\n", + "| std | 1.08 |\n", + "| value_loss | 0.911 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3508348.255896097\n", + "total_reward:2508348.255896097\n", + "total_cost: 11208.941549323808\n", + "total_trades: 37043\n", + "Sharpe: 0.8124699557413589\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15100 |\n", + "| time_elapsed | 280 |\n", + "| total_timesteps | 75500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15099 |\n", + "| policy_loss | 2.28 |\n", + "| std | 1.08 |\n", + "| value_loss | 0.074 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15200 |\n", + "| time_elapsed | 281 |\n", + "| total_timesteps | 76000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | -1.87 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15199 |\n", + "| policy_loss | -80 |\n", + "| std | 1.08 |\n", + "| value_loss | 3.56 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15300 |\n", + "| time_elapsed | 283 |\n", + "| total_timesteps | 76500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15299 |\n", + "| policy_loss | 8.44 |\n", + "| std | 1.08 |\n", + "| value_loss | 1.24 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15400 |\n", + "| time_elapsed | 285 |\n", + "| total_timesteps | 77000 |\n", + "| train/ | |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15399 |\n", + "| policy_loss | 276 |\n", + "| std | 1.08 |\n", + "| value_loss | 57.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15500 |\n", + "| time_elapsed | 287 |\n", + "| total_timesteps | 77500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15499 |\n", + "| policy_loss | 160 |\n", + "| std | 1.08 |\n", + "| value_loss | 16.4 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4416862.49751315\n", + "total_reward:3416862.49751315\n", + "total_cost: 18937.26260040585\n", + "total_trades: 37061\n", + "Sharpe: 0.9703548552780149\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15600 |\n", + "| time_elapsed | 289 |\n", + "| total_timesteps | 78000 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15599 |\n", + "| policy_loss | 577 |\n", + "| std | 1.09 |\n", + "| value_loss | 273 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15700 |\n", + "| time_elapsed | 290 |\n", + "| total_timesteps | 78500 |\n", + "| train/ | |\n", + "| entropy_loss | -44.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15699 |\n", + "| policy_loss | -72.5 |\n", + "| std | 1.09 |\n", + "| value_loss | 3.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15800 |\n", + "| time_elapsed | 292 |\n", + "| total_timesteps | 79000 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | -271 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15799 |\n", + "| policy_loss | -63.8 |\n", + "| std | 1.09 |\n", + "| value_loss | 3.84 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 15900 |\n", + "| time_elapsed | 294 |\n", + "| total_timesteps | 79500 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15899 |\n", + "| policy_loss | -514 |\n", + "| std | 1.09 |\n", + "| value_loss | 170 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16000 |\n", + "| time_elapsed | 296 |\n", + "| total_timesteps | 80000 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15999 |\n", + "| policy_loss | 293 |\n", + "| std | 1.09 |\n", + "| value_loss | 53.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16100 |\n", + "| time_elapsed | 298 |\n", + "| total_timesteps | 80500 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16099 |\n", + "| policy_loss | -312 |\n", + "| std | 1.09 |\n", + "| value_loss | 109 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:6572073.540279714\n", + "total_reward:5572073.540279714\n", + "total_cost: 25558.900906312338\n", + "total_trades: 38195\n", + "Sharpe: 1.1694339512811986\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16200 |\n", + "| time_elapsed | 299 |\n", + "| total_timesteps | 81000 |\n", + "| train/ | |\n", + "| entropy_loss | -45 |\n", + "| explained_variance | -509 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16199 |\n", + "| policy_loss | 257 |\n", + "| std | 1.09 |\n", + "| value_loss | 32.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16300 |\n", + "| time_elapsed | 301 |\n", + "| total_timesteps | 81500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16299 |\n", + "| policy_loss | 117 |\n", + "| std | 1.09 |\n", + "| value_loss | 9.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16400 |\n", + "| time_elapsed | 303 |\n", + "| total_timesteps | 82000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16399 |\n", + "| policy_loss | 262 |\n", + "| std | 1.09 |\n", + "| value_loss | 35.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16500 |\n", + "| time_elapsed | 305 |\n", + "| total_timesteps | 82500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16499 |\n", + "| policy_loss | -45 |\n", + "| std | 1.09 |\n", + "| value_loss | 2.27 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16600 |\n", + "| time_elapsed | 307 |\n", + "| total_timesteps | 83000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16599 |\n", + "| policy_loss | -561 |\n", + "| std | 1.09 |\n", + "| value_loss | 236 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5698994.463846846\n", + "total_reward:4698994.463846846\n", + "total_cost: 17337.4506195575\n", + "total_trades: 36912\n", + "Sharpe: 1.0295608824494007\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16700 |\n", + "| time_elapsed | 308 |\n", + "| total_timesteps | 83500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16699 |\n", + "| policy_loss | -54.8 |\n", + "| std | 1.09 |\n", + "| value_loss | 2.36 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16800 |\n", + "| time_elapsed | 310 |\n", + "| total_timesteps | 84000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16799 |\n", + "| policy_loss | 56.3 |\n", + "| std | 1.09 |\n", + "| value_loss | 4.36 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 16900 |\n", + "| time_elapsed | 312 |\n", + "| total_timesteps | 84500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.2 |\n", + "| explained_variance | -7.42e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16899 |\n", + "| policy_loss | 20.5 |\n", + "| std | 1.1 |\n", + "| value_loss | 6.59 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17000 |\n", + "| time_elapsed | 314 |\n", + "| total_timesteps | 85000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16999 |\n", + "| policy_loss | 306 |\n", + "| std | 1.1 |\n", + "| value_loss | 66.7 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17100 |\n", + "| time_elapsed | 316 |\n", + "| total_timesteps | 85500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17099 |\n", + "| policy_loss | -195 |\n", + "| std | 1.1 |\n", + "| value_loss | 66.1 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:6381904.8543528775\n", + "total_reward:5381904.8543528775\n", + "total_cost: 12508.200039626663\n", + "total_trades: 35689\n", + "Sharpe: 1.1424293800622989\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17200 |\n", + "| time_elapsed | 317 |\n", + "| total_timesteps | 86000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17199 |\n", + "| policy_loss | 30 |\n", + "| std | 1.1 |\n", + "| value_loss | 0.588 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17300 |\n", + "| time_elapsed | 319 |\n", + "| total_timesteps | 86500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17299 |\n", + "| policy_loss | -206 |\n", + "| std | 1.1 |\n", + "| value_loss | 21.8 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17400 |\n", + "| time_elapsed | 321 |\n", + "| total_timesteps | 87000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.3 |\n", + "| explained_variance | -5.48e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17399 |\n", + "| policy_loss | 215 |\n", + "| std | 1.1 |\n", + "| value_loss | 25.9 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17500 |\n", + "| time_elapsed | 323 |\n", + "| total_timesteps | 87500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17499 |\n", + "| policy_loss | -28.9 |\n", + "| std | 1.1 |\n", + "| value_loss | 4.87 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17600 |\n", + "| time_elapsed | 325 |\n", + "| total_timesteps | 88000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17599 |\n", + "| policy_loss | -75.1 |\n", + "| std | 1.1 |\n", + "| value_loss | 6.57 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5436034.522246395\n", + "total_reward:4436034.522246395\n", + "total_cost: 15350.251113259093\n", + "total_trades: 38300\n", + "Sharpe: 1.111300596501636\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17700 |\n", + "| time_elapsed | 327 |\n", + "| total_timesteps | 88500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17699 |\n", + "| policy_loss | 131 |\n", + "| std | 1.1 |\n", + "| value_loss | 8.69 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17800 |\n", + "| time_elapsed | 329 |\n", + "| total_timesteps | 89000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17799 |\n", + "| policy_loss | 37.7 |\n", + "| std | 1.1 |\n", + "| value_loss | 1.64 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 17900 |\n", + "| time_elapsed | 330 |\n", + "| total_timesteps | 89500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17899 |\n", + "| policy_loss | 14.6 |\n", + "| std | 1.1 |\n", + "| value_loss | 2.22 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18000 |\n", + "| time_elapsed | 332 |\n", + "| total_timesteps | 90000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17999 |\n", + "| policy_loss | -304 |\n", + "| std | 1.1 |\n", + "| value_loss | 49.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18100 |\n", + "| time_elapsed | 334 |\n", + "| total_timesteps | 90500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18099 |\n", + "| policy_loss | -370 |\n", + "| std | 1.1 |\n", + "| value_loss | 72.5 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5112916.556362064\n", + "total_reward:4112916.5563620636\n", + "total_cost: 15612.707192791122\n", + "total_trades: 37413\n", + "Sharpe: 1.0611073756631733\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18200 |\n", + "| time_elapsed | 336 |\n", + "| total_timesteps | 91000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.5 |\n", + "| explained_variance | -6.66e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18199 |\n", + "| policy_loss | 74.9 |\n", + "| std | 1.11 |\n", + "| value_loss | 3.92 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18300 |\n", + "| time_elapsed | 338 |\n", + "| total_timesteps | 91500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18299 |\n", + "| policy_loss | -133 |\n", + "| std | 1.11 |\n", + "| value_loss | 13.7 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18400 |\n", + "| time_elapsed | 339 |\n", + "| total_timesteps | 92000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18399 |\n", + "| policy_loss | 73 |\n", + "| std | 1.11 |\n", + "| value_loss | 3.98 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18500 |\n", + "| time_elapsed | 341 |\n", + "| total_timesteps | 92500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18499 |\n", + "| policy_loss | 4.46 |\n", + "| std | 1.11 |\n", + "| value_loss | 0.844 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18600 |\n", + "| time_elapsed | 343 |\n", + "| total_timesteps | 93000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18599 |\n", + "| policy_loss | -214 |\n", + "| std | 1.11 |\n", + "| value_loss | 26.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4986097.277640037\n", + "total_reward:3986097.2776400372\n", + "total_cost: 13702.647875393004\n", + "total_trades: 35305\n", + "Sharpe: 1.0387271032164815\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18700 |\n", + "| time_elapsed | 345 |\n", + "| total_timesteps | 93500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.5 |\n", + "| explained_variance | -26.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18699 |\n", + "| policy_loss | -40.8 |\n", + "| std | 1.11 |\n", + "| value_loss | 0.888 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18800 |\n", + "| time_elapsed | 347 |\n", + "| total_timesteps | 94000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18799 |\n", + "| policy_loss | -114 |\n", + "| std | 1.11 |\n", + "| value_loss | 9.15 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 18900 |\n", + "| time_elapsed | 348 |\n", + "| total_timesteps | 94500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18899 |\n", + "| policy_loss | -360 |\n", + "| std | 1.11 |\n", + "| value_loss | 58.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19000 |\n", + "| time_elapsed | 350 |\n", + "| total_timesteps | 95000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18999 |\n", + "| policy_loss | 94.4 |\n", + "| std | 1.11 |\n", + "| value_loss | 8.57 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19100 |\n", + "| time_elapsed | 352 |\n", + "| total_timesteps | 95500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19099 |\n", + "| policy_loss | -4.65 |\n", + "| std | 1.11 |\n", + "| value_loss | 2.46 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:5478501.103530731\n", + "total_reward:4478501.103530731\n", + "total_cost: 10256.280938558313\n", + "total_trades: 37074\n", + "Sharpe: 1.1342798023300105\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19200 |\n", + "| time_elapsed | 354 |\n", + "| total_timesteps | 96000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.6 |\n", + "| explained_variance | -2.2e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19199 |\n", + "| policy_loss | -52.2 |\n", + "| std | 1.11 |\n", + "| value_loss | 3.13 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19300 |\n", + "| time_elapsed | 356 |\n", + "| total_timesteps | 96500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19299 |\n", + "| policy_loss | -221 |\n", + "| std | 1.11 |\n", + "| value_loss | 29.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19400 |\n", + "| time_elapsed | 358 |\n", + "| total_timesteps | 97000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19399 |\n", + "| policy_loss | 2.54 |\n", + "| std | 1.11 |\n", + "| value_loss | 0.552 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19500 |\n", + "| time_elapsed | 360 |\n", + "| total_timesteps | 97500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19499 |\n", + "| policy_loss | 324 |\n", + "| std | 1.12 |\n", + "| value_loss | 73.7 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19600 |\n", + "| time_elapsed | 361 |\n", + "| total_timesteps | 98000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.7 |\n", + "| explained_variance | -2.23e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19599 |\n", + "| policy_loss | -546 |\n", + "| std | 1.11 |\n", + "| value_loss | 139 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4206773.89180218\n", + "total_reward:3206773.8918021796\n", + "total_cost: 5223.3386326608415\n", + "total_trades: 36723\n", + "Sharpe: 0.9776063927933439\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19700 |\n", + "| time_elapsed | 363 |\n", + "| total_timesteps | 98500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19699 |\n", + "| policy_loss | -155 |\n", + "| std | 1.12 |\n", + "| value_loss | 12.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19800 |\n", + "| time_elapsed | 365 |\n", + "| total_timesteps | 99000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19799 |\n", + "| policy_loss | 73.5 |\n", + "| std | 1.12 |\n", + "| value_loss | 4.66 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 19900 |\n", + "| time_elapsed | 367 |\n", + "| total_timesteps | 99500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19899 |\n", + "| policy_loss | -24.7 |\n", + "| std | 1.12 |\n", + "| value_loss | 2.18 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20000 |\n", + "| time_elapsed | 369 |\n", + "| total_timesteps | 100000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19999 |\n", + "| policy_loss | 42 |\n", + "| std | 1.12 |\n", + "| value_loss | 1.86 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20100 |\n", + "| time_elapsed | 371 |\n", + "| total_timesteps | 100500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20099 |\n", + "| policy_loss | 279 |\n", + "| std | 1.12 |\n", + "| value_loss | 51.4 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4319570.605313044\n", + "total_reward:3319570.605313044\n", + "total_cost: 6777.852646750923\n", + "total_trades: 38079\n", + "Sharpe: 0.9793624584136245\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20200 |\n", + "| time_elapsed | 373 |\n", + "| total_timesteps | 101000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20199 |\n", + "| policy_loss | 94 |\n", + "| std | 1.13 |\n", + "| value_loss | 6.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20300 |\n", + "| time_elapsed | 375 |\n", + "| total_timesteps | 101500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20299 |\n", + "| policy_loss | -23.3 |\n", + "| std | 1.13 |\n", + "| value_loss | 1.69 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20400 |\n", + "| time_elapsed | 376 |\n", + "| total_timesteps | 102000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20399 |\n", + "| policy_loss | 33.9 |\n", + "| std | 1.13 |\n", + "| value_loss | 2.74 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20500 |\n", + "| time_elapsed | 378 |\n", + "| total_timesteps | 102500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20499 |\n", + "| policy_loss | -137 |\n", + "| std | 1.13 |\n", + "| value_loss | 12 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20600 |\n", + "| time_elapsed | 380 |\n", + "| total_timesteps | 103000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20599 |\n", + "| policy_loss | 374 |\n", + "| std | 1.12 |\n", + "| value_loss | 99 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:6257628.032702145\n", + "total_reward:5257628.032702145\n", + "total_cost: 15497.552403549977\n", + "total_trades: 41618\n", + "Sharpe: 1.1223670233311491\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20700 |\n", + "| time_elapsed | 382 |\n", + "| total_timesteps | 103500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | -1.38e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20699 |\n", + "| policy_loss | -30.9 |\n", + "| std | 1.12 |\n", + "| value_loss | 21.9 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20800 |\n", + "| time_elapsed | 384 |\n", + "| total_timesteps | 104000 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20799 |\n", + "| policy_loss | -34 |\n", + "| std | 1.12 |\n", + "| value_loss | 1.24 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 20900 |\n", + "| time_elapsed | 386 |\n", + "| total_timesteps | 104500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20899 |\n", + "| policy_loss | 72.1 |\n", + "| std | 1.13 |\n", + "| value_loss | 3.54 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21000 |\n", + "| time_elapsed | 388 |\n", + "| total_timesteps | 105000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20999 |\n", + "| policy_loss | -385 |\n", + "| std | 1.13 |\n", + "| value_loss | 89.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21100 |\n", + "| time_elapsed | 389 |\n", + "| total_timesteps | 105500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21099 |\n", + "| policy_loss | 115 |\n", + "| std | 1.13 |\n", + "| value_loss | 32.1 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4738471.037828859\n", + "total_reward:3738471.037828859\n", + "total_cost: 7014.150195751989\n", + "total_trades: 41430\n", + "Sharpe: 0.9741579164389573\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21200 |\n", + "| time_elapsed | 391 |\n", + "| total_timesteps | 106000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | -4.84e+10 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21199 |\n", + "| policy_loss | -199 |\n", + "| std | 1.13 |\n", + "| value_loss | 19.4 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21300 |\n", + "| time_elapsed | 393 |\n", + "| total_timesteps | 106500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | -2.18e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21299 |\n", + "| policy_loss | -306 |\n", + "| std | 1.13 |\n", + "| value_loss | 45.8 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21400 |\n", + "| time_elapsed | 395 |\n", + "| total_timesteps | 107000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | -1.53e+05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21399 |\n", + "| policy_loss | -210 |\n", + "| std | 1.13 |\n", + "| value_loss | 24.8 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21500 |\n", + "| time_elapsed | 397 |\n", + "| total_timesteps | 107500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21499 |\n", + "| policy_loss | 126 |\n", + "| std | 1.13 |\n", + "| value_loss | 9.59 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21600 |\n", + "| time_elapsed | 399 |\n", + "| total_timesteps | 108000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21599 |\n", + "| policy_loss | -214 |\n", + "| std | 1.13 |\n", + "| value_loss | 96.2 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4857941.929380179\n", + "total_reward:3857941.9293801794\n", + "total_cost: 4300.517490341594\n", + "total_trades: 39933\n", + "Sharpe: 1.0101593537518043\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21700 |\n", + "| time_elapsed | 401 |\n", + "| total_timesteps | 108500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21699 |\n", + "| policy_loss | -26.1 |\n", + "| std | 1.13 |\n", + "| value_loss | 0.598 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21800 |\n", + "| time_elapsed | 402 |\n", + "| total_timesteps | 109000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21799 |\n", + "| policy_loss | 81.4 |\n", + "| std | 1.13 |\n", + "| value_loss | 6.68 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 21900 |\n", + "| time_elapsed | 404 |\n", + "| total_timesteps | 109500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21899 |\n", + "| policy_loss | -198 |\n", + "| std | 1.12 |\n", + "| value_loss | 18.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22000 |\n", + "| time_elapsed | 406 |\n", + "| total_timesteps | 110000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21999 |\n", + "| policy_loss | -107 |\n", + "| std | 1.13 |\n", + "| value_loss | 6.12 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22100 |\n", + "| time_elapsed | 408 |\n", + "| total_timesteps | 110500 |\n", + "| train/ | |\n", + "| entropy_loss | -45.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22099 |\n", + "| policy_loss | -209 |\n", + "| std | 1.12 |\n", + "| value_loss | 74 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3889237.068636508\n", + "total_reward:2889237.068636508\n", + "total_cost: 2349.804122118537\n", + "total_trades: 40372\n", + "Sharpe: 0.8843985305523498\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22200 |\n", + "| time_elapsed | 410 |\n", + "| total_timesteps | 111000 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22199 |\n", + "| policy_loss | 29.7 |\n", + "| std | 1.13 |\n", + "| value_loss | 0.671 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22300 |\n", + "| time_elapsed | 412 |\n", + "| total_timesteps | 111500 |\n", + "| train/ | |\n", + "| entropy_loss | -46 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22299 |\n", + "| policy_loss | 78.5 |\n", + "| std | 1.13 |\n", + "| value_loss | 3.36 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22400 |\n", + "| time_elapsed | 414 |\n", + "| total_timesteps | 112000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22399 |\n", + "| policy_loss | 33.8 |\n", + "| std | 1.13 |\n", + "| value_loss | 1.25 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22500 |\n", + "| time_elapsed | 416 |\n", + "| total_timesteps | 112500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22499 |\n", + "| policy_loss | 221 |\n", + "| std | 1.13 |\n", + "| value_loss | 29.2 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22600 |\n", + "| time_elapsed | 418 |\n", + "| total_timesteps | 113000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22599 |\n", + "| policy_loss | -1.03e+03 |\n", + "| std | 1.13 |\n", + "| value_loss | 551 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4224562.913610662\n", + "total_reward:3224562.9136106623\n", + "total_cost: 7311.709253680451\n", + "total_trades: 39684\n", + "Sharpe: 0.908724330269282\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22700 |\n", + "| time_elapsed | 420 |\n", + "| total_timesteps | 113500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | -2.31e+04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22699 |\n", + "| policy_loss | -135 |\n", + "| std | 1.13 |\n", + "| value_loss | 11.1 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22800 |\n", + "| time_elapsed | 422 |\n", + "| total_timesteps | 114000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22799 |\n", + "| policy_loss | -74.1 |\n", + "| std | 1.13 |\n", + "| value_loss | 3.66 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 22900 |\n", + "| time_elapsed | 424 |\n", + "| total_timesteps | 114500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | -1.82e+10 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22899 |\n", + "| policy_loss | 44.3 |\n", + "| std | 1.13 |\n", + "| value_loss | 5.06 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23000 |\n", + "| time_elapsed | 425 |\n", + "| total_timesteps | 115000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | -2.81e+05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22999 |\n", + "| policy_loss | 98.9 |\n", + "| std | 1.13 |\n", + "| value_loss | 14.7 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23100 |\n", + "| time_elapsed | 427 |\n", + "| total_timesteps | 115500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23099 |\n", + "| policy_loss | 252 |\n", + "| std | 1.13 |\n", + "| value_loss | 39.1 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4058599.1541633434\n", + "total_reward:3058599.1541633434\n", + "total_cost: 4712.075511668796\n", + "total_trades: 39992\n", + "Sharpe: 0.9184456466750243\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23200 |\n", + "| time_elapsed | 429 |\n", + "| total_timesteps | 116000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | -19.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23199 |\n", + "| policy_loss | 34.4 |\n", + "| std | 1.13 |\n", + "| value_loss | 1.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23300 |\n", + "| time_elapsed | 431 |\n", + "| total_timesteps | 116500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23299 |\n", + "| policy_loss | 79.1 |\n", + "| std | 1.13 |\n", + "| value_loss | 7.28 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23400 |\n", + "| time_elapsed | 433 |\n", + "| total_timesteps | 117000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23399 |\n", + "| policy_loss | -95.2 |\n", + "| std | 1.13 |\n", + "| value_loss | 5.33 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23500 |\n", + "| time_elapsed | 435 |\n", + "| total_timesteps | 117500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23499 |\n", + "| policy_loss | 138 |\n", + "| std | 1.13 |\n", + "| value_loss | 15.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 270 |\n", + "| iterations | 23600 |\n", + "| time_elapsed | 436 |\n", + "| total_timesteps | 118000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23599 |\n", + "| policy_loss | 211 |\n", + "| std | 1.13 |\n", + "| value_loss | 28.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4447977.909194936\n", + "total_reward:3447977.909194936\n", + "total_cost: 4003.027452147933\n", + "total_trades: 41100\n", + "Sharpe: 0.9956972796668654\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 23700 |\n", + "| time_elapsed | 438 |\n", + "| total_timesteps | 118500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | -6.88 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23699 |\n", + "| policy_loss | -68.4 |\n", + "| std | 1.13 |\n", + "| value_loss | 2.73 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 23800 |\n", + "| time_elapsed | 440 |\n", + "| total_timesteps | 119000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | -186 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23799 |\n", + "| policy_loss | -106 |\n", + "| std | 1.13 |\n", + "| value_loss | 6.79 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 23900 |\n", + "| time_elapsed | 442 |\n", + "| total_timesteps | 119500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23899 |\n", + "| policy_loss | 30.7 |\n", + "| std | 1.13 |\n", + "| value_loss | 1.59 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24000 |\n", + "| time_elapsed | 444 |\n", + "| total_timesteps | 120000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23999 |\n", + "| policy_loss | 69.7 |\n", + "| std | 1.13 |\n", + "| value_loss | 5.71 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24100 |\n", + "| time_elapsed | 446 |\n", + "| total_timesteps | 120500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24099 |\n", + "| policy_loss | 224 |\n", + "| std | 1.13 |\n", + "| value_loss | 22.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3735086.557625893\n", + "total_reward:2735086.557625893\n", + "total_cost: 2757.089181630181\n", + "total_trades: 40506\n", + "Sharpe: 0.8851253072732341\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24200 |\n", + "| time_elapsed | 448 |\n", + "| total_timesteps | 121000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | -2.36 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24199 |\n", + "| policy_loss | 5.03 |\n", + "| std | 1.13 |\n", + "| value_loss | 0.398 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24300 |\n", + "| time_elapsed | 450 |\n", + "| total_timesteps | 121500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24299 |\n", + "| policy_loss | -225 |\n", + "| std | 1.14 |\n", + "| value_loss | 28.7 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24400 |\n", + "| time_elapsed | 452 |\n", + "| total_timesteps | 122000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24399 |\n", + "| policy_loss | 65.6 |\n", + "| std | 1.13 |\n", + "| value_loss | 2.32 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24500 |\n", + "| time_elapsed | 454 |\n", + "| total_timesteps | 122500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24499 |\n", + "| policy_loss | -163 |\n", + "| std | 1.14 |\n", + "| value_loss | 16.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24600 |\n", + "| time_elapsed | 456 |\n", + "| total_timesteps | 123000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24599 |\n", + "| policy_loss | 53.5 |\n", + "| std | 1.14 |\n", + "| value_loss | 2.15 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3941900.9207990007\n", + "total_reward:2941900.9207990007\n", + "total_cost: 3208.161901015157\n", + "total_trades: 39655\n", + "Sharpe: 0.916833519860494\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24700 |\n", + "| time_elapsed | 458 |\n", + "| total_timesteps | 123500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.3 |\n", + "| explained_variance | -10.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24699 |\n", + "| policy_loss | -21.7 |\n", + "| std | 1.14 |\n", + "| value_loss | 1.55 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24800 |\n", + "| time_elapsed | 459 |\n", + "| total_timesteps | 124000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.3 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24799 |\n", + "| policy_loss | 274 |\n", + "| std | 1.14 |\n", + "| value_loss | 37.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 24900 |\n", + "| time_elapsed | 461 |\n", + "| total_timesteps | 124500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24899 |\n", + "| policy_loss | -99.3 |\n", + "| std | 1.14 |\n", + "| value_loss | 5.44 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25000 |\n", + "| time_elapsed | 463 |\n", + "| total_timesteps | 125000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24999 |\n", + "| policy_loss | 73.4 |\n", + "| std | 1.14 |\n", + "| value_loss | 2.62 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25100 |\n", + "| time_elapsed | 465 |\n", + "| total_timesteps | 125500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25099 |\n", + "| policy_loss | 85.4 |\n", + "| std | 1.14 |\n", + "| value_loss | 4.21 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3918748.3829585924\n", + "total_reward:2918748.3829585924\n", + "total_cost: 7273.962180458869\n", + "total_trades: 40377\n", + "Sharpe: 0.9114365429898307\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25200 |\n", + "| time_elapsed | 467 |\n", + "| total_timesteps | 126000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.4 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25199 |\n", + "| policy_loss | 78.4 |\n", + "| std | 1.14 |\n", + "| value_loss | 3.83 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25300 |\n", + "| time_elapsed | 469 |\n", + "| total_timesteps | 126500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | -359 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25299 |\n", + "| policy_loss | 43.3 |\n", + "| std | 1.14 |\n", + "| value_loss | 11.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25400 |\n", + "| time_elapsed | 471 |\n", + "| total_timesteps | 127000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25399 |\n", + "| policy_loss | -117 |\n", + "| std | 1.15 |\n", + "| value_loss | 8.74 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25500 |\n", + "| time_elapsed | 473 |\n", + "| total_timesteps | 127500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25499 |\n", + "| policy_loss | -334 |\n", + "| std | 1.15 |\n", + "| value_loss | 55.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25600 |\n", + "| time_elapsed | 475 |\n", + "| total_timesteps | 128000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25599 |\n", + "| policy_loss | 80.1 |\n", + "| std | 1.15 |\n", + "| value_loss | 7.16 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3416634.0516581917\n", + "total_reward:2416634.0516581917\n", + "total_cost: 4919.955620021787\n", + "total_trades: 38886\n", + "Sharpe: 0.7925876800612837\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25700 |\n", + "| time_elapsed | 476 |\n", + "| total_timesteps | 128500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25699 |\n", + "| policy_loss | 74.3 |\n", + "| std | 1.15 |\n", + "| value_loss | 4.39 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25800 |\n", + "| time_elapsed | 478 |\n", + "| total_timesteps | 129000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25799 |\n", + "| policy_loss | -45.1 |\n", + "| std | 1.15 |\n", + "| value_loss | 7.72 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 25900 |\n", + "| time_elapsed | 480 |\n", + "| total_timesteps | 129500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -2.03e+08 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25899 |\n", + "| policy_loss | 237 |\n", + "| std | 1.15 |\n", + "| value_loss | 24.9 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26000 |\n", + "| time_elapsed | 482 |\n", + "| total_timesteps | 130000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -2.15e+03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25999 |\n", + "| policy_loss | -103 |\n", + "| std | 1.15 |\n", + "| value_loss | 9.79 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26100 |\n", + "| time_elapsed | 484 |\n", + "| total_timesteps | 130500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -3.4e+11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26099 |\n", + "| policy_loss | 43.2 |\n", + "| std | 1.15 |\n", + "| value_loss | 1.28 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3276619.3638079385\n", + "total_reward:2276619.3638079385\n", + "total_cost: 5264.404229684018\n", + "total_trades: 38979\n", + "Sharpe: 0.7353175977211657\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26200 |\n", + "| time_elapsed | 486 |\n", + "| total_timesteps | 131000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -908 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26199 |\n", + "| policy_loss | 60 |\n", + "| std | 1.15 |\n", + "| value_loss | 3.88 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26300 |\n", + "| time_elapsed | 488 |\n", + "| total_timesteps | 131500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -2.84e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26299 |\n", + "| policy_loss | -556 |\n", + "| std | 1.15 |\n", + "| value_loss | 149 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26400 |\n", + "| time_elapsed | 489 |\n", + "| total_timesteps | 132000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26399 |\n", + "| policy_loss | -144 |\n", + "| std | 1.15 |\n", + "| value_loss | 10.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26500 |\n", + "| time_elapsed | 491 |\n", + "| total_timesteps | 132500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26499 |\n", + "| policy_loss | 68.5 |\n", + "| std | 1.15 |\n", + "| value_loss | 4.74 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26600 |\n", + "| time_elapsed | 493 |\n", + "| total_timesteps | 133000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26599 |\n", + "| policy_loss | 2.66 |\n", + "| std | 1.15 |\n", + "| value_loss | 0.188 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3639991.011414096\n", + "total_reward:2639991.011414096\n", + "total_cost: 5876.438289118703\n", + "total_trades: 39596\n", + "Sharpe: 0.792662828054479\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26700 |\n", + "| time_elapsed | 495 |\n", + "| total_timesteps | 133500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | -22.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26699 |\n", + "| policy_loss | 114 |\n", + "| std | 1.15 |\n", + "| value_loss | 7.19 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26800 |\n", + "| time_elapsed | 497 |\n", + "| total_timesteps | 134000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26799 |\n", + "| policy_loss | -227 |\n", + "| std | 1.15 |\n", + "| value_loss | 28.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 26900 |\n", + "| time_elapsed | 499 |\n", + "| total_timesteps | 134500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.5 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26899 |\n", + "| policy_loss | -99.1 |\n", + "| std | 1.15 |\n", + "| value_loss | 5.7 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27000 |\n", + "| time_elapsed | 501 |\n", + "| total_timesteps | 135000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26999 |\n", + "| policy_loss | -50.5 |\n", + "| std | 1.15 |\n", + "| value_loss | 1.92 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27100 |\n", + "| time_elapsed | 503 |\n", + "| total_timesteps | 135500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27099 |\n", + "| policy_loss | 86.8 |\n", + "| std | 1.15 |\n", + "| value_loss | 4.17 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3312273.2546917126\n", + "total_reward:2312273.2546917126\n", + "total_cost: 6513.921766223839\n", + "total_trades: 39866\n", + "Sharpe: 0.7669939696087845\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27200 |\n", + "| time_elapsed | 505 |\n", + "| total_timesteps | 136000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.6 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27199 |\n", + "| policy_loss | 83.3 |\n", + "| std | 1.15 |\n", + "| value_loss | 4.52 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27300 |\n", + "| time_elapsed | 507 |\n", + "| total_timesteps | 136500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.7 |\n", + "| explained_variance | -242 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27299 |\n", + "| policy_loss | 196 |\n", + "| std | 1.15 |\n", + "| value_loss | 27.6 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27400 |\n", + "| time_elapsed | 509 |\n", + "| total_timesteps | 137000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.7 |\n", + "| explained_variance | -256 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27399 |\n", + "| policy_loss | -14.1 |\n", + "| std | 1.15 |\n", + "| value_loss | 0.802 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27500 |\n", + "| time_elapsed | 510 |\n", + "| total_timesteps | 137500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27499 |\n", + "| policy_loss | -133 |\n", + "| std | 1.15 |\n", + "| value_loss | 10.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27600 |\n", + "| time_elapsed | 512 |\n", + "| total_timesteps | 138000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.7 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27599 |\n", + "| policy_loss | -216 |\n", + "| std | 1.15 |\n", + "| value_loss | 23.3 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3537920.924942015\n", + "total_reward:2537920.924942015\n", + "total_cost: 7636.677849389829\n", + "total_trades: 39571\n", + "Sharpe: 0.7721256456339295\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27700 |\n", + "| time_elapsed | 514 |\n", + "| total_timesteps | 138500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.7 |\n", + "| explained_variance | -538 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27699 |\n", + "| policy_loss | -78.9 |\n", + "| std | 1.15 |\n", + "| value_loss | 5.95 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27800 |\n", + "| time_elapsed | 516 |\n", + "| total_timesteps | 139000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27799 |\n", + "| policy_loss | -135 |\n", + "| std | 1.16 |\n", + "| value_loss | 11.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 27900 |\n", + "| time_elapsed | 518 |\n", + "| total_timesteps | 139500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.8 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27899 |\n", + "| policy_loss | -7.94 |\n", + "| std | 1.16 |\n", + "| value_loss | 2.54 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28000 |\n", + "| time_elapsed | 520 |\n", + "| total_timesteps | 140000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27999 |\n", + "| policy_loss | -118 |\n", + "| std | 1.16 |\n", + "| value_loss | 7.13 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28100 |\n", + "| time_elapsed | 522 |\n", + "| total_timesteps | 140500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.9 |\n", + "| explained_variance | -1.4e+12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28099 |\n", + "| policy_loss | 33.8 |\n", + "| std | 1.16 |\n", + "| value_loss | 1.74 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3335901.863268089\n", + "total_reward:2335901.863268089\n", + "total_cost: 6148.2616701473435\n", + "total_trades: 38459\n", + "Sharpe: 0.8009972305518047\n", + "=================================\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28200 |\n", + "| time_elapsed | 523 |\n", + "| total_timesteps | 141000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.9 |\n", + "| explained_variance | -1.72e+07 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28199 |\n", + "| policy_loss | -75.4 |\n", + "| std | 1.16 |\n", + "| value_loss | 4.2 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28300 |\n", + "| time_elapsed | 525 |\n", + "| total_timesteps | 141500 |\n", + "| train/ | |\n", + "| entropy_loss | -46.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28299 |\n", + "| policy_loss | 13.6 |\n", + "| std | 1.16 |\n", + "| value_loss | 3.07 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28400 |\n", + "| time_elapsed | 527 |\n", + "| total_timesteps | 142000 |\n", + "| train/ | |\n", + "| entropy_loss | -46.9 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28399 |\n", + "| policy_loss | -38.5 |\n", + "| std | 1.16 |\n", + "| value_loss | 0.936 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28500 |\n", + "| time_elapsed | 529 |\n", + "| total_timesteps | 142500 |\n", + "| train/ | |\n", + "| entropy_loss | -47 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28499 |\n", + "| policy_loss | -20.5 |\n", + "| std | 1.16 |\n", + "| value_loss | 1.02 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28600 |\n", + "| time_elapsed | 531 |\n", + "| total_timesteps | 143000 |\n", + "| train/ | |\n", + "| entropy_loss | -47 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28599 |\n", + "| policy_loss | -95.6 |\n", + "| std | 1.16 |\n", + "| value_loss | 6.74 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3791388.9622966833\n", + "total_reward:2791388.9622966833\n", + "total_cost: 4739.291239631439\n", + "total_trades: 36786\n", + "Sharpe: 0.8352371557337978\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28700 |\n", + "| time_elapsed | 533 |\n", + "| total_timesteps | 143500 |\n", + "| train/ | |\n", + "| entropy_loss | -47 |\n", + "| explained_variance | -656 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28699 |\n", + "| policy_loss | 145 |\n", + "| std | 1.17 |\n", + "| value_loss | 10 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28800 |\n", + "| time_elapsed | 535 |\n", + "| total_timesteps | 144000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28799 |\n", + "| policy_loss | 195 |\n", + "| std | 1.17 |\n", + "| value_loss | 23 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 28900 |\n", + "| time_elapsed | 536 |\n", + "| total_timesteps | 144500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28899 |\n", + "| policy_loss | -26 |\n", + "| std | 1.17 |\n", + "| value_loss | 2.42 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 29000 |\n", + "| time_elapsed | 538 |\n", + "| total_timesteps | 145000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28999 |\n", + "| policy_loss | 32.1 |\n", + "| std | 1.17 |\n", + "| value_loss | 3.84 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 29100 |\n", + "| time_elapsed | 540 |\n", + "| total_timesteps | 145500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | -1.11e+11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29099 |\n", + "| policy_loss | -51.3 |\n", + "| std | 1.17 |\n", + "| value_loss | 1.21 |\n", + "-------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3722466.511508156\n", + "total_reward:2722466.511508156\n", + "total_cost: 2619.4388887420964\n", + "total_trades: 36838\n", + "Sharpe: 0.8751149961312088\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 29200 |\n", + "| time_elapsed | 542 |\n", + "| total_timesteps | 146000 |\n", + "| train/ | |\n", + "| entropy_loss | -47 |\n", + "| explained_variance | -37.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29199 |\n", + "| policy_loss | 97.3 |\n", + "| std | 1.17 |\n", + "| value_loss | 5.24 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 29300 |\n", + "| time_elapsed | 544 |\n", + "| total_timesteps | 146500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29299 |\n", + "| policy_loss | 63.7 |\n", + "| std | 1.17 |\n", + "| value_loss | 3.25 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 269 |\n", + "| iterations | 29400 |\n", + "| time_elapsed | 546 |\n", + "| total_timesteps | 147000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29399 |\n", + "| policy_loss | 76.1 |\n", + "| std | 1.17 |\n", + "| value_loss | 3.03 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 29500 |\n", + "| time_elapsed | 548 |\n", + "| total_timesteps | 147500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.2 |\n", + "| explained_variance | -134 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29499 |\n", + "| policy_loss | -178 |\n", + "| std | 1.17 |\n", + "| value_loss | 15.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 29600 |\n", + "| time_elapsed | 550 |\n", + "| total_timesteps | 148000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29599 |\n", + "| policy_loss | -202 |\n", + "| std | 1.17 |\n", + "| value_loss | 15.6 |\n", + "------------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:3503288.5069474406\n", + "total_reward:2503288.5069474406\n", + "total_cost: 2306.8302833824664\n", + "total_trades: 38804\n", + "Sharpe: 0.8406587986683967\n", + "=================================\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 29700 |\n", + "| time_elapsed | 552 |\n", + "| total_timesteps | 148500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.2 |\n", + "| explained_variance | -338 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29699 |\n", + "| policy_loss | 174 |\n", + "| std | 1.17 |\n", + "| value_loss | 17.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 29800 |\n", + "| time_elapsed | 553 |\n", + "| total_timesteps | 149000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.2 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29799 |\n", + "| policy_loss | -106 |\n", + "| std | 1.17 |\n", + "| value_loss | 7.64 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 29900 |\n", + "| time_elapsed | 555 |\n", + "| total_timesteps | 149500 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29899 |\n", + "| policy_loss | 67.9 |\n", + "| std | 1.17 |\n", + "| value_loss | 2.68 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| time/ | |\n", + "| fps | 268 |\n", + "| iterations | 30000 |\n", + "| time_elapsed | 557 |\n", + "| total_timesteps | 150000 |\n", + "| train/ | |\n", + "| entropy_loss | -47.1 |\n", + "| explained_variance | nan |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29999 |\n", + "| policy_loss | -121 |\n", + "| std | 1.17 |\n", + "| value_loss | 8.47 |\n", + "------------------------------------\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MRiOtrywfAo1" + }, + "source": [ + "### Model 2: DDPG" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "M2YadjfnLwgt", + "outputId": "3b2a8f89-0561-4083-a015-fbee11693037" + }, + "source": [ + "agent = DRLAgent(env = env_train)\n", + "model_ddpg = agent.get_model(\"ddpg\")" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Dr49PotrfG01" - }, - "source": [ - "### Model 5: SAC" - ] + "output_type": "stream", + "text": [ + "{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}\n", + "Using cpu device\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" }, + "id": "tCDa78rqfO_a", + "outputId": "f651f8be-4c93-4b1e-c88a-7e3a09976693" + }, + "source": [ + "trained_ddpg = agent.train_model(model=model_ddpg, \n", + " tb_log_name='ddpg',\n", + " total_timesteps=50000)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xwOhVjqRkCdM", - "outputId": "5ad99882-367d-49ce-d83e-124396074c12" - }, - "source": [ - "agent = DRLAgent(env = env_train)\n", - "SAC_PARAMS = {\n", - " \"batch_size\": 128,\n", - " \"buffer_size\": 1000000,\n", - " \"learning_rate\": 0.0001,\n", - " \"learning_starts\": 100,\n", - " \"ent_coef\": \"auto_0.1\",\n", - "}\n", - "\n", - "model_sac = agent.get_model(\"sac\",model_kwargs = SAC_PARAMS)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "{'batch_size': 128, 'buffer_size': 1000000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}\n", - "Using cpu device\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "Logging to tensorboard_log/ddpg/ddpg_1\n", + "begin_total_asset:1000000\n", + "end_total_asset:3761309.8057632465\n", + "total_reward:2761309.8057632465\n", + "total_cost: 6807.077776350557\n", + "total_trades: 39070\n", + "Sharpe: 1.0173492167488003\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 4 |\n", + "| fps | 38 |\n", + "| time_elapsed | 258 |\n", + "| total timesteps | 10064 |\n", + "| train/ | |\n", + "| actor_loss | -2.81 |\n", + "| critic_loss | 272 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 7548 |\n", + "---------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 8 |\n", + "| fps | 33 |\n", + "| time_elapsed | 604 |\n", + "| total timesteps | 20128 |\n", + "| train/ | |\n", + "| actor_loss | -8.32 |\n", + "| critic_loss | 12.8 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 17612 |\n", + "---------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 12 |\n", + "| fps | 31 |\n", + "| time_elapsed | 953 |\n", + "| total timesteps | 30192 |\n", + "| train/ | |\n", + "| actor_loss | -9.46 |\n", + "| critic_loss | 4.31 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 27676 |\n", + "---------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 16 |\n", + "| fps | 30 |\n", + "| time_elapsed | 1309 |\n", + "| total timesteps | 40256 |\n", + "| train/ | |\n", + "| actor_loss | -10.2 |\n", + "| critic_loss | 3.19 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 37740 |\n", + "---------------------------------\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "begin_total_asset:1000000\n", + "end_total_asset:4423657.61673363\n", + "total_reward:3423657.61673363\n", + "total_cost: 1277.392035166502\n", + "total_trades: 32819\n", + "Sharpe: 0.8726982452731067\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 20 |\n", + "| fps | 30 |\n", + "| time_elapsed | 1675 |\n", + "| total timesteps | 50320 |\n", + "| train/ | |\n", + "| actor_loss | -11.1 |\n", + "| critic_loss | 2.24 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 47804 |\n", + "---------------------------------\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_gDkU-j-fCmZ" + }, + "source": [ + "### Model 3: PPO" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "y5D5PFUhMzSV", + "outputId": "2716af5e-06e5-4eab-b071-a506c60a0475" + }, + "source": [ + "agent = DRLAgent(env = env_train)\n", + "PPO_PARAMS = {\n", + " \"n_steps\": 2048,\n", + " \"ent_coef\": 0.01,\n", + " \"learning_rate\": 0.00025,\n", + " \"batch_size\": 128,\n", + "}\n", + "model_ppo = agent.get_model(\"ppo\",model_kwargs = PPO_PARAMS)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "background_save": true, - "base_uri": "https://localhost:8080/" - }, - "id": "K8RSdKCckJyH", - "outputId": "8dfca8da-65ea-4e61-f7c7-16094ea00cc0" - }, - "source": [ - "trained_sac = agent.train_model(model=model_sac, \n", - " tb_log_name='sac',\n", - " total_timesteps=80000)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Logging to tensorboard_log/sac/sac_8\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 4 |\n", - "| fps | 27 |\n", - "| time_elapsed | 372 |\n", - "| total timesteps | 10064 |\n", - "| train/ | |\n", - "| actor_loss | 1.76e+03 |\n", - "| critic_loss | 1.53e+03 |\n", - "| ent_coef | 0.243 |\n", - "| ent_coef_loss | 169 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 9963 |\n", - "---------------------------------\n", - "day: 2515, episode: 220\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:4405540.86\n", - "total_reward:3405540.86\n", - "total_cost: 74774.48\n", - "total_trades: 56475\n", - "Sharpe: 0.954\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 8 |\n", - "| fps | 26 |\n", - "| time_elapsed | 749 |\n", - "| total timesteps | 20128 |\n", - "| train/ | |\n", - "| actor_loss | 975 |\n", - "| critic_loss | 480 |\n", - "| ent_coef | 0.121 |\n", - "| ent_coef_loss | -91.8 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 20027 |\n", - "---------------------------------\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 12 |\n", - "| fps | 26 |\n", - "| time_elapsed | 1132 |\n", - "| total timesteps | 30192 |\n", - "| train/ | |\n", - "| actor_loss | 574 |\n", - "| critic_loss | 4.49e+03 |\n", - "| ent_coef | 0.0453 |\n", - "| ent_coef_loss | -103 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 30091 |\n", - "---------------------------------\n", - "day: 2515, episode: 230\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:4828464.65\n", - "total_reward:3828464.65\n", - "total_cost: 2997.76\n", - "total_trades: 39564\n", - "Sharpe: 0.994\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 16 |\n", - "| fps | 26 |\n", - "| time_elapsed | 1517 |\n", - "| total timesteps | 40256 |\n", - "| train/ | |\n", - "| actor_loss | 348 |\n", - "| critic_loss | 23.1 |\n", - "| ent_coef | 0.0173 |\n", - "| ent_coef_loss | -87.4 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 40155 |\n", - "---------------------------------\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 20 |\n", - "| fps | 26 |\n", - "| time_elapsed | 1903 |\n", - "| total timesteps | 50320 |\n", - "| train/ | |\n", - "| actor_loss | 205 |\n", - "| critic_loss | 10.6 |\n", - "| ent_coef | 0.00687 |\n", - "| ent_coef_loss | -45.2 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 50219 |\n", - "---------------------------------\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 24 |\n", - "| fps | 26 |\n", - "| time_elapsed | 2291 |\n", - "| total timesteps | 60384 |\n", - "| train/ | |\n", - "| actor_loss | 127 |\n", - "| critic_loss | 9.65 |\n", - "| ent_coef | 0.00328 |\n", - "| ent_coef_loss | -0.401 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 60283 |\n", - "---------------------------------\n", - "day: 2515, episode: 240\n", - "begin_total_asset:1000000.00\n", - "end_total_asset:5207375.51\n", - "total_reward:4207375.51\n", - "total_cost: 1768.49\n", - "total_trades: 38369\n", - "Sharpe: 1.077\n", - "=================================\n", - "---------------------------------\n", - "| time/ | |\n", - "| episodes | 28 |\n", - "| fps | 26 |\n", - "| time_elapsed | 2687 |\n", - "| total timesteps | 70448 |\n", - "| train/ | |\n", - "| actor_loss | 86.5 |\n", - "| critic_loss | 9.16 |\n", - "| ent_coef | 0.00253 |\n", - "| ent_coef_loss | 1.39 |\n", - "| learning_rate | 0.0001 |\n", - "| n_updates | 70347 |\n", - "---------------------------------\n" - ], - "name": "stdout" - } - ] + "output_type": "stream", + "text": [ + "{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}\n", + "Using cpu device\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Gt8eIQKYM4G3", + "outputId": "1016cc05-58b6-45dc-c871-a322f1c3dc89" + }, + "source": [ + "trained_ppo = agent.train_model(model=model_ppo, \n", + " tb_log_name='ppo',\n", + " total_timesteps=100000)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "f2wZgkQXh1jE" - }, - "source": [ - "## Trading\n", - "Assume that we have $1,000,000 initial capital at 2019-01-01. We use the DDPG model to trade Dow jones 30 stocks." - ] + "output_type": "stream", + "text": [ + "Logging to tensorboard_log/ppo/ppo_2\n", + "-----------------------------\n", + "| time/ | |\n", + "| fps | 104 |\n", + "| iterations | 1 |\n", + "| time_elapsed | 19 |\n", + "| total_timesteps | 2048 |\n", + "-----------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 102 |\n", + "| iterations | 2 |\n", + "| time_elapsed | 39 |\n", + "| total_timesteps | 4096 |\n", + "| train/ | |\n", + "| approx_kl | 0.014151055 |\n", + "| clip_fraction | 0.212 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.6 |\n", + "| explained_variance | -28.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 5.76 |\n", + "| n_updates | 10 |\n", + "| policy_gradient_loss | -0.0277 |\n", + "| std | 1 |\n", + "| value_loss | 12 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 101 |\n", + "| iterations | 3 |\n", + "| time_elapsed | 60 |\n", + "| total_timesteps | 6144 |\n", + "| train/ | |\n", + "| approx_kl | 0.016467014 |\n", + "| clip_fraction | 0.186 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.6 |\n", + "| explained_variance | -176 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 9.99 |\n", + "| n_updates | 20 |\n", + "| policy_gradient_loss | -0.0275 |\n", + "| std | 1 |\n", + "| value_loss | 18.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 101 |\n", + "| iterations | 4 |\n", + "| time_elapsed | 80 |\n", + "| total_timesteps | 8192 |\n", + "| train/ | |\n", + "| approx_kl | 0.020772668 |\n", + "| clip_fraction | 0.191 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.6 |\n", + "| explained_variance | -87.8 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 16.7 |\n", + "| n_updates | 30 |\n", + "| policy_gradient_loss | -0.028 |\n", + "| std | 1 |\n", + "| value_loss | 32.2 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 101 |\n", + "| iterations | 5 |\n", + "| time_elapsed | 101 |\n", + "| total_timesteps | 10240 |\n", + "| train/ | |\n", + "| approx_kl | 0.019156657 |\n", + "| clip_fraction | 0.225 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -81.3 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 11 |\n", + "| n_updates | 40 |\n", + "| policy_gradient_loss | -0.0184 |\n", + "| std | 1 |\n", + "| value_loss | 26.6 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 6 |\n", + "| time_elapsed | 122 |\n", + "| total_timesteps | 12288 |\n", + "| train/ | |\n", + "| approx_kl | 0.02388929 |\n", + "| clip_fraction | 0.223 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.7 |\n", + "| explained_variance | -67 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 7.86 |\n", + "| n_updates | 50 |\n", + "| policy_gradient_loss | -0.0269 |\n", + "| std | 1.01 |\n", + "| value_loss | 23 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 7 |\n", + "| time_elapsed | 142 |\n", + "| total_timesteps | 14336 |\n", + "| train/ | |\n", + "| approx_kl | 0.023960019 |\n", + "| clip_fraction | 0.21 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -58.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 6.32 |\n", + "| n_updates | 60 |\n", + "| policy_gradient_loss | -0.0234 |\n", + "| std | 1.01 |\n", + "| value_loss | 12 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 8 |\n", + "| time_elapsed | 163 |\n", + "| total_timesteps | 16384 |\n", + "| train/ | |\n", + "| approx_kl | 0.021991765 |\n", + "| clip_fraction | 0.212 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.8 |\n", + "| explained_variance | -36.4 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 9.39 |\n", + "| n_updates | 70 |\n", + "| policy_gradient_loss | -0.0243 |\n", + "| std | 1.01 |\n", + "| value_loss | 19.9 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 9 |\n", + "| time_elapsed | 183 |\n", + "| total_timesteps | 18432 |\n", + "| train/ | |\n", + "| approx_kl | 0.01857267 |\n", + "| clip_fraction | 0.205 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -59.3 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 8.22 |\n", + "| n_updates | 80 |\n", + "| policy_gradient_loss | -0.0235 |\n", + "| std | 1.01 |\n", + "| value_loss | 20.5 |\n", + "----------------------------------------\n", + "day: 2515, episode: 130\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:3383653.45\n", + "total_reward:2383653.45\n", + "total_cost: 255155.22\n", + "total_trades: 72649\n", + "Sharpe: 0.863\n", + "=================================\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 10 |\n", + "| time_elapsed | 203 |\n", + "| total_timesteps | 20480 |\n", + "| train/ | |\n", + "| approx_kl | 0.022291362 |\n", + "| clip_fraction | 0.213 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -70.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 12.4 |\n", + "| n_updates | 90 |\n", + "| policy_gradient_loss | -0.019 |\n", + "| std | 1.01 |\n", + "| value_loss | 34.1 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 11 |\n", + "| time_elapsed | 224 |\n", + "| total_timesteps | 22528 |\n", + "| train/ | |\n", + "| approx_kl | 0.017316487 |\n", + "| clip_fraction | 0.22 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -42.9 |\n", + "| explained_variance | -159 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 21.4 |\n", + "| n_updates | 100 |\n", + "| policy_gradient_loss | -0.0182 |\n", + "| std | 1.01 |\n", + "| value_loss | 38.8 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 12 |\n", + "| time_elapsed | 244 |\n", + "| total_timesteps | 24576 |\n", + "| train/ | |\n", + "| approx_kl | 0.018951386 |\n", + "| clip_fraction | 0.179 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43 |\n", + "| explained_variance | -25.3 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 11.4 |\n", + "| n_updates | 110 |\n", + "| policy_gradient_loss | -0.0135 |\n", + "| std | 1.02 |\n", + "| value_loss | 29.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 13 |\n", + "| time_elapsed | 265 |\n", + "| total_timesteps | 26624 |\n", + "| train/ | |\n", + "| approx_kl | 0.033302963 |\n", + "| clip_fraction | 0.298 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | -58.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 9.17 |\n", + "| n_updates | 120 |\n", + "| policy_gradient_loss | -0.0236 |\n", + "| std | 1.02 |\n", + "| value_loss | 28.3 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 14 |\n", + "| time_elapsed | 285 |\n", + "| total_timesteps | 28672 |\n", + "| train/ | |\n", + "| approx_kl | 0.027676268 |\n", + "| clip_fraction | 0.278 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.1 |\n", + "| explained_variance | -91.7 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 12.8 |\n", + "| n_updates | 130 |\n", + "| policy_gradient_loss | -0.0192 |\n", + "| std | 1.02 |\n", + "| value_loss | 32.7 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 15 |\n", + "| time_elapsed | 306 |\n", + "| total_timesteps | 30720 |\n", + "| train/ | |\n", + "| approx_kl | 0.027800845 |\n", + "| clip_fraction | 0.233 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.2 |\n", + "| explained_variance | -85.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 26 |\n", + "| n_updates | 140 |\n", + "| policy_gradient_loss | -0.0217 |\n", + "| std | 1.02 |\n", + "| value_loss | 40.1 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 16 |\n", + "| time_elapsed | 326 |\n", + "| total_timesteps | 32768 |\n", + "| train/ | |\n", + "| approx_kl | 0.016968882 |\n", + "| clip_fraction | 0.219 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | -71.3 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 10.2 |\n", + "| n_updates | 150 |\n", + "| policy_gradient_loss | -0.0209 |\n", + "| std | 1.02 |\n", + "| value_loss | 26.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 17 |\n", + "| time_elapsed | 347 |\n", + "| total_timesteps | 34816 |\n", + "| train/ | |\n", + "| approx_kl | 0.022131229 |\n", + "| clip_fraction | 0.215 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.3 |\n", + "| explained_variance | -15.7 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 20.6 |\n", + "| n_updates | 160 |\n", + "| policy_gradient_loss | -0.0153 |\n", + "| std | 1.03 |\n", + "| value_loss | 49.1 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 18 |\n", + "| time_elapsed | 368 |\n", + "| total_timesteps | 36864 |\n", + "| train/ | |\n", + "| approx_kl | 0.029286291 |\n", + "| clip_fraction | 0.266 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.4 |\n", + "| explained_variance | -43.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 13.2 |\n", + "| n_updates | 170 |\n", + "| policy_gradient_loss | -0.015 |\n", + "| std | 1.03 |\n", + "| value_loss | 19.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 19 |\n", + "| time_elapsed | 388 |\n", + "| total_timesteps | 38912 |\n", + "| train/ | |\n", + "| approx_kl | 0.027719798 |\n", + "| clip_fraction | 0.24 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.4 |\n", + "| explained_variance | -131 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 16.8 |\n", + "| n_updates | 180 |\n", + "| policy_gradient_loss | -0.0183 |\n", + "| std | 1.03 |\n", + "| value_loss | 34 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 20 |\n", + "| time_elapsed | 409 |\n", + "| total_timesteps | 40960 |\n", + "| train/ | |\n", + "| approx_kl | 0.022764063 |\n", + "| clip_fraction | 0.217 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.5 |\n", + "| explained_variance | -63.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 22.5 |\n", + "| n_updates | 190 |\n", + "| policy_gradient_loss | -0.0186 |\n", + "| std | 1.03 |\n", + "| value_loss | 37.9 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 21 |\n", + "| time_elapsed | 429 |\n", + "| total_timesteps | 43008 |\n", + "| train/ | |\n", + "| approx_kl | 0.02734076 |\n", + "| clip_fraction | 0.208 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.5 |\n", + "| explained_variance | -113 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 21 |\n", + "| n_updates | 200 |\n", + "| policy_gradient_loss | -0.0201 |\n", + "| std | 1.03 |\n", + "| value_loss | 60.7 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 22 |\n", + "| time_elapsed | 450 |\n", + "| total_timesteps | 45056 |\n", + "| train/ | |\n", + "| approx_kl | 0.023378888 |\n", + "| clip_fraction | 0.277 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.6 |\n", + "| explained_variance | -57 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 19.6 |\n", + "| n_updates | 210 |\n", + "| policy_gradient_loss | -0.0153 |\n", + "| std | 1.03 |\n", + "| value_loss | 38.9 |\n", + "-----------------------------------------\n", + "day: 2515, episode: 140\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:5223199.40\n", + "total_reward:4223199.40\n", + "total_cost: 235269.98\n", + "total_trades: 71552\n", + "Sharpe: 1.128\n", + "=================================\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 23 |\n", + "| time_elapsed | 470 |\n", + "| total_timesteps | 47104 |\n", + "| train/ | |\n", + "| approx_kl | 0.025331508 |\n", + "| clip_fraction | 0.29 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.6 |\n", + "| explained_variance | -61.4 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 19.7 |\n", + "| n_updates | 220 |\n", + "| policy_gradient_loss | -0.0119 |\n", + "| std | 1.04 |\n", + "| value_loss | 34.8 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 24 |\n", + "| time_elapsed | 491 |\n", + "| total_timesteps | 49152 |\n", + "| train/ | |\n", + "| approx_kl | 0.025766762 |\n", + "| clip_fraction | 0.231 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.7 |\n", + "| explained_variance | -64.7 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 20.4 |\n", + "| n_updates | 230 |\n", + "| policy_gradient_loss | -0.0187 |\n", + "| std | 1.04 |\n", + "| value_loss | 47.4 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 25 |\n", + "| time_elapsed | 511 |\n", + "| total_timesteps | 51200 |\n", + "| train/ | |\n", + "| approx_kl | 0.041917183 |\n", + "| clip_fraction | 0.278 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | -34 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 22.2 |\n", + "| n_updates | 240 |\n", + "| policy_gradient_loss | -0.0164 |\n", + "| std | 1.04 |\n", + "| value_loss | 48 |\n", + "-----------------------------------------\n", + "---------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 26 |\n", + "| time_elapsed | 531 |\n", + "| total_timesteps | 53248 |\n", + "| train/ | |\n", + "| approx_kl | 0.0367468 |\n", + "| clip_fraction | 0.273 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.8 |\n", + "| explained_variance | -48.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 21.5 |\n", + "| n_updates | 250 |\n", + "| policy_gradient_loss | -0.00821 |\n", + "| std | 1.04 |\n", + "| value_loss | 39.5 |\n", + "---------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 27 |\n", + "| time_elapsed | 552 |\n", + "| total_timesteps | 55296 |\n", + "| train/ | |\n", + "| approx_kl | 0.024581099 |\n", + "| clip_fraction | 0.211 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.9 |\n", + "| explained_variance | -198 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 32.1 |\n", + "| n_updates | 260 |\n", + "| policy_gradient_loss | -0.0106 |\n", + "| std | 1.05 |\n", + "| value_loss | 58.2 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 28 |\n", + "| time_elapsed | 573 |\n", + "| total_timesteps | 57344 |\n", + "| train/ | |\n", + "| approx_kl | 0.02569989 |\n", + "| clip_fraction | 0.209 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -43.9 |\n", + "| explained_variance | -161 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 25.1 |\n", + "| n_updates | 270 |\n", + "| policy_gradient_loss | -0.0137 |\n", + "| std | 1.05 |\n", + "| value_loss | 55 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 29 |\n", + "| time_elapsed | 593 |\n", + "| total_timesteps | 59392 |\n", + "| train/ | |\n", + "| approx_kl | 0.032340243 |\n", + "| clip_fraction | 0.252 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -24.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 9.23 |\n", + "| n_updates | 280 |\n", + "| policy_gradient_loss | -0.0167 |\n", + "| std | 1.05 |\n", + "| value_loss | 34 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 30 |\n", + "| time_elapsed | 613 |\n", + "| total_timesteps | 61440 |\n", + "| train/ | |\n", + "| approx_kl | 0.018233867 |\n", + "| clip_fraction | 0.239 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -34.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 28.1 |\n", + "| n_updates | 290 |\n", + "| policy_gradient_loss | -0.0158 |\n", + "| std | 1.05 |\n", + "| value_loss | 41.2 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 31 |\n", + "| time_elapsed | 634 |\n", + "| total_timesteps | 63488 |\n", + "| train/ | |\n", + "| approx_kl | 0.030068567 |\n", + "| clip_fraction | 0.152 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44 |\n", + "| explained_variance | -26.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 19.2 |\n", + "| n_updates | 300 |\n", + "| policy_gradient_loss | -0.0121 |\n", + "| std | 1.05 |\n", + "| value_loss | 64.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 32 |\n", + "| time_elapsed | 654 |\n", + "| total_timesteps | 65536 |\n", + "| train/ | |\n", + "| approx_kl | 0.024889158 |\n", + "| clip_fraction | 0.27 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | -31.2 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 37.3 |\n", + "| n_updates | 310 |\n", + "| policy_gradient_loss | -0.0148 |\n", + "| std | 1.05 |\n", + "| value_loss | 58 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 33 |\n", + "| time_elapsed | 674 |\n", + "| total_timesteps | 67584 |\n", + "| train/ | |\n", + "| approx_kl | 0.03883523 |\n", + "| clip_fraction | 0.234 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.1 |\n", + "| explained_variance | -39.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 24.5 |\n", + "| n_updates | 320 |\n", + "| policy_gradient_loss | -0.0121 |\n", + "| std | 1.05 |\n", + "| value_loss | 84.4 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 34 |\n", + "| time_elapsed | 695 |\n", + "| total_timesteps | 69632 |\n", + "| train/ | |\n", + "| approx_kl | 0.024309162 |\n", + "| clip_fraction | 0.225 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | -12.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 8.79 |\n", + "| n_updates | 330 |\n", + "| policy_gradient_loss | -0.015 |\n", + "| std | 1.06 |\n", + "| value_loss | 23.8 |\n", + "-----------------------------------------\n", + "day: 2515, episode: 150\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:6320097.75\n", + "total_reward:5320097.75\n", + "total_cost: 222029.44\n", + "total_trades: 69973\n", + "Sharpe: 1.250\n", + "=================================\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 35 |\n", + "| time_elapsed | 715 |\n", + "| total_timesteps | 71680 |\n", + "| train/ | |\n", + "| approx_kl | 0.024664927 |\n", + "| clip_fraction | 0.183 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.2 |\n", + "| explained_variance | -17.2 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 15.3 |\n", + "| n_updates | 340 |\n", + "| policy_gradient_loss | -0.0141 |\n", + "| std | 1.06 |\n", + "| value_loss | 48.7 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 36 |\n", + "| time_elapsed | 735 |\n", + "| total_timesteps | 73728 |\n", + "| train/ | |\n", + "| approx_kl | 0.03882557 |\n", + "| clip_fraction | 0.207 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.3 |\n", + "| explained_variance | -27.1 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 50.3 |\n", + "| n_updates | 350 |\n", + "| policy_gradient_loss | -0.0141 |\n", + "| std | 1.06 |\n", + "| value_loss | 93.7 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 37 |\n", + "| time_elapsed | 756 |\n", + "| total_timesteps | 75776 |\n", + "| train/ | |\n", + "| approx_kl | 0.022156972 |\n", + "| clip_fraction | 0.214 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.3 |\n", + "| explained_variance | -23.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 26.5 |\n", + "| n_updates | 360 |\n", + "| policy_gradient_loss | -0.0161 |\n", + "| std | 1.06 |\n", + "| value_loss | 71.7 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 38 |\n", + "| time_elapsed | 776 |\n", + "| total_timesteps | 77824 |\n", + "| train/ | |\n", + "| approx_kl | 0.022767432 |\n", + "| clip_fraction | 0.223 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | -17.5 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 23.8 |\n", + "| n_updates | 370 |\n", + "| policy_gradient_loss | -0.0154 |\n", + "| std | 1.06 |\n", + "| value_loss | 38.7 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 39 |\n", + "| time_elapsed | 797 |\n", + "| total_timesteps | 79872 |\n", + "| train/ | |\n", + "| approx_kl | 0.020827759 |\n", + "| clip_fraction | 0.178 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.4 |\n", + "| explained_variance | -56 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 36.3 |\n", + "| n_updates | 380 |\n", + "| policy_gradient_loss | -0.00964 |\n", + "| std | 1.07 |\n", + "| value_loss | 82.1 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 40 |\n", + "| time_elapsed | 817 |\n", + "| total_timesteps | 81920 |\n", + "| train/ | |\n", + "| approx_kl | 0.013000591 |\n", + "| clip_fraction | 0.132 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.5 |\n", + "| explained_variance | -23 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 14 |\n", + "| n_updates | 390 |\n", + "| policy_gradient_loss | -0.0162 |\n", + "| std | 1.07 |\n", + "| value_loss | 63.1 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 41 |\n", + "| time_elapsed | 837 |\n", + "| total_timesteps | 83968 |\n", + "| train/ | |\n", + "| approx_kl | 0.021172233 |\n", + "| clip_fraction | 0.19 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.5 |\n", + "| explained_variance | -26.6 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 47.3 |\n", + "| n_updates | 400 |\n", + "| policy_gradient_loss | -0.0191 |\n", + "| std | 1.07 |\n", + "| value_loss | 98 |\n", + "-----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 42 |\n", + "| time_elapsed | 858 |\n", + "| total_timesteps | 86016 |\n", + "| train/ | |\n", + "| approx_kl | 0.02925424 |\n", + "| clip_fraction | 0.16 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -33.8 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 59.2 |\n", + "| n_updates | 410 |\n", + "| policy_gradient_loss | -0.0117 |\n", + "| std | 1.07 |\n", + "| value_loss | 163 |\n", + "----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 43 |\n", + "| time_elapsed | 878 |\n", + "| total_timesteps | 88064 |\n", + "| train/ | |\n", + "| approx_kl | 0.01635669 |\n", + "| clip_fraction | 0.138 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -28.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 57.9 |\n", + "| n_updates | 420 |\n", + "| policy_gradient_loss | -0.0135 |\n", + "| std | 1.07 |\n", + "| value_loss | 122 |\n", + "----------------------------------------\n", + "----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 44 |\n", + "| time_elapsed | 898 |\n", + "| total_timesteps | 90112 |\n", + "| train/ | |\n", + "| approx_kl | 0.03150232 |\n", + "| clip_fraction | 0.188 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -20.9 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 45.1 |\n", + "| n_updates | 430 |\n", + "| policy_gradient_loss | -0.0222 |\n", + "| std | 1.07 |\n", + "| value_loss | 84.9 |\n", + "----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 45 |\n", + "| time_elapsed | 919 |\n", + "| total_timesteps | 92160 |\n", + "| train/ | |\n", + "| approx_kl | 0.035686597 |\n", + "| clip_fraction | 0.335 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.6 |\n", + "| explained_variance | -5.37 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 4.34 |\n", + "| n_updates | 440 |\n", + "| policy_gradient_loss | -0.0119 |\n", + "| std | 1.07 |\n", + "| value_loss | 14.2 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 46 |\n", + "| time_elapsed | 940 |\n", + "| total_timesteps | 94208 |\n", + "| train/ | |\n", + "| approx_kl | 0.028425248 |\n", + "| clip_fraction | 0.293 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.7 |\n", + "| explained_variance | -4.65 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 22.9 |\n", + "| n_updates | 450 |\n", + "| policy_gradient_loss | -0.0184 |\n", + "| std | 1.07 |\n", + "| value_loss | 26.4 |\n", + "-----------------------------------------\n", + "day: 2515, episode: 160\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:5044806.56\n", + "total_reward:4044806.56\n", + "total_cost: 237117.70\n", + "total_trades: 70270\n", + "Sharpe: 1.271\n", + "=================================\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 47 |\n", + "| time_elapsed | 960 |\n", + "| total_timesteps | 96256 |\n", + "| train/ | |\n", + "| approx_kl | 0.034343738 |\n", + "| clip_fraction | 0.299 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.7 |\n", + "| explained_variance | -3.42 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 17.6 |\n", + "| n_updates | 460 |\n", + "| policy_gradient_loss | -0.0185 |\n", + "| std | 1.08 |\n", + "| value_loss | 56.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 48 |\n", + "| time_elapsed | 981 |\n", + "| total_timesteps | 98304 |\n", + "| train/ | |\n", + "| approx_kl | 0.017608875 |\n", + "| clip_fraction | 0.231 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | -17.7 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 17.6 |\n", + "| n_updates | 470 |\n", + "| policy_gradient_loss | -0.0054 |\n", + "| std | 1.08 |\n", + "| value_loss | 35.9 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 49 |\n", + "| time_elapsed | 1001 |\n", + "| total_timesteps | 100352 |\n", + "| train/ | |\n", + "| approx_kl | 0.024408635 |\n", + "| clip_fraction | 0.168 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -44.8 |\n", + "| explained_variance | -4.44 |\n", + "| learning_rate | 0.00025 |\n", + "| loss | 16.2 |\n", + "| n_updates | 480 |\n", + "| policy_gradient_loss | -0.00922 |\n", + "| std | 1.08 |\n", + "| value_loss | 50.8 |\n", + "-----------------------------------------\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Zpv4S0-fDBv" + }, + "source": [ + "### Model 4: TD3" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "JSAHhV4Xc-bh", + "outputId": "e531db14-aab4-47d1-cc15-02c893ec66c9" + }, + "source": [ + "agent = DRLAgent(env = env_train)\n", + "TD3_PARAMS = {\"batch_size\": 100, \n", + " \"buffer_size\": 1000000, \n", + " \"learning_rate\": 0.001}\n", + "\n", + "model_td3 = agent.get_model(\"td3\",model_kwargs = TD3_PARAMS)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "bEv5KGC8h1jE" - }, - "source": [ - "### Set turbulence threshold\n", - "Set the turbulence threshold to be greater than the maximum of insample turbulence data, if current turbulence index is greater than the threshold, then we assume that the current market is volatile" - ] + "output_type": "stream", + "text": [ + "{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}\n", + "Using cpu device\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "OSRxNYAxdKpU", + "outputId": "ddc4193c-884b-4a2c-9e49-31397e2cfbec" + }, + "source": [ + "trained_td3 = agent.train_model(model=model_td3, \n", + " tb_log_name='td3',\n", + " total_timesteps=30000)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "efwBi84ch1jE" - }, - "source": [ - "data_turbulence = processed[(processed.date<'2019-01-01') & (processed.date>='2009-01-01')]\n", - "insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])" - ], - "execution_count": null, - "outputs": [] + "output_type": "stream", + "text": [ + "Logging to tensorboard_log/td3/td3_2\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 4 |\n", + "| fps | 33 |\n", + "| time_elapsed | 296 |\n", + "| total timesteps | 10064 |\n", + "| train/ | |\n", + "| actor_loss | 67.9 |\n", + "| critic_loss | 979 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 7548 |\n", + "---------------------------------\n", + "day: 2515, episode: 10\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:4438572.29\n", + "total_reward:3438572.29\n", + "total_cost: 1038.05\n", + "total_trades: 40290\n", + "Sharpe: 1.049\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 8 |\n", + "| fps | 30 |\n", + "| time_elapsed | 669 |\n", + "| total timesteps | 20128 |\n", + "| train/ | |\n", + "| actor_loss | 54 |\n", + "| critic_loss | 199 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 17612 |\n", + "---------------------------------\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 12 |\n", + "| fps | 28 |\n", + "| time_elapsed | 1052 |\n", + "| total timesteps | 30192 |\n", + "| train/ | |\n", + "| actor_loss | 41.4 |\n", + "| critic_loss | 25.2 |\n", + "| learning_rate | 0.001 |\n", + "| n_updates | 27676 |\n", + "---------------------------------\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dr49PotrfG01" + }, + "source": [ + "### Model 5: SAC" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "xwOhVjqRkCdM", + "outputId": "5ad99882-367d-49ce-d83e-124396074c12" + }, + "source": [ + "agent = DRLAgent(env = env_train)\n", + "SAC_PARAMS = {\n", + " \"batch_size\": 128,\n", + " \"buffer_size\": 1000000,\n", + " \"learning_rate\": 0.0001,\n", + " \"learning_starts\": 100,\n", + " \"ent_coef\": \"auto_0.1\",\n", + "}\n", + "\n", + "model_sac = agent.get_model(\"sac\",model_kwargs = SAC_PARAMS)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VHZMBpSqh1jG", - "outputId": "f750f515-9f4f-4adb-846e-ea0bdf15ea6b" - }, - "source": [ - "insample_turbulence.turbulence.describe()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "count 2516.000000\n", - "mean 33.277069\n", - "std 33.988999\n", - "min 0.000000\n", - "25% 15.233886\n", - "50% 25.180900\n", - "75% 39.290836\n", - "max 332.062743\n", - "Name: turbulence, dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 199 - } - ] + "output_type": "stream", + "text": [ + "{'batch_size': 128, 'buffer_size': 1000000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}\n", + "Using cpu device\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" }, + "id": "K8RSdKCckJyH", + "outputId": "8dfca8da-65ea-4e61-f7c7-16094ea00cc0" + }, + "source": [ + "trained_sac = agent.train_model(model=model_sac, \n", + " tb_log_name='sac',\n", + " total_timesteps=80000)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "yuwDPkV9h1jL" - }, - "source": [ - "turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,1)" - ], - "execution_count": null, - "outputs": [] + "output_type": "stream", + "text": [ + "Logging to tensorboard_log/sac/sac_8\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 4 |\n", + "| fps | 27 |\n", + "| time_elapsed | 372 |\n", + "| total timesteps | 10064 |\n", + "| train/ | |\n", + "| actor_loss | 1.76e+03 |\n", + "| critic_loss | 1.53e+03 |\n", + "| ent_coef | 0.243 |\n", + "| ent_coef_loss | 169 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 9963 |\n", + "---------------------------------\n", + "day: 2515, episode: 220\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:4405540.86\n", + "total_reward:3405540.86\n", + "total_cost: 74774.48\n", + "total_trades: 56475\n", + "Sharpe: 0.954\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 8 |\n", + "| fps | 26 |\n", + "| time_elapsed | 749 |\n", + "| total timesteps | 20128 |\n", + "| train/ | |\n", + "| actor_loss | 975 |\n", + "| critic_loss | 480 |\n", + "| ent_coef | 0.121 |\n", + "| ent_coef_loss | -91.8 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 20027 |\n", + "---------------------------------\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 12 |\n", + "| fps | 26 |\n", + "| time_elapsed | 1132 |\n", + "| total timesteps | 30192 |\n", + "| train/ | |\n", + "| actor_loss | 574 |\n", + "| critic_loss | 4.49e+03 |\n", + "| ent_coef | 0.0453 |\n", + "| ent_coef_loss | -103 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 30091 |\n", + "---------------------------------\n", + "day: 2515, episode: 230\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:4828464.65\n", + "total_reward:3828464.65\n", + "total_cost: 2997.76\n", + "total_trades: 39564\n", + "Sharpe: 0.994\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 16 |\n", + "| fps | 26 |\n", + "| time_elapsed | 1517 |\n", + "| total timesteps | 40256 |\n", + "| train/ | |\n", + "| actor_loss | 348 |\n", + "| critic_loss | 23.1 |\n", + "| ent_coef | 0.0173 |\n", + "| ent_coef_loss | -87.4 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 40155 |\n", + "---------------------------------\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 20 |\n", + "| fps | 26 |\n", + "| time_elapsed | 1903 |\n", + "| total timesteps | 50320 |\n", + "| train/ | |\n", + "| actor_loss | 205 |\n", + "| critic_loss | 10.6 |\n", + "| ent_coef | 0.00687 |\n", + "| ent_coef_loss | -45.2 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 50219 |\n", + "---------------------------------\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 24 |\n", + "| fps | 26 |\n", + "| time_elapsed | 2291 |\n", + "| total timesteps | 60384 |\n", + "| train/ | |\n", + "| actor_loss | 127 |\n", + "| critic_loss | 9.65 |\n", + "| ent_coef | 0.00328 |\n", + "| ent_coef_loss | -0.401 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 60283 |\n", + "---------------------------------\n", + "day: 2515, episode: 240\n", + "begin_total_asset:1000000.00\n", + "end_total_asset:5207375.51\n", + "total_reward:4207375.51\n", + "total_cost: 1768.49\n", + "total_trades: 38369\n", + "Sharpe: 1.077\n", + "=================================\n", + "---------------------------------\n", + "| time/ | |\n", + "| episodes | 28 |\n", + "| fps | 26 |\n", + "| time_elapsed | 2687 |\n", + "| total timesteps | 70448 |\n", + "| train/ | |\n", + "| actor_loss | 86.5 |\n", + "| critic_loss | 9.16 |\n", + "| ent_coef | 0.00253 |\n", + "| ent_coef_loss | 1.39 |\n", + "| learning_rate | 0.0001 |\n", + "| n_updates | 70347 |\n", + "---------------------------------\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f2wZgkQXh1jE" + }, + "source": [ + "## Trading\n", + "Assume that we have $1,000,000 initial capital at 2019-01-01. We use the DDPG model to trade Dow jones 30 stocks." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bEv5KGC8h1jE" + }, + "source": [ + "### Set turbulence threshold\n", + "Set the turbulence threshold to be greater than the maximum of insample turbulence data, if current turbulence index is greater than the threshold, then we assume that the current market is volatile" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "efwBi84ch1jE" + }, + "source": [ + "data_turbulence = processed[(processed.date<'2019-01-01') & (processed.date>='2009-01-01')]\n", + "insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "VHZMBpSqh1jG", + "outputId": "f750f515-9f4f-4adb-846e-ea0bdf15ea6b" + }, + "source": [ + "insample_turbulence.turbulence.describe()" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wwoz_7VSh1jO", - "outputId": "37894e93-d22e-4e3f-f23a-d3ca08bf8342" - }, - "source": [ - "turbulence_threshold" - ], - "execution_count": 216, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "332.06274290226577" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 216 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "count 2516.000000\n", + "mean 33.277069\n", + "std 33.988999\n", + "min 0.000000\n", + "25% 15.233886\n", + "50% 25.180900\n", + "75% 39.290836\n", + "max 332.062743\n", + "Name: turbulence, dtype: float64" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 199 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yuwDPkV9h1jL" + }, + "source": [ + "turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,1)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "wwoz_7VSh1jO", + "outputId": "37894e93-d22e-4e3f-f23a-d3ca08bf8342" + }, + "source": [ + "turbulence_threshold" + ], + "execution_count": 216, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "U5mmgQF_h1jQ" - }, - "source": [ - "### Trade\n", - "\n", - "DRL model needs to update periodically in order to take full advantage of the data, ideally we need to retrain our model yearly, quarterly, or monthly. We also need to tune the parameters along the way, in this notebook I only use the in-sample data from 2009-01 to 2018-12 to tune the parameters once, so there is some alpha decay here as the length of trade date extends. \n", - "\n", - "Numerous hyperparameters – e.g. the learning rate, the total number of samples to train on – influence the learning process and are usually determined by testing some variations." + "output_type": "execute_result", + "data": { + "text/plain": [ + "332.06274290226577" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 216 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U5mmgQF_h1jQ" + }, + "source": [ + "### Trade\n", + "\n", + "DRL model needs to update periodically in order to take full advantage of the data, ideally we need to retrain our model yearly, quarterly, or monthly. We also need to tune the parameters along the way, in this notebook I only use the in-sample data from 2009-01 to 2018-12 to tune the parameters once, so there is some alpha decay here as the length of trade date extends. \n", + "\n", + "Numerous hyperparameters – e.g. the learning rate, the total number of samples to train on – influence the learning process and are usually determined by testing some variations." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eLOnL5eYh1jR" + }, + "source": [ + "trade = data_split(processed, '2019-01-01','2021-01-01')\n", + "e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 380, **env_kwargs)\n", + "env_trade, obs_trade = e_trade_gym.get_sb_env()\n", + "\n", + "df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,\n", + " test_data = trade,\n", + " test_env = env_trade,\n", + " test_obs = obs_trade)" + ], + "execution_count": 217, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ERxw3KqLkcP4", + "outputId": "cbb465c9-38dc-4d88-e79a-6ae29025164b" + }, + "source": [ + "df_account_value.shape" + ], + "execution_count": 218, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "eLOnL5eYh1jR" - }, - "source": [ - "trade = data_split(processed, '2019-01-01','2021-01-01')\n", - "e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 380, **env_kwargs)\n", - "env_trade, obs_trade = e_trade_gym.get_sb_env()\n", - "\n", - "df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,\n", - " test_data = trade,\n", - " test_env = env_trade,\n", - " test_obs = obs_trade)" - ], - "execution_count": 217, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ERxw3KqLkcP4", - "outputId": "cbb465c9-38dc-4d88-e79a-6ae29025164b" - }, - "source": [ - "df_account_value.shape" - ], - "execution_count": 218, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(505, 2)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 218 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "(505, 2)" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 218 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 194 }, + "id": "2yRkNguY5yvp", + "outputId": "53ec139f-88e7-4291-cf11-8e6766184265" + }, + "source": [ + "df_account_value.head()" + ], + "execution_count": 219, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 194 - }, - "id": "2yRkNguY5yvp", - "outputId": "53ec139f-88e7-4291-cf11-8e6766184265" - }, - "source": [ - "df_account_value.head()" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateaccount_value
02019-01-021.000000e+06
12019-01-039.956678e+05
22019-01-041.004994e+06
32019-01-071.006531e+06
42019-01-081.015034e+06
\n", + "
" ], - "execution_count": 219, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateaccount_value
02019-01-021.000000e+06
12019-01-039.956678e+05
22019-01-041.004994e+06
32019-01-071.006531e+06
42019-01-081.015034e+06
\n", - "
" - ], - "text/plain": [ - " date account_value\n", - "0 2019-01-02 1.000000e+06\n", - "1 2019-01-03 9.956678e+05\n", - "2 2019-01-04 1.004994e+06\n", - "3 2019-01-07 1.006531e+06\n", - "4 2019-01-08 1.015034e+06" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 219 - } + "text/plain": [ + " date account_value\n", + "0 2019-01-02 1.000000e+06\n", + "1 2019-01-03 9.956678e+05\n", + "2 2019-01-04 1.004994e+06\n", + "3 2019-01-07 1.006531e+06\n", + "4 2019-01-08 1.015034e+06" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 219 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 }, + "id": "nFlK5hNbWVFk", + "outputId": "06fe8d38-8724-4cea-f6ce-7a2af821ebab" + }, + "source": [ + "df_actions.head()" + ], + "execution_count": 220, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 328 - }, - "id": "nFlK5hNbWVFk", - "outputId": "06fe8d38-8724-4cea-f6ce-7a2af821ebab" - }, - "source": [ - "df_actions.head()" + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AAPLAXPBACATCSCOCVXDDDISGSHDIBMINTCJNJJPMKOMCDMMMMRKMSFTNKEPFEPGRTXTRVUNHVVZWBAWMTXOM
date
2019-01-0295.222878-84.55982297.74998597.763374-87.515053-99.45852796.83572490.078880-93.82910972.82640898.86610438.124157-93.01590092.97244353.967762-96.257362-92.545105-96.774742-91.88494931.69014444.68938189.037033-88.88290493.31162397.080673-88.93804230.129683-57.037388-94.728508-89.945351
2019-01-0384.561661-57.81327492.66774793.314674-95.521545-98.47362596.68027575.064453-78.63674998.58307697.697639-86.169891-64.66523079.48266690.556229-88.396980-97.0128948.799195-84.32333479.341576-44.26525995.109200-99.22021548.52602456.340240-95.00663875.963913-88.629875-95.672897-90.759430
2019-01-0484.307251-81.54399198.30934998.947021-95.404869-97.58718196.81214170.461342-74.65934891.95225598.680473-84.412781-92.70059275.78729279.312424-93.347343-93.87693825.555515-88.04005498.59678687.32090095.363853-89.88440793.89642332.470451-99.46339488.486122-85.763313-98.706146-80.587807
2019-01-0795.804977-98.47568599.25007698.576164-95.005562-93.48647383.13198184.942986-79.09162993.92118899.603210-77.012062-72.24776584.35185294.593575-96.556320-99.38317137.927414-93.10502662.26899780.98009595.859444-89.58195577.66011073.498856-72.81546889.848091-79.640350-95.003166-91.969521
2019-01-0850.825321-86.74216595.23665695.626114-91.030098-79.49691099.35217366.04116114.67651297.16078999.611877-13.648521-94.20978576.99482764.870956-98.375610-95.54666157.064533-87.11482224.46757586.50474569.436409-95.33475517.40769235.462414-97.93735582.119667-79.810303-99.528976-48.367775
\n", + "
" ], - "execution_count": 220, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AAPLAXPBACATCSCOCVXDDDISGSHDIBMINTCJNJJPMKOMCDMMMMRKMSFTNKEPFEPGRTXTRVUNHVVZWBAWMTXOM
date
2019-01-0295.222878-84.55982297.74998597.763374-87.515053-99.45852796.83572490.078880-93.82910972.82640898.86610438.124157-93.01590092.97244353.967762-96.257362-92.545105-96.774742-91.88494931.69014444.68938189.037033-88.88290493.31162397.080673-88.93804230.129683-57.037388-94.728508-89.945351
2019-01-0384.561661-57.81327492.66774793.314674-95.521545-98.47362596.68027575.064453-78.63674998.58307697.697639-86.169891-64.66523079.48266690.556229-88.396980-97.0128948.799195-84.32333479.341576-44.26525995.109200-99.22021548.52602456.340240-95.00663875.963913-88.629875-95.672897-90.759430
2019-01-0484.307251-81.54399198.30934998.947021-95.404869-97.58718196.81214170.461342-74.65934891.95225598.680473-84.412781-92.70059275.78729279.312424-93.347343-93.87693825.555515-88.04005498.59678687.32090095.363853-89.88440793.89642332.470451-99.46339488.486122-85.763313-98.706146-80.587807
2019-01-0795.804977-98.47568599.25007698.576164-95.005562-93.48647383.13198184.942986-79.09162993.92118899.603210-77.012062-72.24776584.35185294.593575-96.556320-99.38317137.927414-93.10502662.26899780.98009595.859444-89.58195577.66011073.498856-72.81546889.848091-79.640350-95.003166-91.969521
2019-01-0850.825321-86.74216595.23665695.626114-91.030098-79.49691099.35217366.04116114.67651297.16078999.611877-13.648521-94.20978576.99482764.870956-98.375610-95.54666157.064533-87.11482224.46757586.50474569.436409-95.33475517.40769235.462414-97.93735582.119667-79.810303-99.528976-48.367775
\n", - "
" - ], - "text/plain": [ - " AAPL AXP BA ... WBA WMT XOM\n", - "date ... \n", - "2019-01-02 95.222878 -84.559822 97.749985 ... -57.037388 -94.728508 -89.945351\n", - "2019-01-03 84.561661 -57.813274 92.667747 ... -88.629875 -95.672897 -90.759430\n", - "2019-01-04 84.307251 -81.543991 98.309349 ... -85.763313 -98.706146 -80.587807\n", - "2019-01-07 95.804977 -98.475685 99.250076 ... -79.640350 -95.003166 -91.969521\n", - "2019-01-08 50.825321 -86.742165 95.236656 ... -79.810303 -99.528976 -48.367775\n", - "\n", - "[5 rows x 30 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 220 - } + "text/plain": [ + " AAPL AXP BA ... WBA WMT XOM\n", + "date ... \n", + "2019-01-02 95.222878 -84.559822 97.749985 ... -57.037388 -94.728508 -89.945351\n", + "2019-01-03 84.561661 -57.813274 92.667747 ... -88.629875 -95.672897 -90.759430\n", + "2019-01-04 84.307251 -81.543991 98.309349 ... -85.763313 -98.706146 -80.587807\n", + "2019-01-07 95.804977 -98.475685 99.250076 ... -79.640350 -95.003166 -91.969521\n", + "2019-01-08 50.825321 -86.742165 95.236656 ... -79.810303 -99.528976 -48.367775\n", + "\n", + "[5 rows x 30 columns]" ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 220 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W6vvNSC6h1jZ" + }, + "source": [ + "\n", + "# Part 7: Backtest Our Strategy\n", + "Backtesting plays a key role in evaluating the performance of a trading strategy. Automated backtesting tool is preferred because it reduces the human error. We usually use the Quantopian pyfolio package to backtest our trading strategies. It is easy to use and consists of various individual plots that provide a comprehensive image of the performance of a trading strategy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lr2zX7ZxNyFQ" + }, + "source": [ + "\n", + "## 7.1 BackTestStats\n", + "pass in df_account_value, this information is stored in env class\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Nzkr9yv-AdV_", + "outputId": "1053083a-d74c-48b0-a623-de33282e2fff" + }, + "source": [ + "print(\"==============Get Backtest Results===========\")\n", + "now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')\n", + "\n", + "perf_stats_all = BackTestStats(account_value=df_account_value)\n", + "perf_stats_all = pd.DataFrame(perf_stats_all)\n", + "perf_stats_all.to_csv(\"./\"+config.RESULTS_DIR+\"/perf_stats_all_\"+now+'.csv')" + ], + "execution_count": 221, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "W6vvNSC6h1jZ" - }, - "source": [ - "\n", - "# Part 7: Backtest Our Strategy\n", - "Backtesting plays a key role in evaluating the performance of a trading strategy. Automated backtesting tool is preferred because it reduces the human error. We usually use the Quantopian pyfolio package to backtest our trading strategies. It is easy to use and consists of various individual plots that provide a comprehensive image of the performance of a trading strategy." - ] + "output_type": "stream", + "text": [ + "==============Get Backtest Results===========\n", + "annual return: 23.588340141653006\n", + "sharpe ratio: 1.0074093133078277\n", + "Annual return 0.208323\n", + "Cumulative returns 0.461140\n", + "Annual volatility 0.210317\n", + "Sharpe ratio 1.007409\n", + "Calmar ratio 0.869951\n", + "Stability 0.419595\n", + "Max drawdown -0.239465\n", + "Omega ratio 1.212383\n", + "Sortino ratio 1.426763\n", + "Skew NaN\n", + "Kurtosis NaN\n", + "Tail ratio 1.034439\n", + "Daily value at risk -0.025657\n", + "Alpha 0.000000\n", + "Beta 1.000000\n", + "dtype: float64\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9U6Suru3h1jc" + }, + "source": [ + "\n", + "## 7.2 BackTestPlot" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "lKRGftSS7pNM", + "outputId": "4f77cef2-3934-444a-cacc-4ed8f94514ae" + }, + "source": [ + "print(\"==============Compare to DJIA===========\")\n", + "%matplotlib inline\n", + "# S&P 500: ^GSPC\n", + "# Dow Jones Index: ^DJI\n", + "# NASDAQ 100: ^NDX\n", + "BackTestPlot(df_account_value, \n", + " baseline_ticker = '^DJI', \n", + " baseline_start = '2019-01-01',\n", + " baseline_end = '2021-01-01')" + ], + "execution_count": 222, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Lr2zX7ZxNyFQ" - }, - "source": [ - "\n", - "## 7.1 BackTestStats\n", - "pass in df_account_value, this information is stored in env class\n" - ] + "output_type": "stream", + "text": [ + "==============Compare to DJIA===========\n", + "annual return: 23.588340141653006\n", + "sharpe ratio: 1.0074093133078277\n", + "[*********************100%***********************] 1 of 1 completed\n", + "Shape of DataFrame: (505, 7)\n" + ], + "name": "stdout" }, { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Nzkr9yv-AdV_", - "outputId": "1053083a-d74c-48b0-a623-de33282e2fff" - }, - "source": [ - "print(\"==============Get Backtest Results===========\")\n", - "now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')\n", - "\n", - "perf_stats_all = BackTestStats(account_value=df_account_value)\n", - "perf_stats_all = pd.DataFrame(perf_stats_all)\n", - "perf_stats_all.to_csv(\"./\"+config.RESULTS_DIR+\"/perf_stats_all_\"+now+'.csv')" + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Start date2019-01-03
End date2020-12-31
Total months24
Backtest
Annual return20.878%
Cumulative returns46.114%
Annual volatility21.032%
Sharpe ratio1.01
Calmar ratio0.87
Stability0.42
Max drawdown-23.946%
Omega ratio1.21
Sortino ratio1.43
Skew-0.62
Kurtosis7.86
Tail ratio1.03
Daily value at risk-2.566%
Alpha0.12
Beta0.58
" ], - "execution_count": 221, - "outputs": [ - { - "output_type": "stream", - "text": [ - "==============Get Backtest Results===========\n", - "annual return: 23.588340141653006\n", - "sharpe ratio: 1.0074093133078277\n", - "Annual return 0.208323\n", - "Cumulative returns 0.461140\n", - "Annual volatility 0.210317\n", - "Sharpe ratio 1.007409\n", - "Calmar ratio 0.869951\n", - "Stability 0.419595\n", - "Max drawdown -0.239465\n", - "Omega ratio 1.212383\n", - "Sortino ratio 1.426763\n", - "Skew NaN\n", - "Kurtosis NaN\n", - "Tail ratio 1.034439\n", - "Daily value at risk -0.025657\n", - "Alpha 0.000000\n", - "Beta 1.000000\n", - "dtype: float64\n" - ], - "name": "stdout" - } + "text/plain": [ + "" ] + }, + "metadata": { + "tags": [] + } }, { - "cell_type": "markdown", - "metadata": { - "id": "9U6Suru3h1jc" - }, - "source": [ - "\n", - "## 7.2 BackTestPlot" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "lKRGftSS7pNM", - "outputId": "4f77cef2-3934-444a-cacc-4ed8f94514ae" - }, - "source": [ - "print(\"==============Compare to DJIA===========\")\n", - "%matplotlib inline\n", - "# S&P 500: ^GSPC\n", - "# Dow Jones Index: ^DJI\n", - "# NASDAQ 100: ^NDX\n", - "BackTestPlot(df_account_value, \n", - " baseline_ticker = '^DJI', \n", - " baseline_start = '2019-01-01',\n", - " baseline_end = '2021-01-01')" + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Worst drawdown periodsNet drawdown in %Peak dateValley dateRecovery dateDuration
023.952020-02-122020-04-032020-06-0583
111.302020-06-082020-06-262020-08-1046
28.422019-07-152019-08-142019-09-1143
37.792020-09-022020-09-242020-10-1229
46.932020-10-122020-10-282020-11-0921
" ], - "execution_count": 222, - "outputs": [ - { - "output_type": "stream", - "text": [ - "==============Compare to DJIA===========\n", - "annual return: 23.588340141653006\n", - "sharpe ratio: 1.0074093133078277\n", - "[*********************100%***********************] 1 of 1 completed\n", - "Shape of DataFrame: (505, 7)\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Start date2019-01-03
End date2020-12-31
Total months24
Backtest
Annual return20.878%
Cumulative returns46.114%
Annual volatility21.032%
Sharpe ratio1.01
Calmar ratio0.87
Stability0.42
Max drawdown-23.946%
Omega ratio1.21
Sortino ratio1.43
Skew-0.62
Kurtosis7.86
Tail ratio1.03
Daily value at risk-2.566%
Alpha0.12
Beta0.58
" - ], - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - } - }, - { - "output_type": "display_data", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Worst drawdown periodsNet drawdown in %Peak dateValley dateRecovery dateDuration
023.952020-02-122020-04-032020-06-0583
111.302020-06-082020-06-262020-08-1046
28.422019-07-152019-08-142019-09-1143
37.792020-09-022020-09-242020-10-1229
46.932020-10-122020-10-282020-11-0921
" - ], - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - } - }, - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/pyfolio/tears.py:907: UserWarning: Passed returns do not overlap with anyinteresting times.\n", - " 'interesting times.', UserWarning)\n" - ], - "name": "stderr" - }, - { - "output_type": "display_data", - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [], - "needs_background": "light" - } - } + "text/plain": [ + "" ] + }, + "metadata": { + "tags": [] + } }, { - "cell_type": "markdown", - "metadata": { - "id": "SlLT9_5WN478" - }, - "source": [ - "\n", - "## 7.3 Baseline Stats" - ] + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/pyfolio/tears.py:907: UserWarning: Passed returns do not overlap with anyinteresting times.\n", + " 'interesting times.', UserWarning)\n" + ], + "name": "stderr" }, { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YktexHcqh1jc", - "outputId": "38566531-a3a0-4705-db30-d437e8f8fc73" - }, - "source": [ - "print(\"==============Get Baseline Stats===========\")\n", - "baesline_perf_stats=BaselineStats('^DJI',\n", - " baseline_start = '2019-01-01',\n", - " baseline_end = '2021-01-01')" - ], - "execution_count": 223, - "outputs": [ - { - "output_type": "stream", - "text": [ - "==============Get Baseline Stats===========\n", - "[*********************100%***********************] 1 of 1 completed\n", - "Shape of DataFrame: (505, 7)\n", - "Annual return 0.144674\n", - "Cumulative returns 0.310981\n", - "Annual volatility 0.274619\n", - "Sharpe ratio 0.631418\n", - "Calmar ratio 0.390102\n", - "Stability 0.116677\n", - "Max drawdown -0.370862\n", - "Omega ratio 1.149365\n", - "Sortino ratio 0.870084\n", - "Skew NaN\n", - "Kurtosis NaN\n", - "Tail ratio 0.860710\n", - "Daily value at risk -0.033911\n", - "Alpha 0.000000\n", - "Beta 1.000000\n", - "dtype: float64\n" - ], - "name": "stdout" - } + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SlLT9_5WN478" + }, + "source": [ + "\n", + "## 7.3 Baseline Stats" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "YktexHcqh1jc", + "outputId": "38566531-a3a0-4705-db30-d437e8f8fc73" + }, + "source": [ + "print(\"==============Get Baseline Stats===========\")\n", + "baesline_perf_stats=BaselineStats('^DJI',\n", + " baseline_start = '2019-01-01',\n", + " baseline_end = '2021-01-01')" + ], + "execution_count": 223, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "A6W2J57ch1j9" - }, - "source": [ - "" - ], - "execution_count": null, - "outputs": [] + "output_type": "stream", + "text": [ + "==============Get Baseline Stats===========\n", + "[*********************100%***********************] 1 of 1 completed\n", + "Shape of DataFrame: (505, 7)\n", + "Annual return 0.144674\n", + "Cumulative returns 0.310981\n", + "Annual volatility 0.274619\n", + "Sharpe ratio 0.631418\n", + "Calmar ratio 0.390102\n", + "Stability 0.116677\n", + "Max drawdown -0.370862\n", + "Omega ratio 1.149365\n", + "Sortino ratio 0.870084\n", + "Skew NaN\n", + "Kurtosis NaN\n", + "Tail ratio 0.860710\n", + "Daily value at risk -0.033911\n", + "Alpha 0.000000\n", + "Beta 1.000000\n", + "dtype: float64\n" + ], + "name": "stdout" } - ] + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "A6W2J57ch1j9" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] } \ No newline at end of file