diff --git a/.gitignore b/.gitignore index eef7ac646..b0743de34 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ reward !reward/reward_sample.py logs !logs/*-sample.log -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +*.pyc \ No newline at end of file diff --git a/log-analysis/Training_analysis.ipynb b/log-analysis/Training_analysis.ipynb index f255ca768..788bc5b72 100644 --- a/log-analysis/Training_analysis.ipynb +++ b/log-analysis/Training_analysis.ipynb @@ -66,6 +66,7 @@ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime\n", + "import ipywidgets as widgets\n", "%matplotlib inline\n", "\n", "#Shapely Library\n", @@ -75,6 +76,7 @@ "import track_utils as tu\n", "import log_analysis as la\n", "import cw_utils as cw\n", + "import job_utils as ju\n", "\n", "# Make sure your boto version is >= '1.9.133'\n", "cw.boto3.__version__" @@ -109,7 +111,104 @@ "import importlib\n", "importlib.reload(la)\n", "importlib.reload(cw)\n", - "importlib.reload(tu)" + "importlib.reload(tu)\n", + "importlib.reload(ju)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the logs\n", + "\n", + "Depending on which way you are training your model, you will need a different way to load the data.\n", + "\n", + "**AWS DeepRacer Console**\n", + "The logs are being stored in CloudWatch, in group `/aws/robomaker/SimulationJobs`. You will be using boto3 to download them based on the training ID (stream name prefix). If you wish to bulk export the logs from Amazon Cloudwatch to Amazon S3 :: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/S3ExportTasks.html\n", + "\n", + "**DeepRacer for Dummies/ARCC local training**\n", + "Those two setups come with a container that runs Jupyter Notebook (as you noticed if you're using one of them and reading this text). Logs are stored in `/logs/` and you just need to point at the latest file to see the current training. The logs are split for long running training if they exceed 500 MB. The log loading method has been extended to support that.\n", + "\n", + "**Chris Rhodes' repo**\n", + "Chris repo doesn't come with logs storage out of the box. I would normally run `docker logs dr > /path/to/logfile` and then load the file.\n", + "\n", + "Below I have prepared a section for each case. In each case you can analyse the logs as the training is being run, just in case of the Console you may need to force downloading of the logs as the `cw.download_log` method has a protection against needless downloads.\n", + "\n", + "## AutoConfig Parameters\n", + "\n", + "You can now select autoConfig for parameters to select an available simulation from AWS and load the logs and parameters (Including hyperparameters)\n", + "Only AWS DeepRacer Console is supported now, but others will follow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "autoParams = None\n", + "listJobsConsoleWidget = widgets.Checkbox(\n", + " value=True,\n", + " description='AutoConfig Parameters',\n", + " disabled=False,\n", + ")\n", + "display(listJobsConsoleWidget)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if listJobsConsoleWidget.value == True:\n", + " ju.display_job_selection_widget()\n", + " autoParams = ju.get_auto_params()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log file exists, use force=True to download again\n", + "SIM_TRACE_LOG:799,111,1.7594,4.4353,3.0875,-0.26,2.50,2,1.0000,False,True,71.5802,49,17.67,1555554451.1110387\n", + "SIM_TRACE_LOG:799,112,1.6637,4.4457,3.0636,0.26,2.50,6,1.0000,False,True,72.1366,49,17.67,1555554451.2382815\n", + "SIM_TRACE_LOG:799,113,1.5520,4.4493,3.0941,0.52,5.00,9,1.0000,False,True,72.7582,49,17.67,1555554451.3647106\n" + ] + } + ], + "source": [ + "# AWS DeepRacer Console\n", + "if not autoParams:\n", + " stream_name = 'sim-sample' ## CHANGE This to your simulation application ID\n", + " fname = 'logs/deepracer-%s.log' %stream_name # The log will be downloaded into the specified path\n", + " cw.download_log(fname, stream_prefix=stream_name) # add force=True if you downloaded the file before but want to repeat\n", + "\n", + "else:\n", + " jobID = autoParams['selectedJobID']\n", + " print(\"Using {} from autoParams\".format(jobID))\n", + " fname = \"logs/deepracer-{}.log\".format(jobID) # The log will be downloaded into the specified path\n", + " print(\"Downloading log file...\")\n", + " cw.download_log(fname, stream_prefix=jobID) # add force=True if you downloaded the file before but want to repeat\n", + " print(\"Downloaded log file to {}\".format(fname))\n", + "\n", + "# DeepRacer for Dummies / ARCC repository - comment the above and uncomment\n", + "# the lines below. They rely on a magic command to list log files\n", + "# ordered by time and pick up the most recent one (index zero).\n", + "# If you want an earlier file, change 0 to larger value.\n", + "# !ls -t /workspace/venv/logs/*.log\n", + "# fname = !ls -t /workspace/venv/logs/*.log\n", + "# fname = fname[0]\n", + "\n", + "\n", + "# Chris Rhodes' repository\n", + "# Use a preferred way of saving the logs to a file , then set an fname value to load it\n", + "# fname = /path/to/your/log/file" ] }, { @@ -191,70 +290,17 @@ } ], "source": [ - "l_center_line, l_inner_border, l_outer_border, road_poly = tu.load_track(\"reinvent_base\")\n", + "if not autoParams:\n", + " track = \"reinvent_base\"\n", + " print(\"Track: {}\".format(track))\n", + "else:\n", + " track = autoParams['track']\n", + " print(\"Track: {} (Loaded from autoParams)\".format(track))\n", + "l_center_line, l_inner_border, l_outer_border, road_poly = tu.load_track(track)\n", "\n", "road_poly" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get the logs\n", - "\n", - "Depending on which way you are training your model, you will need a different way to load the data.\n", - "\n", - "**AWS DeepRacer Console**\n", - "The logs are being stored in CloudWatch, in group `/aws/robomaker/SimulationJobs`. You will be using boto3 to download them based on the training ID (stream name prefix). If you wish to bulk export the logs from Amazon Cloudwatch to Amazon S3 :: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/S3ExportTasks.html\n", - "\n", - "**DeepRacer for Dummies/ARCC local training**\n", - "Those two setups come with a container that runs Jupyter Notebook (as you noticed if you're using one of them and reading this text). Logs are stored in `/logs/` and you just need to point at the latest file to see the current training. The logs are split for long running training if they exceed 500 MB. The log loading method has been extended to support that.\n", - "\n", - "**Chris Rhodes' repo**\n", - "Chris repo doesn't come with logs storage out of the box. I would normally run `docker logs dr > /path/to/logfile` and then load the file.\n", - "\n", - "Below I have prepared a section for each case. In each case you can analyse the logs as the training is being run, just in case of the Console you may need to force downloading of the logs as the `cw.download_log` method has a protection against needless downloads.\n", - "\n", - "Select your preferred way to get the logs below and you can get rid of the rest." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Log file exists, use force=True to download again\n", - "SIM_TRACE_LOG:799,111,1.7594,4.4353,3.0875,-0.26,2.50,2,1.0000,False,True,71.5802,49,17.67,1555554451.1110387\n", - "SIM_TRACE_LOG:799,112,1.6637,4.4457,3.0636,0.26,2.50,6,1.0000,False,True,72.1366,49,17.67,1555554451.2382815\n", - "SIM_TRACE_LOG:799,113,1.5520,4.4493,3.0941,0.52,5.00,9,1.0000,False,True,72.7582,49,17.67,1555554451.3647106\n" - ] - } - ], - "source": [ - "# AWS DeepRacer Console\n", - "stream_name = 'sim-sample' ## CHANGE This to your simulation application ID\n", - "fname = 'logs/deepracer-%s.log' %stream_name # The log will be downloaded into the specified path\n", - "cw.download_log(fname, stream_prefix=stream_name) # add force=True if you downloaded the file before but want to repeat\n", - "\n", - "\n", - "# DeepRacer for Dummies / ARCC repository - comment the above and uncomment\n", - "# the lines below. They rely on a magic command to list log files\n", - "# ordered by time and pick up the most recent one (index zero).\n", - "# If you want an earlier file, change 0 to larger value.\n", - "# !ls -t /workspace/venv/logs/*.log\n", - "# fname = !ls -t /workspace/venv/logs/*.log\n", - "# fname = fname[0]\n", - "\n", - "\n", - "# Chris Rhodes' repository\n", - "# Use a preferred way of saving the logs to a file , then set an fname value to load it\n", - "# fname = /path/to/your/log/file" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -296,7 +342,10 @@ "outputs": [], "source": [ "EPISODES_PER_ITERATION = 20 # Set to value of your hyperparameter in training\n", - "\n", + "if autoParams:\n", + " EPISODES_PER_ITERATION = autoParams['hyperParameters']['num_episodes_between_training']\n", + " print(\"EPISODES_PER_ITERATION set to {} from autoParams\".format(EPISODES_PER_ITERATION))\n", + " \n", "data = la.load_data(fname)\n", "df = la.convert_to_pandas(data, episodes_per_iteration=EPISODES_PER_ITERATION)\n", "\n", @@ -2601,7 +2650,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.2" } }, "nbformat": 4, diff --git a/log-analysis/job_utils.py b/log-analysis/job_utils.py new file mode 100644 index 000000000..5de72cd4b --- /dev/null +++ b/log-analysis/job_utils.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python + +''' +Get details of RoboMaker jobs for DeepRacer + +''' +import boto3 +from operator import itemgetter +from json import loads +import ipywidgets as widgets + +autoParams = {} + +def get_robo_maker_jobs(jobsType='training'): + rmclient = boto3.client('robomaker') + # Get the list of RoboMaker simulation jobs that were used for DeepRacer + deepRacerSimAppId = rmclient.list_simulation_applications()['simulationApplicationSummaries'][0]['name'] + response = rmclient.list_simulation_jobs( + maxResults=100, + filters=[ + { + 'name': 'simulationApplicationName', + 'values': [ + deepRacerSimAppId, + ], + } + ] + ) + rmjobs = response['simulationJobSummaries'] + allJobs={} + allJobs['training'] = [] + allJobs['evaluation'] = [] + # Also get the summaries for each job and add that into the array + for job in rmjobs: + # Get & populate job summary + job['summary'] = rmclient.describe_simulation_job(job=job['arn']) + + if job['summary']['simulationApplications'][0]['launchConfig']['launchFile'] == "distributed_training.launch": + job['type'] = "training" + elif job['summary']['simulationApplications'][0]['launchConfig']['launchFile'] == "evaluation.launch": + job['type'] = "evaluation" + + job['id'] = job['arn'].partition("/")[-1] + if 'lastStartedAt' in job['summary'].keys(): + job['startTime'] = job['summary']['lastStartedAt'] + else: + job['startTime'] = None + job['maxduration'] = job['summary']['maxJobDurationInSeconds'] + job['track'] = job['summary']['simulationApplications'][0]['launchConfig']['environmentVariables']['WORLD_NAME'] + + if job['type'] == "training" and job['status'] != "Failed": + # Get location of metadata (action space) and hyperparams + job['metadataS3bucket'] = job['summary']['simulationApplications'][0]['launchConfig']['environmentVariables']['METRICS_S3_BUCKET'] + job['metadatafilekey'] = job['summary']['simulationApplications'][0]['launchConfig']['environmentVariables']['MODEL_METADATA_FILE_S3_KEY'] + + job['hyperparamsS3bucket'] = job['summary']['simulationApplications'][0]['launchConfig']['environmentVariables']['SAGEMAKER_SHARED_S3_BUCKET'] + job['hyperparamsfilekey'] = "{}/ip/hyperparameters.json".format(job['summary']['simulationApplications'][0]['launchConfig']['environmentVariables']['SAGEMAKER_SHARED_S3_PREFIX']) + + # Download and ingest metadata and hyperparams + s3 = boto3.resource('s3') + job['actionspace'] = loads(s3.Object(job['metadataS3bucket'], job['metadatafilekey']).get()['Body'].read().decode('utf-8'))['action_space'] + job['hyperparams'] = loads(s3.Object(job['hyperparamsS3bucket'], job['hyperparamsfilekey']).get()['Body'].read().decode('utf-8')) + + # Create a description for the widget. + job['desc'] = "{} - Type: {} - Track: {} - Duration: {}".format(job['id'],job['type'],job['track'],job['maxduration']/60) + allJobs[job['type']].append(job) + + return allJobs[jobsType] + +def get_job(jobsList,jobID): + return list(filter(lambda job: job['id'] == jobID, jobsList))[0] + +def display_job_selection_widget(): + + output = widgets.Output(layout={'border': '1px solid black','width':'auto'}) + + allJobs = get_robo_maker_jobs() + with output: + output.clear_output() + # Generate and display the dropdown list + dropdownlist=list(map(itemgetter('desc','id'),allJobs)) + simSelectWidget = widgets.Dropdown( + options=dropdownlist, + value=dropdownlist[0][1], + disabled=False, + layout={'width':'auto'}, + ) + + loadButtonText="Load Simulation Job Summary" + print("Select RoboMaker Simulation Job ID and click on '{}' before continuing:".format(loadButtonText)) + display(simSelectWidget) + + button = widgets.Button(description=loadButtonText,layout={'width':'auto'}) + + display(button, output) + def on_button_clicked(b): + global autoParams + selectedJob = list(filter(lambda job: job['id'] == simSelectWidget.value, allJobs))[0] + selectedJobID = selectedJob['id'] + track = selectedJob['track'] + + with output: + output.clear_output() + print("Job ID: {}\t""Track: {}\n" + "Max Run Time: {}\t\tStart Time: {}\n" + "Status: {}\n".format(selectedJobID, + track, + selectedJob['maxduration']/60, + selectedJob['startTime'], + selectedJob['status'] + )) + + if 'actionspace' in selectedJob.keys(): + actionSpace = selectedJob['actionspace'] + autoParams['actionSpace'] = actionSpace + print("Action Space:\nIndex") + print("{: >10} {:>15.3} {: >10}".format("Index","Angle","Speed")) + for action in actionSpace: + print("{: >10} {:>15f} {: >10}".format(action['index'],action['steering_angle'],action['speed'])) + print("\n") + + + if 'hyperparams' in selectedJob.keys(): + hyperParameters = selectedJob['hyperparams'] + autoParams['hyperParameters'] = hyperParameters + print("Hyperparameters:\nBatch Size:\t\t\t{}\n" + "Entropy:\t\t\t{}\n" + "Discount Factor:\t\t{}\n" + "Loss Type:\t\t\t{}\n" + "Learning Rate:\t\t\t{}\n" + "Episodes per iteration\t\t{}\n" + "No Epochs\t\t\t{}\n".format(hyperParameters['batch_size'], + hyperParameters['beta_entropy'], + hyperParameters['discount_factor'], + hyperParameters['loss_type'], + hyperParameters['lr'], + hyperParameters['num_episodes_between_training'], + hyperParameters['num_epochs'], + )) + + autoParams['selectedJob'] = selectedJob + autoParams['selectedJobID'] = selectedJobID + autoParams['track'] = track + + + button.on_click(on_button_clicked) + +def get_auto_params(): + global autoParams + return autoParams + +