Skip to content

Commit

Permalink
changes
Browse files Browse the repository at this point in the history
  • Loading branch information
denisyarats committed Sep 23, 2019
1 parent 681e13b commit f2e39f7
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 730 deletions.
20 changes: 15 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# Soft Actor-Critic implementaiton in PyTorch
# SAC+AE implementaiton in PyTorch

## Requirements

## Running locally
To train SAC locally one can use provided `run_local.sh` script (change it to modify particular arguments):
```
./run_local.sh
## Instructions
To train an SAC+AE agent on the `cheetah run` task from image-based observations run:
```
python train.py \
--domain_name cheetah \
--task_name run \
--encoder_type pixel \
--decoder_type pixel \
--action_repeat 4 \
--save_video \
--save_tb \
--work_dir ./runs/cheetah_run/sac_ae \
--seed 1
```
This will produce a folder (`./save`) by default, where all the output is going to be stored including train/eval logs, tensorboard blobs, evaluation videos, and model snapshots. It is possible to attach tensorboard to a particular run using the following command:
```
Expand Down
209 changes: 0 additions & 209 deletions ddpg.py

This file was deleted.

40 changes: 4 additions & 36 deletions decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,45 +62,13 @@ def log(self, L, step, log_freq):
L.log_param('train_decoder/fc', self.fc, step)


class StateDecoder(nn.Module):
def __init__(self, obs_shape, feature_dim):
super().__init__()

assert len(obs_shape) == 1

self.trunk = nn.Sequential(
nn.Linear(feature_dim, 1024), nn.ReLU(), nn.Linear(1024, 1024),
nn.ReLU(), nn.Linear(1024, obs_shape[0]), nn.ReLU()
)

self.outputs = dict()

def forward(self, obs, detach=False):
h = self.trunk(obs)
if detach:
h = h.detach()
self.outputs['h'] = h
return h

def log(self, L, step, log_freq):
if step % log_freq != 0:
return

L.log_param('train_encoder/fc1', self.trunk[0], step)
L.log_param('train_encoder/fc2', self.trunk[2], step)
for k, v in self.outputs.items():
L.log_histogram('train_encoder/%s_hist' % k, v, step)


_AVAILABLE_DECODERS = {'pixel': PixelDecoder, 'state': StateDecoder}
_AVAILABLE_DECODERS = {'pixel': PixelDecoder}


def make_decoder(
decoder_type, obs_shape, feature_dim, num_layers, num_filters
):
assert decoder_type in _AVAILABLE_DECODERS
if decoder_type == 'pixel':
return _AVAILABLE_DECODERS[decoder_type](
obs_shape, feature_dim, num_layers, num_filters
)
return _AVAILABLE_DECODERS[decoder_type](obs_shape, feature_dim)
return _AVAILABLE_DECODERS[decoder_type](
obs_shape, feature_dim, num_layers, num_filters
)
Loading

0 comments on commit f2e39f7

Please sign in to comment.