changes

denisyarats · Sep 23, 2019 · f2e39f7 · f2e39f7
1 parent 681e13b
commit f2e39f7
Show file tree

Hide file tree

Showing 10 changed files with 56 additions and 730 deletions.
diff --git a/README.md b/README.md
@@ -1,10 +1,20 @@
-# Soft Actor-Critic implementaiton in PyTorch
+# SAC+AE implementaiton in PyTorch
 
+## Requirements
 
-## Running locally
-To train SAC locally one can use provided `run_local.sh` script (change it to modify particular arguments):
-```
-./run_local.sh
+## Instructions
+To train an SAC+AE agent on the `cheetah run` task from image-based observations  run:
+```
+python train.py \
+    --domain_name cheetah \
+    --task_name run \
+    --encoder_type pixel \
+    --decoder_type pixel \
+    --action_repeat 4 \
+    --save_video \
+    --save_tb \
+    --work_dir ./runs/cheetah_run/sac_ae \
+    --seed 1
 ```
 This will produce a folder (`./save`) by default, where all the output is going to be stored including train/eval logs, tensorboard blobs, evaluation videos, and model snapshots. It is possible to attach tensorboard to a particular run using the following command:
 ```

diff --git a/ddpg.py b/ddpg.py
diff --git a/decoder.py b/decoder.py
@@ -62,45 +62,13 @@ def log(self, L, step, log_freq):
         L.log_param('train_decoder/fc', self.fc, step)
 
 
-class StateDecoder(nn.Module):
-    def __init__(self, obs_shape, feature_dim):
-        super().__init__()
-
-        assert len(obs_shape) == 1
-
-        self.trunk = nn.Sequential(
-            nn.Linear(feature_dim, 1024), nn.ReLU(), nn.Linear(1024, 1024),
-            nn.ReLU(), nn.Linear(1024, obs_shape[0]), nn.ReLU()
-        )
-
-        self.outputs = dict()
-
-    def forward(self, obs, detach=False):
-        h = self.trunk(obs)
-        if detach:
-            h = h.detach()
-        self.outputs['h'] = h
-        return h
-
-    def log(self, L, step, log_freq):
-        if step % log_freq != 0:
-            return
-
-        L.log_param('train_encoder/fc1', self.trunk[0], step)
-        L.log_param('train_encoder/fc2', self.trunk[2], step)
-        for k, v in self.outputs.items():
-            L.log_histogram('train_encoder/%s_hist' % k, v, step)
-
-
-_AVAILABLE_DECODERS = {'pixel': PixelDecoder, 'state': StateDecoder}
+_AVAILABLE_DECODERS = {'pixel': PixelDecoder}
 
 
 def make_decoder(
     decoder_type, obs_shape, feature_dim, num_layers, num_filters
 ):
     assert decoder_type in _AVAILABLE_DECODERS
-    if decoder_type == 'pixel':
-        return _AVAILABLE_DECODERS[decoder_type](
-            obs_shape, feature_dim, num_layers, num_filters
-        )
-    return _AVAILABLE_DECODERS[decoder_type](obs_shape, feature_dim)
+    return _AVAILABLE_DECODERS[decoder_type](
+        obs_shape, feature_dim, num_layers, num_filters
+    )