From 2b483c47c19987a5786cbb674de870fc12bab636 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Mon, 11 Apr 2022 18:12:03 -0700
Subject: [PATCH 01/11] HM3D and increasing rollouts.

---
 ...ctnav_habitat_rgb_clipresnet50gru_ddppo.py |  34 ++---
 ...lipresnet50gru_ddppo_increasingrollouts.py | 106 ++++++++++++++++
 .../habitat/objectnav_habitat_base.py         | 117 ++++++++++++------
 3 files changed, 204 insertions(+), 53 deletions(-)
 create mode 100644 projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py

diff --git a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
index c974254ec..ba068e9b2 100644
--- a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
@@ -1,6 +1,7 @@
 from typing import Sequence, Union
 
 import torch.nn as nn
+from torch.distributions.utils import lazy_property
 
 from allenact.base_abstractions.preprocessor import Preprocessor
 from allenact.utils.experiment_utils import Builder, TrainingPipeline
@@ -25,19 +26,6 @@ class ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(
 
     CLIP_MODEL_TYPE = "RN50"
 
-    SENSORS = [
-        RGBSensorHabitat(
-            height=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
-            width=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
-            use_resnet_normalization=True,
-            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
-            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
-        ),
-        TargetObjectSensorHabitat(
-            len(ObjectNavHabitatBaseConfig.DEFAULT_OBJECT_CATEGORIES_TO_IND)
-        ),
-    ]
-
     def __init__(self, lr: float, **kwargs):
         super().__init__(**kwargs)
 
@@ -50,6 +38,21 @@ def __init__(self, lr: float, **kwargs):
             goal_sensor_type=TargetObjectSensorHabitat,
         )
 
+    @lazy_property
+    def SENSORS(self):
+        return [
+            RGBSensorHabitat(
+                height=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
+                width=ObjectNavHabitatBaseConfig.SCREEN_SIZE,
+                use_resnet_normalization=True,
+                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            ),
+            TargetObjectSensorHabitat(
+                len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)
+            ),
+        ]
+
     def training_pipeline(self, **kwargs) -> TrainingPipeline:
         return ObjectNavPPOMixin.training_pipeline(
             lr=self.lr,
@@ -67,4 +70,7 @@ def create_model(self, **kwargs) -> nn.Module:
         )
 
     def tag(self):
-        return f"ObjectNav-Habitat-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}"
+        return (
+            f"{super(ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig, self).tag()}"
+            f"-RGB-ClipResNet50GRU-DDPPO-lr{self.lr}"
+        )
diff --git a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
new file mode 100644
index 000000000..4792f1a55
--- /dev/null
+++ b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
@@ -0,0 +1,106 @@
+import torch
+import torch.optim as optim
+
+from allenact.algorithms.onpolicy_sync.losses import PPO
+from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
+from allenact.utils.experiment_utils import (
+    Builder,
+    TrainingPipeline,
+    PipelineStage,
+    TrainingSettings,
+)
+from projects.objectnav_baselines.experiments.habitat.clip.objectnav_habitat_rgb_clipresnet50gru_ddppo import (
+    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig,
+)
+from projects.objectnav_baselines.mixins import update_with_auxiliary_losses
+
+
+class ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig(
+    ObjectNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig
+):
+    def __init__(self, lr=1e-4, **kwargs):
+        super().__init__(lr, **kwargs)
+        self.lr = lr
+
+    def training_pipeline(self, **kwargs) -> TrainingPipeline:
+        auxiliary_uuids = []
+        multiple_beliefs = False
+        normalize_advantage = False
+        advance_scene_rollout_period = self.ADVANCE_SCENE_ROLLOUT_PERIOD
+        log_interval_small = (
+            self.num_train_processes * 32 * 10 if torch.cuda.is_available() else 1
+        )
+        log_interval_med = (
+            self.num_train_processes * 64 * 5 if torch.cuda.is_available() else 1
+        )
+        log_interval_large = (
+            self.num_train_processes * 128 * 5 if torch.cuda.is_available() else 1
+        )
+
+        batch_steps_0 = int(10e6)
+        batch_steps_1 = int(10e6)
+        batch_steps_2 = int(1e9) - batch_steps_0 - batch_steps_1
+
+        lr = self.lr
+        num_mini_batch = 1
+        update_repeats = 4
+        save_interval = 5000000
+        gamma = 0.99
+        use_gae = True
+        gae_lambda = 0.95
+        max_grad_norm = 0.5
+
+        named_losses = {
+            "ppo_loss": (PPO(**PPOConfig, normalize_advantage=normalize_advantage), 1.0)
+        }
+        named_losses = update_with_auxiliary_losses(
+            named_losses=named_losses,
+            auxiliary_uuids=auxiliary_uuids,
+            multiple_beliefs=multiple_beliefs,
+        )
+
+        return TrainingPipeline(
+            save_interval=save_interval,
+            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
+            num_mini_batch=num_mini_batch,
+            update_repeats=update_repeats,
+            max_grad_norm=max_grad_norm,
+            named_losses={key: val[0] for key, val in named_losses.items()},
+            gamma=gamma,
+            use_gae=use_gae,
+            gae_lambda=gae_lambda,
+            advance_scene_rollout_period=advance_scene_rollout_period,
+            pipeline_stages=[
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_0,
+                    training_settings=TrainingSettings(
+                        num_steps=32, metric_accumulate_interval=log_interval_small
+                    ),
+                ),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_1,
+                    training_settings=TrainingSettings(
+                        num_steps=64, metric_accumulate_interval=log_interval_med,
+                    ),
+                ),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_2,
+                    training_settings=TrainingSettings(
+                        num_steps=128, metric_accumulate_interval=log_interval_large,
+                    ),
+                ),
+            ],
+            lr_scheduler_builder=None,
+        )
+
+    def tag(self):
+        return (
+            super(
+                ObjectNavHabitatRGBClipResNet50GRUDDPPOIncreasingLengthExpConfig, self
+            )
+            .tag()
+            .replace("-DDPPO-lr", "-DDPPO-IncRollouts-lr")
+        )
diff --git a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
index 20ab4af2c..7b5a03777 100644
--- a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
+++ b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
@@ -1,3 +1,4 @@
+import glob
 import math
 import os
 from abc import ABC
@@ -124,13 +125,6 @@ class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):
 
     FAILED_END_REWARD = -1.0
 
-    TASK_DATA_DIR_TEMPLATE = os.path.join(
-        HABITAT_DATASETS_DIR, "objectnav/mp3d/v1/{}/{}.json.gz"
-    )
-    BASE_CONFIG_YAML_PATH = os.path.join(
-        HABITAT_CONFIGS_DIR, "tasks/objectnav_mp3d.yaml"
-    )
-
     ACTION_SPACE = gym.spaces.Discrete(len(ObjectNavTask.class_action_names()))
 
     DEFAULT_NUM_TRAIN_PROCESSES = (
@@ -142,32 +136,9 @@ class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):
     DEFAULT_VALID_GPU_IDS = [torch.cuda.device_count() - 1]
     DEFAULT_TEST_GPU_IDS = tuple(range(torch.cuda.device_count()))
 
-    DEFAULT_OBJECT_CATEGORIES_TO_IND = {
-        "chair": 0,
-        "table": 1,
-        "picture": 2,
-        "cabinet": 3,
-        "cushion": 4,
-        "sofa": 5,
-        "bed": 6,
-        "chest_of_drawers": 7,
-        "plant": 8,
-        "sink": 9,
-        "toilet": 10,
-        "stool": 11,
-        "towel": 12,
-        "tv_monitor": 13,
-        "shower": 14,
-        "bathtub": 15,
-        "counter": 16,
-        "fireplace": 17,
-        "gym_equipment": 18,
-        "seating": 19,
-        "clothes": 20,
-    }
-
     def __init__(
         self,
+        scene_dataset: str,  # Should be "mp3d" or "hm3d"
         debug: bool = False,
         num_train_processes: Optional[int] = None,
         num_test_processes: Optional[int] = None,
@@ -179,6 +150,8 @@ def __init__(
         **kwargs,
     ):
         super().__init__(**kwargs)
+
+        self.scene_dataset = scene_dataset
         self.debug = debug
 
         def v_or_default(v, default):
@@ -224,6 +197,56 @@ def _create_config(
             num_episode_sample=num_episode_sample,
         )
 
+    @lazy_property
+    def DEFAULT_OBJECT_CATEGORIES_TO_IND(self):
+        if self.scene_dataset == "mp3d":
+            return {
+                "chair": 0,
+                "table": 1,
+                "picture": 2,
+                "cabinet": 3,
+                "cushion": 4,
+                "sofa": 5,
+                "bed": 6,
+                "chest_of_drawers": 7,
+                "plant": 8,
+                "sink": 9,
+                "toilet": 10,
+                "stool": 11,
+                "towel": 12,
+                "tv_monitor": 13,
+                "shower": 14,
+                "bathtub": 15,
+                "counter": 16,
+                "fireplace": 17,
+                "gym_equipment": 18,
+                "seating": 19,
+                "clothes": 20,
+            }
+        elif self.scene_dataset == "hm3d":
+            return {
+                "chair": 0,
+                "bed": 1,
+                "plant": 2,
+                "toilet": 3,
+                "tv_monitor": 4,
+                "sofa": 5,
+            }
+        else:
+            raise NotImplementedError
+
+    @lazy_property
+    def TASK_DATA_DIR_TEMPLATE(self):
+        return os.path.join(
+            HABITAT_DATASETS_DIR, f"objectnav/{self.scene_dataset}/v1/{{}}/{{}}.json.gz"
+        )
+
+    @lazy_property
+    def BASE_CONFIG_YAML_PATH(self):
+        return os.path.join(
+            HABITAT_CONFIGS_DIR, f"tasks/objectnav_{self.scene_dataset}.yaml"
+        )
+
     @lazy_property
     def TRAIN_CONFIG(self):
         return self._create_config(
@@ -265,10 +288,20 @@ def TRAIN_CONFIGS_PER_PROCESS(self):
             for config in configs:
                 assert len(config.DATASET.CONTENT_SCENES) == 1
                 scene_name = config.DATASET.CONTENT_SCENES[0]
-                glb_path = os.path.join(
-                    scenes_dir, "mp3d", scene_name, f"{scene_name}.glb"
+
+                paths = glob.glob(
+                    os.path.join(
+                        scenes_dir, self.scene_dataset, "**", f"{scene_name}.*"
+                    ),
+                    recursive=True,
                 )
-                memory_use_per_config.append(os.path.getsize(glb_path))
+
+                if self.scene_dataset == "mp3d":
+                    assert len(paths) == 4
+                else:
+                    assert len(paths) == 2
+
+                memory_use_per_config.append(sum(os.path.getsize(p) for p in paths))
 
             max_configs_per_device = math.ceil(len(configs) / len(self.train_gpu_ids))
             mem_per_device = np.array([0.0 for _ in range(len(self.train_gpu_ids))])
@@ -290,10 +323,17 @@ def TRAIN_CONFIGS_PER_PROCESS(self):
             configs = sum(configs_per_device, [])
 
         if self.debug:
-            get_logger().warning("IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE!!!")
+            get_logger().warning(
+                "IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!"
+            )
             for config in configs:
                 config.defrost()
-                config.DATASET.CONTENT_SCENES = ["1LXtFkjw3qL"]
+                if self.scene_dataset == "mp3d":
+                    config.DATASET.CONTENT_SCENES = ["1LXtFkjw3qL"]
+                elif self.scene_dataset == "hm3d":
+                    config.DATASET.CONTENT_SCENES = ["1S7LAXRdDqK"]
+                else:
+                    raise NotImplementedError
                 config.freeze()
         return configs
 
@@ -312,9 +352,8 @@ def test_scenes_path(self):
         return self.TASK_DATA_DIR_TEMPLATE.format(*(["val"] * 2))
         # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))
 
-    @classmethod
-    def tag(cls):
-        return "ObjectNav"
+    def tag(self):
+        return f"ObjectNav-Habitat-{self.scene_dataset.upper()}"
 
     def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
         return tuple()

From bb300987cb0eb2cd1a7002c3a733be5095606eba Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Tue, 19 Apr 2022 11:41:26 -0700
Subject: [PATCH 02/11] Minor inference agent / vision sensor / habitat
 experiment improvements.

---
 .../onpolicy_sync/vector_sampled_tasks.py     | 85 +++++++++++++------
 allenact/embodiedai/sensors/vision_sensors.py | 27 ++++--
 allenact/utils/inference.py                   |  4 +
 ...ctnav_habitat_rgb_clipresnet50gru_ddppo.py |  4 +-
 .../habitat/objectnav_habitat_base.py         |  8 +-
 5 files changed, 87 insertions(+), 41 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py b/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
index f4045f2fc..981a0fff0 100644
--- a/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
+++ b/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
@@ -144,6 +144,7 @@ class VectorSampledTasks:
     _mp_ctx: BaseContext
     _connection_read_fns: List[Callable[[], Any]]
     _connection_write_fns: List[Callable[[Any], None]]
+    _read_timeout: Optional[float]
 
     def __init__(
         self,
@@ -154,12 +155,16 @@ def __init__(
         mp_ctx: Optional[BaseContext] = None,
         should_log: bool = True,
         max_processes: Optional[int] = None,
+        read_timeout: Optional[
+            float
+        ] = 60,  # Seconds to wait for a task to return a response before timing out
     ) -> None:
 
         self._is_waiting = False
         self._is_closed = True
         self.should_log = should_log
         self.max_processes = max_processes
+        self.read_timeout = read_timeout
 
         assert (
             sampler_fn_args is not None and len(sampler_fn_args) > 0
@@ -195,7 +200,8 @@ def __init__(
         for args in sampler_fn_args:
             args["mp_ctx"] = self._mp_ctx
         (
-            self._connection_read_fns,
+            connection_poll_fns,
+            connection_read_fns,
             self._connection_write_fns,
         ) = self._spawn_workers(  # noqa
             make_sampler_fn=make_sampler_fn,
@@ -204,6 +210,13 @@ def __init__(
             ],
         )
 
+        self._connection_read_fns = [
+            self._create_read_function_with_timeout(
+                read_fn=read_fn, poll_fn=poll_fn, timeout=self.read_timeout
+            )
+            for read_fn, poll_fn in zip(connection_read_fns, connection_poll_fns)
+        ]
+
         self._is_closed = False
 
         for write_fn in self._connection_write_fns:
@@ -234,6 +247,25 @@ def __init__(
             space for read_fn in self._connection_read_fns for space in read_fn()
         ]
 
+    @staticmethod
+    def _create_read_function_with_timeout(
+        *,
+        read_fn: Callable[[], Any],
+        poll_fn: Callable[[float], bool],
+        timeout: Optional[float],
+    ) -> Callable[[], Any]:
+        def read_with_timeout(timeout_to_use: Optional[float] = timeout):
+            if timeout_to_use is not None:
+                # noinspection PyArgumentList
+                if not poll_fn(timeout=timeout_to_use):
+                    raise TimeoutError(
+                        f"Did not recieve output from `VectorSampledTask` worker for {timeout_to_use} seconds."
+                    )
+
+            return read_fn()
+
+        return read_with_timeout
+
     def _reset_sampler_index_to_process_ind_and_subprocess_ind(self):
         self.sampler_index_to_process_ind_and_subprocess_ind = [
             [i, j]
@@ -297,7 +329,7 @@ def _task_sampling_loop_worker(
         """process worker for creating and interacting with the
         Tasks/TaskSampler."""
 
-        ptitle("VectorSampledTask: {}".format(worker_id))
+        ptitle(f"VectorSampledTask: {worker_id}")
 
         sp_vector_sampled_tasks = SingleProcessVectorSampledTasks(
             make_sampler_fn=make_sampler_fn,
@@ -307,7 +339,7 @@ def _task_sampling_loop_worker(
         )
 
         if parent_pipe is not None:
-            parent_pipe.close()
+            parent_pipe.close()  # Means this pipe will close when the calling process closes it
         try:
             while True:
                 read_input = connection_read_fn()
@@ -368,7 +400,9 @@ def _task_sampling_loop_worker(
             if should_log:
                 get_logger().info(f"Worker {worker_id} KeyboardInterrupt")
         except Exception as e:
-            get_logger().error(traceback.format_exc())
+            get_logger().error(
+                f"Worker {worker_id} encountered an exception:\n{traceback.format_exc()}"
+            )
             raise e
         finally:
             if child_pipe is not None:
@@ -380,52 +414,50 @@ def _spawn_workers(
         self,
         make_sampler_fn: Callable[..., TaskSampler],
         sampler_fn_args_list: Sequence[Sequence[Dict[str, Any]]],
-    ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
+    ) -> Tuple[
+        List[Callable[[], bool]], List[Callable[[], Any]], List[Callable[[Any], None]]
+    ]:
         parent_connections, worker_connections = zip(
             *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_processes)]
         )
         self._workers = []
         k = 0
         id: Union[int, str]
-        for id, stuff in enumerate(
+        for id, (worker_conn, parent_conn, current_sampler_fn_args_list) in enumerate(
             zip(worker_connections, parent_connections, sampler_fn_args_list)
         ):
-            worker_conn, parent_conn, current_sampler_fn_args_list = stuff  # type: ignore
-
             if len(current_sampler_fn_args_list) != 1:
-                id = "{}({}-{})".format(
-                    id, k, k + len(current_sampler_fn_args_list) - 1
-                )
+                id = f"{id}({k}-{k + len(current_sampler_fn_args_list) - 1})"
                 k += len(current_sampler_fn_args_list)
 
             if self.should_log:
                 get_logger().info(
-                    "Starting {}-th VectorSampledTask worker with args {}".format(
-                        id, current_sampler_fn_args_list
-                    )
+                    f"Starting {id}-th VectorSampledTask worker with args {current_sampler_fn_args_list}"
                 )
+
             ps = self._mp_ctx.Process(  # type: ignore
                 target=self._task_sampling_loop_worker,
-                args=(
-                    id,
-                    worker_conn.recv,
-                    worker_conn.send,
-                    make_sampler_fn,
-                    current_sampler_fn_args_list,
-                    self._auto_resample_when_done,
-                    self.should_log,
-                    worker_conn,
-                    parent_conn,
+                kwargs=dict(
+                    worker_id=id,
+                    connection_read_fn=worker_conn.recv,
+                    connection_write_fn=worker_conn.send,
+                    make_sampler_fn=make_sampler_fn,
+                    sampler_fn_args_list=current_sampler_fn_args_list,
+                    auto_resample_when_done=self._auto_resample_when_done,
+                    should_log=self.should_log,
+                    child_pipe=worker_conn,
+                    parent_pipe=parent_conn,
                 ),
             )
             self._workers.append(ps)
             ps.daemon = True
             ps.start()
-            worker_conn.close()
+            worker_conn.close()  # Means this pipe will close when the child process closes it
             time.sleep(
                 0.1
             )  # Useful to ensure things don't lock up when spawning many envs
         return (
+            [p.poll for p in parent_connections],
             [p.recv for p in parent_connections],
             [p.send for p in parent_connections],
         )
@@ -593,7 +625,8 @@ def close(self) -> None:
         if self._is_waiting:
             for read_fn in self._connection_read_fns:
                 try:
-                    read_fn()
+                    # noinspection PyArgumentList
+                    read_fn(0)  # Time out immediately
                 except Exception:
                     pass
 
diff --git a/allenact/embodiedai/sensors/vision_sensors.py b/allenact/embodiedai/sensors/vision_sensors.py
index 33a7b2fd5..3ca2a3cb5 100644
--- a/allenact/embodiedai/sensors/vision_sensors.py
+++ b/allenact/embodiedai/sensors/vision_sensors.py
@@ -162,28 +162,37 @@ def frame_from_env(self, env: EnvType, task: Optional[SubTaskType]) -> np.ndarra
 
     def process_img(self, img: np.ndarray):
         assert (
-            img.dtype == np.float32 and (len(img.shape) == 2 or img.shape[-1] == 1)
-        ) or (img.shape[-1] == 3 and img.dtype == np.uint8), (
+            np.issubdtype(img.dtype, np.float32)
+            and (len(img.shape) == 2 or img.shape[-1] == 1)
+        ) or (img.shape[-1] == 3 and np.issubdtype(img.dtype, np.uint8)), (
             "Input frame must either have 3 channels and be of"
             " type np.uint8 or have one channel and be of type np.float32"
         )
 
-        if self._scale_first:
-            if self.scaler is not None and img.shape[:2] != (self._height, self._width):
-                img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype)  # hwc
+        if (
+            self._scale_first
+            and self.scaler is not None
+            and img.shape[:2] != (self._height, self._width)
+        ):
+            img = np.array(self.scaler(self.to_pil(img)), dtype=img.dtype)  # hwc
+        elif np.issubdtype(img.dtype, np.float32):
+            img = img.copy()
 
         assert img.dtype in [np.uint8, np.float32]
 
-        if img.dtype == np.uint8:
+        if np.issubdtype(img.dtype, np.uint8):
             img = img.astype(np.float32) / 255.0
 
         if self._should_normalize:
             img -= self._norm_means
             img /= self._norm_sds
 
-        if not self._scale_first:
-            if self.scaler is not None and img.shape[:2] != (self._height, self._width):
-                img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32)  # hwc
+        if (
+            (not self._scale_first)
+            and self.scaler is not None
+            and img.shape[:2] != (self._height, self._width)
+        ):
+            img = np.array(self.scaler(self.to_pil(img)), dtype=np.float32)  # hwc
 
         return img
 
diff --git a/allenact/utils/inference.py b/allenact/utils/inference.py
index 0cef51bea..9fef87086 100644
--- a/allenact/utils/inference.py
+++ b/allenact/utils/inference.py
@@ -27,6 +27,7 @@ class InferenceAgent:
     memory: Optional[Memory] = attr.ib(default=None)
     steps_taken_in_task: int = attr.ib(default=0)
     last_action_flat: Optional = attr.ib(default=None)
+    has_initialized: Optional = attr.ib(default=False)
 
     def __attrs_post_init__(self):
         self.actor_critic.eval()
@@ -74,6 +75,8 @@ def from_experiment_config(
         )
 
     def reset(self):
+        if self.has_initialized:
+            self.rollout_storage.after_updates()
         self.steps_taken_in_task = 0
         self.memory = None
 
@@ -84,6 +87,7 @@ def act(self, observations: ObservationType):
             obs_batch = self.sensor_preprocessor_graph.get_observations(obs_batch)
 
         if self.steps_taken_in_task == 0:
+            self.has_initialized = True
             self.rollout_storage.initialize(
                 observations=obs_batch,
                 num_samplers=1,
diff --git a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
index ba068e9b2..3826f75d8 100644
--- a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
@@ -48,9 +48,7 @@ def SENSORS(self):
                 mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
                 stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
             ),
-            TargetObjectSensorHabitat(
-                len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)
-            ),
+            TargetObjectSensorHabitat(len(self.DEFAULT_OBJECT_CATEGORIES_TO_IND)),
         ]
 
     def training_pipeline(self, **kwargs) -> TrainingPipeline:
diff --git a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
index 7b5a03777..9b6f0405a 100644
--- a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
+++ b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
@@ -1,16 +1,18 @@
 import glob
 import math
 import os
+import warnings
 from abc import ABC
 from typing import Dict, Any, List, Optional, Sequence, Union
 
 import gym
+
+# noinspection PyUnresolvedReferences
+import habitat
 import numpy as np
 import torch
 from torch.distributions.utils import lazy_property
 
-# noinspection PyUnresolvedReferences
-import habitat
 from allenact.base_abstractions.experiment_config import MachineParams
 from allenact.base_abstractions.preprocessor import (
     SensorPreprocessorGraph,
@@ -323,7 +325,7 @@ def TRAIN_CONFIGS_PER_PROCESS(self):
             configs = sum(configs_per_device, [])
 
         if self.debug:
-            get_logger().warning(
+            warnings.warning(
                 "IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!"
             )
             for config in configs:

From caa61ade7cbaf618a7e6a463b4e42aded486ba82 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Tue, 19 Apr 2022 11:41:47 -0700
Subject: [PATCH 03/11] First attempt at introducing fault tolerance to
 training.

---
 allenact/algorithms/onpolicy_sync/engine.py  | 50 +++++++++++++++++---
 allenact/algorithms/onpolicy_sync/runner.py  | 13 ++++-
 allenact/algorithms/onpolicy_sync/storage.py |  9 ++--
 3 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index a458cf9c9..24501687a 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -113,6 +113,7 @@ def __init__(
         deterministic_agents: bool = False,
         max_sampler_processes_per_worker: Optional[int] = None,
         initial_model_state_dict: Optional[Union[Dict[str, Any], int]] = None,
+        try_restart_after_task_timeout: bool = False,
         **kwargs,
     ):
         """Initializer.
@@ -140,6 +141,7 @@ def __init__(
         self.device = torch.device("cpu") if device == -1 else torch.device(device)  # type: ignore
         self.distributed_ip = distributed_ip
         self.distributed_port = distributed_port
+        self.try_restart_after_task_timeout = try_restart_after_task_timeout
 
         self.mode = mode.lower().strip()
         assert self.mode in [
@@ -1414,11 +1416,47 @@ def run_pipeline(self):
                 for k, v in self.training_pipeline.current_stage_storage.items()
             }
 
-            for step in range(cur_stage_training_settings.num_steps):
-                num_paused = self.collect_step_across_all_task_samplers(
-                    rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
-                    uuid_to_storage=uuid_to_storage,
-                )
+            vector_tasks_already_restarted = False
+            step = -1
+            while step < cur_stage_training_settings.num_steps - 1:
+                step += 1
+
+                try:
+                    num_paused = self.collect_step_across_all_task_samplers(
+                        rollout_storage_uuid=self.training_pipeline.rollout_storage_uuid,
+                        uuid_to_storage=uuid_to_storage,
+                    )
+                except TimeoutError:
+                    if (
+                        not self.try_restart_after_task_timeout
+                    ) or self.mode != TRAIN_MODE_STR:
+                        # Apparently you can just call `raise` here and doing so will just raise the exception as though
+                        # it was not caught (so the stacktrace isn't messed up)
+                        raise
+                    elif vector_tasks_already_restarted:
+                        raise RuntimeError(
+                            f"[{self.mode} worker {self.worker_id}] `vector_tasks` has timed out twice in the same"
+                            f" rollout. This suggests that this error was not recoverable. Timeout exception:\n{traceback.format_exc()}"
+                        )
+                    else:
+                        get_logger().warning(
+                            f"[{self.mode} worker {self.worker_id}] `vector_tasks` appears to have crashed during"
+                            f" training as it has timed out. You have set `try_restart_after_task_timeout` to `True` so"
+                            f" we will attempt to restart these tasks from the beginning. USE THIS FEATURE AT YOUR OWN"
+                            f" RISK. Timeout exception:\n{traceback.format_exc()}."
+                        )
+                        self.vector_tasks.close()
+                        self._vector_tasks = None
+
+                        vector_tasks_already_restarted = True
+                        for (
+                            storage
+                        ) in self.training_pipeline.current_stage_storage.values():
+                            storage.after_updates()
+                        self.initialize_storage_and_viz(
+                            storage_to_initialize=list(uuid_to_storage.values())
+                        )
+                        num_paused = 0
 
                 # A more informative error message should already have been thrown in be given in
                 # `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves
@@ -1595,7 +1633,7 @@ def train(
             get_logger().error(
                 f"[{self.mode} worker {self.worker_id}] Encountered {type(e).__name__}, exiting."
             )
-            get_logger().exception(traceback.format_exc())
+            get_logger().error(traceback.format_exc())
         finally:
             if training_completed_successfully:
                 if self.worker_id == 0:
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 872d09ebf..7a3120d72 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -281,9 +281,9 @@ def handler(_signo, _frame):
                     except Exception:
                         get_logger().error(
                             f"Error occurred when closing the RL engine used by work {mode}-{id}."
-                            f" We cannot recover from this and will simply exit. The exception:"
+                            f" We cannot recover from this and will simply exit. The exception:\n"
+                            f"{traceback.format_exc()}"
                         )
-                        get_logger().exception(traceback.format_exc())
                         sys.exit(1)
                     sys.exit(0)
                 else:
@@ -429,6 +429,15 @@ def start_train(
 
         distributed_port = 0 if num_workers == 1 else self.get_port()
 
+        if (
+            num_workers > 1
+            and "NCCL_ASYNC_ERROR_HANDLING" not in os.environ
+            and "NCCL_BLOCKING_WAIT" not in os.environ
+        ):
+            # This ensures the NCCL distributed backend will throw errors
+            # if we timeout at a call to `barrier()`
+            os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "1"
+
         worker_ids = self.local_worker_ids(TRAIN_MODE_STR)
 
         model_hash = None
diff --git a/allenact/algorithms/onpolicy_sync/storage.py b/allenact/algorithms/onpolicy_sync/storage.py
index 504b29a9f..c6b8fc14d 100644
--- a/allenact/algorithms/onpolicy_sync/storage.py
+++ b/allenact/algorithms/onpolicy_sync/storage.py
@@ -199,7 +199,7 @@ def initialize(
                 self.full_size + 1, num_samplers, action_flat_dim, device=self.device
             )
 
-        assert self.step == 0, "Must call `after_update` before calling `initialize`"
+        assert self.step == 0, "Must call `after_updates` before calling `initialize`"
         self.insert_observations(observations=observations, time_step=0)
         self.prev_actions[0].zero_()  # Have to zero previous actions
         self.masks[0].zero_()  # Have to zero masks
@@ -529,8 +529,11 @@ def after_updates(self, **kwargs):
             for key in storage:
                 storage[key][0][0].copy_(storage[key][0][-1])
 
-        self.masks[0].copy_(self.masks[-1])
-        self.prev_actions[0].copy_(self.prev_actions[-1])
+        if self._masks_full is not None:
+            self.masks[0].copy_(self.masks[-1])
+
+        if self._prev_actions_full is not None:
+            self.prev_actions[0].copy_(self.prev_actions[-1])
 
         self._before_update_called = False
         self._advantages = None

From b8f4664cbe028ca826497cbb2695500e9a2021d7 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Wed, 20 Apr 2022 08:50:05 -0700
Subject: [PATCH 04/11] Fixing pointnav experiments.

---
 .../habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py    | 2 +-
 .../habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py      | 2 +-
 .../habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py     | 2 +-
 .../ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py        | 2 +-
 .../ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py          | 2 +-
 .../ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py         | 2 +-
 .../pointnav_baselines/experiments/pointnav_thor_base.py     | 5 ++++-
 .../robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py  | 2 +-
 .../robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py    | 2 +-
 .../robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py   | 2 +-
 10 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py
index a201ddf16..d0076105d 100644
--- a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_depth_simpleconvgru_ddppo.py
@@ -31,7 +31,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py
index f43e001e7..bf07f6dff 100644
--- a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgb_simpleconvgru_ddppo.py
@@ -31,7 +31,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py
index 3da80b140..b9d79a1e7 100644
--- a/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/habitat/pointnav_habitat_rgbd_simpleconvgru_ddppo.py
@@ -37,7 +37,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py
index 7dd5af097..df7ee39f0 100644
--- a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_depth_simpleconvgru_ddppo.py
@@ -30,7 +30,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py
index 4bd711c5c..e6af23663 100644
--- a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgb_simpleconvgru_ddppo.py
@@ -27,7 +27,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py
index fe28eb004..dcfa647d3 100644
--- a/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/ithor/pointnav_ithor_rgbd_simpleconvgru_ddppo.py
@@ -35,7 +35,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/pointnav_thor_base.py b/projects/pointnav_baselines/experiments/pointnav_thor_base.py
index d98d0e7a5..945761f30 100644
--- a/projects/pointnav_baselines/experiments/pointnav_thor_base.py
+++ b/projects/pointnav_baselines/experiments/pointnav_thor_base.py
@@ -64,6 +64,9 @@ def __init__(self):
             renderDepthImage=any(isinstance(s, DepthSensorThor) for s in self.SENSORS),
         )
 
+    def preprocessors(self):
+        return tuple()
+
     def machine_params(self, mode="train", **kwargs):
         sampler_devices: Sequence[int] = []
         if mode == "train":
@@ -91,7 +94,7 @@ def machine_params(self, mode="train", **kwargs):
         sensor_preprocessor_graph = (
             SensorPreprocessorGraph(
                 source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces,
-                preprocessors=self.PREPROCESSORS,
+                preprocessors=self.preprocessors(),
             )
             if mode == "train"
             or (
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
index db41055aa..a767b2fd4 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
@@ -32,7 +32,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
index 74d4007f6..27e935318 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
@@ -28,7 +28,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
index a900a59b3..ab236fbe6 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
@@ -36,7 +36,7 @@ def __init__(self):
         super().__init__()
 
         self.model_creation_handler = PointNavUnfrozenResNetWithGRUActorCriticMixin(
-            backbone="simpleconv",
+            backbone="simple_cnn",
             sensors=self.SENSORS,
             auxiliary_uuids=[],
             add_prev_actions=True,

From 04b1a2c1a98c382a782b7f7a5bfb0ba1f8f9b303 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Wed, 20 Apr 2022 13:00:20 -0700
Subject: [PATCH 05/11] Habitat experiment warning fix and making timeouts not
 happen when debugging.

---
 allenact/algorithms/onpolicy_sync/engine.py         | 13 ++++++++++++-
 .../experiments/habitat/objectnav_habitat_base.py   |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 24501687a..a3b5699cd 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -77,6 +77,16 @@
 )
 from allenact.utils.viz_utils import VizSuite
 
+try:
+    # When debugging we don't want to timeout in the VectorSampledTasks
+
+    # noinspection PyPackageRequirements
+    import pydevd
+
+    DEBUGGING = True
+except ImportError:
+    DEBUGGING = False
+
 TRAIN_MODE_STR = "train"
 VALID_MODE_STR = "valid"
 TEST_MODE_STR = "test"
@@ -248,7 +258,7 @@ def __init__(
                 # During testing, we sometimes found that default timeout was too short
                 # resulting in the run terminating surprisingly, we increase it here.
                 timeout=datetime.timedelta(minutes=3000)
-                if self.mode == TEST_MODE_STR
+                if (self.mode == TEST_MODE_STR or DEBUGGING)
                 else dist.default_pg_timeout,
             )
             self.is_distributed = True
@@ -302,6 +312,7 @@ def vector_tasks(
                 else None,
                 mp_ctx=self.mp_ctx,
                 max_processes=self.max_sampler_processes_per_worker,
+                read_timeout=None if DEBUGGING else 60
             )
         return self._vector_tasks
 
diff --git a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
index 9b6f0405a..2ae1b4f52 100644
--- a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
+++ b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
@@ -325,7 +325,7 @@ def TRAIN_CONFIGS_PER_PROCESS(self):
             configs = sum(configs_per_device, [])
 
         if self.debug:
-            warnings.warning(
+            warnings.warn(
                 "IN DEBUG MODE, WILL ONLY USE `1LXtFkjw3qL` SCENE IN MP3D OR `1S7LAXRdDqK` scene in HM3D!!!"
             )
             for config in configs:

From 21b663a6007c31122fa84b1b8542b24e9e7878ef Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Tue, 26 Apr 2022 15:55:47 -0700
Subject: [PATCH 06/11] add_prev_action option and fix to inference agent.

---
 allenact/utils/inference.py                         |  4 ++--
 .../objectnav_baselines/experiments/clip/mixins.py  |  3 ++-
 .../objectnav_habitat_rgb_clipresnet50gru_ddppo.py  |  2 +-
 .../experiments/habitat/objectnav_habitat_base.py   | 13 ++++++++-----
 .../objectnav_robothor_rgb_clipresnet50gru_ddppo.py |  5 +++--
 ...jectnav_robothor_rgb_clipresnet50x16gru_ddppo.py |  5 +++--
 .../pointnav_habitat_rgb_clipresnet50gru_ddppo.py   |  7 +++++--
 7 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/allenact/utils/inference.py b/allenact/utils/inference.py
index 9fef87086..4549ef936 100644
--- a/allenact/utils/inference.py
+++ b/allenact/utils/inference.py
@@ -13,8 +13,8 @@
     DistributionType,
 )
 from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
-from allenact.utils.tensor_utils import batch_observations
 from allenact.utils import spaces_utils as su
+from allenact.utils.tensor_utils import batch_observations
 
 
 @attr.s(kw_only=True)
@@ -121,7 +121,7 @@ def act(self, observations: ObservationType):
 
         self.steps_taken_in_task += 1
 
-        if self.steps_taken_in_task % self.steps_before_rollout_refresh:
+        if self.steps_taken_in_task % self.steps_before_rollout_refresh == 0:
             self.rollout_storage.after_updates()
 
         return su.action_list(self.actor_critic.action_space, self.last_action_flat)[0]
diff --git a/projects/objectnav_baselines/experiments/clip/mixins.py b/projects/objectnav_baselines/experiments/clip/mixins.py
index 8ef3b59fa..09e7f3b84 100644
--- a/projects/objectnav_baselines/experiments/clip/mixins.py
+++ b/projects/objectnav_baselines/experiments/clip/mixins.py
@@ -67,7 +67,7 @@ def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
 
         return preprocessors
 
-    def create_model(self, num_actions: int, **kwargs) -> nn.Module:
+    def create_model(self, num_actions: int, add_prev_actions: bool, **kwargs) -> nn.Module:
         has_rgb = any(isinstance(s, RGBSensor) for s in self.sensors)
         has_depth = any(isinstance(s, DepthSensor) for s in self.sensors)
 
@@ -84,4 +84,5 @@ def create_model(self, num_actions: int, **kwargs) -> nn.Module:
             depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None,
             hidden_size=512,
             goal_dims=32,
+            add_prev_actions=add_prev_actions
         )
diff --git a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
index 3826f75d8..7d255d8fc 100644
--- a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo.py
@@ -64,7 +64,7 @@ def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
 
     def create_model(self, **kwargs) -> nn.Module:
         return self.preprocessing_and_model.create_model(
-            num_actions=self.ACTION_SPACE.n, **kwargs
+            num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs
         )
 
     def tag(self):
diff --git a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
index 2ae1b4f52..7e23353d4 100644
--- a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
+++ b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
@@ -6,13 +6,12 @@
 from typing import Dict, Any, List, Optional, Sequence, Union
 
 import gym
-
-# noinspection PyUnresolvedReferences
-import habitat
 import numpy as np
 import torch
 from torch.distributions.utils import lazy_property
 
+# noinspection PyUnresolvedReferences
+import habitat
 from allenact.base_abstractions.experiment_config import MachineParams
 from allenact.base_abstractions.preprocessor import (
     SensorPreprocessorGraph,
@@ -116,7 +115,6 @@ class ObjectNavHabitatBaseConfig(ObjectNavBaseConfig, ABC):
         # CPCA8Loss.UUID,
         # CPCA16Loss.UUID,
     ]
-    ADD_PREV_ACTIONS = False
     MULTIPLE_BELIEFS = False
     BELIEF_FUSION = (  # choose one
         None
@@ -149,6 +147,7 @@ def __init__(
         train_gpu_ids: Optional[Sequence[int]] = None,
         val_gpu_ids: Optional[Sequence[int]] = None,
         test_gpu_ids: Optional[Sequence[int]] = None,
+        add_prev_actions: bool = False,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -172,6 +171,7 @@ def v_or_default(v, default):
             val_gpu_ids, self.DEFAULT_VALID_GPU_IDS if run_valid else []
         )
         self.test_gpu_ids = v_or_default(test_gpu_ids, self.DEFAULT_TEST_GPU_IDS)
+        self.add_prev_actions = add_prev_actions
 
     def _create_config(
         self,
@@ -355,7 +355,10 @@ def test_scenes_path(self):
         # return self.TASK_DATA_DIR_TEMPLATE.format(*(["test"] * 2))
 
     def tag(self):
-        return f"ObjectNav-Habitat-{self.scene_dataset.upper()}"
+        t = f"ObjectNav-Habitat-{self.scene_dataset.upper()}"
+        if not self.add_prev_actions:
+            return t
+        return f"{t}-PrevActions"
 
     def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
         return tuple()
diff --git a/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py b/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py
index e9db7aae0..b027e27b9 100644
--- a/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50gru_ddppo.py
@@ -38,7 +38,7 @@ class ObjectNavRoboThorClipRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
         ),
     ]
 
-    def __init__(self, **kwargs):
+    def __init__(self, add_prev_actions: bool = False, **kwargs):
         super().__init__(**kwargs)
 
         self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
@@ -47,6 +47,7 @@ def __init__(self, **kwargs):
             screen_size=self.SCREEN_SIZE,
             goal_sensor_type=GoalObjectTypeThorSensor,
         )
+        self.add_prev_actions = add_prev_actions
 
     def training_pipeline(self, **kwargs) -> TrainingPipeline:
         return ObjectNavPPOMixin.training_pipeline(
@@ -60,7 +61,7 @@ def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
 
     def create_model(self, **kwargs) -> nn.Module:
         return self.preprocessing_and_model.create_model(
-            num_actions=self.ACTION_SPACE.n, **kwargs
+            num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs
         )
 
     @classmethod
diff --git a/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py b/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
index 80220d4aa..e772ae9f5 100644
--- a/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/robothor/clip/objectnav_robothor_rgb_clipresnet50x16gru_ddppo.py
@@ -38,7 +38,7 @@ class ObjectNavRoboThorRGBPPOExperimentConfig(ObjectNavRoboThorBaseConfig):
         ),
     ]
 
-    def __init__(self, **kwargs):
+    def __init__(self, add_prev_actions: bool = False, **kwargs):
         super().__init__(**kwargs)
 
         self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
@@ -47,6 +47,7 @@ def __init__(self, **kwargs):
             screen_size=self.SCREEN_SIZE,
             goal_sensor_type=GoalObjectTypeThorSensor,
         )
+        self.add_prev_actions = add_prev_actions
 
     def training_pipeline(self, **kwargs) -> TrainingPipeline:
         return ObjectNavPPOMixin.training_pipeline(
@@ -60,7 +61,7 @@ def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
 
     def create_model(self, **kwargs) -> nn.Module:
         return self.preprocessing_and_model.create_model(
-            num_actions=self.ACTION_SPACE.n, **kwargs
+            num_actions=self.ACTION_SPACE.n, add_prev_actions=self.add_prev_actions, **kwargs
         )
 
     @classmethod
diff --git a/projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py b/projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py
index 3375b8c7a..c3271d6d8 100644
--- a/projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/habitat/clip/pointnav_habitat_rgb_clipresnet50gru_ddppo.py
@@ -35,7 +35,7 @@ class PointNavHabitatRGBClipResNet50GRUDDPPOExperimentConfig(PointNavHabitatBase
         TargetCoordinatesSensorHabitat(coordinate_dims=2),
     ]
 
-    def __init__(self, **kwargs):
+    def __init__(self, add_prev_actions: bool = False, **kwargs):
         super().__init__(**kwargs)
 
         self.preprocessing_and_model = ClipResNetPreprocessGRUActorCriticMixin(
@@ -44,6 +44,7 @@ def __init__(self, **kwargs):
             screen_size=self.SCREEN_SIZE,
             goal_sensor_type=TargetCoordinatesSensorHabitat,
         )
+        self.add_prev_actions = add_prev_actions
 
     def training_pipeline(self, **kwargs) -> TrainingPipeline:
         return PointNavPPOMixin.training_pipeline(
@@ -58,7 +59,9 @@ def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
 
     def create_model(self, **kwargs) -> nn.Module:
         return self.preprocessing_and_model.create_model(
-            num_actions=self.ACTION_SPACE.n, **kwargs
+            num_actions=self.ACTION_SPACE.n,
+            add_prev_actions=self.add_prev_actions,
+            **kwargs,
         )
 
     @classmethod

From 481c44980fcf6e59ca2693646805e99258aaf2d1 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Thu, 28 Apr 2022 12:46:33 -0700
Subject: [PATCH 07/11] Additional inference agent parameter.

---
 allenact/utils/inference.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/allenact/utils/inference.py b/allenact/utils/inference.py
index 4549ef936..744c45d9e 100644
--- a/allenact/utils/inference.py
+++ b/allenact/utils/inference.py
@@ -46,10 +46,11 @@ def from_experiment_config(
         exp_config: ExperimentConfig,
         device: torch.device,
         checkpoint_path: Optional[str] = None,
+        mode: str = "test",
     ):
         rollout_storage = exp_config.training_pipeline().rollout_storage
 
-        machine_params = exp_config.machine_params("test")
+        machine_params = exp_config.machine_params(mode)
         if not isinstance(machine_params, MachineParams):
             machine_params = MachineParams(**machine_params)
 

From 6aaeb1b4efe5edae0099dbdd09e6333ed03e063a Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Thu, 28 Apr 2022 21:44:32 -0700
Subject: [PATCH 08/11] Longer task timeouts.

---
 allenact/algorithms/onpolicy_sync/engine.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index a3b5699cd..612a508e5 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -24,7 +24,6 @@
 import torch.multiprocessing as mp  # type: ignore
 import torch.nn as nn
 import torch.optim as optim
-
 # noinspection PyProtectedMember
 from torch._C._distributed_c10d import ReduceOp
 
@@ -312,7 +311,7 @@ def vector_tasks(
                 else None,
                 mp_ctx=self.mp_ctx,
                 max_processes=self.max_sampler_processes_per_worker,
-                read_timeout=None if DEBUGGING else 60
+                read_timeout=None if DEBUGGING else 5 * 60
             )
         return self._vector_tasks
 

From 18fd558d54451aeaa8fdf529b423b8aed68068a0 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Tue, 10 May 2022 10:29:16 -0700
Subject: [PATCH 09/11] Improvements to the `get_open_x_displays` function.

---
 allenact_plugins/ithor_plugin/ithor_util.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_util.py b/allenact_plugins/ithor_plugin/ithor_util.py
index 16627c1d5..0446bc75d 100644
--- a/allenact_plugins/ithor_plugin/ithor_util.py
+++ b/allenact_plugins/ithor_plugin/ithor_util.py
@@ -2,6 +2,8 @@
 import math
 import os
 import platform
+import traceback
+import warnings
 from contextlib import contextmanager
 from typing import Sequence
 
@@ -70,11 +72,14 @@ def get_open_x_displays(throw_error_if_empty: bool = False) -> Sequence[str]:
     for open_display_str in sorted(open_display_strs):
         try:
             open_display_str = str(int(open_display_str))
+            display = Xlib.display.Display(f":{open_display_str}")
         except Exception:
+            warnings.warn(
+                f"Encountered error when attempting to open display :{open_display_str},"
+                f" error message:\n{traceback.format_exc()}"
+            )
             continue
 
-        display = Xlib.display.Display(":{}".format(open_display_str))
-
         displays.extend(
             [f"{open_display_str}.{i}" for i in range(display.screen_count())]
         )

From 7f3da1076c07ba2b671cb0761a7c964d0fe25be4 Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Thu, 12 May 2022 16:16:40 -0700
Subject: [PATCH 10/11] Fix to experimental timeout code.

---
 allenact/algorithms/onpolicy_sync/engine.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 612a508e5..c6bf903d7 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -86,6 +86,10 @@
 except ImportError:
     DEBUGGING = False
 
+DEBUG_VST_TIMEOUT: Optional[int] = (lambda x: int(x) if x is not None else x)(
+    os.getenv("ALLENACT_DEBUG_VST_TIMEOUT", None)
+)
+
 TRAIN_MODE_STR = "train"
 VALID_MODE_STR = "valid"
 TEST_MODE_STR = "test"
@@ -311,7 +315,7 @@ def vector_tasks(
                 else None,
                 mp_ctx=self.mp_ctx,
                 max_processes=self.max_sampler_processes_per_worker,
-                read_timeout=None if DEBUGGING else 5 * 60
+                read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 5 * 60,
             )
         return self._vector_tasks
 
@@ -1466,7 +1470,8 @@ def run_pipeline(self):
                         self.initialize_storage_and_viz(
                             storage_to_initialize=list(uuid_to_storage.values())
                         )
-                        num_paused = 0
+                        step = -1
+                        continue
 
                 # A more informative error message should already have been thrown in be given in
                 # `collect_step_across_all_task_samplers` if `num_paused != 0` here but this serves

From 1d3c64ae1f4d9dc44763d7e9c449e01add0464af Mon Sep 17 00:00:00 2001
From: lucaw <lucaw@allenai.org>
Date: Thu, 2 Jun 2022 10:25:14 -0700
Subject: [PATCH 11/11] Updating pytest.yml

---
 .github/workflows/pytest.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 4a291ca6b..822f8800f 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,6 +28,7 @@ jobs:
         python -m pip install --editable="./allenact_plugins[all]"
         python -m pip install pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt # Required as babyai is not on PyPI
         python -m pip install compress_pickle # Needed for some mapping tests
+        python -m pip install -U protobuf==3.20.1 # Required until tensorboardX is fixed: https://github.com/lanpa/tensorboardX/issues/663
         pip list
 
     - name: Test with pytest