From 2d741e81b3d2d5b8c099423e5b364ce57eaf8148 Mon Sep 17 00:00:00 2001 From: Ruoyu Gao Date: Thu, 30 Mar 2023 00:02:16 -0400 Subject: [PATCH] fix typo for clear buffer --- ding/reward_model/gail_irl_model.py | 4 ++-- ding/reward_model/icm_reward_model.py | 4 ++-- ding/reward_model/pdeil_irl_model.py | 4 ++-- ding/reward_model/pwil_irl_model.py | 4 ++-- ding/reward_model/red_irl_model.py | 4 ++-- ding/reward_model/rnd_reward_model.py | 4 ---- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/ding/reward_model/gail_irl_model.py b/ding/reward_model/gail_irl_model.py index c160bfaccb..6533e114dd 100644 --- a/ding/reward_model/gail_irl_model.py +++ b/ding/reward_model/gail_irl_model.py @@ -130,7 +130,7 @@ class GailRewardModel(BaseRewardModel): 8 | ``hidden_size`` int 128 | Linear model hidden size | 9 | ``collect_count`` int 100000 | Expert dataset size | One entry is a (s,a) | | | tuple - 10 | ``clear_buffer_`` int 1 | clear buffer per fix iters | make sure replay + 10 | ``clear_buffer_`` int 1 | clear buffer per fixed iters | make sure replay | ``per_iters`` | buffer's data count | | isn't too few. | | (code work in entry) @@ -155,7 +155,7 @@ class GailRewardModel(BaseRewardModel): hidden_size=128, # (int) Expert dataset size. collect_count=100000, - # (int) Clear buffer per fix iters. + # (int) Clear buffer per fixed iters. clear_buffer_per_iters=1, ) diff --git a/ding/reward_model/icm_reward_model.py b/ding/reward_model/icm_reward_model.py index 9ff018b750..9cc6e23e9b 100644 --- a/ding/reward_model/icm_reward_model.py +++ b/ding/reward_model/icm_reward_model.py @@ -157,7 +157,7 @@ class ICMRewardModel(BaseRewardModel): ``reward_norm`` | extrinsic reward 12 | ``extrinsic_`` int 1 | the upper bound of the reward ``reward_norm_max`` | normalization - 13 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay + 13 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay ``_per_iters`` | buffer's data count | isn't too few. | (code work in entry) @@ -192,7 +192,7 @@ class ICMRewardModel(BaseRewardModel): extrinsic_reward_norm=True, # (int) The upper bound of the reward normalization. extrinsic_reward_norm_max=1, - # (int) Clear buffer per fix iters. + # (int) Clear buffer per fixed iters. clear_buffer_per_iters=100, ) diff --git a/ding/reward_model/pdeil_irl_model.py b/ding/reward_model/pdeil_irl_model.py index 6928ee88f3..0bd42c8fdc 100644 --- a/ding/reward_model/pdeil_irl_model.py +++ b/ding/reward_model/pdeil_irl_model.py @@ -32,7 +32,7 @@ class PdeilRewardModel(BaseRewardModel): | ``action`` | | 4 | ``alpha`` float 0.5 | coefficient for Probability | | | Density Estimator | - 5 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay + 5 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay ``_per_iters`` | buffer's data count | isn't too few. | (code work in entry) @@ -50,7 +50,7 @@ class PdeilRewardModel(BaseRewardModel): # when alpha is close to 0, the estimator has high variance and low bias; # when alpha is close to 1, the estimator has high bias and low variance. alpha=0.5, - # (int) Clear buffer per fix iters. + # (int) Clear buffer per fixed iters. clear_buffer_per_iters=1, ) diff --git a/ding/reward_model/pwil_irl_model.py b/ding/reward_model/pwil_irl_model.py index 3700285846..5fec46b821 100644 --- a/ding/reward_model/pwil_irl_model.py +++ b/ding/reward_model/pwil_irl_model.py @@ -52,7 +52,7 @@ class PwilRewardModel(BaseRewardModel): 5 | ``beta`` int 5 | factor beta | 6 | ``s_size`` int 4 | state size | 7 | ``a_size`` int 2 | action size | - 8 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay + 8 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay ``_per_iters`` | buffer's data count | isn't too few. | (code work in entry) @@ -78,7 +78,7 @@ class PwilRewardModel(BaseRewardModel): # s_size=4, # (int) Action size. # a_size=2, - # (int) Clear buffer per fix iters. + # (int) Clear buffer per fixed iters. clear_buffer_per_iters=1, ) diff --git a/ding/reward_model/red_irl_model.py b/ding/reward_model/red_irl_model.py index 14fc09f8ee..a7daeeceec 100644 --- a/ding/reward_model/red_irl_model.py +++ b/ding/reward_model/red_irl_model.py @@ -50,7 +50,7 @@ class RedRewardModel(BaseRewardModel): 6 | ``hidden_size`` int 128 | Linear model hidden size | 7 | ``update_per_`` int 100 | Number of updates per collect | | ``collect`` | | - 8 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay + 8 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay ``_per_iters`` | buffer's data count | isn't too few. | (code work in entry) @@ -80,7 +80,7 @@ class RedRewardModel(BaseRewardModel): # (float) Hyperparameter at estimated score of r(s,a). # r(s,a) = exp(-sigma* L(s,a)) sigma=0.5, - # (int) Clear buffer per fix iters. + # (int) Clear buffer per fixed iters. clear_buffer_per_iters=1, ) diff --git a/ding/reward_model/rnd_reward_model.py b/ding/reward_model/rnd_reward_model.py index d36a76f1ec..00bb1542fd 100644 --- a/ding/reward_model/rnd_reward_model.py +++ b/ding/reward_model/rnd_reward_model.py @@ -81,10 +81,6 @@ class RndRewardModel(BaseRewardModel): ``reward_norm`` 12 | ``extrinsic_`` int 1 | the upper bound of the reward ``reward_norm_max`` | normalization - 13 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay - ``_per_iters`` | buffer's data count - | isn't too few. - | (code work in entry) == ==================== ===== ============= ======================================= ======================= """ config = dict(