'td3_agent_x(#2)'

agi-brain · Dec 24, 2023 · 2298018 · 2298018
1 parent 1b19455
commit 2298018
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 28 deletions.
diff --git a/docs/source/documents/api/agents/drl/sac.rst b/docs/source/documents/api/agents/drl/sac.rst
@@ -126,8 +126,8 @@ SAC_Agent
 
     :param obs: The observation variables.
     :type obs: np.ndarray
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: Selected actions.
+    :rtype: np.ndarray
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.sac_agent.SAC_Agent.train(train_steps)
@@ -139,11 +139,10 @@ SAC_Agent
     xuance.mindspore.agents.policy_gradient.sac_agent.SAC_Agent.test(env_fn,test_episodes)
 
     :param env_fn: The function of making environments.
-    :type env_fn: xxxxxx
     :param test_episodes: The number of testing episodes.
     :type test_episodes: int
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: **scores** - The accumulated scores of these episodes.
+    :rtype: list
 
 .. raw:: html
 

diff --git a/docs/source/documents/api/agents/drl/sac_dis.rst b/docs/source/documents/api/agents/drl/sac_dis.rst
@@ -126,8 +126,8 @@ SACDIS_Agent
 
     :param obs: The observation variables.
     :type obs: np.ndarray
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: Selected actions.
+    :rtype: np.ndarray
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.sacdis_agent.SACDIS_Agent(train_steps)
@@ -139,11 +139,10 @@ SACDIS_Agent
     xuance.mindspore.agents.policy_gradient.sacdis_agent.SACDIS_Agent(env_fn,test_episodes)
 
     :param env_fn: The function of making environments.
-    :type env_fn: xxxxxx
     :param test_episodes: The number of testing episodes.
     :type test_episodes: int
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: **scores** - The accumulated scores of these episodes.
+    :rtype: list
 
 .. raw:: html
 

diff --git a/docs/source/documents/api/agents/drl/spdqn.rst b/docs/source/documents/api/agents/drl/spdqn.rst
@@ -148,18 +148,18 @@ SPDQN_Agent
 
     :param obs: The observation variables.
     :type obs: np.ndarray
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: discrete action, continuous action, and raw continuous actions.
+    :rtype: Tuple
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.spdqn_agent.SPDQN_Agent.pad_action(disaction, conaction)
 
-    :param disaction: xxxxxx.
-    :type disaction: xxxxxx
-    :param conaction: xxxxxx.
-    :type conaction: xxxxxx
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :param disaction: The discrete action index.
+    :type disaction: int
+    :param conaction: The continuous action.
+    :type conaction: np.ndarray
+    :return: discrete action and padded continuous actions.
+    :rtype: tuple
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.spdqn_agent.SPDQN_Agent.train(train_steps)
@@ -171,17 +171,16 @@ SPDQN_Agent
     xuance.mindspore.agents.policy_gradient.spdqn_agent.SPDQN_Agent.test(env_fn,test_episodes)
 
     :param env_fn: The function of making environments.
-    :type env_fn: xxxxxx
     :param test_episodes: The number of testing episodes.
     :type test_episodes: int
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: **scores** - The accumulated scores of these episodes.
+    :rtype: list
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.spdqn_agent.SPDQN_Agent.end_episode(episode)
 
-    :param episode: xxxxxx.
-    :type episode: xxxxxx
+    :param episode: The current episode number.
+    :type episode: int
 .. raw:: html
 
     <br><hr>

diff --git a/docs/source/documents/api/agents/drl/td3.rst b/docs/source/documents/api/agents/drl/td3.rst
@@ -134,8 +134,8 @@ TD3_Agent
     :type obs: np.ndarray
     :param noise_scale: The scale value of the Gaussian noise.
     :type noise_scale: float
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: The selected action, clipped to the valid action space range [-1, 1].
+    :rtype: np.ndarray
 
 .. py:function::
     xuance.mindspore.agents.policy_gradient.td3_agent.TD3_Agent.train(train_steps)
@@ -147,11 +147,10 @@ TD3_Agent
     xuance.mindspore.agents.policy_gradient.td3_agent.TD3_Agent.test(env_fn,test_episodes)
 
     :param env_fn: The function of making environments.
-    :type env_fn: xxxxxx
     :param test_episodes: The number of testing episodes.
     :type test_episodes: int
-    :return: xxxxxx.
-    :rtype: xxxxxx
+    :return: **scores** - The accumulated scores of these episodes.
+    :rtype: list
 
 .. raw:: html