From da053b06751f0f9d06fe675da280f3eb39504165 Mon Sep 17 00:00:00 2001 From: baijinqiu <2522827873@qq.com> Date: Mon, 25 Dec 2023 15:11:15 +0800 Subject: [PATCH] 'masac_learner_x(#1)' --- .../documents/api/learners/marl/masac.rst | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/source/documents/api/learners/marl/masac.rst b/docs/source/documents/api/learners/marl/masac.rst index d08e6a9d..ebe991f9 100644 --- a/docs/source/documents/api/learners/marl/masac.rst +++ b/docs/source/documents/api/learners/marl/masac.rst @@ -1,7 +1,7 @@ MASAC_Learner ===================================== -xxxxxx. +An implementation of the Multi-Agent Soft Actor-Critic (MASAC) algorithm . .. raw:: html @@ -33,11 +33,12 @@ PyTorch .. py:function:: xuance.torch.learners.multi_agent_rl.masac_learner.MASAC_Learner.update(sample) - xxxxxx. + Update the MASAC agent with a batch of training samples. - :param sample: xxxxxx. - :type sample: xxxxxx - :return: The infomation of the training. + :param sample: A dictionary containing training samples, including observations, actions, next observations, rewards, + terminals, agent masks, and agent IDs. + :type sample: dict + :return: The information of the training. :rtype: dict .. raw:: html @@ -68,11 +69,12 @@ TensorFlow .. py:function:: xuance.tensorflow.learners.multi_agent_rl.masac_learner.MASAC_Learner.update(sample) - xxxxxx. + Update the MASAC agent with a batch of training samples. - :param sample: xxxxxx. - :type sample: xxxxxx - :return: The infomation of the training. + :param sample: A dictionary containing training samples, including observations, actions, next observations, rewards, + terminals, agent masks, and agent IDs. + :type sample: dict + :return: The information of the training. :rtype: dict .. raw:: html @@ -103,11 +105,12 @@ MindSpore .. py:function:: xuance.mindspore.learners.multi_agent_rl.masac_learner.MASAC_Learner.update(sample) - xxxxxx. + Update the MASAC agent with a batch of training samples. - :param sample: xxxxxx. - :type sample: xxxxxx - :return: The infomation of the training. + :param sample: A dictionary containing training samples, including observations, actions, next observations, rewards, + terminals, agent masks, and agent IDs. + :type sample: dict + :return: The information of the training. :rtype: dict .. raw:: html