Skip to content

Commit

Permalink
reset and step env corrected
Browse files Browse the repository at this point in the history
  • Loading branch information
mahdinobar committed Apr 5, 2024
1 parent bb22ab0 commit 8841026
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 18 deletions.
2 changes: 1 addition & 1 deletion spinup/algos/pytorch/sac/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def sac(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0,
obs_dim = env.observation_space.shape
act_dim = env.action_space.shape[0]

# Action limit for clamping: critically, assumes all dimensions share the same bound!
# Action limit for clamping: critically, assumes all dimensions share the same bound! (Attention: correct here if bounds of action dimensions are different)
act_limit = env.action_space.high[0]

# Create actor-critic module and target networks
Expand Down
26 changes: 16 additions & 10 deletions spinup/backup_gym/gym/envs/classic_control/tworr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def __init__(self):
# TODO: reward params
self.lp=100
self.lv=100
self.reward_eta=0.75
self.ljerk=100
self.reward_eta_p=0.7
self.reward_eta_v=0.15
self.reward_eta_jerk=0.15
# TODO: User defined linear position gain
self.K_p = 10
self.K_i = 5
Expand All @@ -43,7 +46,9 @@ def __init__(self):
self.state = None
self.state_buffer= None
self.t = 0
self.seed()
seed=1
np.random.seed(seed)
self.seed(seed=seed)

self.xd_init = 1.5
self.yd_init = 0.4
Expand All @@ -56,11 +61,11 @@ def __init__(self):
self.xd = np.linspace(self.xd_init, self.xd_init + deltax, self.MAX_TIMESTEPS, endpoint=True)
self.yd = np.linspace(self.yd_init, self.yd_init + deltay, self.MAX_TIMESTEPS, endpoint=True)

# TODO CHECK
# TODO Attention: just the dimension of the observation space is enforced. The data here is not used. If you need to enforce them then modify the code.
high_s = np.array([0.2, 0.2, 1.5, 1.5, 2, 2, 18, 5, 2, 2])
low_s = np.array([-0.2, -0.2, -1.5, -1.5, -2, -2, -18, -5, -2, -2])
self.observation_space = spaces.Box(low=low_s, high=high_s, dtype=np.float32)
high_a = np.array([1, 1])
high_a = np.array([1, 1]) #TODO Attention: limits should be the same otherwise modify sac code
low_a = np.array([-1, -1])
self.action_space = spaces.Box(low=low_a, high=high_a, dtype=np.float32)

Expand Down Expand Up @@ -223,7 +228,6 @@ def seed(self, seed=None):
def reset(self):
# # randomize true model parameter in every episode
# self.LINK_MASS_2_TRUE = 1.1 + np.random.normal(loc=0.0, scale=0.01, size=1)

# at time t=0
self.t = 0
rd_t = np.array([self.xd[self.t], self.yd[self.t]])
Expand All @@ -247,12 +251,12 @@ def reset(self):
self.qc = qc_t.reshape(1, 2)
self.dqc = dqc_t.reshape(1, 2)
self.ddqc = ddqc_t.reshape(1, 2)
tau1_hat, tau2_hat = self.two_link_inverse_dynamics(q_t, dqc_t, ddqc_t)
tau1_hat, tau2_hat = self.two_link_inverse_dynamics(qc_t, dqc_t, ddqc_t)

# s_init=[th1_init,dth1_init,th2_init,dth2_init]
s_init = np.array([q_t[0], dq_t[0], q_t[1], dq_t[1]])
tau1 = tau1_hat #+ a[0]
tau2 = tau2_hat #+ a[1]
tau1 = tau1_hat #todo check concept here
tau2 = tau2_hat
q_FD = self.two_link_forward_dynamics(tau1, tau2,
s_init)
self.q = np.vstack((self.q, np.array([q_FD[0], q_FD[2]])))
Expand Down Expand Up @@ -299,7 +303,7 @@ def step(self,a):
self.dqc = np.vstack((self.dqc, dqc_t))
self.ddqc = np.vstack((self.ddqc, ddqc_t))

tau1_hat, tau2_hat = self.two_link_inverse_dynamics(q_t, dqc_t, ddqc_t)
tau1_hat, tau2_hat = self.two_link_inverse_dynamics(qc_t, dqc_t, ddqc_t)

# s_init=[th1_init,dth1_init,th2_init,dth2_init]
s_init = np.array([q_t[0], dq_t[0], q_t[1], dq_t[1]])
Expand Down Expand Up @@ -349,9 +353,11 @@ def step(self,a):
error_p_t = sum(abs(obs[0:2]))
v_hat_after = np.array(self.two_link_forward_kinematics(np.array([q_FD[0], q_FD[2]]))) - np.array(r_hat_t) / self.dt
error_v_t = sum(abs(v_hat_after-vd_t))
jerk_level_t = np.abs(self.state_buffer[-1,6]-self.state_buffer[-2,6])+np.abs(self.state_buffer[-1,7]-self.state_buffer[-2,7])
reward_jerk_t = self.f_logistic(jerk_level_t,self.ljerk)
reward_p_t=self.f_logistic(error_p_t,self.lp)
reward_v_t = self.f_logistic(error_v_t, self.lv)
reward_t=self.reward_eta*reward_p_t+(1-self.reward_eta)*reward_v_t
reward_t=self.reward_eta_p*reward_p_t+self.reward_eta_v*reward_v_t+self.reward_eta_jerk*reward_jerk_t
# given action it returns 4-tuple (observation, reward, done, info)
return (self._get_ob(), reward_t, terminal, {})

Expand Down
9 changes: 5 additions & 4 deletions spinup/examples/pytorch/inverted_pendulum_SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from spinup.utils.test_policy import load_policy_and_env, run_policy
import os

TRAIN=False
TRAIN=True
env_fn = lambda: gym.make('Tworr-v0')
exp_name = "Tworrv0_25" #"no_SAC_0_9"
exp_name = "Tworrv0_27" #"no_SAC_0_9"
if __name__ == '__main__':
if TRAIN:
# train
Expand All @@ -26,5 +26,6 @@
# from gym.wrappers.monitoring.video_recorder import VideoRecorder
# VideoRecorder(env_fn,'/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/video.mp4', enabled=True)
# visualize output
env, get_action = load_policy_and_env('/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/'+exp_name)
run_policy(env, get_action,num_episodes=1)
output_dir='/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/'+exp_name
env, get_action = load_policy_and_env(output_dir)
run_policy(env, get_action,num_episodes=1, output_dir=output_dir)
6 changes: 3 additions & 3 deletions spinup/utils/test_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def get_action(x):
return get_action


def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, output_dir=""):

assert env is not None, \
"Environment not found!\n\n It looks like the environment wasn't saved, " + \
Expand Down Expand Up @@ -144,7 +144,7 @@ def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.savefig("/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/Tworrv0_25/position.pdf",format="pdf", bbox_inches='tight')
plt.savefig(output_dir+"/position.pdf",format="pdf", bbox_inches='tight')
plt.show()

plt.figure(2)
Expand All @@ -158,7 +158,7 @@ def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):
plt.xlabel("vx")
plt.ylabel("vy")
plt.legend()
plt.savefig("/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/Tworrv0_25/velocity.pdf",format="pdf", bbox_inches='tight')
plt.savefig(output_dir+"/velocity.pdf",format="pdf", bbox_inches='tight')
plt.show()


Expand Down

0 comments on commit 8841026

Please sign in to comment.