From 696fa9d5c8c2ad85c6433cb38f1e7fdfe310c522 Mon Sep 17 00:00:00 2001 From: rickstaa Date: Mon, 5 Oct 2020 10:37:34 +0200 Subject: [PATCH] :sparkles: Updates oscillator environment --- simzoo/envs/Ex3_EKF/Ex3_EKF.py | 187 ++++++++++++++++++++++ simzoo/envs/Ex3_EKF/Ex3_EKF_negative.py | 203 ++++++++++++++++++++++++ simzoo/envs/Ex3_EKF/__init__.py | 11 ++ simzoo/envs/__init__.py | 8 +- tests/test_envs_register.py | 3 +- 5 files changed, 408 insertions(+), 4 deletions(-) create mode 100644 simzoo/envs/Ex3_EKF/Ex3_EKF.py create mode 100644 simzoo/envs/Ex3_EKF/Ex3_EKF_negative.py create mode 100644 simzoo/envs/Ex3_EKF/__init__.py diff --git a/simzoo/envs/Ex3_EKF/Ex3_EKF.py b/simzoo/envs/Ex3_EKF/Ex3_EKF.py new file mode 100644 index 00000000..685e4d8d --- /dev/null +++ b/simzoo/envs/Ex3_EKF/Ex3_EKF.py @@ -0,0 +1,187 @@ +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np +import csv +import matplotlib.pyplot as plt + +# This example is the RL based stationary Kalman filter + +# the dynamic system whose state is to be estimated: +# x(k+1)=Ax(k)+w(k) +# x_1: angle +# x_2: frequency +# x_3: amplitude +# y(k)=x_3(k)*sin(x_1(k))+v(k) +# A=[1,dt,0;0,1,0;0,0,1] +# x(0)~N([0;10;1],[3,0,0;0,3,0;0,0,3]) +# w(k)~N([0;0;0],[1/3*(dt)^3*q_1,1/2*(dt)^2*q_1,0;1/2*(dt)^2*q_1,dt*q_1,0;0,0,dt*q_2]) +# v(k)~N(0,1) + +# estimator design +# \hat(x)(k+1)=A\hat(x)(k)+u +# where u=[u1,u2,u3]', u=l(\hat(x)(k),y(k)) come from the policy network l(.,.) + + +class Ex3_EKF(gym.Env): + def __init__(self): + + self.t = 0 + self.dt = 0.1 + self.q1 = 0.01 + self.g = 9.81 + # self.l = max(0.5,1 + np.random.normal(0,0.5)) + self.l = 1 + # self.mean0 = [1.5, 0] + # self.cov0_1 = 0.1 + # self.cov0_2 = 0.1 + # self.cov0_1 = 0 + # self.cov0_2 = 0 + self.mean1 = [0, 0] + self.cov1 = np.array( + [ + [1 / 3 * (self.dt) ** 3 * self.q1, 1 / 2 * (self.dt) ** 2 * self.q1], + [1 / 2 * (self.dt) ** 2 * self.q1, self.dt * self.q1], + ] + ) + # self.cov1 = np.array([[0,0],[0,0]]) + self.mean2 = 0 + self.cov2 = 1e-2 + # self.cov2 = 0 + self.missing_rate = 0 + self.sigma = 0 + # displacement limit set to be [-high, high] + high = np.array([10000, 10000]) + + self.action_space = spaces.Box( + low=np.array([-10.0, -10.0]), high=np.array([10.0, 10.0]), dtype=np.float32 + ) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + self.viewer = None + self.state = None + self.output = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + u1, u2 = action + t = self.t + input = 0 * np.cos(t) * self.dt + # Slave + hat_x_1, hat_x_2, x_1, x_2 = self.state + # y_1 = self.output + # hat_y_1 = np.sin(hat_x_1) + # + # hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1*(y_1-hat_y_1) + # hat_x_2 = hat_x_2 - self.g*np.sin(hat_x_1)*self.dt + self.dt * u2*(y_1-hat_y_1) + input + + # Master + + x_1 = x_1 + self.dt * x_2 + x_2 = x_2 - self.g * self.l * np.sin(x_1) * self.dt + input + + state = np.array([x_1, x_2]) + # add process noise + state = state + np.random.multivariate_normal(self.mean1, self.cov1).flatten() + x_1, x_2 = state + + y_1 = np.sin(x_1) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + hat_y_1 = np.sin(hat_x_1 + self.dt * hat_x_2) + + # flag=1: received + # flag=0: dropout + (flag,) = np.random.binomial(1, 1 - self.missing_rate, 1) + # drop_rate = 1 + # to construct cost + if flag == 1: + hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1 * (y_1 - hat_y_1) + hat_x_2 = ( + hat_x_2 + - self.g * np.sin(hat_x_1) * self.dt + + self.dt * u2 * (y_1 - hat_y_1) + + input + ) + else: + hat_x_1 = hat_x_1 + self.dt * hat_x_2 + hat_x_2 = hat_x_2 - self.g * np.sin(hat_x_1) * self.dt + input + + # hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1 * (y_1 - hat_y_1) + # hat_x_2 = hat_x_2 - self.g * np.sin(hat_x_1) * self.dt + self.dt * u2 * (y_1 - hat_y_1) + input + + cost_u = ( + np.square(u1 * (y_1 - hat_y_1)) * self.dt + + np.square(u2 * (y_1 - hat_y_1)) * self.dt + ) + # cost_y = np.abs(hat_y_1 - y_1) * self.dt + # cost = cost_y + cost = np.square(hat_x_1 - x_1) + np.square(hat_x_2 - x_2) + # cost = np.abs(hat_x_1 - x_1)**1 + np.abs(hat_x_2 - x_2)**1 + # print('cost',cost) + if cost > 100: + done = True + else: + done = False + + # update new for next round + self.state = np.array([hat_x_1, hat_x_2, x_1, x_2]) + self.output = y_1 + self.t = self.t + self.dt + + # return np.array([hat_x_1,hat_x_2,y_1, y_2]), cost, done, dict(reference=y_1, state_of_interest=np.array([hat_y_1,hat_y_2])) + + return ( + np.array([hat_x_1, hat_x_2]), + cost, + done, + dict( + reference=y_1, + state_of_interest=np.array([hat_x_1 - x_1, hat_x_2 - x_2]), + ), + ) + + def reset(self): + x_1 = np.random.uniform(-np.pi / 2, np.pi / 2) + x_2 = np.random.uniform(-np.pi / 2, np.pi / 2) + hat_x_1 = x_1 + np.random.uniform(-np.pi / 4, np.pi / 4) + hat_x_2 = x_2 + np.random.uniform(-np.pi / 4, np.pi / 4) + self.state = np.array([hat_x_1, hat_x_2, x_1, x_2]) + self.output = np.sin(x_1) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + y_1 = self.output + y_2 = np.sin(x_2) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + return np.array([hat_x_1, hat_x_2]) + + def render(self, mode="human"): + + return + + +if __name__ == "__main__": + env = Ex3_EKF() + T = 10 + path = [] + t1 = [] + s = env.reset() + for i in range(int(T / env.dt)): + s, r, info, done = env.step(np.array([0, 0])) + path.append(s) + t1.append(i * env.dt) + + fig = plt.figure(figsize=(9, 6)) + ax = fig.add_subplot(111) + + ax.plot(t1, np.array(path)[:, 0], color="yellow", label="x1") + ax.plot(t1, np.array(path)[:, 1], color="green", label="x2") + # ax.plot(t1, np.array(path)[:, 2], color='black', label='measurement') + + handles, labels = ax.get_legend_handles_labels() + # + ax.legend(handles, labels, loc=2, fancybox=False, shadow=False) + plt.show() + print("done") diff --git a/simzoo/envs/Ex3_EKF/Ex3_EKF_negative.py b/simzoo/envs/Ex3_EKF/Ex3_EKF_negative.py new file mode 100644 index 00000000..5581f950 --- /dev/null +++ b/simzoo/envs/Ex3_EKF/Ex3_EKF_negative.py @@ -0,0 +1,203 @@ +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np +import csv +import matplotlib.pyplot as plt + +# This example is the RL based stationary Kalman filter + +# the dynamic system whose state is to be estimated: +# x(k+1)=Ax(k)+w(k) +# x_1: angle +# x_2: frequency +# x_3: amplitude +# y(k)=x_3(k)*sin(x_1(k))+v(k) +# A=[1,dt,0;0,1,0;0,0,1] +# x(0)~N([0;10;1],[3,0,0;0,3,0;0,0,3]) +# w(k)~N([0;0;0],[1/3*(dt)^3*q_1,1/2*(dt)^2*q_1,0;1/2*(dt)^2*q_1,dt*q_1,0;0,0,dt*q_2]) +# v(k)~N(0,1) + +# estimator design +# \hat(x)(k+1)=A\hat(x)(k)+u +# where u=[u1,u2,u3]', u=l(\hat(x)(k),y(k)) come from the policy network l(.,.) + + +class Ex3_EKF_negative(gym.Env): + def __init__(self): + + self.t = 0 + self.dt = 0.1 + self.q1 = 0.01 + self.g = 9.81 + # self.l = max(0.5,1 + np.random.normal(0,0.5)) + self.l = 1 + # self.mean0 = [1.5, 0] + # self.cov0_1 = 0.1 + # self.cov0_2 = 0.1 + # self.cov0_1 = 0 + # self.cov0_2 = 0 + self.mean1 = [0, 0] + self.cov1 = np.array( + [ + [1 / 3 * (self.dt) ** 3 * self.q1, 1 / 2 * (self.dt) ** 2 * self.q1], + [1 / 2 * (self.dt) ** 2 * self.q1, self.dt * self.q1], + ] + ) + # self.cov1 = np.array([[0,0],[0,0]]) + self.mean2 = 0 + self.cov2 = 1e-2 + # self.cov2 = 0 + self.missing_rate = 0 + self.sigma = 0 + # displacement limit set to be [-high, high] + high = np.array([10000, 10000]) + + self.action_space = spaces.Box( + low=np.array([-10.0, -10.0]), high=np.array([10.0, 10.0]), dtype=np.float32 + ) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + self.viewer = None + self.state = None + self.output = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + u1, u2 = action + t = self.t + input = 0 * np.cos(t) * self.dt + # Slave + hat_x_1, hat_x_2, x_1, x_2 = self.state + # y_1 = self.output + # hat_y_1 = np.sin(hat_x_1) + # + # hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1*(y_1-hat_y_1) + # hat_x_2 = hat_x_2 - self.g*np.sin(hat_x_1)*self.dt + self.dt * u2*(y_1-hat_y_1) + input + + # Master + + x_1 = x_1 + self.dt * x_2 + x_2 = x_2 - self.g * self.l * np.sin(x_1) * self.dt + input + + state = np.array([x_1, x_2]) + # add process noise + state = state + np.random.multivariate_normal(self.mean1, self.cov1).flatten() + x_1, x_2 = state + + y_1 = np.sin(x_1) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + hat_y_1 = np.sin(hat_x_1 + self.dt * hat_x_2) + + # flag=1: received + # flag=0: dropout + (flag,) = np.random.binomial(1, 1 - self.missing_rate, 1) + # drop_rate = 1 + # to construct cost + if flag == 1: + hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1 * (y_1 - hat_y_1) + hat_x_2 = ( + hat_x_2 + - self.g * np.sin(hat_x_1) * self.dt + + self.dt * u2 * (y_1 - hat_y_1) + + input + ) + else: + hat_x_1 = hat_x_1 + self.dt * hat_x_2 + hat_x_2 = hat_x_2 - self.g * np.sin(hat_x_1) * self.dt + input + + # hat_x_1 = hat_x_1 + self.dt * hat_x_2 + self.dt * u1 * (y_1 - hat_y_1) + # hat_x_2 = hat_x_2 - self.g * np.sin(hat_x_1) * self.dt + self.dt * u2 * (y_1 - hat_y_1) + input + + # NOTE: OLD COST - Minimizeing the return + # cost_u = ( + # np.square(u1 * (y_1 - hat_y_1)) * self.dt + # + np.square(u2 * (y_1 - hat_y_1)) * self.dt + # ) + # # cost_y = np.abs(hat_y_1 - y_1) * self.dt + # # cost = cost_y + # cost = np.square(hat_x_1 - x_1) + np.square(hat_x_2 - x_2) + # # cost = np.abs(hat_x_1 - x_1)**1 + np.abs(hat_x_2 - x_2)**1 + # # print('cost',cost) + # if cost > 100: + # done = True + # else: + # done = False + + # NOTE: Cost when maximizing the return + cost_u = -( + np.square(u1 * (y_1 - hat_y_1)) * self.dt + + np.square(u2 * (y_1 - hat_y_1)) * self.dt + ) + # cost_y = np.abs(hat_y_1 - y_1) * self.dt + # cost = cost_y + cost = -1.0 * (np.square(hat_x_1 - x_1) + np.square(hat_x_2 - x_2)) + # cost = np.abs(hat_x_1 - x_1)**1 + np.abs(hat_x_2 - x_2)**1 + # print('cost',cost) + if cost < -100: + done = True + else: + done = False + + # update new for next round + self.state = np.array([hat_x_1, hat_x_2, x_1, x_2]) + self.output = y_1 + self.t = self.t + self.dt + + # return np.array([hat_x_1,hat_x_2,y_1, y_2]), cost, done, dict(reference=y_1, state_of_interest=np.array([hat_y_1,hat_y_2])) + + return ( + np.array([hat_x_1, hat_x_2]), + cost, + done, + dict( + reference=y_1, + state_of_interest=np.array([hat_x_1 - x_1, hat_x_2 - x_2]), + ), + ) + + def reset(self): + x_1 = np.random.uniform(-np.pi / 2, np.pi / 2) + x_2 = np.random.uniform(-np.pi / 2, np.pi / 2) + hat_x_1 = x_1 + np.random.uniform(-np.pi / 4, np.pi / 4) + hat_x_2 = x_2 + np.random.uniform(-np.pi / 4, np.pi / 4) + self.state = np.array([hat_x_1, hat_x_2, x_1, x_2]) + self.output = np.sin(x_1) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + y_1 = self.output + y_2 = np.sin(x_2) + np.random.normal(self.mean2, np.sqrt(self.cov2)) + return np.array([hat_x_1, hat_x_2]) + + def render(self, mode="human"): + + return + + +if __name__ == "__main__": + env = Ex3_EKF() + T = 10 + path = [] + t1 = [] + s = env.reset() + for i in range(int(T / env.dt)): + s, r, info, done = env.step(np.array([0, 0])) + path.append(s) + t1.append(i * env.dt) + + fig = plt.figure(figsize=(9, 6)) + ax = fig.add_subplot(111) + + ax.plot(t1, np.array(path)[:, 0], color="yellow", label="x1") + ax.plot(t1, np.array(path)[:, 1], color="green", label="x2") + # ax.plot(t1, np.array(path)[:, 2], color='black', label='measurement') + + handles, labels = ax.get_legend_handles_labels() + # + ax.legend(handles, labels, loc=2, fancybox=False, shadow=False) + plt.show() + print("done") diff --git a/simzoo/envs/Ex3_EKF/__init__.py b/simzoo/envs/Ex3_EKF/__init__.py new file mode 100644 index 00000000..74a5fcd3 --- /dev/null +++ b/simzoo/envs/Ex3_EKF/__init__.py @@ -0,0 +1,11 @@ +import sys + +# Check if used as name_space package +if "simzoo" in sys.modules: + from simzoo.envs.Ex3_EKF.Ex3_EKF import Ex3_EKF + from simzoo.envs.Ex3_EKF.Ex3_EKF_negative import Ex3_EKF_negative +else: + from machine_learning_control.simzoo.simzoo.envs.Ex3_EKF.Ex3_EKF import Ex3_EKF + from machine_learning_control.simzoo.simzoo.envs.Ex3_EKF.Ex3_EKF_negative import ( + Ex3_EKF_negative, + ) diff --git a/simzoo/envs/__init__.py b/simzoo/envs/__init__.py index 49359038..0a93d6d7 100644 --- a/simzoo/envs/__init__.py +++ b/simzoo/envs/__init__.py @@ -16,8 +16,12 @@ # Environments envs = { - "name": ["Oscillator-v0"], - "module": ["simzoo.envs.oscillator:Oscillator"], + "name": ["Oscillator-v0", "Ex3_EKF-v0", "Ex3_EKA_negative-v0"], + "module": [ + "simzoo.envs.oscillator:Oscillator", + "simzoo.envs.Ex3_EKF:Ex3_EKF", + "simzoo.envs.Ex3_EKF:Ex3_EKF_negative", + ], } # Register environments diff --git a/tests/test_envs_register.py b/tests/test_envs_register.py index dab42c25..d153aadd 100644 --- a/tests/test_envs_register.py +++ b/tests/test_envs_register.py @@ -10,8 +10,7 @@ import machine_learning_control.simzoo.envs # Script Parameters -ENVS = ["Oscillator-v0"] - +ENVS = ["Oscillator-v0", "Ex3_EKF-v0"] ################################################# # Test script ###################################