Skip to content

Commit

Permalink
Release 0.12
Browse files Browse the repository at this point in the history
  • Loading branch information
yannbouteiller committed Apr 5, 2023
1 parent aed1e64 commit 213df48
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 2 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@

setup(name='rtgym',
packages=[package for package in find_packages()],
version='0.11',
version='0.12',
license='MIT',
description='Easily implement custom Gymnasium environments for real-time applications',
long_description=long_description,
long_description_content_type="text/markdown",
author='Yann Bouteiller',
url='https://github.com/yannbouteiller/rtgym',
download_url='https://github.com/yannbouteiller/rtgym/archive/refs/tags/v0.11.tar.gz',
download_url='https://github.com/yannbouteiller/rtgym/archive/refs/tags/v0.12.tar.gz',
keywords=['gymnasium', 'real', 'time', 'custom', 'environment', 'reinforcement', 'learning', 'random', 'delays'],
install_requires=['gymnasium', 'numpy'],
classifiers=[
Expand Down
128 changes: 128 additions & 0 deletions tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,134 @@ def test_timing(self):
print(f"The action applied when obs was captured was {obs1[1]}")
self.assertEqual(obs1[1], np.array([-1.]))

# now let us test changing the configuration on-the-fly

# reset:
print("--- reset ---")
obs1, info = env.reset()

# now let us step the environment
a = 1
print(f"--- Step {a} ---")
act = np.array([float(a)], dtype=np.float64)
obs2, _, terminated, truncated, _ = env.step(act)
print(f"terminated: {terminated}, truncated:{truncated}")

for i in range(3):

obs1 = obs2
a += 1

print(f"--- Step {a} ---")
act = np.array([float(a)], dtype=np.float64)
obs2, _, terminated, truncated, _ = env.step(act)
now = time.time()
print(f"terminated: {terminated}, truncated:{truncated}")

# let us look at the action buffer:
for j in range(act_buf_len):
print(f"The action buffer is {obs2[3 + j]} at index {j}")
self.assertEqual(obs2[-1], a)

# Now, we look at the time elapsed between the two observations:
elapsed = obs2[0] - obs1[0]
print(f"The two last obs are spaced by {elapsed} seconds")
self.assertGreater(time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, time_step_duration - epsilon)

# the control applied when obs2 was captured should be the previous a
print(f"The action applied when obs was captured was {obs2[1]}")
self.assertEqual(obs2[1], np.array([float(a - 1)]))

# the sending timestamp of the control should be the beginning of the last time-step:
elapsed = now - obs2[2]
print(f"This action was sent {elapsed} seconds ago")
self.assertGreater(time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, time_step_duration - epsilon)

new_time_step_duration = 0.2
new_ep_max_length = 8
print(f"changing time step duration to {new_time_step_duration} and episode length to {new_ep_max_length}")
# let us change the parameters (this waits for the end of the ongoing time step):
env.set_time_step_duration(time_step_duration=new_time_step_duration)
env.set_start_obs_capture(start_obs_capture=new_time_step_duration)
env.set_ep_max_length(ep_max_length=new_ep_max_length)

# the next time step will still be of the old duration:

obs1 = obs2
a += 1

print(f"--- Step {a} ---")
act = np.array([float(a)], dtype=np.float64)
obs2, _, terminated, truncated, _ = env.step(act)
now = time.time()
print(f"terminated: {terminated}, truncated:{truncated}")

# let us look at the action buffer:
for j in range(act_buf_len):
print(f"The action buffer is {obs2[3 + j]} at index {j}")
self.assertEqual(obs2[-1], a)

# Now, we look at the time elapsed between the two observations:
elapsed = obs2[0] - obs1[0]
print(f"The two last obs are spaced by {elapsed} seconds")
self.assertGreater(time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, time_step_duration - epsilon)

# the control applied when obs2 was captured should be the previous a
print(f"The action applied when obs was captured was {obs2[1]}")
self.assertEqual(obs2[1], np.array([float(a - 1)]))

# the sending timestamp of the control should be the beginning of the last time-step:
elapsed = now - obs2[2]
print(f"This action was sent {elapsed} seconds ago")
self.assertGreater(time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, time_step_duration - epsilon)

# but the following time steps will be of the new duration, and episode will end on time step 8

for i in range(5):

obs1 = obs2
a += 1

print(f"--- Step {a} ---")
act = np.array([float(a)], dtype=np.float64)
obs2, _, terminated, truncated, _ = env.step(act)
now = time.time()
print(f"terminated: {terminated}, truncated:{truncated}")

# let us look at the action buffer:
for j in range(act_buf_len):
print(f"The action buffer is {obs2[3 + j]} at index {j}")
self.assertEqual(obs2[-1], a)

# Now, we look at the time elapsed between the two observations:
elapsed = obs2[0] - obs1[0]
print(f"The two last obs are spaced by {elapsed} seconds")
if i > 0: # there is some jitter on first iteration
self.assertGreater(new_time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, new_time_step_duration - epsilon)

# the control applied when obs2 was captured should be the previous a
print(f"The action applied when obs was captured was {obs2[1]}")
self.assertEqual(obs2[1], np.array([float(a - 1)]))

# the sending timestamp of the control should be the beginning of the last time-step:
elapsed = now - obs2[2]
print(f"This action was sent {elapsed} seconds ago")
if i > 0: # there is some jitter on first iteration
self.assertGreater(new_time_step_duration + epsilon, elapsed)
self.assertGreater(elapsed, new_time_step_duration - epsilon)

# end of episode:
if a == 8:
# the terminated signal should override the truncated signal:
self.assertFalse(terminated)
self.assertTrue(truncated)
break


if __name__ == '__main__':
unittest.main()

0 comments on commit 213df48

Please sign in to comment.