#!/usr/bin/env python # -*- coding: utf-8 -*- import agent as ag import sumoenv as se env_train = se.SumoEnv(gui_f=False) env_test = se.SumoEnv(gui_f=True) agent = ag.Agent() EPS = 2 for ieps in range(EPS): for i in range(20): print "Start Training" state = env_train.reset() done = False while not done: action = agent.policy(state) next_state, reward, done, rewards = env_train.step_d(action) agent.train(state, action, reward, 0.001, [1, 1, done, 1, 1]) state = next_state print "Stop Training" env_train.close() print "Start Testing" state = env_test.reset() done = False while not done: action = agent.policy(state) next_state, reward, done, rewards = env_test.step_d(action) print(state) state = next_state print "Stop Testing" env_test.close()