-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from gegelati/GridWorld
Creation of GridWorld environment
- Loading branch information
Showing
11 changed files
with
550 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
cmake_minimum_required(VERSION 3.12.4) | ||
|
||
# ******************************************* | ||
# ************* CMake Content *************** | ||
# ******************************************* | ||
# This CMake create a workspace containing the following projects | ||
# | ||
# Programs | ||
# - gridworld | ||
|
||
set (PROJECT_NAME gridworld) | ||
|
||
project(${PROJECT_NAME}) | ||
|
||
# Add definition for relative path into project | ||
add_definitions( -DPROJECT_ROOT_PATH="${CMAKE_CURRENT_SOURCE_DIR}") | ||
|
||
# Disable C and C++ compiler extensions. | ||
# C/CXX_EXTENSIONS are ON by default to allow the compilers to use extended | ||
# variants of the C/CXX language. | ||
# However, this could expose cross-platform bugs in user code or in the headers | ||
# of third-party dependencies and thus it is strongly suggested to turn | ||
# extensions off. | ||
set(CMAKE_C_EXTENSIONS OFF) | ||
set(CMAKE_CXX_EXTENSIONS OFF) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
||
if(NOT ${CMAKE_GENERATOR} MATCHES "Visual Studio.*") | ||
|
||
# Link with pthread | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") | ||
|
||
# Debug or release | ||
if(CMAKE_BUILD_TYPE MATCHES "Debug") | ||
MESSAGE("Generate Debug project") | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Debug) | ||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -pg -Wall") | ||
else() | ||
MESSAGE("Generate Release project") | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Release) | ||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall") | ||
endif() | ||
#add libmath during non visual studio builds | ||
set(CMAKE_EXTRA_LIB m) | ||
else() | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) | ||
add_definitions(-D_CRT_SECURE_NO_WARNINGS) | ||
endif() | ||
|
||
# Add definitions for testing purposes | ||
if(${TESTING}) | ||
MESSAGE("Testing mode") | ||
add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2) | ||
endif() | ||
|
||
# ******************************************* | ||
# *********** GEGELATI LIBRARY ************** | ||
# ******************************************* | ||
|
||
if(WIN32) | ||
set(LIBS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib) | ||
# find the gegelatilib-x.y.z folder in the lib directory. | ||
file(GLOB GEGELATI_ROOT_DIR "${LIBS_DIR}/gegelatilib-[\\.|0-9]*") | ||
set(ENV{GEGELATI_DIR} ${GEGELATI_ROOT_DIR}) | ||
endif() | ||
find_package(GEGELATI) | ||
|
||
|
||
if (WIN32) | ||
file(GLOB | ||
GEGELATI_DLL | ||
${GEGELATI_ROOT_DIR}/bin/*.dll | ||
) | ||
|
||
MESSAGE("Copy GEGELATI DLLs into ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") | ||
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) | ||
if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") | ||
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug) | ||
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Release) | ||
endif() | ||
endif() | ||
|
||
# ******************************************* | ||
# ************** Executable **************** | ||
# ******************************************* | ||
|
||
# Executable to learn the TPG | ||
file(GLOB_RECURSE | ||
gridworld_files | ||
./src/*.cpp | ||
./src/*.h | ||
./params.json | ||
) | ||
|
||
|
||
include_directories(${GEGELATI_INCLUDE_DIRS} ) | ||
add_executable(${PROJECT_NAME} ${gridworld_files}) | ||
target_link_libraries(${PROJECT_NAME} ${GEGELATI_LIBRARIES}) | ||
target_compile_definitions(${PROJECT_NAME} PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# GridWorld | ||
|
||
This application teaches a learning agent built with the [GEGELATI library](https://github.com/gegelati/gegelati) how to get out of a gridWorld | ||
|
||
The gridWorld is a grid composed of 0, 1, 2, and 3. | ||
* 0 is an available tile | ||
* 1 is a good output tile | ||
* 2 is a bad output tile | ||
* 3 is unavailable tile | ||
|
||
The agent start at coordonate (0, 0). It can go left, right, up and down. | ||
|
||
It get a reward of -1 if it reach a tile with value 0. | ||
If it reach a tile a value 1 or 2, it terminate the environnement and the agent get a reward of respectively 100 or -100. | ||
|
||
## How to Build? | ||
The build process of applications relies on [cmake](https://cmake.org) to configure a project for a wide variety of development environments and operating systems. Install [cmake](https://cmake.org/download/) on your system before building the application. | ||
|
||
### Under windows | ||
1. Copy the `gegelatilib-<version>` folder containing the binaries of the [GEGELATI library](https://github.com/gegelati/gegelati) into the `lib` folder. | ||
2. Open a command line interface in the `bin` folder. | ||
3. Enter the following command to create the project for your favorite IDE `cmake ..`. | ||
4. Open the project created in the `bin` folder, or launch the build with the following command: `cmake --build .`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This file exists only to force the presence of the lib folder in the git repository. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This file exists only to force the presence of the lib folder in the git repository. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
{ | ||
// Number of recordings held in the Archive. | ||
// "archiveSize" : 50, // Default value | ||
"archiveSize" : 2000, | ||
// Probability of archiving the result of each Program execution. | ||
// "archivingProbability" : 0.05, // Default value | ||
"archivingProbability" : 0.01, | ||
// Boolean used to activate an evaluation of the surviving roots in validation | ||
// mode after the training at each generation. | ||
// "doValidation" : false, // Default value | ||
"doValidation" : false, | ||
// Maximum number of actions performed on the learning environment during the | ||
// each evaluation of a root. | ||
// "maxNbActionsPerEval" : 1000, // Default value | ||
"maxNbActionsPerEval" : 100, | ||
// Maximum number of times a given root is evaluated.After this number is | ||
// reached, possibly after several generations, the score of the root will be | ||
// fixed, and no further evaluation will be done. | ||
// "maxNbEvaluationPerPolicy" : 1000, // Default value | ||
"maxNbEvaluationPerPolicy" : 10, | ||
"mutation" : | ||
{ | ||
"prog" : | ||
{ | ||
// Maximum constant value possible. | ||
// "maxConstValue" : 100, // Default value | ||
"maxConstValue" : 10, | ||
// Maximum number of Line within the Program of the TPG. | ||
// "maxProgramSize" : 96, // Default value | ||
"maxProgramSize" : 20, | ||
// Minimum constant value possible. | ||
// "minConstValue" : -10, // Default value | ||
"minConstValue" : -10, | ||
// Probability of inserting a line in the Program. | ||
// "pAdd" : 0.5, // Default value | ||
"pAdd" : 0.5, | ||
// Probability of each constant to be mutated. | ||
// "pConstantMutation" : 0.5, // Default value | ||
"pConstantMutation" : 0.5, | ||
// Probability of deleting a line of the Program. | ||
// "pDelete" : 0.5, // Default value | ||
"pDelete" : 0.5, | ||
// Probability of altering a line of the Program. | ||
// "pMutate" : 1.0, // Default value | ||
"pMutate" : 1.0, | ||
// Probability of swapping two lines of the Program. | ||
// "pSwap" : 1.0, // Default value | ||
"pSwap" : 1.0 | ||
}, | ||
"tpg" : | ||
{ | ||
// When a Program is mutated, makes sure its behavior is no longer the same. | ||
// "forceProgramBehaviorChangeOnMutation" : false, // Default value | ||
"forceProgramBehaviorChangeOnMutation" : true, | ||
// Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when | ||
// initialized. | ||
// "maxInitOutgoingEdges" : 3, // Default value | ||
"maxInitOutgoingEdges" : 4, | ||
// Maximum number of outgoing edge during TPGGraph mutations. | ||
// "maxOutgoingEdges" : 5, // Default value | ||
"maxOutgoingEdges" : 10, | ||
// Number of TPGAction vertex of the initialized TPGGraph. | ||
// This parameter is generally automatically set by the LearningEnvironment. | ||
// /* "nbActions" : 0,*/ // Commented by default | ||
/* "nbActions" : 0,*/ | ||
// Number of root TPGTeams to maintain when populating the TPGGraph | ||
// "nbRoots" : 100, // Default value | ||
"nbRoots" : 500, | ||
// Probability of adding an outgoing Edge to a Team. | ||
// "pEdgeAddition" : 0.7, // Default value | ||
"pEdgeAddition" : 0.7, | ||
// Probability of deleting an outgoing Edge of a Team. | ||
// "pEdgeDeletion" : 0.7, // Default value | ||
"pEdgeDeletion" : 0.7, | ||
// Probability of changing the destination of an Edge. | ||
// "pEdgeDestinationChange" : 0.1, // Default value | ||
"pEdgeDestinationChange" : 0.1, | ||
// Probability of the new destination of an Edge to be an Action. | ||
// "pEdgeDestinationIsAction" : 0.5, // Default value | ||
"pEdgeDestinationIsAction" : 0.5, | ||
// Probability of mutating the Program of an outgoing Edge. | ||
// "pProgramMutation" : 0.2, // Default value | ||
"pProgramMutation" : 0.2 | ||
} | ||
}, | ||
// Number of generations of the training. | ||
// "nbGenerations" : 500, // Default value | ||
"nbGenerations" : 10, | ||
// [Only used in AdversarialLearningAgent.] | ||
// Number of times each job is evaluated in the learning process. | ||
// Each root may belong to several jobs, hence this parameter should be lower | ||
// than the nbIterationsPerPolicyEvaluation parameter. | ||
// "nbIterationsPerJob" : 1, // Default value | ||
"nbIterationsPerJob" : 1, | ||
// Number of evaluation of each root per generation. | ||
// "nbIterationsPerPolicyEvaluation" : 5, // Default value | ||
"nbIterationsPerPolicyEvaluation" : 1, | ||
// Number of Constant available in each Program. | ||
// "nbProgramConstant" : 0, // Default value | ||
"nbProgramConstant" : 5, | ||
// Number of registers for the Program execution. | ||
// "nbRegisters" : 8, // Default value | ||
"nbRegisters" : 8, | ||
// [Only used in ParallelLearningAgent and child classes.] | ||
// Number of threads used for the training process. | ||
// When undefined in the json file, this parameter is automatically set to the | ||
// number of cores of the CPU. | ||
// /* "nbThreads" : 0,*/ // Commented by default | ||
"nbThreads" : 1, | ||
// Percentage of deleted (and regenerated) root TPGVertex at each generation. | ||
// "ratioDeletedRoots" : 0.5, // Default value | ||
"ratioDeletedRoots" : 0.5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#include"gridworld.h" | ||
|
||
void GridWorld::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber){ | ||
|
||
// Reset agent coordonate | ||
agentCoord = {0, 0}; | ||
|
||
// Reset terminated and score | ||
terminated = false; | ||
score = 0.0; | ||
|
||
// Set data | ||
currentState.setDataAt(typeid(double), 0, agentCoord[0]); | ||
currentState.setDataAt(typeid(double), 1, agentCoord[1]); | ||
} | ||
|
||
bool GridWorld::positionAvailable(int pos_x, int pos_y){ | ||
|
||
// position unavailable on axis x | ||
if(pos_x == size[0] || pos_x == -1){ | ||
return false; | ||
} | ||
|
||
// position unavailable on axis x | ||
if(pos_y == size[1] || pos_y == -1){ | ||
return false; | ||
} | ||
|
||
// position unavailable because tile is unavailable | ||
if (grid[pos_y][pos_x] == 3){ | ||
return false; | ||
} | ||
|
||
// Else : position is available | ||
return true; | ||
|
||
} | ||
|
||
void GridWorld::doAction(uint64_t action){ | ||
|
||
switch (action){ | ||
case 0: // left | ||
if (positionAvailable(agentCoord[0] - 1, agentCoord[1])) agentCoord[0]--; | ||
break; | ||
case 1: // Down | ||
if (positionAvailable(agentCoord[0], agentCoord[1] + 1)) agentCoord[1]++; | ||
break; | ||
case 2: // Right | ||
if (positionAvailable(agentCoord[0] + 1, agentCoord[1])) agentCoord[0]++; | ||
break; | ||
case 3: // Up | ||
if (positionAvailable(agentCoord[0], agentCoord[1] - 1)) agentCoord[1]--; | ||
break; | ||
} | ||
|
||
// Reward is always -1 except when an output is reached | ||
double reward = -1; | ||
|
||
if(grid[agentCoord[1]][agentCoord[0]] == 1){ | ||
// good output reached | ||
terminated = true; | ||
reward = 100; | ||
} else if(grid[agentCoord[1]][agentCoord[0]] == 2){ | ||
// Bad output reached | ||
terminated = true; | ||
reward = -100; | ||
} | ||
|
||
// update score | ||
score += reward; | ||
|
||
// Set data | ||
currentState.setDataAt(typeid(double), 0, agentCoord[0]); | ||
currentState.setDataAt(typeid(double), 1, agentCoord[1]); | ||
} | ||
|
||
bool GridWorld::isTerminal() const{ | ||
return terminated; | ||
} | ||
|
||
double GridWorld::getScore() const { | ||
return score; | ||
} | ||
|
||
std::vector<std::reference_wrapper<const Data::DataHandler>> GridWorld::getDataSources() | ||
{ | ||
auto result = std::vector<std::reference_wrapper<const Data::DataHandler>>(); | ||
result.push_back(this->currentState); | ||
return result; | ||
} | ||
|
||
Learn::LearningEnvironment* GridWorld::clone() const | ||
{ | ||
return new GridWorld(*this); | ||
} | ||
|
||
bool GridWorld::isCopyable() const | ||
{ | ||
return true; | ||
} |
Oops, something went wrong.