Skip to content

Commit

Permalink
Merge pull request #16 from gegelati/GridWorld
Browse files Browse the repository at this point in the history
Creation of GridWorld environment
  • Loading branch information
QuentinVacher-rl authored Aug 27, 2024
2 parents 419c60b + b0b8d0b commit ab71988
Show file tree
Hide file tree
Showing 11 changed files with 550 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
app: [pendulum, tic-tac-toe, mnist, stickgame]
app: [pendulum, tic-tac-toe, mnist, stickgame, gridworld]
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
101 changes: 101 additions & 0 deletions gridworld/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
cmake_minimum_required(VERSION 3.12.4)

# *******************************************
# ************* CMake Content ***************
# *******************************************
# This CMake create a workspace containing the following projects
#
# Programs
# - gridworld

set (PROJECT_NAME gridworld)

project(${PROJECT_NAME})

# Add definition for relative path into project
add_definitions( -DPROJECT_ROOT_PATH="${CMAKE_CURRENT_SOURCE_DIR}")

# Disable C and C++ compiler extensions.
# C/CXX_EXTENSIONS are ON by default to allow the compilers to use extended
# variants of the C/CXX language.
# However, this could expose cross-platform bugs in user code or in the headers
# of third-party dependencies and thus it is strongly suggested to turn
# extensions off.
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_CXX_EXTENSIONS OFF)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if(NOT ${CMAKE_GENERATOR} MATCHES "Visual Studio.*")

# Link with pthread
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")

# Debug or release
if(CMAKE_BUILD_TYPE MATCHES "Debug")
MESSAGE("Generate Debug project")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Debug)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -pg -Wall")
else()
MESSAGE("Generate Release project")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Release)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall")
endif()
#add libmath during non visual studio builds
set(CMAKE_EXTRA_LIB m)
else()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif()

# Add definitions for testing purposes
if(${TESTING})
MESSAGE("Testing mode")
add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2)
endif()

# *******************************************
# *********** GEGELATI LIBRARY **************
# *******************************************

if(WIN32)
set(LIBS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib)
# find the gegelatilib-x.y.z folder in the lib directory.
file(GLOB GEGELATI_ROOT_DIR "${LIBS_DIR}/gegelatilib-[\\.|0-9]*")
set(ENV{GEGELATI_DIR} ${GEGELATI_ROOT_DIR})
endif()
find_package(GEGELATI)


if (WIN32)
file(GLOB
GEGELATI_DLL
${GEGELATI_ROOT_DIR}/bin/*.dll
)

MESSAGE("Copy GEGELATI DLLs into ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug)
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Release)
endif()
endif()

# *******************************************
# ************** Executable ****************
# *******************************************

# Executable to learn the TPG
file(GLOB_RECURSE
gridworld_files
./src/*.cpp
./src/*.h
./params.json
)


include_directories(${GEGELATI_INCLUDE_DIRS} )
add_executable(${PROJECT_NAME} ${gridworld_files})
target_link_libraries(${PROJECT_NAME} ${GEGELATI_LIBRARIES})
target_compile_definitions(${PROJECT_NAME} PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}")
23 changes: 23 additions & 0 deletions gridworld/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# GridWorld

This application teaches a learning agent built with the [GEGELATI library](https://github.com/gegelati/gegelati) how to get out of a gridWorld

The gridWorld is a grid composed of 0, 1, 2, and 3.
* 0 is an available tile
* 1 is a good output tile
* 2 is a bad output tile
* 3 is unavailable tile

The agent start at coordonate (0, 0). It can go left, right, up and down.

It get a reward of -1 if it reach a tile with value 0.
If it reach a tile a value 1 or 2, it terminate the environnement and the agent get a reward of respectively 100 or -100.

## How to Build?
The build process of applications relies on [cmake](https://cmake.org) to configure a project for a wide variety of development environments and operating systems. Install [cmake](https://cmake.org/download/) on your system before building the application.

### Under windows
1. Copy the `gegelatilib-<version>` folder containing the binaries of the [GEGELATI library](https://github.com/gegelati/gegelati) into the `lib` folder.
2. Open a command line interface in the `bin` folder.
3. Enter the following command to create the project for your favorite IDE `cmake ..`.
4. Open the project created in the `bin` folder, or launch the build with the following command: `cmake --build .`.
1 change: 1 addition & 0 deletions gridworld/bin/.dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file exists only to force the presence of the lib folder in the git repository.
1 change: 1 addition & 0 deletions gridworld/lib/.dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file exists only to force the presence of the lib folder in the git repository.
113 changes: 113 additions & 0 deletions gridworld/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
// Number of recordings held in the Archive.
// "archiveSize" : 50, // Default value
"archiveSize" : 2000,
// Probability of archiving the result of each Program execution.
// "archivingProbability" : 0.05, // Default value
"archivingProbability" : 0.01,
// Boolean used to activate an evaluation of the surviving roots in validation
// mode after the training at each generation.
// "doValidation" : false, // Default value
"doValidation" : false,
// Maximum number of actions performed on the learning environment during the
// each evaluation of a root.
// "maxNbActionsPerEval" : 1000, // Default value
"maxNbActionsPerEval" : 100,
// Maximum number of times a given root is evaluated.After this number is
// reached, possibly after several generations, the score of the root will be
// fixed, and no further evaluation will be done.
// "maxNbEvaluationPerPolicy" : 1000, // Default value
"maxNbEvaluationPerPolicy" : 10,
"mutation" :
{
"prog" :
{
// Maximum constant value possible.
// "maxConstValue" : 100, // Default value
"maxConstValue" : 10,
// Maximum number of Line within the Program of the TPG.
// "maxProgramSize" : 96, // Default value
"maxProgramSize" : 20,
// Minimum constant value possible.
// "minConstValue" : -10, // Default value
"minConstValue" : -10,
// Probability of inserting a line in the Program.
// "pAdd" : 0.5, // Default value
"pAdd" : 0.5,
// Probability of each constant to be mutated.
// "pConstantMutation" : 0.5, // Default value
"pConstantMutation" : 0.5,
// Probability of deleting a line of the Program.
// "pDelete" : 0.5, // Default value
"pDelete" : 0.5,
// Probability of altering a line of the Program.
// "pMutate" : 1.0, // Default value
"pMutate" : 1.0,
// Probability of swapping two lines of the Program.
// "pSwap" : 1.0, // Default value
"pSwap" : 1.0
},
"tpg" :
{
// When a Program is mutated, makes sure its behavior is no longer the same.
// "forceProgramBehaviorChangeOnMutation" : false, // Default value
"forceProgramBehaviorChangeOnMutation" : true,
// Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when
// initialized.
// "maxInitOutgoingEdges" : 3, // Default value
"maxInitOutgoingEdges" : 4,
// Maximum number of outgoing edge during TPGGraph mutations.
// "maxOutgoingEdges" : 5, // Default value
"maxOutgoingEdges" : 10,
// Number of TPGAction vertex of the initialized TPGGraph.
// This parameter is generally automatically set by the LearningEnvironment.
// /* "nbActions" : 0,*/ // Commented by default
/* "nbActions" : 0,*/
// Number of root TPGTeams to maintain when populating the TPGGraph
// "nbRoots" : 100, // Default value
"nbRoots" : 500,
// Probability of adding an outgoing Edge to a Team.
// "pEdgeAddition" : 0.7, // Default value
"pEdgeAddition" : 0.7,
// Probability of deleting an outgoing Edge of a Team.
// "pEdgeDeletion" : 0.7, // Default value
"pEdgeDeletion" : 0.7,
// Probability of changing the destination of an Edge.
// "pEdgeDestinationChange" : 0.1, // Default value
"pEdgeDestinationChange" : 0.1,
// Probability of the new destination of an Edge to be an Action.
// "pEdgeDestinationIsAction" : 0.5, // Default value
"pEdgeDestinationIsAction" : 0.5,
// Probability of mutating the Program of an outgoing Edge.
// "pProgramMutation" : 0.2, // Default value
"pProgramMutation" : 0.2
}
},
// Number of generations of the training.
// "nbGenerations" : 500, // Default value
"nbGenerations" : 10,
// [Only used in AdversarialLearningAgent.]
// Number of times each job is evaluated in the learning process.
// Each root may belong to several jobs, hence this parameter should be lower
// than the nbIterationsPerPolicyEvaluation parameter.
// "nbIterationsPerJob" : 1, // Default value
"nbIterationsPerJob" : 1,
// Number of evaluation of each root per generation.
// "nbIterationsPerPolicyEvaluation" : 5, // Default value
"nbIterationsPerPolicyEvaluation" : 1,
// Number of Constant available in each Program.
// "nbProgramConstant" : 0, // Default value
"nbProgramConstant" : 5,
// Number of registers for the Program execution.
// "nbRegisters" : 8, // Default value
"nbRegisters" : 8,
// [Only used in ParallelLearningAgent and child classes.]
// Number of threads used for the training process.
// When undefined in the json file, this parameter is automatically set to the
// number of cores of the CPU.
// /* "nbThreads" : 0,*/ // Commented by default
"nbThreads" : 1,
// Percentage of deleted (and regenerated) root TPGVertex at each generation.
// "ratioDeletedRoots" : 0.5, // Default value
"ratioDeletedRoots" : 0.5
}
100 changes: 100 additions & 0 deletions gridworld/src/gridworld.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include"gridworld.h"

void GridWorld::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber){

// Reset agent coordonate
agentCoord = {0, 0};

// Reset terminated and score
terminated = false;
score = 0.0;

// Set data
currentState.setDataAt(typeid(double), 0, agentCoord[0]);
currentState.setDataAt(typeid(double), 1, agentCoord[1]);
}

bool GridWorld::positionAvailable(int pos_x, int pos_y){

// position unavailable on axis x
if(pos_x == size[0] || pos_x == -1){
return false;
}

// position unavailable on axis x
if(pos_y == size[1] || pos_y == -1){
return false;
}

// position unavailable because tile is unavailable
if (grid[pos_y][pos_x] == 3){
return false;
}

// Else : position is available
return true;

}

void GridWorld::doAction(uint64_t action){

switch (action){
case 0: // left
if (positionAvailable(agentCoord[0] - 1, agentCoord[1])) agentCoord[0]--;
break;
case 1: // Down
if (positionAvailable(agentCoord[0], agentCoord[1] + 1)) agentCoord[1]++;
break;
case 2: // Right
if (positionAvailable(agentCoord[0] + 1, agentCoord[1])) agentCoord[0]++;
break;
case 3: // Up
if (positionAvailable(agentCoord[0], agentCoord[1] - 1)) agentCoord[1]--;
break;
}

// Reward is always -1 except when an output is reached
double reward = -1;

if(grid[agentCoord[1]][agentCoord[0]] == 1){
// good output reached
terminated = true;
reward = 100;
} else if(grid[agentCoord[1]][agentCoord[0]] == 2){
// Bad output reached
terminated = true;
reward = -100;
}

// update score
score += reward;

// Set data
currentState.setDataAt(typeid(double), 0, agentCoord[0]);
currentState.setDataAt(typeid(double), 1, agentCoord[1]);
}

bool GridWorld::isTerminal() const{
return terminated;
}

double GridWorld::getScore() const {
return score;
}

std::vector<std::reference_wrapper<const Data::DataHandler>> GridWorld::getDataSources()
{
auto result = std::vector<std::reference_wrapper<const Data::DataHandler>>();
result.push_back(this->currentState);
return result;
}

Learn::LearningEnvironment* GridWorld::clone() const
{
return new GridWorld(*this);
}

bool GridWorld::isCopyable() const
{
return true;
}
Loading

0 comments on commit ab71988

Please sign in to comment.