diff --git a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py index eeb177fc..d5e58e89 100644 --- a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py +++ b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py @@ -274,7 +274,8 @@ def delete_previous_weights(self): # delete previous weights to save space files = glob.glob(self.tc.export_dir + 'weights/*') for f in files: - os.remove(f) + if os.path.isfile(f): + os.remove(f) def _get_train_loader(self, part_id): # load one chunk of the dataset from memory diff --git a/engine/src/nn/neuralnetapiuser.cpp b/engine/src/nn/neuralnetapiuser.cpp index a2cd63f1..6a09cf0e 100644 --- a/engine/src/nn/neuralnetapiuser.cpp +++ b/engine/src/nn/neuralnetapiuser.cpp @@ -108,3 +108,8 @@ void NeuralNetAPIUser::run_inference(uint_fast16_t iterations) } } +unsigned int NeuralNetAPIUser::get_num_phases() const +{ + return numPhases; +} + diff --git a/engine/src/nn/neuralnetapiuser.h b/engine/src/nn/neuralnetapiuser.h index 692c2c1e..08cbc316 100644 --- a/engine/src/nn/neuralnetapiuser.h +++ b/engine/src/nn/neuralnetapiuser.h @@ -60,6 +60,12 @@ class NeuralNetAPIUser * @param iterations Number of iterations to run */ void run_inference(uint_fast16_t iterations); + + /** + * @brief get_num_phases Returns the number of phases + * @return numPhases + */ + unsigned int get_num_phases() const; }; #endif // NEURALNETAPIUSER_H diff --git a/engine/src/rl/rl_loop.py b/engine/src/rl/rl_loop.py index 56cd58de..8db386f9 100644 --- a/engine/src/rl/rl_loop.py +++ b/engine/src/rl/rl_loop.py @@ -49,7 +49,7 @@ def __init__(self, args, rl_config, nb_arena_games=100, lr_reduction=0.0001, k_s self.rl_config = rl_config self.file_io = FileIO(orig_binary_name=self.rl_config.binary_name, binary_dir=self.rl_config.binary_dir, - uci_variant=self.rl_config.uci_variant, framework=self.tc.framework) + uci_variant=self.rl_config.uci_variant) self.binary_io = None if nb_arena_games % 2 == 1: @@ -85,7 +85,7 @@ def initialize(self, is_arena=False): is_arena: Signals that UCI option should be set for arena comparison :return: """ - self.model_name = self.file_io.get_current_model_weight_file() + self.model_name = self.file_io.get_current_model_tar_file() self.binary_io = BinaryIO(binary_path=self.file_io.binary_dir+self.current_binary_name) self.binary_io.set_uci_options(self.rl_config.uci_variant, self.args.context, self.args.device_id, self.rl_config.precision, self.file_io.model_dir, @@ -105,7 +105,7 @@ def check_for_new_model(self): self.nn_update_index = extract_nn_update_idx_from_binary_name(self.current_binary_name) # If a new model is available, the binary name has also changed - model_name = self.file_io.get_current_model_weight_file() + model_name = self.file_io.get_current_model_tar_file() if model_name != "" and model_name != self.model_name: logging.info("Loading new model: %s" % model_name) diff --git a/engine/src/rl/rl_training.py b/engine/src/rl/rl_training.py index 957416be..8a236ee9 100644 --- a/engine/src/rl/rl_training.py +++ b/engine/src/rl/rl_training.py @@ -49,7 +49,7 @@ def update_network(queue, nn_update_idx: int, tar_filename: Path, convert_to_onn raise Exception('No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) - val_data, x_val, _ = get_validation_data(train_config) + val_data, x_val = get_validation_data(train_config) input_shape = x_val[0].shape # calculate how many iterations per epoch exist diff --git a/engine/src/rl/selfplay.cpp b/engine/src/rl/selfplay.cpp index e4385e70..e0895f23 100644 --- a/engine/src/rl/selfplay.cpp +++ b/engine/src/rl/selfplay.cpp @@ -79,9 +79,9 @@ string load_random_fen(string filepath) } -SelfPlay::SelfPlay(RawNetAgent* rawAgent, MCTSAgent* mctsAgent, SearchLimits* searchLimits, PlaySettings* playSettings, - RLSettings* rlSettings, OptionsMap& options): - rawAgent(rawAgent), mctsAgent(mctsAgent), searchLimits(searchLimits), playSettings(playSettings), +SelfPlay::SelfPlay(RawNetAgent* rawAgent, MCTSAgent* mctsAgent, const SearchSettings* searchSettings, SearchLimits* searchLimits, const PlaySettings* playSettings, + const RLSettings* rlSettings, OptionsMap& options): + rawAgent(rawAgent), mctsAgent(mctsAgent), searchSettings(searchSettings), searchLimits(searchLimits), playSettings(playSettings), rlSettings(rlSettings), gameIdx(0), gamesPerMin(0), samplesPerMin(0), options(options) { is960 = options["UCI_Chess960"]; @@ -113,6 +113,8 @@ SelfPlay::SelfPlay(RawNetAgent* rawAgent, MCTSAgent* mctsAgent, SearchLimits* se gamePGN.round = "?"; gamePGN.is960 = is960; this->exporter = new TrainDataExporter(string("data_") + mctsAgent->get_device_name() + string(".zarr"), + mctsAgent->get_num_phases(), + searchSettings->gamePhaseDefinition, rlSettings->numberChunks, rlSettings->chunkSize); filenamePGNSelfplay = string("games_") + mctsAgent->get_device_name() + string(".pgn"); filenamePGNArena = string("arena_games_")+ mctsAgent->get_device_name() + string(".pgn"); diff --git a/engine/src/rl/selfplay.h b/engine/src/rl/selfplay.h index 237c785b..cc3f7eab 100644 --- a/engine/src/rl/selfplay.h +++ b/engine/src/rl/selfplay.h @@ -68,9 +68,10 @@ class SelfPlay private: RawNetAgent* rawAgent; MCTSAgent* mctsAgent; + const SearchSettings* searchSettings; SearchLimits* searchLimits; - PlaySettings* playSettings; - RLSettings* rlSettings; + const PlaySettings* playSettings; + const RLSettings* rlSettings; OptionsMap& options; GamePGN gamePGN; TrainDataExporter* exporter; @@ -90,13 +91,14 @@ class SelfPlay * @brief SelfPlay * @param rawAgent Raw network agent which uses the raw network policy for e.g. game initiliation * @param mctsAgent MCTSAgent which is used during selfplay for game generation + * @param searchSettings Search settings configuration struct * @param searchLimits Search limit configuration struct * @param playSettings Playing setting configuration struct * @param RLSettings Additional settings for reinforcement learning usage * @param options Object holding all UCI options */ - SelfPlay(RawNetAgent* rawAgent, MCTSAgent* mctsAgent, SearchLimits* searchLimits, PlaySettings* playSettings, - RLSettings* rlSettings, OptionsMap& options); + SelfPlay(RawNetAgent* rawAgent, MCTSAgent* mctsAgent, const SearchSettings* searchSettings, SearchLimits* searchLimits, const PlaySettings* playSettings, + const RLSettings* rlSettings, OptionsMap& options); ~SelfPlay(); /** diff --git a/engine/src/rl/traindataexporter.cpp b/engine/src/rl/traindataexporter.cpp index c118eeb5..60ded3ce 100644 --- a/engine/src/rl/traindataexporter.cpp +++ b/engine/src/rl/traindataexporter.cpp @@ -40,6 +40,7 @@ void TrainDataExporter::save_sample(const StateObj* pos, const EvalInfo& eval) save_best_move_q(eval); save_side_to_move(Color(pos->side_to_move())); save_cur_sample_index(); + save_cur_phase(pos); ++curSampleIdx; // value will be set later in export_game_result() firstMove = false; @@ -87,6 +88,20 @@ void TrainDataExporter::save_cur_sample_index() } } +void TrainDataExporter::save_cur_phase(const StateObj* pos) +{ + // curGamePhase, starting from 0 + xt::xarray phaseArray({ 1 }, pos->get_phase(numPhases, gamePhaseDefinition)); + + if (firstMove) { + gamePhaseVector = phaseArray; + } + else { + // concatenate the sample to array for the current game + gamePhaseVector = xt::concatenate(xtuple(gamePhaseVector, phaseArray)); + } +} + void TrainDataExporter::export_game_samples(Result result) { if (startIdx >= numberSamples) { info_string("Extended number of maximum samples"); @@ -106,13 +121,16 @@ void TrainDataExporter::export_game_samples(Result result) { z5::types::ShapeType offsetPolicy = { startIdx, 0 }; z5::multiarray::writeSubarray(dPolicy, gamePolicy, offsetPolicy.begin()); z5::multiarray::writeSubarray(dPlysToEnd, gamePlysToEnd, offset.begin()); + z5::multiarray::writeSubarray(dPhaseVector, gamePhaseVector, offset.begin()); startIdx += curSampleIdx; gameIdx++; save_start_idx(); } -TrainDataExporter::TrainDataExporter(const string& fileName, size_t numberChunks, size_t chunkSize): +TrainDataExporter::TrainDataExporter(const string& fileName, unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition, size_t numberChunks, size_t chunkSize): + numPhases(numPhases), + gamePhaseDefinition(gamePhaseDefinition), numberChunks(numberChunks), chunkSize(chunkSize), numberSamples(numberChunks * chunkSize), @@ -214,6 +232,7 @@ void TrainDataExporter::open_dataset_from_file(const z5::filesystem::handle::Fil dPolicy = z5::openDataset(file, "y_policy"); dbestMoveQ = z5::openDataset(file, "y_best_move_q"); dPlysToEnd = z5::openDataset(file, "plys_to_end"); + dPhaseVector = z5::openDataset(file, "phase_vector"); } void TrainDataExporter::create_new_dataset_file(const z5::filesystem::handle::File &file) @@ -231,6 +250,7 @@ void TrainDataExporter::create_new_dataset_file(const z5::filesystem::handle::Fi dPolicy = z5::createDataset(file, "y_policy", "float32", { numberSamples, StateConstants::NB_LABELS() }, { chunkSize, StateConstants::NB_LABELS() }); dbestMoveQ = z5::createDataset(file, "y_best_move_q", "float32", { numberSamples }, { chunkSize }); dPlysToEnd = z5::createDataset(file, "plys_to_end", "int16", { numberSamples }, { chunkSize }); + dPhaseVector = z5::createDataset(file, "phase_vector", "int16", { numberSamples }, { chunkSize }); save_start_idx(); } diff --git a/engine/src/rl/traindataexporter.h b/engine/src/rl/traindataexporter.h index cbdf4b01..aa93936e 100644 --- a/engine/src/rl/traindataexporter.h +++ b/engine/src/rl/traindataexporter.h @@ -48,6 +48,8 @@ class TrainDataExporter { private: + unsigned int numPhases; + GamePhaseDefinition gamePhaseDefinition; size_t numberChunks; size_t chunkSize; size_t numberSamples; @@ -57,12 +59,14 @@ class TrainDataExporter std::unique_ptr dPolicy; std::unique_ptr dbestMoveQ; std::unique_ptr dPlysToEnd; + std::unique_ptr dPhaseVector; xt::xarray gameX; xt::xarray gameValue; xt::xarray gamePolicy; xt::xarray gameBestMoveQ; xt::xarray gamePlysToEnd; + xt::xarray gamePhaseVector; bool firstMove; // current number of games - 1 @@ -106,6 +110,12 @@ class TrainDataExporter */ void save_cur_sample_index(); + /** + * @brief save_cur_phase Saves the current phase id for the current position. + * @param pos Current position + */ + void save_cur_phase(const StateObj* pos); + /** * @brief save_start_idx Saves the current starting index where the next game starts to the game array */ @@ -140,11 +150,13 @@ class TrainDataExporter /** * @brief TrainDataExporter * @param fileNameExport File name of the uncompressed data to be exported in (e.g. "data.zarr") + * @param numPhases Number of game phases to support for exporting + * @param gamePhaseDefinition Game phase definition to use * @param numberChunks Defines how many chunks a single file should contain. * The product of the number of chunks and its chunk size yields the total number of samples of a file. * @param chunkSize Defines the chunk size of a single chunk */ - TrainDataExporter(const string& fileNameExport, size_t numberChunks=200, size_t chunkSize=128); + TrainDataExporter(const string& fileNameExport, unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition, size_t numberChunks=200, size_t chunkSize=128); /** * @brief export_pos Saves a given board position, policy and Q-value to the specific game arrays diff --git a/engine/src/uci/crazyara.cpp b/engine/src/uci/crazyara.cpp index 7c05a44c..09b49cd6 100644 --- a/engine/src/uci/crazyara.cpp +++ b/engine/src/uci/crazyara.cpp @@ -356,7 +356,7 @@ void CrazyAra::activeuci() void CrazyAra::selfplay(istringstream &is) { prepare_search_config_structs(); - SelfPlay selfPlay(rawAgent.get(), mctsAgent.get(), &searchLimits, &playSettings, &rlSettings, Options); + SelfPlay selfPlay(rawAgent.get(), mctsAgent.get(), &searchSettings, &searchLimits, &playSettings, &rlSettings, Options); size_t numberOfGames; is >> numberOfGames; selfPlay.go(numberOfGames, variant); @@ -366,7 +366,7 @@ void CrazyAra::selfplay(istringstream &is) void CrazyAra::arena(istringstream &is) { prepare_search_config_structs(); - SelfPlay selfPlay(rawAgent.get(), mctsAgent.get(), &searchLimits, &playSettings, &rlSettings, Options); + SelfPlay selfPlay(rawAgent.get(), mctsAgent.get(), &searchSettings, &searchLimits, &playSettings, &rlSettings, Options); fill_nn_vectors(Options["Model_Directory_Contender"], netSingleContenderVector, netBatchesContenderVector); mctsAgentContender = create_new_mcts_agent(netSingleContenderVector, netBatchesContenderVector, &searchSettings); size_t numberOfGames; @@ -420,7 +420,7 @@ void CrazyAra::multimodel_arena(istringstream &is, const string &modelDirectory1 mcts2 = create_new_mcts_agent(netSingleContenderVector, netBatchesContenderVector, &searchSettings, static_cast(type)); } - SelfPlay selfPlay(rawAgent.get(), mcts1.get(), &searchLimits, &playSettings, &rlSettings, Options); + SelfPlay selfPlay(rawAgent.get(), mcts1.get(), &searchSettings, &searchLimits, &playSettings, &rlSettings, Options); size_t numberOfGames; is >> numberOfGames; TournamentResult tournamentResult = selfPlay.go_arena(mcts2.get(), numberOfGames, variant);