From be142337d843ef3afc675e27628ab8e896c32cce Mon Sep 17 00:00:00 2001 From: mstembera Date: Mon, 29 Apr 2024 20:37:54 -0700 Subject: [PATCH] Accumulator cache bugfix and cleanup STC: https://tests.stockfishchess.org/tests/view/663068913a05f1bf7a511dc2 LLR: 2.98 (-2.94,2.94) <-1.75,0.25> Total: 70304 W: 18211 L: 18026 D: 34067 Ptnml(0-2): 232, 7966, 18582, 8129, 243 1) Fixes a bug introduced in https://github.com/official-stockfish/Stockfish/pull/5194. Only one psqtOnly flag was used for two perspectives which was causing wrong entries to be cleared and marked. 2) The finny caches should be cleared like histories and not at the start of every search. closes https://github.com/official-stockfish/Stockfish/pull/5203 No functional change --- src/nnue/nnue_accumulator.h | 28 ++++++++++++--------------- src/nnue/nnue_feature_transformer.h | 30 ++++++++++++++--------------- src/search.cpp | 5 ++--- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index a2b3b98988e..179feba553e 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -59,31 +59,27 @@ struct AccumulatorCaches { struct alignas(CacheLineSize) Cache { struct alignas(CacheLineSize) Entry { - BiasType accumulation[COLOR_NB][Size]; - PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets]; - Bitboard byColorBB[COLOR_NB][COLOR_NB]; - Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB]; + BiasType accumulation[Size]; + PSQTWeightType psqtAccumulation[PSQTBuckets]; + Bitboard byColorBB[COLOR_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; bool psqtOnly; // To initialize a refresh entry, we set all its bitboards empty, // so we put the biases in the accumulation, without any weights on top void clear(const BiasType* biases) { - std::memset(byColorBB, 0, sizeof(byColorBB)); - std::memset(byTypeBB, 0, sizeof(byTypeBB)); - psqtOnly = false; - - std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType)); - std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType)); - - std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation)); + std::memcpy(accumulation, biases, sizeof(accumulation)); + std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0, + sizeof(Entry) - offsetof(Entry, psqtAccumulation)); } }; template void clear(const Network& network) { - for (auto& entry : entries) - entry.clear(network.featureTransformer->biases); + for (auto& entries1D : entries) + for (auto& entry : entries1D) + entry.clear(network.featureTransformer->biases); } void clear(const BiasType* biases) { @@ -91,9 +87,9 @@ struct AccumulatorCaches { entry.clear(biases); } - Entry& operator[](Square sq) { return entries[sq]; } + std::array& operator[](Square sq) { return entries[sq]; } - std::array entries; + std::array, SQUARE_NB> entries; }; template diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 402a47a815d..4647ecd066d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -652,7 +652,7 @@ class FeatureTransformer { assert(cache != nullptr); Square ksq = pos.square(Perspective); - auto& entry = (*cache)[ksq]; + auto& entry = (*cache)[ksq][Perspective]; FeatureSet::IndexList removed, added; if (entry.psqtOnly && !psqtOnly) @@ -666,9 +666,8 @@ class FeatureTransformer { { for (PieceType pt = PAWN; pt <= KING; ++pt) { - const Piece piece = make_piece(c, pt); - const Bitboard oldBB = - entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt]; + const Piece piece = make_piece(c, pt); + const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt]; const Bitboard newBB = pos.pieces(c, pt); Bitboard toRemove = oldBB & ~newBB; Bitboard toAdd = newBB & ~oldBB; @@ -698,8 +697,7 @@ class FeatureTransformer { if (!psqtOnly) for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { - auto entryTile = - reinterpret_cast(&entry.accumulation[Perspective][j * TileHeight]); + auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) acc[k] = entryTile[k]; @@ -741,8 +739,8 @@ class FeatureTransformer { for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { - auto entryTilePsqt = reinterpret_cast( - &entry.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto entryTilePsqt = + reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; @@ -777,11 +775,11 @@ class FeatureTransformer { { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - entry.accumulation[Perspective][j] -= weights[offset + j]; + entry.accumulation[j] -= weights[offset + j]; } for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k]; } for (const auto index : added) { @@ -789,11 +787,11 @@ class FeatureTransformer { { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - entry.accumulation[Perspective][j] += weights[offset + j]; + entry.accumulation[j] += weights[offset + j]; } for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; } #endif @@ -802,17 +800,17 @@ class FeatureTransformer { // Now copy its content to the actual accumulator we were refreshing if (!psqtOnly) - std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective], + std::memcpy(accumulator.accumulation[Perspective], entry.accumulation, sizeof(BiasType) * HalfDimensions); - std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective], + std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, sizeof(int32_t) * PSQTBuckets); for (Color c : {WHITE, BLACK}) - entry.byColorBB[Perspective][c] = pos.pieces(c); + entry.byColorBB[c] = pos.pieces(c); for (PieceType pt = PAWN; pt <= KING; ++pt) - entry.byTypeBB[Perspective][pt] = pos.pieces(pt); + entry.byTypeBB[pt] = pos.pieces(pt); entry.psqtOnly = psqtOnly; } diff --git a/src/search.cpp b/src/search.cpp index e4f170be61d..b8e515f0267 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -147,9 +147,6 @@ Search::Worker::Worker(SharedState& sharedState, void Search::Worker::start_searching() { - // Initialize accumulator refresh entries - refreshTable.clear(networks); - // Non-main threads go directly to iterative_deepening() if (!is_mainthread()) { @@ -506,6 +503,8 @@ void Search::Worker::clear() { for (size_t i = 1; i < reductions.size(); ++i) reductions[i] = int((20.14 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + + refreshTable.clear(networks); }