diff --git a/.github/workflows/SFMnps_ArmWinBinariesUpload.yml b/.github/workflows/SFMnps_ArmWinBinariesUpload.yml
new file mode 100644
index 00000000000..4bd177473da
--- /dev/null
+++ b/.github/workflows/SFMnps_ArmWinBinariesUpload.yml
@@ -0,0 +1,105 @@
+name: SFMnpsArmWinBinariesUpload
+on:
+ workflow_dispatch:
+jobs:
+ SFnpsArmWinBuilds:
+ name: ${{ matrix.config.name }}
+ runs-on: ${{ matrix.config.os }}
+ env:
+ COMPILER: ${{ matrix.config.compiler }}
+ COMP: ${{ matrix.config.comp }}
+ strategy:
+ matrix:
+ config:
+ - name: Ubuntu 22.04 NDK armv8
+ os: ubuntu-22.04
+ compiler: aarch64-linux-android21-clang++
+ comp: ndk
+ run_armv8_build: true
+ shell: bash {0}
+
+ - name: Windows 2022 Mingw-w64 GCC x86_64
+ os: windows-2022
+ compiler: g++
+ comp: mingw
+ run_win11_build: true
+ msys_sys: mingw64
+ msys_env: x86_64-gcc
+ shell: msys2 {0}
+
+ defaults:
+ run:
+ working-directory: src
+ shell: ${{ matrix.config.shell }}
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Setup msys and install required packages
+ if: runner.os == 'Windows'
+ uses: msys2/setup-msys2@v2
+ with:
+ msystem: ${{ matrix.config.msys_sys }}
+ install: mingw-w64-${{ matrix.config.msys_env }} make git
+
+ - name: Download the MEDIUM network from the fishtest framework
+ run: |
+ cp evaluateM.h evaluate.h
+ cd nnue
+ cp nnue_architectureM.h nnue_architecture.h
+ cd ..
+ make net
+
+ - name: armv8 build
+ if: ${{ matrix.config.run_armv8_build }}
+ run: |
+ export PATH=$ANDROID_NDK_HOME:$PATH
+ export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH
+
+ cp nn-*.nnue ../jni
+ cd ../jni
+ cp Application_v8.mk Application.mk
+ ndk-build
+ cd ../libs/arm64-v8a
+ cp Stockfish ../../SFMnps_armv8
+
+ - uses: xresloader/upload-to-github-release@v1
+ if: ${{ matrix.config.run_armv8_build }}
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ overwrite: true
+ file: "SFMnps_armv8"
+ update_latest_release: true
+
+ - uses: actions/upload-artifact@v3
+ if: ${{ matrix.config.run_armv8_build }}
+ with:
+ name: SFMnps-armv8
+ path: SFMnps_armv8
+
+ - name: win11 build
+ if: ${{ matrix.config.run_win11_build }}
+ run: |
+ make clean
+ make -j3 profile-build ARCH=x86-64-modern COMP=$COMP
+ make strip ARCH=x86-64-modern COMP=$COMP
+ cp stockfish.exe ../SFMnps_modern.exe
+
+ - uses: xresloader/upload-to-github-release@v1
+ if: ${{ matrix.config.run_win11_build }}
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ overwrite: true
+ file: "SFMnps_modern.exe"
+ update_latest_release: true
+
+ - uses: actions/upload-artifact@v3
+ if: ${{ matrix.config.run_win11_build }}
+ with:
+ name: SFMnps-modern
+ path: SFMnps_modern.exe
+
+
diff --git a/src/evaluateM.h b/src/evaluateM.h
new file mode 100644
index 00000000000..1ba758e90de
--- /dev/null
+++ b/src/evaluateM.h
@@ -0,0 +1,58 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#ifndef EVALUATE_H_INCLUDED
+#define EVALUATE_H_INCLUDED
+
+#include
+#include
+
+#include "types.h"
+
+namespace Stockfish {
+
+class Position;
+
+namespace Eval {
+
+ std::string trace(Position& pos);
+ Value evaluate(const Position& pos);
+
+ extern bool useNNUE;
+ extern std::string currentEvalFileName;
+
+ // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
+ // for the build process (profile-build and fishtest) to work. Do not change the
+ // name of the macro, as it is used in the Makefile.
+ #define EvalFileDefaultName "nn-e1fb1ade4432.nnue"
+
+ namespace NNUE {
+
+ extern int RandomEvalPerturb;
+ extern int waitms;
+
+ void init();
+ void verify();
+
+ } // namespace NNUE
+
+} // namespace Eval
+
+} // namespace Stockfish
+
+#endif // #ifndef EVALUATE_H_INCLUDED
diff --git a/src/nnue/nnue_architectureM.h b/src/nnue/nnue_architectureM.h
new file mode 100644
index 00000000000..c43a23c3f69
--- /dev/null
+++ b/src/nnue/nnue_architectureM.h
@@ -0,0 +1,138 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+#include
+
+#include "nnue_common.h"
+
+#include "features/half_ka_v2_hm.h"
+
+#include "layers/affine_transform.h"
+#include "layers/clipped_relu.h"
+#include "layers/sqr_clipped_relu.h"
+
+#include "../misc.h"
+
+namespace Stockfish::Eval::NNUE {
+
+// Input features used in evaluation function
+using FeatureSet = Features::HalfKAv2_hm;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType TransformedFeatureDimensions = 1024;
+constexpr IndexType PSQTBuckets = 8;
+constexpr IndexType LayerStacks = 8;
+
+struct Network
+{
+ static constexpr int FC_0_OUTPUTS = 15;
+ static constexpr int FC_1_OUTPUTS = 32;
+
+ Layers::AffineTransform fc_0;
+ Layers::SqrClippedReLU ac_sqr_0;
+ Layers::ClippedReLU ac_0;
+ Layers::AffineTransform fc_1;
+ Layers::ClippedReLU ac_1;
+ Layers::AffineTransform fc_2;
+
+ // Hash value embedded in the evaluation file
+ static constexpr std::uint32_t get_hash_value() {
+ // input slice hash
+ std::uint32_t hashValue = 0xEC42E90Du;
+ hashValue ^= TransformedFeatureDimensions * 2;
+
+ hashValue = decltype(fc_0)::get_hash_value(hashValue);
+ hashValue = decltype(ac_0)::get_hash_value(hashValue);
+ hashValue = decltype(fc_1)::get_hash_value(hashValue);
+ hashValue = decltype(ac_1)::get_hash_value(hashValue);
+ hashValue = decltype(fc_2)::get_hash_value(hashValue);
+
+ return hashValue;
+ }
+
+ // Read network parameters
+ bool read_parameters(std::istream& stream) {
+ if (!fc_0.read_parameters(stream)) return false;
+ if (!ac_0.read_parameters(stream)) return false;
+ if (!fc_1.read_parameters(stream)) return false;
+ if (!ac_1.read_parameters(stream)) return false;
+ if (!fc_2.read_parameters(stream)) return false;
+ return true;
+ }
+
+ // Read network parameters
+ bool write_parameters(std::ostream& stream) const {
+ if (!fc_0.write_parameters(stream)) return false;
+ if (!ac_0.write_parameters(stream)) return false;
+ if (!fc_1.write_parameters(stream)) return false;
+ if (!ac_1.write_parameters(stream)) return false;
+ if (!fc_2.write_parameters(stream)) return false;
+ return true;
+ }
+
+ std::int32_t propagate(const TransformedFeatureType* transformedFeatures)
+ {
+ struct alignas(CacheLineSize) Buffer
+ {
+ alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
+ alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)];
+ alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out;
+ alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
+ alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
+ alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
+
+ Buffer()
+ {
+ std::memset(this, 0, sizeof(*this));
+ }
+ };
+
+#if defined(__clang__) && (__APPLE__)
+ // workaround for a bug reported with xcode 12
+ static thread_local auto tlsBuffer = std::make_unique();
+ // Access TLS only once, cache result.
+ Buffer& buffer = *tlsBuffer;
+#else
+ alignas(CacheLineSize) static thread_local Buffer buffer;
+#endif
+
+ fc_0.propagate(transformedFeatures, buffer.fc_0_out);
+ ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out);
+ ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
+ std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, FC_0_OUTPUTS * sizeof(decltype(ac_0)::OutputType));
+ fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out);
+ ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
+ fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
+
+ // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<