Skip to content

Commit

Permalink
Merge branch 'master' into Solve_problems_in_GBMV,HBMV,SBMV,TBMV
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiCristianMatraguna committed Aug 28, 2024
2 parents 864cb77 + 2a08197 commit 137b0ad
Show file tree
Hide file tree
Showing 87 changed files with 299 additions and 134 deletions.
4 changes: 2 additions & 2 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ build_script:

after_build:
- ps: pushd $env:CLBLAST_BUILD
- 7z a CLBlast-1.6.2-Windows-x64.zip .\install_dir\*
- ps: mv CLBlast-1.6.2-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER
- 7z a CLBlast-1.6.3-Windows-x64.zip .\install_dir\*
- ps: mv CLBlast-1.6.3-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER

artifacts:
- path: '*.zip'
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
config: [
{os: ubuntu-latest, c_compiler: gcc, cpp_compiler: g++},
{os: ubuntu-latest, c_compiler: clang, cpp_compiler: clang++},
{os: macos-latest, c_compiler: clang, cpp_compiler: clang++},
{os: macos-13, c_compiler: clang, cpp_compiler: clang++},
]
runs-on: ${{ matrix.config.os }}
steps:
Expand All @@ -27,7 +27,7 @@ jobs:

- name: Install requirements for macOS
run: brew install ninja
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

- name: Run CMake
run: |
Expand All @@ -40,23 +40,23 @@ jobs:

- name: Get the diagnostics info
run: ./build/clblast_test_diagnostics
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

- name: Run an example client
run: ./build/clblast_client_xgemm
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

- name: Run an example sample program
run: ./build/clblast_sample_dgemv_c
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

- name: Run an example tuner
run: ./build/clblast_tuner_xdot
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

- name: Run the unittests
run: ctest --test-dir build
if: ${{ matrix.config.os == 'macos-latest' }}
if: ${{ matrix.config.os == 'macos-13' }}

build_windows:

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Development version (next version)
- (no changes yet since last release)

Version 1.6.3
- Fixed a bug in the GEMMK=1 kernel (with 2D register tiling) when MWG!=NWG
- CMake fixes for older versions and for the CUDA backend
- Added tuned parameters for many devices (see doc/tuning.md)

Version 1.6.2
- Fix a bug in the pre-processor that would cause issues on Arm GPUs
- Fix DLL install directory in mingw
Expand Down
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ endif()
project("clblast" C CXX)
set(clblast_VERSION_MAJOR 1)
set(clblast_VERSION_MINOR 6)
set(clblast_VERSION_PATCH 2)
set(clblast_VERSION_PATCH 3)
set(clblast_VERSION "${clblast_VERSION_MAJOR}.${clblast_VERSION_MINOR}.${clblast_VERSION_PATCH}")
set(clblast_SOVERSION ${clblast_VERSION_MAJOR})

# Policies
cmake_policy(SET CMP0074 NEW) # to make -DCBLAS_ROOT= work with newer CMake versions as well
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
cmake_policy(SET CMP0074 NEW) # to make -DCBLAS_ROOT= work with newer CMake versions as well
ENDIF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")

# Options and their default values
option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON)
Expand Down Expand Up @@ -518,7 +520,7 @@ if(CLIENTS OR TESTS)
endif()
endif()
if(CUBLAS_FOUND)
set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS})
set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS} ${CUBLAS_INCLUDE_DIRS})
set(REF_LIBRARIES ${REF_LIBRARIES} ${CUDA_LIBRARIES} ${CUBLAS_LIBRARIES})
set(WRAPPERS ${WRAPPERS} test/wrapper_cuda.hpp test/wrapper_cublas.hpp)
if(MSVC)
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,9 @@ More detailed documentation is available in separate files:
Known issues
-------------

Known performance related issues:
Known issues:

* Severe performance issues with Beignet v1.3.0 due to missing support for local memory. Please downgrade to v1.2.1 or upgrade to v1.3.1 or newer.

Other known issues:
* Correctness issues on Intel Arc A770 and several other devices with version 1.6.2 or lower (depends on the device). Upgrade to version 1.6.3 or newer.

* Routines returning an integer are currently not properly tested for half-precision FP16: IHAMAX/IHAMIN/IHMAX/IHMIN

Expand Down
3 changes: 3 additions & 0 deletions doc/tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
- Quadro GV100
- Tesla V100
- SM 7.5:
- GeForce MX 450
- GeForce GTX 1650
- GeForce GTX 1650 Ti
- GeForce GTX 1650 Super
Expand All @@ -74,6 +75,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
- GeForce GTX 3090
- RTX A6000
- SM 8.9:
- GeForce GTX 4050 Laptop
- GeForce RTX 4060
- GeForce GTX 4060 Ti
- GeForce GTX 4070 Laptop
Expand Down Expand Up @@ -166,6 +168,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
- Core i7-4790K
- Core i7-5930K
- Core i7-6770HQ
- Core i7-12700H
- Core i9-9980HK
- Xeon E5-2630 v3
- Xeon E5-2630 v4
Expand Down
4 changes: 2 additions & 2 deletions include/clblast.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
#define PUBLIC_API
#endif

// Version numbering (v1.6.2)
// Version numbering (v1.6.3)
#define CLBLAST_VERSION_MAJOR 1
#define CLBLAST_VERSION_MINOR 6
#define CLBLAST_VERSION_PATCH 2
#define CLBLAST_VERSION_PATCH 3

namespace clblast {
// =================================================================================================
Expand Down
4 changes: 2 additions & 2 deletions include/clblast_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
#define PUBLIC_API
#endif

// Version numbering (v1.6.2)
// Version numbering (v1.6.3)
#define CLBLAST_VERSION_MAJOR 1
#define CLBLAST_VERSION_MINOR 6
#define CLBLAST_VERSION_PATCH 2
#define CLBLAST_VERSION_PATCH 3

// The C interface
#ifdef __cplusplus
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmark/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
PURPLISH = [c / 255.0 for c in [85, 0, 119]] # #550077
GREEN = [c / 255.0 for c in [144, 224, 98]] # #90e062
COLORS = [BLUEISH, REDISH, PURPLISH, GREEN]
MARKERS = ["o-", "x-", ".-"]
MARKERS = ["o-", "x-", ".-", "--"]


def plot_graphs(results, file_name, num_rows, num_cols,
Expand Down
4 changes: 2 additions & 2 deletions src/database/kernels/copy/copy_16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ const DatabaseEntry CopyHalf = {
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Arc(TM) A750 Graphics "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 16, 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Iris(R) Xe Graphics "}, Params{ 8, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) RaptorLake-S Mobile Graphics Controller "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) UHD Graphics 620 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) UHD Graphics 770 "}, Params{ 16, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 32, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
Expand Down
5 changes: 4 additions & 1 deletion src/database/kernels/copy/copy_32.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ const DatabaseEntry CopySingle = {
kDeviceTypeCPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"12th Gen Intel(R) Core(TM) i7-12700H "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 32, 16, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand All @@ -170,7 +171,7 @@ const DatabaseEntry CopySingle = {
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Arc(TM) A750 Graphics "}, Params{ 32, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Gen9 HD Graphics NEO "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 530 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -259,6 +260,7 @@ const DatabaseEntry CopySingle = {
{ Name{"GeForce GTX 1650 "}, Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 SUPER "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 Ti "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce MX450 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2060 "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 SUPER "}, Params{ 8, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 Super "}, Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -287,6 +289,7 @@ const DatabaseEntry CopySingle = {
{ kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
{ "SM8.9", {
{ Name{"NVIDIA GeForce RTX 4050 Laptop GPU "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down
9 changes: 6 additions & 3 deletions src/database/kernels/copy/copy_3232.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ const DatabaseEntry CopyComplexSingle = {
kDeviceTypeCPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"12th Gen Intel(R) Core(TM) i7-12700H "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand All @@ -161,15 +162,15 @@ const DatabaseEntry CopyComplexSingle = {
{ Name{"Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz "}, Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 32, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 16, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Arc(TM) A750 Graphics "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 16, 32, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Arc(TM) A770 Graphics "}, Params{ 8, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 530 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 16, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) HD Graphics 620 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -253,6 +254,7 @@ const DatabaseEntry CopyComplexSingle = {
{ Name{"GeForce GTX 1650 "}, Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 SUPER "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 Ti "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce MX450 "}, Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2060 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 SUPER "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 Super "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -281,13 +283,14 @@ const DatabaseEntry CopyComplexSingle = {
{ kDeviceNameDefault , Params{ 8, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
{ "SM8.9", {
{ Name{"NVIDIA GeForce RTX 4050 Laptop GPU "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 16, 32, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4080 "}, Params{ 16, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4090 "}, Params{ 8, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
{ "default", {
{ kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down
5 changes: 4 additions & 1 deletion src/database/kernels/copy/copy_64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ const DatabaseEntry CopyDouble = {
kDeviceTypeCPU, "Intel", {
{ "default", {
{ Name{"Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz "}, Params{ 16, 32, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"12th Gen Intel(R) Core(TM) i7-12700H "}, Params{ 32, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-4590S CPU @ 3.00GHz "}, Params{ 32, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz "}, Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -217,6 +218,7 @@ const DatabaseEntry CopyDouble = {
{ Name{"GeForce GTX 1650 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 SUPER "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce GTX 1650 Ti "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce MX450 "}, Params{ 32, 16, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2060 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 SUPER "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 2070 Super "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down Expand Up @@ -245,13 +247,14 @@ const DatabaseEntry CopyDouble = {
{ kDeviceNameDefault , Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
{ "SM8.9", {
{ Name{"NVIDIA GeForce RTX 4050 Laptop GPU "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4060 Ti "}, Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4070 Laptop GPU "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4070 Ti "}, Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4080 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"NVIDIA GeForce RTX 4090 "}, Params{ 8, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 16, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 8, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
{ "default", {
{ kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
Expand Down
Loading

0 comments on commit 137b0ad

Please sign in to comment.