Merge pull request #426 from Chia-Network/develop

Version 3.1.0 - Add CUDA disk-hybrid mode with 128G of system DRAM. - Add integrate plot checker into CUDA plotter. - Exposes `--no-direct-io` to disable direct-IO to the output plot directory. - Fix some related issues on Windows. - Fix bug where some plots overflowed slice buffers. - Fix build issues and other trivial issues. - Expose experimental/WIP CUDA 16G disk -hybrid mode on Linux. - Update README with CUDA and compression information.
Chia-Network · Oct 3, 2023 · e9836f8 · e9836f8
2 parents 02a8e68 + 7b25480
commit e9836f8
Showing 79 changed files with 5,724 additions and 2,148 deletions.
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -131,19 +131,25 @@
             "preLaunchTask" : "build_cuda_debug",
 
             "program": "${workspaceFolder}/build/bladebit_cuda",
-            
+
             //                 "-c", "xch1uf48n3f50xrs7zds0uek9wp9wmyza6crnex6rw8kwm3jnm39y82q5mvps6",
             // "-i", "7a709594087cca18cffa37be61bdecf9b6b465de91acb06ecb6dbe0f4a536f73",    // Yes overflow
             // "--memo", "80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef207d52406afa2b6d7d92ea778f407205bd9dca40816c1b1cacfca2a6612b93eb",
+
+            "args":
+            "-w -n 1 -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --check 100 --check-threshold 2 /home/harold/plot",
+
+            // "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot /home/harold/plot",
+            // "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk --no-direct-buffers /home/harold/plot",
+            // "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk /home/harold/plot",
+            "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-64 -t1 /home/harold/plotdisk /home/harold/plot",
 
-            "args": 
-            // "-w --compress 3 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot ~/plot/tmp",
-            "-w --compress 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot ~/plot",
 
             "windows": {
                 "type": "cppvsdbg",
                 "program": "${workspaceFolder}/build/Debug/bladebit_cuda.exe",
-                "args": "--benchmark --compress 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot D:/"
+                // "args": "--benchmark -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot D:/"
+                "args": "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot -t2 D:/chia_test_plots D:/chia_test_plots",
             }
         },
 
@@ -236,7 +242,7 @@
 
         {
             "name"         : "Tests",
-            
+
             "type"          : "cppdbg",
             "osx": {
                 "MIMode": "lldb",
@@ -245,7 +251,7 @@
             "stopAtEntry"   : false,
             "cwd"           : "${workspaceFolder}",
             "preLaunchTask" : "build_tests_debug",
-            "console"       : "internalConsole",
+            // "console"       : "internalConsole",
 
             "program": "${workspaceRoot}/build/tests",
 
@@ -260,6 +266,8 @@
                 // { "name": "bb_plot"        , "value": "/home/harold/plot/tmp/plot-k32-c06-2023-02-14-21-43-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot" },
                 { "name": "bb_clevel"      , "value": "1" },
                 { "name": "bb_end_clevel"  , "value": "1" },
+
+                { "name": "bb_queue_path"  , "value": "/home/ubuntu/plot" },
             ],
 
             "args": [
@@ -273,7 +281,10 @@
                 // "line-point-deltas"
                 // "compressed-plot-proof"
                 // "compressed-plot-qualities"
-                "macos-threads"
+                // "macos-threads"
+                // "disk-slices"
+                // "disk-buckets"
+                "[disk-queue]"
             ]
         }
 
@@ -285,10 +296,16 @@
             "stopAtEntry"   : false,
             "cwd"           : "${workspaceFolder}",
             "preLaunchTask" : "build_debug",
-            "console"       : "internalConsole",
 
             "program": "${workspaceFolder}/build/bladebit",
-
+            // "program": "${workspaceFolder}/build/bladebit_cuda",
+
+            "linux": {
+                "MIMode": "gdb",
+                "miDebuggerPath": "/usr/bin/gdb",
+                "program": "${workspaceFolder}/build/bladebit"
+            },
+
             "windows": {
                 "type"   : "cppvsdbg",
                 "program": "${workspaceFolder}/build/debug/bladebit.exe"
@@ -301,6 +318,11 @@
                 // "-t", "48",
                 // "-t", "1",
 
+                // "validate", "--f7", "2",
+                // "/home/harold/plot/jmplot-c01-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+                // "/home/harold/plot/plot-k32-c01-2023-07-19-00-29-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",
+                // "/home/harold/plot/plot-k32-c01-2023-08-03-04-57-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+
                 // "-t", "1", "validate", "--f7", "324", "~/plot/tmp/plot-k32-c01-2023-02-13-22-21-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
                 // "validate", "--f7", "7", "~/plot/tmp/plot-k32-c01-2023-03-09-14-07-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",
                 // "validate", "--cuda", "--f7", "4", "~/plot/tmp/plot-k32-c07-2023-04-13-16-08-330fbf677f78641061c93312c1a7ffa28138739b69975f3b874df6acc3e76378.plot",
@@ -322,8 +344,8 @@
                 // // "/home/harold/plot/tmp/plot-k32-c04-2023-01-31-23-15-5cfc42dfaa5613da0b425994c2427a2ba4a8efcfb49e7844e93c0854baf09863.plot"
 
                 // Simulation
-                "-t", "1", "simulate", "--seed", "b8e9ec6bc179ae6ba5f5c3483f7501db32879efa84b62001d27601a540dca5ff", 
-                    "-p", "16", "-n", "1", "--power", "45", "--size", "4PB", "~/plot/tmp/plot-k32-c01-2023-03-09-14-07-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+                // "-t", "1", "simulate", "--seed", "b8e9ec6bc179ae6ba5f5c3483f7501db32879efa84b62001d27601a540dca5ff", 
+                //     "-p", "16", "-n", "1", "--power", "45", "--size", "4PB", "~/plot/tmp/plot-k32-c01-2023-03-09-14-07-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
                 // "-t", "30", "simulate", "-p", "2", "-n", "600", "~/plot/tmp/plot-k32-c07-2023-03-16-11-49-7732c75d9f3b5ad1fc804bb7429121e334bd4f25f9bbbb76ef0370b5a0e80aae.plot"
 
                 // "-m",
@@ -335,11 +357,18 @@
                 // "--f7", "3983284117", "/home/harito/plot/tmp/gpu_1.plot",
 
                 /// Compare
-                // "plotcmp",
-                // "/home/harito/plot/tmp/gpu_1.plot.old",
-                // "/home/harold/plot-tmpfs/gpu_1.plot",
-                // "/home/harito/plot/tmp/gpu_1.plot",
-                // "/home/harito/plot/tmp/plot-k32-2022-11-21-05-59-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+                "plotcmp",
+                "/home/harold/plot/plot-k32-c01-2023-08-22-16-21-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",
+                "/home/harold/plot/plot-k32-c01-2023-08-22-16-21-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",
+
+                // "/home/harold/plot/plot-k32-c01-2023-08-03-22-59-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+                // "/home/harold/plot/jmplot-c01-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+
+                // Check
+                // "check",
+                // "-n", "100", "--seed", "dc471c4d905ba3a65c6cecb46d97b132c0c98f51d416db5ec5cbdbe95ef2832f", 
+                // "/home/harold/plot/plot-k32-c01-2023-07-19-00-29-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
+                // "/home/harold/plot/jm.plot"
             ]
         },
 

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -4,16 +4,16 @@
         "nominmax"
     ],
     "files.associations": {
+        "*.sd": "yaml",
+        "*.userprefs": "xml",
+        "*.make": "makefile",
         "Fastfile": "ruby",
         "*.plist": "xml",
-        "*.sd": "yaml",
         "*.json": "jsonc",
         "*.ir": "llvm",
         "*.qs": "javascript",
         "*.ac": "shellscript",
         "player": "json",
-        "*.userprefs": "xml",
-        "*.make": "makefile",
         "memory": "cpp",
         "cstddef": "cpp",
         "string": "cpp",
@@ -113,7 +113,18 @@
         "filesystem": "cpp",
         "__bits": "cpp",
         "csignal": "cpp",
-        "cfenv": "cpp"
+        "cfenv": "cpp",
+        "ranges": "cpp",
+        "xhash": "cpp",
+        "xmemory": "cpp",
+        "xstddef": "cpp",
+        "xstring": "cpp",
+        "xtr1common": "cpp",
+        "xtree": "cpp",
+        "xutility": "cpp",
+        "__assert": "cpp",
+        "*.inc": "cpp",
+        "xiosbase": "cpp"
     },
     "cSpell.words": [
         "Ryzen"
@@ -124,7 +135,13 @@
     "cmake.preferredGenerators": [
         "Unix Makefiles",
         "Visual Studio 17 2022"
-    ]
+    ],
+    // "cmake.buildArgs": [],
+    "cmake.configureSettings": {
+        "BB_ENABLE_TESTS": "ON",
+        "BB_CUDA_USE_NATIVE": "ON"
+    },
+    "C_Cpp.dimInactiveRegions": false,
     // "cmake.generator": "Unix Makefiles"
     // "cmake.generator": "Visual Studio 17 2022"
 

diff --git a/Bladebit.cmake b/Bladebit.cmake
@@ -227,6 +227,8 @@ set(src_bladebit
     src/plotting/PlotWriter.cpp
     src/plotting/PlotWriter.h
     src/plotting/Tables.h
+    src/plotting/BufferChain.h
+    src/plotting/BufferChain.cpp
 
     src/plotting/f1/F1Gen.h
     src/plotting/f1/F1Gen.cpp
@@ -258,6 +260,7 @@ set(src_bladebit
     src/tools/PlotReader.cpp
     src/tools/PlotReader.h
     src/tools/PlotValidator.cpp
+    src/tools/PlotChecker.cpp
 
     src/util/Array.h
     src/util/Array.inl
@@ -289,6 +292,18 @@ set(src_bladebit
     src/harvesting/GreenReaper.h
     src/harvesting/GreenReaperInternal.h
     src/harvesting/Thresher.h
+
+    src/plotting/DiskQueue.h
+    src/plotting/DiskQueue.cpp
+    src/plotting/DiskBuffer.h
+    src/plotting/DiskBuffer.cpp
+    src/plotting/DiskBucketBuffer.h
+    src/plotting/DiskBucketBuffer.cpp
+    src/plotting/DiskBufferBase.h
+    src/plotting/DiskBufferBase.cpp
+
+    src/util/MPMCQueue.h
+    src/util/CommandQueue.h
 )
 
 target_sources(bladebit_core PUBLIC ${src_bladebit})

diff --git a/BladebitCUDA.cmake b/BladebitCUDA.cmake
@@ -22,6 +22,9 @@ add_executable(bladebit_cuda
     cuda/CudaPlotUtil.cu
     cuda/GpuStreams.h
     cuda/GpuStreams.cu
+    cuda/GpuDownloadStream.cu
+    cuda/GpuQueue.h
+    cuda/GpuQueue.cu
 
     # Harvester
     cuda/harvesting/CudaThresher.cu
@@ -42,7 +45,7 @@ target_compile_options(bladebit_cuda PRIVATE
     >
 
     $<${is_cuda_debug}:
-        -G
+    #    -G
     >
  )
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
 
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CUDA_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set(CMAKE_CONFIGURATION_TYPES Release Debug)
@@ -9,15 +10,15 @@ if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "Release"
         CACHE STRING "Possible values are: Release, Debug"
         FORCE
-   )
+    )
 endif()
 
 # Allows for CMAKE_MSVC_RUNTIME_LIBRARY
 if(POLICY CMP0091)
   cmake_policy(SET CMP0091 NEW) 
 endif()
 
-set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "macOS minimum supported version.")
+set(CMAKE_OSX_DEPLOYMENT_TARGET "10.16" CACHE STRING "macOS minimum supported version.")
 set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>" CACHE STRING "MSVC Runtime Library")
 
 project(bladebit LANGUAGES C CXX ASM)
@@ -83,10 +84,10 @@ endif()
 # NOTE: These are mostly sandbox test environment, not proper tests
 option(BB_ENABLE_TESTS "Enable tests." OFF)
 option(NO_CUDA_HARVESTER "Explicitly disable CUDA in the bladebit_harvester target." OFF)
-option(BB_NO_EMBED_VERSION "Disable embedding the version when building locally (non-CI)." ON)
+option(BB_NO_EMBED_VERSION "Disable embedding the version when building locally (non-CI)." OFF)
 option(BB_HARVESTER_ONLY "Enable only the harvester target." OFF)
 option(BB_HARVESTER_STATIC "Build the harvester target as a static library." OFF)
-
+option(BB_CUDA_USE_NATIVE "Only build the native CUDA architecture when in release mode." OFF)
 
 #
 # Dependencies
@@ -103,7 +104,7 @@ if(NOT ${BB_HARVESTER_ONLY})
     GIT_REPOSITORY https://github.com/Chia-Network/bls-signatures.git
     GIT_TAG        2.0.2
     EXCLUDE_FROM_ALL ${BB_IS_DEPENDENCY}
-    )
+)
 
     set(BUILD_BLS_PYTHON_BINDINGS "0" CACHE STRING "0")
     set(BUILD_BLS_TESTS "0" CACHE STRING "")
@@ -130,6 +131,7 @@ set(is_x86 $<OR:$<STREQUAL:${CMAKE_HOST_SYSTEM_PROCESSOR},AMD64>,$<STREQUAL:${CM
 set(is_arm $<OR:$<STREQUAL:${CMAKE_HOST_SYSTEM_PROCESSOR},arm64>,$<STREQUAL:${CMAKE_HOST_SYSTEM_PROCESSOR},aarch64>>)
 set(is_msvc_c_cpp $<AND:${is_c_cpp},$<CXX_COMPILER_ID:MSVC>>)
 
+
 if(CUDAToolkit_FOUND AND NOT ${NO_CUDA_HARVESTER})
     set(have_cuda $<BOOL:1>)
 else()
@@ -143,7 +145,7 @@ endif()
 include(Config.cmake)
 
 if(NOT ${BB_HARVESTER_ONLY})
-    if(NOT BB_IS_DEPENDENCY AND (NOT BB_NO_EMBED_VERSION))
+    if((NOT BB_IS_DEPENDENCY) AND (NOT BB_NO_EMBED_VERSION))
         include(cmake_modules/EmbedVersion.cmake)
     endif()