Autodesk · massimim · Jun 9, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -1,4 +1,5 @@
 cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
 
-add_subdirectory("lbm-lid-driven-cavity-flow")
+add_subdirectory(lbm)
+# add_subdirectory("lbm-lid-driven-cavity-flow")
 # add_subdirectory("lbm-flow-over-sphere")
diff --git a/benchmarks/lbm-lid-driven-cavity-flow/lbm-lid-driven-cavity-flow.py b/benchmarks/lbm-lid-driven-cavity-flow/lbm-lid-driven-cavity-flow.py
@@ -4,9 +4,11 @@
 GRID_LIST = "dGrid bGrid eGrid".split()
 STORAGE_FP_LIST = "double float".split()
 COMPUTE_FP_LIST = "double float".split()
-OCC_LIST = "nOCC".split()
+OCC_LIST = "nOCC sOCC".split()
+HU_LIST = "huGrid huLattice".split()
+CURVE_LIST = "sweep morton hilbert".split()
 WARM_UP_ITER = 10
-MAX_ITER = 100
+MAX_ITER = 10000
 REPETITIONS = 5
 
 import subprocess
@@ -38,60 +40,79 @@ def countAll():
                     for COMPUTE_FP in COMPUTE_FP_LIST:
                         for DEVICE_SET in DEVICE_SET_LIST:
                             for GRID in GRID_LIST:
-                                if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
-                                    continue
+                                for HU in HU_LIST:
+                                    for CURVE in CURVE_LIST:
+                                        if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
+                                            continue
+                                        if STORAGE_FP == 'float' and COMPUTE_FP == 'double':
+                                            continue
 
-                                counter += 1
+                                        counter += 1
     return counter
 
 
 SAMPLES = countAll()
 counter = 0
 command = './lbm-lid-driven-cavity-flow'
+# command = 'echo'
 with open(command + '.log', 'w') as fp:
     for DEVICE_TYPE in DEVICE_TYPE_LIST:
         DEVICE_SET_LIST = [DEVICE_ID_LIST[0]]
         if DEVICE_TYPE == 'gpu':
             for DEVICE in DEVICE_ID_LIST[1:]:
                 DEVICE_SET_LIST.append(DEVICE_SET_LIST[-1] + ' ' + DEVICE)
-        for OCC in OCC_LIST:
-            for DOMAIN_SIZE in DOMAIN_SIZE_LIST:
-                for STORAGE_FP in STORAGE_FP_LIST:
-                    for COMPUTE_FP in COMPUTE_FP_LIST:
-                        for DEVICE_SET in DEVICE_SET_LIST:
+        for DEVICE_SET in DEVICE_SET_LIST:
+            for OCC in OCC_LIST:
+                for DOMAIN_SIZE in DOMAIN_SIZE_LIST:
+                    for STORAGE_FP in STORAGE_FP_LIST:
+                        for COMPUTE_FP in COMPUTE_FP_LIST:
                             for GRID in GRID_LIST:
-                                if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
-                                    continue
+                                for HU in HU_LIST:
+                                    for CURVE in CURVE_LIST:
+
+                                        if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
+                                            continue
+                                        if STORAGE_FP == 'float' and COMPUTE_FP == 'double':
+                                            continue
+
+                                        parameters = []
+                                        parameters.append('--deviceType ' + DEVICE_TYPE)
+                                        parameters.append('--deviceIds ' + DEVICE_SET)
+                                        parameters.append('--grid ' + GRID)
+                                        parameters.append('--domain-size ' + DOMAIN_SIZE)
+                                        parameters.append('--warmup-iter ' + str(WARM_UP_ITER))
+                                        parameters.append('--repetitions ' + str(REPETITIONS))
+                                        parameters.append('--max-iter ' + str(MAX_ITER))
+                                        parameters.append(
+                                            '--report-filename ' + 'lbm-lid-driven-cavity-flow___' +
+                                            DEVICE_TYPE + '_' +
+                                            DEVICE_SET.replace(' ', '_') + '-' +
+                                            GRID + '_' +
+                                            DOMAIN_SIZE + '-' +
+                                            STORAGE_FP + '-' + COMPUTE_FP + '-' +
+                                            OCC + '-' +
+                                            HU + '-' +
+                                            CURVE)
+                                        parameters.append('--computeFP ' + COMPUTE_FP)
+                                        parameters.append('--storageFP ' + STORAGE_FP)
+                                        parameters.append('--curve ' + CURVE)
 
-                                parameters = []
-                                parameters.append('--deviceType ' + DEVICE_TYPE)
-                                parameters.append('--deviceIds ' + DEVICE_SET)
-                                parameters.append('--grid ' + GRID)
-                                parameters.append('--domain-size ' + DOMAIN_SIZE)
-                                parameters.append('--warmup-iter ' + str(WARM_UP_ITER))
-                                parameters.append('--repetitions ' + str(REPETITIONS))
-                                parameters.append('--max-iter ' + str(MAX_ITER))
-                                parameters.append(
-                                    '--report-filename ' + 'lbm-lid-driven-cavity-flow___' +
-                                    DEVICE_TYPE + '_' + DOMAIN_SIZE + '_' +
-                                    STORAGE_FP + '_' + COMPUTE_FP + '_' +
-                                    DEVICE_SET.replace(' ', '_') + '_' + OCC)
-                                parameters.append('--computeFP ' + COMPUTE_FP)
-                                parameters.append('--storageFP ' + STORAGE_FP)
-                                parameters.append('--benchmark')
-                                parameters.append('--' + OCC)
+                                        parameters.append('--benchmark')
+                                        parameters.append('--' + OCC)
+                                        parameters.append('--' + HU)
 
-                                commandList = []
-                                commandList.append(command)
-                                for el in parameters:
-                                    for s in el.split():
-                                        commandList.append(s)
+                                        commandList = []
+                                        commandList.append(command)
+                                        for el in parameters:
+                                            for s in el.split():
+                                                commandList.append(s)
 
-                                fp.write("\n-------------------------------------------\n")
-                                fp.write(' '.join(commandList))
-                                fp.write("\n-------------------------------------------\n")
-                                fp.flush()
-                                subprocess.run(commandList, text=True, stdout=fp)
+                                        fp.write("\n-------------------------------------------\n")
+                                        fp.write(' '.join(commandList))
+                                        fp.write("\n-------------------------------------------\n")
+                                        fp.flush()
+                                        print(' '.join(commandList))
+                                        subprocess.run(commandList, text=True, stdout=fp)
 
-                                counter += 1
-                                printProgressBar(counter * 100.0 / SAMPLES, 'Progress')
+                                        counter += 1
+                                        printProgressBar(counter * 100.0 / SAMPLES, 'Progress')
diff --git a/benchmarks/lbm-lid-driven-cavity-flow/src/CellType.h b/benchmarks/lbm-lid-driven-cavity-flow/src/CellType.h
@@ -22,13 +22,28 @@ struct CellType
         classification = c;
         wallNghBitflag = n;
     }
+
     NEON_CUDA_HOST_DEVICE explicit CellType(Classification c)
     {
         classification = c;
         wallNghBitflag = 0;
     }
 
+    // Converting to int to exportVti
+    operator int() const { return int(classification); }
+
+    template <int fwdRegIdx>
+    static auto isWall(const uint32_t& wallNghBitFlag)
+        -> bool
+    {
+        return wallNghBitFlag & (uint32_t(1) << fwdRegIdx);
+    }
 
+    auto setWall(int fwdRegIdx)
+        -> void
+    {
+        wallNghBitflag = wallNghBitflag | ((uint32_t(1) << fwdRegIdx));
+    }
 
     uint32_t       wallNghBitflag;
     Classification classification;

diff --git a/benchmarks/lbm-lid-driven-cavity-flow/src/Config.cpp b/benchmarks/lbm-lid-driven-cavity-flow/src/Config.cpp
@@ -41,6 +41,7 @@ auto Config::toString() const -> std::string
 
     s << "......... computeType " << c.computeType << std::endl;
     s << "........... storeType " << c.storeType << std::endl;
+    s << "............... curve " << c.curve << std::endl;
 
     s << ". ............... occ " << Neon::skeleton::OccUtils::toString(c.occ) << std::endl;
     s << "....... transfer Mode " << Neon::set::TransferModeUtils::toString(c.transferMode) << std::endl;
@@ -60,43 +61,58 @@ auto Config::parseArgs(const int argc, char* argv[])
     auto& config = *this;
 
     auto cli =
-        (
-            clipp::required("--deviceType") & clipp::value("deviceType", config.deviceType) % "Device ids to use",
-            clipp::required("--deviceIds") & clipp::integers("gpus", config.devices) % "Device ids to use",
-            clipp::option("--grid") & clipp::value("grid", config.gridType) % "Could be dGrid, eGrid, bGrid",
-            clipp::option("--domain-size") & clipp::integer("domain_size", config.N) % "Voxels along each dimension of the cube domain",
-            clipp::option("--warmup-iter") & clipp::integer("warmup_iter", config.benchIniIter) % "Number of iteration for warm up. max_iter = warmup_iter + timed_iters",
-            clipp::option("--max-iter") & clipp::integer("max_iter", config.benchMaxIter) % "Maximum solver iterations",
-            clipp::option("--repetitions") & clipp::integer("repetitions", config.repetitions) % "Number of times the benchmark is run.",
-            clipp::option("--report-filename ") & clipp::value("keeper_filename", config.reportFile) % "Output perf keeper filename",
-
-            clipp::option("--computeFP") & clipp::value("computeFP", config.computeType) % "Could be double or float",
-            clipp::option("--storageFP") & clipp::value("storageFP", config.storeType) % "Could be double or float",
-
-            (
-                (clipp::option("--sOCC").set(config.occ, Neon::skeleton::Occ::standard) % "Standard OCC") |
-                (clipp::option("--nOCC").set(config.occ, Neon::skeleton::Occ::none) % "No OCC (on by default)")),
-            (
-                (clipp::option("--put").set(config.transferMode, Neon::set::TransferMode::put) % "Set transfer mode to PUT") |
-                (clipp::option("--get").set(config.transferMode, Neon::set::TransferMode::get) % "Set transfer mode to GET (on by default)")),
-            (
-                (clipp::option("--huLattice").set(config.stencilSemantic, Neon::set::StencilSemantic::streaming) % "Halo update with lattice semantic (on by default)") |
-                (clipp::option("--huGrid").set(config.stencilSemantic, Neon::set::StencilSemantic::standard) % "Halo update with grid semantic ")),
-            (
-                (clipp::option("--benchmark").set(config.benchmark, true) % "Run benchmark mode") |
-                (clipp::option("--visual").set(config.benchmark, false) % "Run export partial data")),
-
-            (
-                clipp::option("--vti").set(config.vti, true) % "Standard OCC")
+        (clipp::required("--deviceType") & clipp::value("deviceType", config.deviceType) % "Device ids to use",
+         clipp::required("--deviceIds") & clipp::integers("gpus", config.devices) % "Device ids to use",
+         clipp::option("--grid") & clipp::value("grid", config.gridType) % "Could be dGrid, eGrid, bGrid",
+         clipp::option("--domain-size") & clipp::integer("domain_size", config.N) % "Voxels along each dimension of the cube domain",
+         clipp::option("--warmup-iter") & clipp::integer("warmup_iter", config.benchIniIter) % "Number of iteration for warm up. max_iter = warmup_iter + timed_iters",
+         clipp::option("--max-iter") & clipp::integer("max_iter", config.benchMaxIter) % "Maximum solver iterations",
+         clipp::option("--repetitions") & clipp::integer("repetitions", config.repetitions) % "Number of times the benchmark is run.",
+         clipp::option("--report-filename ") & clipp::value("keeper_filename", config.reportFile) % "Output perf keeper filename",
+
+         clipp::option("--computeFP") & clipp::value("computeFP", config.computeType) % "Could be double or float",
+         clipp::option("--storageFP") & clipp::value("storageFP", config.storeType) % "Could be double or float",
+
+         clipp::option("--curve") & clipp::value("curve", config.curve) % "Could be sweep (the default), morton, or hilber",
+         (
+             (clipp::option("--sOCC").set(config.occ, Neon::skeleton::Occ::standard) % "Standard OCC") |
+             (clipp::option("--nOCC").set(config.occ, Neon::skeleton::Occ::none) % "No OCC (on by default)")),
+         (
+             (clipp::option("--put").set(config.transferMode, Neon::set::TransferMode::put) % "Set transfer mode to PUT") |
+             (clipp::option("--get").set(config.transferMode, Neon::set::TransferMode::get) % "Set transfer mode to GET (on by default)")),
+         (
+             (clipp::option("--huLattice").set(config.stencilSemantic, Neon::set::StencilSemantic::streaming) % "Halo update with lattice semantic (on by default)") |
+             (clipp::option("--huGrid").set(config.stencilSemantic, Neon::set::StencilSemantic::standard) % "Halo update with grid semantic ")),
+         (
+             (clipp::option("--benchmark").set(config.benchmark, true) % "Run benchmark mode") |
+             (clipp::option("--visual").set(config.benchmark, false) % "Run export partial data")),
+
+         (
+             clipp::option("--vti").set(config.vti, true) % "Standard OCC")
 
         );
 
+
     if (!clipp::parse(argc, argv, cli)) {
         auto fmt = clipp::doc_formatting{}.doc_column(31);
         std::cout << make_man_page(cli, argv[0], fmt) << '\n';
         return -1;
     }
 
+    if (config.curve == "sweep")
+        config.spaceCurve = Neon::domain::tool::spaceCurves::EncoderType::sweep;
+    if (config.curve == "morton")
+        config.spaceCurve = Neon::domain::tool::spaceCurves::EncoderType::morton;
+    if (config.curve == "hilbert")
+        config.spaceCurve = Neon::domain::tool::spaceCurves::EncoderType::hilbert;
+
+    if (config.curve != "sweep" && config.curve != "morton" && config.curve != "hilbert") {
+        auto fmt = clipp::doc_formatting{}.doc_column(31);
+        std::cout << config.curve << " is not a supported configuration" << std::endl;
+        std::cout << make_man_page(cli, argv[0], fmt) << '\n';
+        return -1;
+    }
+
     helpSetLbmParameters();
 
     return 0;

diff --git a/benchmarks/lbm-lid-driven-cavity-flow/src/Config.h b/benchmarks/lbm-lid-driven-cavity-flow/src/Config.h
@@ -3,6 +3,7 @@
 #include <string>
 #include <vector>
 #include "Neon/core/tools/clipp.h"
+#include "Neon/domain/tools/SpaceCurves.h"
 #include "Neon/skeleton/Skeleton.h"
 
 template <typename ComputeType>
@@ -16,28 +17,29 @@ struct LbmParameters
 
 struct Config
 {
-    double                     Re = 100.;            // Reynolds number
-    double                     ulb = 0.04;           // Velocity in lattice units
-    int                        N = 160;              // Number of nodes in x-direction
-    bool                       benchmark = false;    // Run in benchmark mode ?
-    double                     max_t = 10.0;         // Non-benchmark mode: Total time in dim.less units
-    int                        outFrequency = 200;   // Non-benchmark mode: Frequency in LU for output of terminal message and profiles (use 0 for no messages)
-    int                        dataFrequency = 0;    // Non-benchmark mode: Frequency in LU of full data dump (use 0 for no data dump)
-    int                        benchIniIter = 1000;  // Benchmark mode: Number of warmup iterations
-    int                        benchMaxIter = 2000;  // Benchmark mode: Total number of iterations
-    int                        repetitions = 1;      // Benchmark mode: number of time the test is run
-    std::string                deviceType = "gpu";
-    std::vector<int>           devices = std::vector<int>(0);                // Devices for the execution
-    std::string                reportFile = "lbm-lid-driven-cavity-flow";    // Report file name
-    std::string                gridType = "dGrid";                           // Neon grid type
-    Neon::skeleton::Occ        occ = Neon::skeleton::Occ::none;              // Neon OCC type
-    Neon::set::TransferMode    transferMode = Neon::set::TransferMode::get;  // Neon transfer mode for halo update
-    Neon::set::StencilSemantic stencilSemantic = Neon::set::StencilSemantic::streaming;
-    bool                       vti = false;  // Export vti file
-    std::string                computeType = "double";
-    std::string                storeType = "double";
-
-    LbmParameters<double> mLbmParameters;
+    double                                       Re = 100.;            // Reynolds number
+    double                                       ulb = 0.04;           // Velocity in lattice units
+    int                                          N = 160;              // Number of nodes in x-direction
+    bool                                         benchmark = false;    // Run in benchmark mode ?
+    double                                       max_t = 10.0;         // Non-benchmark mode: Total time in dim.less units
+    int                                          outFrequency = 200;   // Non-benchmark mode: Frequency in LU for output of terminal message and profiles (use 0 for no messages)
+    int                                          dataFrequency = 0;    // Non-benchmark mode: Frequency in LU of full data dump (use 0 for no data dump)
+    int                                          benchIniIter = 1000;  // Benchmark mode: Number of warmup iterations
+    int                                          benchMaxIter = 2000;  // Benchmark mode: Total number of iterations
+    int                                          repetitions = 1;      // Benchmark mode: number of time the test is run
+    std::string                                  deviceType = "gpu";
+    std::vector<int>                             devices = std::vector<int>(0);                // Devices for the execution
+    std::string                                  reportFile = "lbm-lid-driven-cavity-flow";    // Report file name
+    std::string                                  gridType = "dGrid";                           // Neon grid type
+    Neon::skeleton::Occ                          occ = Neon::skeleton::Occ::none;              // Neon OCC type
+    Neon::set::TransferMode                      transferMode = Neon::set::TransferMode::get;  // Neon transfer mode for halo update
+    Neon::set::StencilSemantic                   stencilSemantic = Neon::set::StencilSemantic::streaming;
+    bool                                         vti = false;  // Export vti file
+    std::string                                  computeType = "double";
+    std::string                                  storeType = "double";
+    std::string                                  curve = "sweep";
+    Neon::domain::tool::spaceCurves::EncoderType spaceCurve = Neon::domain::tool::spaceCurves::EncoderType::sweep;
+    LbmParameters<double>                        mLbmParameters;
 
     auto toString()
         const -> std::string;