Fix bug causing from_variable to not honor fill value for floating po…

…int numbers
TGSAI · Dec 9, 2024 · 549be32 · 549be32
1 parent 838538d
commit 549be32
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 2 deletions.
diff --git a/mdio/variable.h b/mdio/variable.h
@@ -16,6 +16,7 @@
 #define MDIO_VARIABLE_H_
 
 #include <filesystem>
+#include <limits>
 #include <memory>
 #include <queue>
 #include <set>
@@ -1793,8 +1794,15 @@ struct VariableData {
 };
 
 /**
- * @brief If you already have a variable in memory, allocated a
- * variableDataobject using this variable as the specification.
+ * @brief Allocates a VariableData object with the specified dtype and fill
+ * value. Intended behavior is to fill the array with default fill values and
+ * may overwrite existing data if written to disk.
+ * @tparam T The data type of the variable.
+ * @tparam R The rank of the variable.
+ * @tparam OriginKind The origin kind of the variable.
+ * @param variable The variable to allocate from.
+ * @return mdio::Result<VariableData<T, R, OriginKind>> The allocated
+ * VariableData object.
  */
 template <typename T = void, DimensionIndex R = dynamic_rank,
           ArrayOriginKind OriginKind = offset_origin>
@@ -1809,6 +1817,26 @@ Result<VariableData<T, R, OriginKind>> from_variable(
       variable.get_store().domain().box(), mdio::ContiguousLayoutOrder::c,
       tensorstore::value_init, variable.dtype());
 
+  if (variable.dtype() == constants::kFloat32 ||
+      variable.dtype() == constants::kFloat64) {
+    // Get the size of the array in bytes
+    size_t num_elements = variable.num_samples();
+    size_t element_size = variable.dtype().size();
+
+    // Fill with NaN values using memset
+    if (variable.dtype() == constants::kFloat32) {
+      auto* data = reinterpret_cast<float*>(_array.data());
+      for (size_t i = 0; i < num_elements; ++i) {
+        data[i] = std::numeric_limits<float>::quiet_NaN();
+      }
+    } else {  // double
+      auto* data = reinterpret_cast<double*>(_array.data());
+      for (size_t i = 0; i < num_elements; ++i) {
+        data[i] = std::numeric_limits<double>::quiet_NaN();
+      }
+    }
+  }
+
   // The second step tries to cast the dtype of the array to the supplied
   // templated. this can fail if the types are inconsistent, at which point it
   // will return with a status.

diff --git a/mdio/variable_test.cc b/mdio/variable_test.cc
@@ -17,6 +17,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <cmath>  // For checking NaN values
 #include <filesystem>
 #include <fstream>
 #include <sstream>
@@ -1270,4 +1271,22 @@ TEST(VariableData, writeTest) {
   std::filesystem::remove_all("name");
 }
 
+TEST(VariableData, fromVariableFillTest) {
+  auto goodJson = json_good;
+  // Issue #144 reports double with fill value of NaN filling with 0's
+  goodJson["metadata"]["dtype"] = "<f8";
+  // Copy dataset factory fill value pattern for double.
+  goodJson["metadata"]["fill_value"] = nlohmann::json::value_t::null;
+  auto variable = mdio::Variable<mdio::dtypes::float64_t>::Open(
+                      goodJson, mdio::constants::kCreateClean)
+                      .value();
+  mdio::Result<mdio::VariableData<mdio::dtypes::float64_t>> variableData =
+      mdio::from_variable<mdio::dtypes::float64_t>(variable);
+
+  // Use std::isnan to check for NaN
+  EXPECT_TRUE(std::isnan(variableData.value().get_data_accessor().data()[0]))
+      << "Expected NaN filled array but got "
+      << variableData.value().get_data_accessor().data()[0];
+}
+
 }  // namespace