From d70b718191293e436009ad5779f4e8b066aadab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB?=
 <mike.milos@yandex.ru>
Date: Wed, 9 Oct 2024 22:44:46 +0300
Subject: [PATCH] fix windows build (#40)

---
 .github/workflows/build.yml                 |  2 +-
 csrc/faster_eval_api/coco_eval/cocoeval.cpp | 35 ++++++------
 csrc/mask_api/src/mask.cpp                  | 62 ++++++++++++++-------
 csrc/mask_api/src/mask.h                    |  2 +
 csrc/mask_api/src/rle.cpp                   | 12 ++--
 faster_coco_eval/core/mask.py               |  2 +-
 faster_coco_eval/version.py                 |  2 +-
 setup.py                                    | 21 ++++---
 8 files changed, 83 insertions(+), 55 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bddbac6..34cb47b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -24,7 +24,7 @@ jobs:
           CIBW_SKIP: "pp* *musllinux* *cp36* *cp313*"
           CIBW_ARCHS: x86_64
           CIBW_TEST_REQUIRES: pytest
-          CIBW_TEST_COMMAND: "cd {project}/tests && pytest test_basic.py"
+          CIBW_TEST_COMMAND: "cd {project}/tests && pytest"
         with:
           package-dir: ./
 
diff --git a/csrc/faster_eval_api/coco_eval/cocoeval.cpp b/csrc/faster_eval_api/coco_eval/cocoeval.cpp
index 86a67db..cf097e8 100644
--- a/csrc/faster_eval_api/coco_eval/cocoeval.cpp
+++ b/csrc/faster_eval_api/coco_eval/cocoeval.cpp
@@ -12,7 +12,7 @@ namespace coco_eval
   namespace COCOeval
   {
     template <typename T>
-    int v_index(const std::vector<T> &v, const T &key)
+    int64_t v_index(const std::vector<T> &v, const T &key)
     {
       auto itr = std::find(v.begin(), v.end(), key);
 
@@ -90,9 +90,9 @@ namespace coco_eval
         ImageEvaluation *results)
     {
       // Initialize memory to store return data matches and ignore
-      const int num_iou_thresholds = iou_thresholds.size();
-      const int num_ground_truth = ground_truth_sorted_indices.size();
-      const int num_detections = detection_sorted_indices.size();
+      const int num_iou_thresholds = (const int) iou_thresholds.size();
+      const int num_ground_truth = (const int) ground_truth_sorted_indices.size();
+      const int num_detections = (const int) detection_sorted_indices.size();
       // std::vector<uint64_t> ground_truth_matches(
       // num_iou_thresholds * num_ground_truth, 0);
       std::vector<int64_t> &ground_truth_matches = results->ground_truth_matches;
@@ -189,10 +189,9 @@ namespace coco_eval
         const ImageCategoryInstances<InstanceAnnotation> &
             image_category_detection_instances)
     {
-      const int num_area_ranges = area_ranges.size();
-      const int num_images = image_category_ground_truth_instances.size();
-      const int num_categories =
-          image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;
+      const int num_area_ranges = (const int) area_ranges.size();
+      const int num_images = (const int) image_category_ground_truth_instances.size();
+      const int num_categories = (const int) (image_category_ious.size() > 0 ? image_category_ious[0].size() : 0);
       std::vector<uint64_t> detection_sorted_indices;
       std::vector<uint64_t> ground_truth_sorted_indices;
       std::vector<bool> ignores;
@@ -440,14 +439,12 @@ namespace coco_eval
           list_to_vec<double>(params.attr("recThrs"));
       const std::vector<int> max_detections =
           list_to_vec<int>(params.attr("maxDets"));
-      const int num_iou_thresholds = py::len(params.attr("iouThrs"));
-      const int num_recall_thresholds = py::len(params.attr("recThrs"));
-      const int num_categories = params.attr("useCats").cast<int>() == 1
-                                     ? py::len(params.attr("catIds"))
-                                     : 1;
-      const int num_area_ranges = py::len(params.attr("areaRng"));
-      const int num_max_detections = py::len(params.attr("maxDets"));
-      const int num_images = py::len(params.attr("imgIds"));
+      const int num_iou_thresholds = (const int) py::len(params.attr("iouThrs"));
+      const int num_recall_thresholds = (const int) py::len(params.attr("recThrs"));
+      const int num_categories = (const int)(params.attr("useCats").cast<int>() == 1 ? py::len(params.attr("catIds")) : 1);
+      const int num_area_ranges = (const int) py::len(params.attr("areaRng"));
+      const int num_max_detections = (const int) py::len(params.attr("maxDets"));
+      const int num_images = (const int) py::len(params.attr("imgIds"));
 
       std::vector<double> precisions_out(
           num_iou_thresholds * num_recall_thresholds * num_categories *
@@ -793,9 +790,9 @@ namespace coco_eval
 
       std::vector<int> _catIds;
 
-      int iou_ind = v_index(iouThrs, iouThr);
-      int aind = v_index(areaRngLbl, areaRng);
-      int mind = v_index(maxDets, maxDet);
+      int64_t iou_ind = v_index(iouThrs, iouThr);
+      int64_t aind = v_index(areaRngLbl, areaRng);
+      int64_t mind = v_index(maxDets, maxDet);
 
       if (catIds.size() == 0)
       {
diff --git a/csrc/mask_api/src/mask.cpp b/csrc/mask_api/src/mask.cpp
index 516b142..fa98bf0 100644
--- a/csrc/mask_api/src/mask.cpp
+++ b/csrc/mask_api/src/mask.cpp
@@ -7,6 +7,7 @@
 #include <iostream>
 #include <execution>
 #include <future>
+#include <thread>
 
 using namespace pybind11::literals;
 
@@ -141,7 +142,8 @@ namespace mask_api
             if (n > 0)
             {
                 uint64_t h = (R[0].h), w = (R[0].w);
-                py::array_t<uint8_t, py::array::f_style> M({h, w, n});
+                
+                py::array_t<uint8_t> M({h, w, n});
                 auto mask = M.mutable_unchecked();
                 uint64_t s = h * w * n;
 
@@ -535,7 +537,7 @@ namespace mask_api
                 {
                     return frUncompressedRLE(pyobj.cast<std::vector<py::dict>>());
                 }
-                else if (sub_type == "<class 'list'>" or sub_type == "<class 'numpy.ndarray'>")
+                else if ((sub_type == "<class 'list'>") || (sub_type == "<class 'numpy.ndarray'>"))
                 {
                     std::vector<std::vector<double>> numpy_array = pyobj.cast<std::vector<std::vector<double>>>();
                     if (numpy_array[0].size() == 4)
@@ -547,7 +549,7 @@ namespace mask_api
                         return frPoly(numpy_array, h, w);
                     }
                 }
-                else if (sub_type == "<class 'float'>" or sub_type == "<class 'int'>")
+                else if ((sub_type == "<class 'float'>") || (sub_type == "<class 'int'>"))
                 {
                     std::vector<double> array = pyobj.cast<std::vector<double>>();
                     if (array.size() == 4)
@@ -595,27 +597,44 @@ namespace mask_api
             py::gil_scoped_release release;
             std::vector<std::tuple<uint64_t, uint64_t, std::string>> result(rles.size());
 
-            auto process = [&rles, &result](size_t s, size_t e, double d) mutable
-            {
-                for (size_t i = s; i < e; ++i)
+            // Windows not support async
+            #ifndef _WIN32
+                auto process = [&rles, &result](size_t s, size_t e, double d) mutable
                 {
-                    result[i] = rles[i].toBoundary(d).toTuple();
-                }
-                std::this_thread::sleep_for(std::chrono::milliseconds(1));
-            };
+                    for (size_t i = s; i < e; ++i)
+                    {
+                        result[i] = rles[i].toBoundary(d).toTuple();
+                    }
+                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+                };
+            #endif
 
             size_t start = 0;
             size_t step = 1000;
             size_t end = step;
 
+            if (end > rles.size())
+            {
+                end = rles.size();
+            }
+
             while (start < rles.size())
             {
-                std::vector<std::future<void>> rle_futures(cpu_count);
+                #ifndef _WIN32
+                    std::vector<std::future<void>> rle_futures(cpu_count);
+                #endif
 
                 size_t thread = 0;
-                for (thread = 0; thread < rle_futures.size(); thread++)
+                for (thread = 0; thread < cpu_count; thread++)
                 {
-                    rle_futures[thread] = std::async(std::launch::async, process, start, end, dilation_ratio);
+                    #ifdef _WIN32
+                        for (size_t i = start; i < end; ++i)
+                        {
+                            result[i] = rles[i].toBoundary(dilation_ratio).toTuple();
+                        }
+                    #else
+                        rle_futures[thread] = std::async(std::launch::async, process, start, end, dilation_ratio);
+                    #endif
 
                     start += step;
                     end += step;
@@ -630,13 +649,15 @@ namespace mask_api
                         break;
                     }
                 }
-
-                for (size_t i = 0; i < thread; i++)
-                {
-                    rle_futures[i].wait();
-                }
-                rle_futures.clear();
-                rle_futures.shrink_to_fit();
+                
+                #ifndef _WIN32
+                    for (size_t i = 0; i < thread; i++)
+                    {
+                        rle_futures[i].wait();
+                    }
+                    rle_futures.clear();
+                    rle_futures.shrink_to_fit();                
+                #endif
             }
 
             py::gil_scoped_acquire acquire;
@@ -677,6 +698,7 @@ namespace mask_api
 
                 if (compute_boundary)
                 {
+
                     boundary_array = processRleToBoundary(rles, dilation_ratio, cpu_count);
                 }
 
diff --git a/csrc/mask_api/src/mask.h b/csrc/mask_api/src/mask.h
index 8eebf00..c1f15e2 100644
--- a/csrc/mask_api/src/mask.h
+++ b/csrc/mask_api/src/mask.h
@@ -10,6 +10,8 @@
 
 namespace py = pybind11;
 
+typedef unsigned int uint;
+
 namespace mask_api
 {
 
diff --git a/csrc/mask_api/src/rle.cpp b/csrc/mask_api/src/rle.cpp
index 0bc84f3..b964737 100644
--- a/csrc/mask_api/src/rle.cpp
+++ b/csrc/mask_api/src/rle.cpp
@@ -81,7 +81,7 @@ namespace mask_api
                     x += cnts[cnts.size() - 2];
                 }
 
-                cnts.emplace_back(x);
+                cnts.emplace_back((uint)x);
             }
 
             return RLE(h, w, cnts.size(), cnts);
@@ -208,7 +208,7 @@ namespace mask_api
                     if (yd < 0)
                         yd = 0;
                     else if (yd > h)
-                        yd = h;
+                        yd = (double)h;
 
                     yd = ceil(yd);
                     x.emplace_back((int)xd);
@@ -259,7 +259,7 @@ namespace mask_api
         RLE RLE::erode_3x3(int dilation) const
         {
             bool v = false;
-            long max_len = this->w * this->h;
+            long max_len = (long)(this->w * this->h);
             std::vector<bool> _counts(max_len, false);
             std::vector<bool>::iterator ptr = _counts.begin();
             std::for_each(this->cnts.begin(), this->cnts.end(), [&v, &ptr](uint count)
@@ -285,7 +285,7 @@ namespace mask_api
                     }
                     if (i > 0)
                     {
-                        ofsvec.push_back(i * this->h + j);
+                        ofsvec.push_back((int)(i * this->h + j));
                     }
                     else
                     {
@@ -296,7 +296,7 @@ namespace mask_api
 
             for (int i = dilation; i >= -dilation; i--)
             {
-                ofsvec_bottom.push_back(i * this->h + dilation);
+                ofsvec_bottom.push_back((int)(i * this->h + dilation));
             }
 
             long c = 0;
@@ -304,7 +304,7 @@ namespace mask_api
             long ic = 0;
             std::vector<uint> cnts;
             bool _min = false, _prev_min = false;
-            long rle_h = this->h;
+            long rle_h = (long)this->h;
 
             v = true;
             for (uint j : this->cnts)
diff --git a/faster_coco_eval/core/mask.py b/faster_coco_eval/core/mask.py
index 3f2b818..9bf9c25 100644
--- a/faster_coco_eval/core/mask.py
+++ b/faster_coco_eval/core/mask.py
@@ -116,7 +116,7 @@ def calculateRleForAllAnnotations(
         boundary_cpu_count (int): number of CPUs to use for boundary computation
 
     """
-    return _mask.calculateRleForAllAnnotations(
+    _mask.calculateRleForAllAnnotations(
         anns,
         img_sizes,
         compute_rle,
diff --git a/faster_coco_eval/version.py b/faster_coco_eval/version.py
index b26eadd..9f79c48 100644
--- a/faster_coco_eval/version.py
+++ b/faster_coco_eval/version.py
@@ -1,2 +1,2 @@
-__version__ = "1.6.0"
+__version__ = "1.6.1"
 __author__ = "MiXaiLL76"
diff --git a/setup.py b/setup.py
index a4fad00..e352911 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python3
 
 import glob
+import os
 
 import setuptools
 from pybind11.setup_helpers import ParallelCompile, Pybind11Extension, build_ext
@@ -107,13 +108,19 @@ def get_extensions(version_info):
     ]
     print("Sources: {}".format(sources))
 
-    extra_compile_args = [
-        "-std=c++17",
-        "-fPIC",
-        "-ffinite-math-only",
-        "-fno-signed-zeros",
-        "-ftree-vectorize",
-    ]
+    if os.name == "nt":
+        extra_compile_args = [
+            "/std:c++17",
+            "/fp:fast",
+        ]
+    else:
+        extra_compile_args = [
+            "-std=c++17",
+            "-fPIC",
+            "-ffinite-math-only",
+            "-fno-signed-zeros",
+            "-ftree-vectorize",
+        ]
 
     ext_modules += [
         Pybind11Extension(