more patching

conda-forge · Feb 12, 2021 · 0a5e606 · 0a5e606
1 parent b0755f7
commit 0a5e606
Show file tree

Hide file tree

Showing 11 changed files with 150 additions and 9 deletions.
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
@@ -88,6 +88,8 @@ source:
     - patches/0008-fix-missing-AVX-overloads-for-MSVC-in-ScalarQuantize.patch
     # add compat for __SSEx__ macros
     - patches/0009-add-windows-compat-for-__SSEx__-macros.patch
+    # fix operator overloads for __m128/__m256 that MSVC doesn't define
+    - patches/0010-fix-missing-AVX-overloads-for-MSVC-in-distances_simd.patch
 
 build:
   number: {{ number }}

diff --git a/recipe/patches/0001-use-c-14.patch b/recipe/patches/0001-use-c-14.patch
@@ -1,7 +1,7 @@
 From 1b4e6f16de1bc6e6e7a104647625f45956356df5 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Mon, 26 Oct 2020 22:44:44 +0100
-Subject: [PATCH 1/9] use c++14
+Subject: [PATCH 01/10] use c++14
 
 ---
  CMakeLists.txt              | 2 +-

diff --git a/recipe/patches/0002-Add-missing-headers-in-faiss-gpu-CMakeLists.txt-1666.patch b/recipe/patches/0002-Add-missing-headers-in-faiss-gpu-CMakeLists.txt-1666.patch
@@ -1,7 +1,8 @@
 From e4b94c8a5ad0fc7632cf05006865dbd8feac2ed4 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Thu, 4 Feb 2021 09:21:21 -0800
-Subject: [PATCH 2/9] Add missing headers in faiss/[gpu/]CMakeLists.txt (#1666)
+Subject: [PATCH 02/10] Add missing headers in faiss/[gpu/]CMakeLists.txt
+ (#1666)
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit

diff --git a/recipe/patches/0003-make-AVX2-detection-platform-independent-1600.patch b/recipe/patches/0003-make-AVX2-detection-platform-independent-1600.patch
@@ -1,7 +1,7 @@
 From 98aa69a963ff6fc90a3c643cf6da4c598e8c76e8 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Wed, 3 Feb 2021 08:00:39 -0800
-Subject: [PATCH 3/9] make AVX2-detection platform-independent (#1600)
+Subject: [PATCH 03/10] make AVX2-detection platform-independent (#1600)
 
 Summary:
 In the context of https://github.com/conda-forge/faiss-split-feedstock/issues/23, I discussed with some of the conda-folks how we should support AVX2 (and potentially other builds) for faiss. In the meantime, we'd like to follow the model that faiss itself is using (i.e. build with AVX2 and without and then load the corresponding library at runtime depending on CPU capabilities).

diff --git a/recipe/patches/0004-set-correct-EXT_SUFFIX-for-swig.patch b/recipe/patches/0004-set-correct-EXT_SUFFIX-for-swig.patch
@@ -1,7 +1,7 @@
 From baeb71758b3aae26cb41c023e2c703c5b6824fc0 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Wed, 23 Dec 2020 10:22:30 +0100
-Subject: [PATCH 4/9] set correct EXT_SUFFIX for swig
+Subject: [PATCH 04/10] set correct EXT_SUFFIX for swig
 
 ---
  faiss/python/CMakeLists.txt |  7 +++----

diff --git a/recipe/patches/0005-log-success-messages-and-errors-in-loader.py.patch b/recipe/patches/0005-log-success-messages-and-errors-in-loader.py.patch
@@ -1,7 +1,7 @@
 From 86e41f82945d230e2ef16d178a4d2e442acebc61 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Thu, 11 Feb 2021 14:05:12 +0100
-Subject: [PATCH 5/9] log success messages and errors in loader.py
+Subject: [PATCH 05/10] log success messages and errors in loader.py
 
 ---
  faiss/python/loader.py | 17 ++++++++++-------

diff --git a/recipe/patches/0006-add-msvc-compatible-AVX2-switch-in-CMakeLists.txt.patch b/recipe/patches/0006-add-msvc-compatible-AVX2-switch-in-CMakeLists.txt.patch
@@ -1,7 +1,7 @@
 From 6cfb1b2cdbd5b13a96569e643f38bb9920fe3a74 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Thu, 11 Feb 2021 16:25:25 +0100
-Subject: [PATCH 6/9] add msvc-compatible AVX2 switch in CMakeLists.txt
+Subject: [PATCH 06/10] add msvc-compatible AVX2 switch in CMakeLists.txt
 
 ---
  faiss/CMakeLists.txt | 7 ++++++-

diff --git a/recipe/patches/0007-msvc-doesn-t-seet-__F16C__.patch b/recipe/patches/0007-msvc-doesn-t-seet-__F16C__.patch
@@ -1,7 +1,7 @@
 From c8412579efd780e4cb578ad80da4e09994201512 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Thu, 11 Feb 2021 17:51:01 +0100
-Subject: [PATCH 7/9] msvc doesn't seet __F16C__
+Subject: [PATCH 07/10] msvc doesn't seet __F16C__
 
 ---
  faiss/impl/ScalarQuantizer.cpp | 2 +-

diff --git a/recipe/patches/0008-fix-missing-AVX-overloads-for-MSVC-in-ScalarQuantize.patch b/recipe/patches/0008-fix-missing-AVX-overloads-for-MSVC-in-ScalarQuantize.patch
@@ -1,7 +1,8 @@
 From 680a97d38a359303dcf4e70344bc6a4556e23697 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Fri, 12 Feb 2021 00:22:16 +0100
-Subject: [PATCH 8/9] fix missing AVX-overloads for MSVC in ScalarQuantizer.cpp
+Subject: [PATCH 08/10] fix missing AVX-overloads for MSVC in
+ ScalarQuantizer.cpp
 
 ---
  faiss/impl/ScalarQuantizer.cpp | 26 +++++++++++++-------------

diff --git a/recipe/patches/0009-add-windows-compat-for-__SSEx__-macros.patch b/recipe/patches/0009-add-windows-compat-for-__SSEx__-macros.patch
@@ -1,7 +1,7 @@
 From a1e9f6a79a6211a1decb891f6b65a85192dac6c7 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <[email protected]>
 Date: Fri, 12 Feb 2021 01:19:15 +0100
-Subject: [PATCH 9/9] add windows compat for __SSEx__ macros
+Subject: [PATCH 09/10] add windows compat for __SSEx__ macros
 
 ---
  faiss/impl/ScalarQuantizer.cpp |  1 +

diff --git a/recipe/patches/0010-fix-missing-AVX-overloads-for-MSVC-in-distances_simd.patch b/recipe/patches/0010-fix-missing-AVX-overloads-for-MSVC-in-distances_simd.patch
@@ -0,0 +1,137 @@
+From 1eff714e5c52e106c76a53cafb15f8c583b89ea7 Mon Sep 17 00:00:00 2001
+From: "H. Vetinari" <[email protected]>
+Date: Fri, 12 Feb 2021 01:39:36 +0100
+Subject: [PATCH 10/10] fix missing AVX-overloads for MSVC in
+ distances_simd.cpp
+
+---
+ faiss/utils/distances_simd.cpp | 42 +++++++++++++++++-----------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp
+index 49979bd3..f02c68bf 100644
+--- a/faiss/utils/distances_simd.cpp
++++ b/faiss/utils/distances_simd.cpp
+@@ -409,7 +409,7 @@ float fvec_inner_product (const float * x,
+     }
+
+     __m128 msum2 = _mm256_extractf128_ps(msum1, 1);
+-    msum2 +=       _mm256_extractf128_ps(msum1, 0);
++    msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0);
+
+     if (d >= 4) {
+         __m128 mx = _mm_loadu_ps (x); x += 4;
+@@ -438,27 +438,27 @@ float fvec_L2sqr (const float * x,
+     while (d >= 8) {
+         __m256 mx = _mm256_loadu_ps (x); x += 8;
+         __m256 my = _mm256_loadu_ps (y); y += 8;
+-        const __m256 a_m_b1 = mx - my;
+-        msum1 += a_m_b1 * a_m_b1;
++        const __m256 a_m_b1 = _mm256_sub_ps(mx, my);
++        msum1 = _mm256_add_ps(msum1, _mm256_mul_ps(a_m_b1, a_m_b1));
+         d -= 8;
+     }
+
+     __m128 msum2 = _mm256_extractf128_ps(msum1, 1);
+-    msum2 +=       _mm256_extractf128_ps(msum1, 0);
++    msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));
+
+     if (d >= 4) {
+         __m128 mx = _mm_loadu_ps (x); x += 4;
+         __m128 my = _mm_loadu_ps (y); y += 4;
+-        const __m128 a_m_b1 = mx - my;
+-        msum2 += a_m_b1 * a_m_b1;
++        const __m128 a_m_b1 = _mm_sub_ps(mx, my);
++        msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
+         d -= 4;
+     }
+
+     if (d > 0) {
+         __m128 mx = masked_read (d, x);
+         __m128 my = masked_read (d, y);
+-        __m128 a_m_b1 = mx - my;
+-        msum2 += a_m_b1 * a_m_b1;
++        __m128 a_m_b1 = _mm_sub_ps(mx, my);
++        msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
+     }
+
+     msum2 = _mm_hadd_ps (msum2, msum2);
+@@ -474,28 +474,28 @@ float fvec_L1 (const float * x, const float * y, size_t d)
+     while (d >= 8) {
+         __m256 mx = _mm256_loadu_ps (x); x += 8;
+         __m256 my = _mm256_loadu_ps (y); y += 8;
+-        const __m256 a_m_b = mx - my;
+-        msum1 += _mm256_and_ps(signmask, a_m_b);
++        const __m256 a_m_b = _mm256_sub_ps(mx, my);
++        msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b));
+         d -= 8;
+     }
+
+     __m128 msum2 = _mm256_extractf128_ps(msum1, 1);
+-    msum2 +=       _mm256_extractf128_ps(msum1, 0);
++    msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));
+     __m128 signmask2 = __m128(_mm_set1_epi32 (0x7fffffffUL));
+
+     if (d >= 4) {
+         __m128 mx = _mm_loadu_ps (x); x += 4;
+         __m128 my = _mm_loadu_ps (y); y += 4;
+-        const __m128 a_m_b = mx - my;
+-        msum2 += _mm_and_ps(signmask2, a_m_b);
++        const __m128 a_m_b = _mm_sub_ps(mx, my);
++        msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
+         d -= 4;
+     }
+
+     if (d > 0) {
+         __m128 mx = masked_read (d, x);
+         __m128 my = masked_read (d, y);
+-        __m128 a_m_b = mx - my;
+-        msum2 += _mm_and_ps(signmask2, a_m_b);
++        __m128 a_m_b = _mm_sub_ps(mx, my);
++        msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
+     }
+
+     msum2 = _mm_hadd_ps (msum2, msum2);
+@@ -511,7 +511,7 @@ float fvec_Linf (const float * x, const float * y, size_t d)
+     while (d >= 8) {
+         __m256 mx = _mm256_loadu_ps (x); x += 8;
+         __m256 my = _mm256_loadu_ps (y); y += 8;
+-        const __m256 a_m_b = mx - my;
++        const __m256 a_m_b = _mm256_sub_ps(mx, my);
+         msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b));
+         d -= 8;
+     }
+@@ -531,7 +531,7 @@ float fvec_Linf (const float * x, const float * y, size_t d)
+     if (d > 0) {
+         __m128 mx = masked_read (d, x);
+         __m128 my = masked_read (d, y);
+-        __m128 a_m_b = mx - my;
++        __m128 a_m_b = _mm_sub_ps(mx, my);
+         msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b));
+     }
+
+@@ -562,8 +562,8 @@ float fvec_L2sqr (const float * x,
+     while (d >= 4) {
+         __m128 mx = _mm_loadu_ps (x); x += 4;
+         __m128 my = _mm_loadu_ps (y); y += 4;
+-        const __m128 a_m_b1 = mx - my;
+-        msum1 += a_m_b1 * a_m_b1;
++        const __m128 a_m_b1 = _mm_sub_ps(mx, my);
++        msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1));
+         d -= 4;
+     }
+
+@@ -571,8 +571,8 @@ float fvec_L2sqr (const float * x,
+         // add the last 1, 2 or 3 values
+         __m128 mx = masked_read (d, x);
+         __m128 my = masked_read (d, y);
+-        __m128 a_m_b1 = mx - my;
+-        msum1 += a_m_b1 * a_m_b1;
++        __m128 a_m_b1 = _mm_sub_ps(mx, my);
++        msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1));
+     }
+
+     msum1 = _mm_hadd_ps (msum1, msum1);
+-- 
+2.29.2.windows.3
+