Skip to content

Commit

Permalink
more patching
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari committed Feb 12, 2021
1 parent b0755f7 commit 0a5e606
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 9 deletions.
2 changes: 2 additions & 0 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ source:
- patches/0008-fix-missing-AVX-overloads-for-MSVC-in-ScalarQuantize.patch
# add compat for __SSEx__ macros
- patches/0009-add-windows-compat-for-__SSEx__-macros.patch
# fix operator overloads for __m128/__m256 that MSVC doesn't define
- patches/0010-fix-missing-AVX-overloads-for-MSVC-in-distances_simd.patch

build:
number: {{ number }}
Expand Down
2 changes: 1 addition & 1 deletion recipe/patches/0001-use-c-14.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 1b4e6f16de1bc6e6e7a104647625f45956356df5 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Mon, 26 Oct 2020 22:44:44 +0100
Subject: [PATCH 1/9] use c++14
Subject: [PATCH 01/10] use c++14

---
CMakeLists.txt | 2 +-
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
From e4b94c8a5ad0fc7632cf05006865dbd8feac2ed4 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 4 Feb 2021 09:21:21 -0800
Subject: [PATCH 2/9] Add missing headers in faiss/[gpu/]CMakeLists.txt (#1666)
Subject: [PATCH 02/10] Add missing headers in faiss/[gpu/]CMakeLists.txt
(#1666)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 98aa69a963ff6fc90a3c643cf6da4c598e8c76e8 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Wed, 3 Feb 2021 08:00:39 -0800
Subject: [PATCH 3/9] make AVX2-detection platform-independent (#1600)
Subject: [PATCH 03/10] make AVX2-detection platform-independent (#1600)

Summary:
In the context of https://github.com/conda-forge/faiss-split-feedstock/issues/23, I discussed with some of the conda-folks how we should support AVX2 (and potentially other builds) for faiss. In the meantime, we'd like to follow the model that faiss itself is using (i.e. build with AVX2 and without and then load the corresponding library at runtime depending on CPU capabilities).
Expand Down
2 changes: 1 addition & 1 deletion recipe/patches/0004-set-correct-EXT_SUFFIX-for-swig.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From baeb71758b3aae26cb41c023e2c703c5b6824fc0 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Wed, 23 Dec 2020 10:22:30 +0100
Subject: [PATCH 4/9] set correct EXT_SUFFIX for swig
Subject: [PATCH 04/10] set correct EXT_SUFFIX for swig

---
faiss/python/CMakeLists.txt | 7 +++----
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 86e41f82945d230e2ef16d178a4d2e442acebc61 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 11 Feb 2021 14:05:12 +0100
Subject: [PATCH 5/9] log success messages and errors in loader.py
Subject: [PATCH 05/10] log success messages and errors in loader.py

---
faiss/python/loader.py | 17 ++++++++++-------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 6cfb1b2cdbd5b13a96569e643f38bb9920fe3a74 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 11 Feb 2021 16:25:25 +0100
Subject: [PATCH 6/9] add msvc-compatible AVX2 switch in CMakeLists.txt
Subject: [PATCH 06/10] add msvc-compatible AVX2 switch in CMakeLists.txt

---
faiss/CMakeLists.txt | 7 ++++++-
Expand Down
2 changes: 1 addition & 1 deletion recipe/patches/0007-msvc-doesn-t-seet-__F16C__.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From c8412579efd780e4cb578ad80da4e09994201512 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 11 Feb 2021 17:51:01 +0100
Subject: [PATCH 7/9] msvc doesn't seet __F16C__
Subject: [PATCH 07/10] msvc doesn't seet __F16C__

---
faiss/impl/ScalarQuantizer.cpp | 2 +-
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
From 680a97d38a359303dcf4e70344bc6a4556e23697 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Fri, 12 Feb 2021 00:22:16 +0100
Subject: [PATCH 8/9] fix missing AVX-overloads for MSVC in ScalarQuantizer.cpp
Subject: [PATCH 08/10] fix missing AVX-overloads for MSVC in
ScalarQuantizer.cpp

---
faiss/impl/ScalarQuantizer.cpp | 26 +++++++++++++-------------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From a1e9f6a79a6211a1decb891f6b65a85192dac6c7 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Fri, 12 Feb 2021 01:19:15 +0100
Subject: [PATCH 9/9] add windows compat for __SSEx__ macros
Subject: [PATCH 09/10] add windows compat for __SSEx__ macros

---
faiss/impl/ScalarQuantizer.cpp | 1 +
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
From 1eff714e5c52e106c76a53cafb15f8c583b89ea7 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Fri, 12 Feb 2021 01:39:36 +0100
Subject: [PATCH 10/10] fix missing AVX-overloads for MSVC in
distances_simd.cpp

---
faiss/utils/distances_simd.cpp | 42 +++++++++++++++++-----------------
1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp
index 49979bd3..f02c68bf 100644
--- a/faiss/utils/distances_simd.cpp
+++ b/faiss/utils/distances_simd.cpp
@@ -409,7 +409,7 @@ float fvec_inner_product (const float * x,
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
- msum2 += _mm256_extractf128_ps(msum1, 0);
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0);

if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
@@ -438,27 +438,27 @@ float fvec_L2sqr (const float * x,
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
- const __m256 a_m_b1 = mx - my;
- msum1 += a_m_b1 * a_m_b1;
+ const __m256 a_m_b1 = _mm256_sub_ps(mx, my);
+ msum1 = _mm256_add_ps(msum1, _mm256_mul_ps(a_m_b1, a_m_b1));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
- msum2 += _mm256_extractf128_ps(msum1, 0);
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));

if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
- const __m128 a_m_b1 = mx - my;
- msum2 += a_m_b1 * a_m_b1;
+ const __m128 a_m_b1 = _mm_sub_ps(mx, my);
+ msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
- __m128 a_m_b1 = mx - my;
- msum2 += a_m_b1 * a_m_b1;
+ __m128 a_m_b1 = _mm_sub_ps(mx, my);
+ msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
}

msum2 = _mm_hadd_ps (msum2, msum2);
@@ -474,28 +474,28 @@ float fvec_L1 (const float * x, const float * y, size_t d)
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
- const __m256 a_m_b = mx - my;
- msum1 += _mm256_and_ps(signmask, a_m_b);
+ const __m256 a_m_b = _mm256_sub_ps(mx, my);
+ msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
- msum2 += _mm256_extractf128_ps(msum1, 0);
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));
__m128 signmask2 = __m128(_mm_set1_epi32 (0x7fffffffUL));

if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
- const __m128 a_m_b = mx - my;
- msum2 += _mm_and_ps(signmask2, a_m_b);
+ const __m128 a_m_b = _mm_sub_ps(mx, my);
+ msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
- __m128 a_m_b = mx - my;
- msum2 += _mm_and_ps(signmask2, a_m_b);
+ __m128 a_m_b = _mm_sub_ps(mx, my);
+ msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
}

msum2 = _mm_hadd_ps (msum2, msum2);
@@ -511,7 +511,7 @@ float fvec_Linf (const float * x, const float * y, size_t d)
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
- const __m256 a_m_b = mx - my;
+ const __m256 a_m_b = _mm256_sub_ps(mx, my);
msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b));
d -= 8;
}
@@ -531,7 +531,7 @@ float fvec_Linf (const float * x, const float * y, size_t d)
if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
- __m128 a_m_b = mx - my;
+ __m128 a_m_b = _mm_sub_ps(mx, my);
msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b));
}

@@ -562,8 +562,8 @@ float fvec_L2sqr (const float * x,
while (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
- const __m128 a_m_b1 = mx - my;
- msum1 += a_m_b1 * a_m_b1;
+ const __m128 a_m_b1 = _mm_sub_ps(mx, my);
+ msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1));
d -= 4;
}

@@ -571,8 +571,8 @@ float fvec_L2sqr (const float * x,
// add the last 1, 2 or 3 values
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
- __m128 a_m_b1 = mx - my;
- msum1 += a_m_b1 * a_m_b1;
+ __m128 a_m_b1 = _mm_sub_ps(mx, my);
+ msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1));
}

msum1 = _mm_hadd_ps (msum1, msum1);
--
2.29.2.windows.3

0 comments on commit 0a5e606

Please sign in to comment.