-
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b0755f7
commit 0a5e606
Showing
11 changed files
with
150 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 1b4e6f16de1bc6e6e7a104647625f45956356df5 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Mon, 26 Oct 2020 22:44:44 +0100 | ||
Subject: [PATCH 1/9] use c++14 | ||
Subject: [PATCH 01/10] use c++14 | ||
|
||
--- | ||
CMakeLists.txt | 2 +- | ||
|
3 changes: 2 additions & 1 deletion
3
recipe/patches/0002-Add-missing-headers-in-faiss-gpu-CMakeLists.txt-1666.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
From e4b94c8a5ad0fc7632cf05006865dbd8feac2ed4 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Thu, 4 Feb 2021 09:21:21 -0800 | ||
Subject: [PATCH 2/9] Add missing headers in faiss/[gpu/]CMakeLists.txt (#1666) | ||
Subject: [PATCH 02/10] Add missing headers in faiss/[gpu/]CMakeLists.txt | ||
(#1666) | ||
MIME-Version: 1.0 | ||
Content-Type: text/plain; charset=UTF-8 | ||
Content-Transfer-Encoding: 8bit | ||
|
2 changes: 1 addition & 1 deletion
2
recipe/patches/0003-make-AVX2-detection-platform-independent-1600.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 98aa69a963ff6fc90a3c643cf6da4c598e8c76e8 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Wed, 3 Feb 2021 08:00:39 -0800 | ||
Subject: [PATCH 3/9] make AVX2-detection platform-independent (#1600) | ||
Subject: [PATCH 03/10] make AVX2-detection platform-independent (#1600) | ||
|
||
Summary: | ||
In the context of https://github.com/conda-forge/faiss-split-feedstock/issues/23, I discussed with some of the conda-folks how we should support AVX2 (and potentially other builds) for faiss. In the meantime, we'd like to follow the model that faiss itself is using (i.e. build with AVX2 and without and then load the corresponding library at runtime depending on CPU capabilities). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From baeb71758b3aae26cb41c023e2c703c5b6824fc0 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Wed, 23 Dec 2020 10:22:30 +0100 | ||
Subject: [PATCH 4/9] set correct EXT_SUFFIX for swig | ||
Subject: [PATCH 04/10] set correct EXT_SUFFIX for swig | ||
|
||
--- | ||
faiss/python/CMakeLists.txt | 7 +++---- | ||
|
2 changes: 1 addition & 1 deletion
2
recipe/patches/0005-log-success-messages-and-errors-in-loader.py.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 86e41f82945d230e2ef16d178a4d2e442acebc61 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Thu, 11 Feb 2021 14:05:12 +0100 | ||
Subject: [PATCH 5/9] log success messages and errors in loader.py | ||
Subject: [PATCH 05/10] log success messages and errors in loader.py | ||
|
||
--- | ||
faiss/python/loader.py | 17 ++++++++++------- | ||
|
2 changes: 1 addition & 1 deletion
2
recipe/patches/0006-add-msvc-compatible-AVX2-switch-in-CMakeLists.txt.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 6cfb1b2cdbd5b13a96569e643f38bb9920fe3a74 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Thu, 11 Feb 2021 16:25:25 +0100 | ||
Subject: [PATCH 6/9] add msvc-compatible AVX2 switch in CMakeLists.txt | ||
Subject: [PATCH 06/10] add msvc-compatible AVX2 switch in CMakeLists.txt | ||
|
||
--- | ||
faiss/CMakeLists.txt | 7 ++++++- | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From c8412579efd780e4cb578ad80da4e09994201512 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Thu, 11 Feb 2021 17:51:01 +0100 | ||
Subject: [PATCH 7/9] msvc doesn't seet __F16C__ | ||
Subject: [PATCH 07/10] msvc doesn't seet __F16C__ | ||
|
||
--- | ||
faiss/impl/ScalarQuantizer.cpp | 2 +- | ||
|
3 changes: 2 additions & 1 deletion
3
recipe/patches/0008-fix-missing-AVX-overloads-for-MSVC-in-ScalarQuantize.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
From 680a97d38a359303dcf4e70344bc6a4556e23697 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Fri, 12 Feb 2021 00:22:16 +0100 | ||
Subject: [PATCH 8/9] fix missing AVX-overloads for MSVC in ScalarQuantizer.cpp | ||
Subject: [PATCH 08/10] fix missing AVX-overloads for MSVC in | ||
ScalarQuantizer.cpp | ||
|
||
--- | ||
faiss/impl/ScalarQuantizer.cpp | 26 +++++++++++++------------- | ||
|
2 changes: 1 addition & 1 deletion
2
recipe/patches/0009-add-windows-compat-for-__SSEx__-macros.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From a1e9f6a79a6211a1decb891f6b65a85192dac6c7 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Fri, 12 Feb 2021 01:19:15 +0100 | ||
Subject: [PATCH 9/9] add windows compat for __SSEx__ macros | ||
Subject: [PATCH 09/10] add windows compat for __SSEx__ macros | ||
|
||
--- | ||
faiss/impl/ScalarQuantizer.cpp | 1 + | ||
|
137 changes: 137 additions & 0 deletions
137
recipe/patches/0010-fix-missing-AVX-overloads-for-MSVC-in-distances_simd.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
From 1eff714e5c52e106c76a53cafb15f8c583b89ea7 Mon Sep 17 00:00:00 2001 | ||
From: "H. Vetinari" <[email protected]> | ||
Date: Fri, 12 Feb 2021 01:39:36 +0100 | ||
Subject: [PATCH 10/10] fix missing AVX-overloads for MSVC in | ||
distances_simd.cpp | ||
|
||
--- | ||
faiss/utils/distances_simd.cpp | 42 +++++++++++++++++----------------- | ||
1 file changed, 21 insertions(+), 21 deletions(-) | ||
|
||
diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp | ||
index 49979bd3..f02c68bf 100644 | ||
--- a/faiss/utils/distances_simd.cpp | ||
+++ b/faiss/utils/distances_simd.cpp | ||
@@ -409,7 +409,7 @@ float fvec_inner_product (const float * x, | ||
} | ||
|
||
__m128 msum2 = _mm256_extractf128_ps(msum1, 1); | ||
- msum2 += _mm256_extractf128_ps(msum1, 0); | ||
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0); | ||
|
||
if (d >= 4) { | ||
__m128 mx = _mm_loadu_ps (x); x += 4; | ||
@@ -438,27 +438,27 @@ float fvec_L2sqr (const float * x, | ||
while (d >= 8) { | ||
__m256 mx = _mm256_loadu_ps (x); x += 8; | ||
__m256 my = _mm256_loadu_ps (y); y += 8; | ||
- const __m256 a_m_b1 = mx - my; | ||
- msum1 += a_m_b1 * a_m_b1; | ||
+ const __m256 a_m_b1 = _mm256_sub_ps(mx, my); | ||
+ msum1 = _mm256_add_ps(msum1, _mm256_mul_ps(a_m_b1, a_m_b1)); | ||
d -= 8; | ||
} | ||
|
||
__m128 msum2 = _mm256_extractf128_ps(msum1, 1); | ||
- msum2 += _mm256_extractf128_ps(msum1, 0); | ||
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0)); | ||
|
||
if (d >= 4) { | ||
__m128 mx = _mm_loadu_ps (x); x += 4; | ||
__m128 my = _mm_loadu_ps (y); y += 4; | ||
- const __m128 a_m_b1 = mx - my; | ||
- msum2 += a_m_b1 * a_m_b1; | ||
+ const __m128 a_m_b1 = _mm_sub_ps(mx, my); | ||
+ msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1)); | ||
d -= 4; | ||
} | ||
|
||
if (d > 0) { | ||
__m128 mx = masked_read (d, x); | ||
__m128 my = masked_read (d, y); | ||
- __m128 a_m_b1 = mx - my; | ||
- msum2 += a_m_b1 * a_m_b1; | ||
+ __m128 a_m_b1 = _mm_sub_ps(mx, my); | ||
+ msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1)); | ||
} | ||
|
||
msum2 = _mm_hadd_ps (msum2, msum2); | ||
@@ -474,28 +474,28 @@ float fvec_L1 (const float * x, const float * y, size_t d) | ||
while (d >= 8) { | ||
__m256 mx = _mm256_loadu_ps (x); x += 8; | ||
__m256 my = _mm256_loadu_ps (y); y += 8; | ||
- const __m256 a_m_b = mx - my; | ||
- msum1 += _mm256_and_ps(signmask, a_m_b); | ||
+ const __m256 a_m_b = _mm256_sub_ps(mx, my); | ||
+ msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b)); | ||
d -= 8; | ||
} | ||
|
||
__m128 msum2 = _mm256_extractf128_ps(msum1, 1); | ||
- msum2 += _mm256_extractf128_ps(msum1, 0); | ||
+ msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0)); | ||
__m128 signmask2 = __m128(_mm_set1_epi32 (0x7fffffffUL)); | ||
|
||
if (d >= 4) { | ||
__m128 mx = _mm_loadu_ps (x); x += 4; | ||
__m128 my = _mm_loadu_ps (y); y += 4; | ||
- const __m128 a_m_b = mx - my; | ||
- msum2 += _mm_and_ps(signmask2, a_m_b); | ||
+ const __m128 a_m_b = _mm_sub_ps(mx, my); | ||
+ msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b)); | ||
d -= 4; | ||
} | ||
|
||
if (d > 0) { | ||
__m128 mx = masked_read (d, x); | ||
__m128 my = masked_read (d, y); | ||
- __m128 a_m_b = mx - my; | ||
- msum2 += _mm_and_ps(signmask2, a_m_b); | ||
+ __m128 a_m_b = _mm_sub_ps(mx, my); | ||
+ msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b)); | ||
} | ||
|
||
msum2 = _mm_hadd_ps (msum2, msum2); | ||
@@ -511,7 +511,7 @@ float fvec_Linf (const float * x, const float * y, size_t d) | ||
while (d >= 8) { | ||
__m256 mx = _mm256_loadu_ps (x); x += 8; | ||
__m256 my = _mm256_loadu_ps (y); y += 8; | ||
- const __m256 a_m_b = mx - my; | ||
+ const __m256 a_m_b = _mm256_sub_ps(mx, my); | ||
msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b)); | ||
d -= 8; | ||
} | ||
@@ -531,7 +531,7 @@ float fvec_Linf (const float * x, const float * y, size_t d) | ||
if (d > 0) { | ||
__m128 mx = masked_read (d, x); | ||
__m128 my = masked_read (d, y); | ||
- __m128 a_m_b = mx - my; | ||
+ __m128 a_m_b = _mm_sub_ps(mx, my); | ||
msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b)); | ||
} | ||
|
||
@@ -562,8 +562,8 @@ float fvec_L2sqr (const float * x, | ||
while (d >= 4) { | ||
__m128 mx = _mm_loadu_ps (x); x += 4; | ||
__m128 my = _mm_loadu_ps (y); y += 4; | ||
- const __m128 a_m_b1 = mx - my; | ||
- msum1 += a_m_b1 * a_m_b1; | ||
+ const __m128 a_m_b1 = _mm_sub_ps(mx, my); | ||
+ msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1)); | ||
d -= 4; | ||
} | ||
|
||
@@ -571,8 +571,8 @@ float fvec_L2sqr (const float * x, | ||
// add the last 1, 2 or 3 values | ||
__m128 mx = masked_read (d, x); | ||
__m128 my = masked_read (d, y); | ||
- __m128 a_m_b1 = mx - my; | ||
- msum1 += a_m_b1 * a_m_b1; | ||
+ __m128 a_m_b1 = _mm_sub_ps(mx, my); | ||
+ msum1 = _mm_add_ps(msum1, _mm_mul_ps(a_m_b1, a_m_b1)); | ||
} | ||
|
||
msum1 = _mm_hadd_ps (msum1, msum1); | ||
-- | ||
2.29.2.windows.3 | ||
|