Delete some obsolete cruft

The SSE/AVX code was written in 2012 and mostly hasn't been touched since then. In the time since then, runtime checking for SSE4.2 support has become pointless (CPUs supporting it were first released in 2008, so it was somewhat recent in 2012 and now is not at all recent), and we never actually wrote anything which requires AVX. There was also some SSE code which has been disabled since 2012. set_string_compare_method() and everyhing related to it has never actually been used by any SDK, and is not really the correct solution to the problem anyway. The encryption code used `ssize_t` in a few places because it was originally written to use `pread` and friends, but the functions it now calls use `size_t` and `ssize_t` doesn't exist on Windows. The use of ssize_t in the Windows networking code was simply incorrect; the winsock functions return `int`.
realm · Mar 15, 2023 · 5f2dda1 · 5f2dda1
1 parent 134c9c4
commit 5f2dda1
Show file tree

Hide file tree

Showing 15 changed files with 346 additions and 1,504 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,12 @@
 ### Compatibility
 * Fileformat: Generates files with format v24. Reads and automatically upgrade from fileformat v10. If you want to upgrade from an earlier file format version you will have to use RealmCore v13.x.y or earlier.
 
+-----------
+
+### Internals
+* Remove `set_string_compare_method()` and everything related to it, which has never actually been used and was a bad solution to the problem it tried to solve.
+* Remove runtime CPUID checking as the most recent CPU features we rely on are now 15 years old.
+
 ----------------------------------------------
 # 13.4.2 Release notes
 

diff --git a/src/realm/array.cpp b/src/realm/array.cpp
@@ -686,23 +686,6 @@ int64_t Array::sum(size_t start, size_t end) const
 
         for (size_t t = 0; t < chunks; t++) {
             if (w == 1) {
-
-#if 0
-#if defined(USE_SSE42) && defined(_MSC_VER) && defined(REALM_PTR_64)
-                s += __popcnt64(data[t]);
-#elif !defined(_MSC_VER) && defined(USE_SSE42) && defined(REALM_PTR_64)
-                s += __builtin_popcountll(data[t]);
-#else
-                uint64_t a = data[t];
-                const uint64_t m1  = 0x5555555555555555ULL;
-                a -= (a >> 1) & m1;
-                a = (a & m2) + ((a >> 2) & m2);
-                a = (a + (a >> 4)) & m4;
-                a = (a * h01) >> 56;
-                s += a;
-#endif
-#endif
-
                 s += fast_popcount64(data[t]);
             }
             else if (w == 2) {
@@ -724,91 +707,89 @@ int64_t Array::sum(size_t start, size_t end) const
     }
 
 #ifdef REALM_COMPILER_SSE
-    if (sseavx<42>()) {
-
-        // 2000 items summed 500000 times, 8/16/32 bits, miliseconds:
-        // Naive, templated get<>: 391 371 374
-        // SSE:                     97 148 282
-
-        if ((w == 8 || w == 16 || w == 32) && end - start > sizeof(__m128i) * 8 / no0(w)) {
-            __m128i* data = reinterpret_cast<__m128i*>(m_data + start * w / 8);
-            __m128i sum_result = {0};
-            __m128i sum2;
-
-            size_t chunks = (end - start) * w / 8 / sizeof(__m128i);
-
-            for (size_t t = 0; t < chunks; t++) {
-                if (w == 8) {
-                    /*
-                    // 469 ms AND disadvantage of handling max 64k elements before overflow
-                    __m128i vl = _mm_cvtepi8_epi16(data[t]);
-                    __m128i vh = data[t];
-                    vh.m128i_i64[0] = vh.m128i_i64[1];
-                    vh = _mm_cvtepi8_epi16(vh);
-                    sum_result = _mm_add_epi16(sum_result, vl);
-                    sum_result = _mm_add_epi16(sum_result, vh);
-                    */
-
-                    /*
-                    // 424 ms
-                    __m128i vl = _mm_unpacklo_epi8(data[t], _mm_set1_epi8(0));
-                    __m128i vh = _mm_unpackhi_epi8(data[t], _mm_set1_epi8(0));
-                    sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vl, _mm_set1_epi16(1)));
-                    sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vh, _mm_set1_epi16(1)));
-                    */
-
-                    __m128i vl = _mm_cvtepi8_epi16(data[t]); // sign extend lower words 8->16
-                    __m128i vh = data[t];
-                    vh = _mm_srli_si128(vh, 8); // v >>= 64
-                    vh = _mm_cvtepi8_epi16(vh); // sign extend lower words 8->16
-                    __m128i sum1 = _mm_add_epi16(vl, vh);
-                    __m128i sumH = _mm_cvtepi16_epi32(sum1);
-                    __m128i sumL = _mm_srli_si128(sum1, 8); // v >>= 64
-                    sumL = _mm_cvtepi16_epi32(sumL);
-                    sum_result = _mm_add_epi32(sum_result, sumL);
-                    sum_result = _mm_add_epi32(sum_result, sumH);
-                }
-                else if (w == 16) {
-                    // todo, can overflow for array size > 2^32
-                    __m128i vl = _mm_cvtepi16_epi32(data[t]); // sign extend lower words 16->32
-                    __m128i vh = data[t];
-                    vh = _mm_srli_si128(vh, 8);  // v >>= 64
-                    vh = _mm_cvtepi16_epi32(vh); // sign extend lower words 16->32
-                    sum_result = _mm_add_epi32(sum_result, vl);
-                    sum_result = _mm_add_epi32(sum_result, vh);
-                }
-                else if (w == 32) {
-                    __m128i v = data[t];
-                    __m128i v0 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64
-                    v = _mm_srli_si128(v, 8);           // v >>= 64
-                    __m128i v1 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64
-                    sum_result = _mm_add_epi64(sum_result, v0);
-                    sum_result = _mm_add_epi64(sum_result, v1);
-
-                    /*
-                    __m128i m = _mm_set1_epi32(0xc000);             // test if overflow could happen (still need
-                    underflow test).
-                    __m128i mm = _mm_and_si128(data[t], m);
-                    zz = _mm_or_si128(mm, zz);
-                    sum_result = _mm_add_epi32(sum_result, data[t]);
-                    */
-                }
-            }
-            start += sizeof(__m128i) * 8 / no0(w) * chunks;
 
-            // prevent taking address of 'state' to make the compiler keep it in SSE register in above loop
-            // (vc2010/gcc4.6)
-            sum2 = sum_result;
+    // 2000 items summed 500000 times, 8/16/32 bits, miliseconds:
+    // Naive, templated get<>: 391 371 374
+    // SSE:                     97 148 282
+
+    if ((w == 8 || w == 16 || w == 32) && end - start > sizeof(__m128i) * 8 / no0(w)) {
+        __m128i* data = reinterpret_cast<__m128i*>(m_data + start * w / 8);
+        __m128i sum_result = {0};
+        __m128i sum2;
 
-            // Avoid aliasing bug where sum2 might not yet be initialized when accessed by get_universal
-            char sum3[sizeof sum2];
-            memcpy(&sum3, &sum2, sizeof sum2);
+        size_t chunks = (end - start) * w / 8 / sizeof(__m128i);
 
-            // Sum elements of sum
-            for (size_t t = 0; t < sizeof(__m128i) * 8 / ((w == 8 || w == 16) ? 32 : 64); ++t) {
-                int64_t v = get_universal < (w == 8 || w == 16) ? 32 : 64 > (reinterpret_cast<char*>(&sum3), t);
-                s += v;
+        for (size_t t = 0; t < chunks; t++) {
+            if (w == 8) {
+                /*
+                 // 469 ms AND disadvantage of handling max 64k elements before overflow
+                 __m128i vl = _mm_cvtepi8_epi16(data[t]);
+                 __m128i vh = data[t];
+                 vh.m128i_i64[0] = vh.m128i_i64[1];
+                 vh = _mm_cvtepi8_epi16(vh);
+                 sum_result = _mm_add_epi16(sum_result, vl);
+                 sum_result = _mm_add_epi16(sum_result, vh);
+                 */
+
+                /*
+                 // 424 ms
+                 __m128i vl = _mm_unpacklo_epi8(data[t], _mm_set1_epi8(0));
+                 __m128i vh = _mm_unpackhi_epi8(data[t], _mm_set1_epi8(0));
+                 sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vl, _mm_set1_epi16(1)));
+                 sum_result = _mm_add_epi32(sum_result, _mm_madd_epi16(vh, _mm_set1_epi16(1)));
+                 */
+
+                __m128i vl = _mm_cvtepi8_epi16(data[t]); // sign extend lower words 8->16
+                __m128i vh = data[t];
+                vh = _mm_srli_si128(vh, 8); // v >>= 64
+                vh = _mm_cvtepi8_epi16(vh); // sign extend lower words 8->16
+                __m128i sum1 = _mm_add_epi16(vl, vh);
+                __m128i sumH = _mm_cvtepi16_epi32(sum1);
+                __m128i sumL = _mm_srli_si128(sum1, 8); // v >>= 64
+                sumL = _mm_cvtepi16_epi32(sumL);
+                sum_result = _mm_add_epi32(sum_result, sumL);
+                sum_result = _mm_add_epi32(sum_result, sumH);
+            }
+            else if (w == 16) {
+                // todo, can overflow for array size > 2^32
+                __m128i vl = _mm_cvtepi16_epi32(data[t]); // sign extend lower words 16->32
+                __m128i vh = data[t];
+                vh = _mm_srli_si128(vh, 8);  // v >>= 64
+                vh = _mm_cvtepi16_epi32(vh); // sign extend lower words 16->32
+                sum_result = _mm_add_epi32(sum_result, vl);
+                sum_result = _mm_add_epi32(sum_result, vh);
             }
+            else if (w == 32) {
+                __m128i v = data[t];
+                __m128i v0 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64
+                v = _mm_srli_si128(v, 8);           // v >>= 64
+                __m128i v1 = _mm_cvtepi32_epi64(v); // sign extend lower dwords 32->64
+                sum_result = _mm_add_epi64(sum_result, v0);
+                sum_result = _mm_add_epi64(sum_result, v1);
+
+                /*
+                 __m128i m = _mm_set1_epi32(0xc000);             // test if overflow could happen (still need
+                 underflow test).
+                 __m128i mm = _mm_and_si128(data[t], m);
+                 zz = _mm_or_si128(mm, zz);
+                 sum_result = _mm_add_epi32(sum_result, data[t]);
+                 */
+            }
+        }
+        start += sizeof(__m128i) * 8 / no0(w) * chunks;
+
+        // prevent taking address of 'state' to make the compiler keep it in SSE register in above loop
+        // (vc2010/gcc4.6)
+        sum2 = sum_result;
+
+        // Avoid aliasing bug where sum2 might not yet be initialized when accessed by get_universal
+        char sum3[sizeof sum2];
+        memcpy(&sum3, &sum2, sizeof sum2);
+
+        // Sum elements of sum
+        for (size_t t = 0; t < sizeof(__m128i) * 8 / ((w == 8 || w == 16) ? 32 : 64); ++t) {
+            int64_t v = get_universal < (w == 8 || w == 16) ? 32 : 64 > (reinterpret_cast<char*>(&sum3), t);
+            s += v;
         }
     }
 #endif

diff --git a/src/realm/array_with_find.cpp b/src/realm/array_with_find.cpp
@@ -79,5 +79,4 @@ size_t ArrayWithFind::first_set_bit64(int64_t v) const
     return first_set_bit(v1) + 32;
 }
 
-
 } // namespace realm
diff --git a/src/realm/array_with_find.hpp b/src/realm/array_with_find.hpp
@@ -369,11 +369,10 @@ bool ArrayWithFind::find_optimized(int64_t value, size_t start, size_t end, size
     REALM_ASSERT_3(m_array.m_width, !=, 0);
 
 #if defined(REALM_COMPILER_SSE)
-    // Only use SSE if payload is at least one SSE chunk (128 bits) in size. Also note taht SSE doesn't support
+    // Only use SSE if payload is at least one SSE chunk (128 bits) in size. Also note that SSE doesn't support
     // Less-than comparison for 64-bit values.
-    if ((!(std::is_same<cond, Less>::value && m_array.m_width == 64)) && end - start2 >= sizeof(__m128i) &&
-        m_array.m_width >= 8 &&
-        (sseavx<42>() || (sseavx<30>() && std::is_same<cond, Equal>::value && m_array.m_width < 64))) {
+    if ((!(std::is_same_v<cond, Less> && m_array.m_width == 64)) && end - start2 >= sizeof(__m128i) &&
+        m_array.m_width >= 8) {
 
         // find_sse() must start2 at 16-byte boundary, so search area before that using compare_equality()
         __m128i* const a =
@@ -388,19 +387,10 @@ bool ArrayWithFind::find_optimized(int64_t value, size_t start, size_t end, size
 
         // Search aligned area with SSE
         if (b > a) {
-            if (sseavx<42>()) {
-                if (!find_sse<cond, bitwidth, Callback>(
-                        value, a, b - a, state,
-                        baseindex + ((reinterpret_cast<char*>(a) - m_array.m_data) * 8 / no0(bitwidth)), callback))
-                    return false;
-            }
-            else if (sseavx<30>()) {
-
-                if (!find_sse<Equal, bitwidth, Callback>(
-                        value, a, b - a, state,
-                        baseindex + ((reinterpret_cast<char*>(a) - m_array.m_data) * 8 / no0(bitwidth)), callback))
-                    return false;
-            }
+            if (!find_sse<cond, bitwidth, Callback>(
+                    value, a, b - a, state,
+                    baseindex + ((reinterpret_cast<char*>(a) - m_array.m_data) * 8 / no0(bitwidth)), callback))
+                return false;
         }
 
         // Search remainder with compare_equality()
@@ -909,7 +899,7 @@ bool ArrayWithFind::compare_leafs_4(const Array* foreign, size_t start, size_t e
 
 
 #if defined(REALM_COMPILER_SSE)
-    if (sseavx<42>() && width == foreign_width && (width == 8 || width == 16 || width == 32)) {
+    if (width == foreign_width && (width == 8 || width == 16 || width == 32)) {
         // We can only use SSE if both bitwidths are equal and above 8 bits and all values are signed
         // and the two arrays are aligned the same way
         if ((reinterpret_cast<size_t>(m_array.m_data) & 0xf) == (reinterpret_cast<size_t>(foreign_m_data) & 0xf)) {

diff --git a/src/realm/group.cpp b/src/realm/group.cpp
@@ -39,20 +39,6 @@
 using namespace realm;
 using namespace realm::util;
 
-namespace {
-
-class Initialization {
-public:
-    Initialization()
-    {
-        realm::cpuid_init();
-    }
-};
-
-Initialization initialization;
-
-} // anonymous namespace
-
 Group::Group()
     : m_local_alloc(new SlabAlloc)
     , m_alloc(*m_local_alloc) // Throws

diff --git a/src/realm/realm_nmmintrin.h b/src/realm/realm_nmmintrin.h
@@ -36,87 +36,6 @@
 
 namespace realm {
 
-#if 0
-#ifdef REALM_COMPILER_AVX
-typedef float __m256 __attribute__((__vector_size__(32), __may_alias__));
-typedef double __m256d __attribute__((__vector_size__(32), __may_alias__));
-
-const int _CMP_EQ_OQ = 0x00; // Equal (ordered, non-signaling)
-const int _CMP_NEQ_OQ = 0x0c; // Not-equal (ordered, non-signaling)
-const int _CMP_LT_OQ = 0x11; // Less-than (ordered, non-signaling)
-const int _CMP_LE_OQ = 0x12; // Less-than-or-equal (ordered, non-signaling)
-const int _CMP_GE_OQ = 0x1d; // Greater-than-or-equal (ordered, non-signaling)
-const int _CMP_GT_OQ = 0x1e; // Greater-than (ordered, non-signaling)
-
-
-template<int op>
-static int movemask_cmp_ps(__m256* y1, __m256* y2)
-{
-    int ret;
-    __asm__("vmovaps %0, %%ymm0"                    :                   : "m"(*y1)                      : "%xmm0"   );
-    __asm__("vmovaps %0, %%ymm1"                    :                   : "m"(*y2)                      : "%xmm1"   );
-    __asm__("vcmpps %0, %%ymm0, %%ymm1, %%ymm0"     :                   : "I"(op)                       : "%xmm0"   );
-    __asm__("vmovmskps %%ymm0, %0"                  : "=r"(ret)         :                               :           );
-    return ret;
-}
-
-template<int op>
-static inline int movemask_cmp_pd(__m256d* y1, __m256d* y2)
-{
-    int ret;
-    __asm__("vmovapd %0, %%ymm0"                    :                   : "m"(*y1)                      : "%xmm0"   );
-    __asm__("vmovapd %0, %%ymm1"                    :                   : "m"(*y2)                      : "%xmm1"   );
-    __asm__("vcmppd %0, %%ymm0, %%ymm1, %%ymm0"     :                   : "I"(op)                       : "%xmm0"   );
-    __asm__("vmovmskpd %%ymm0, %0"                  : "=r"(ret)         :                               :           );
-    return ret;
-}
-
-
-
-static inline int movemask_cmp_ps(__m256* y1, __m256* y2, int op)
-{
-    // todo, use constexpr;
-    if (op == _CMP_EQ_OQ)
-        return movemask_cmp_ps<_CMP_NEQ_OQ>(y1, y2);
-    else if (op == _CMP_NEQ_OQ)
-        return movemask_cmp_ps<_CMP_NEQ_OQ>(y1, y2);
-    else if (op == _CMP_LT_OQ)
-        return movemask_cmp_ps<_CMP_LT_OQ>(y1, y2);
-    else if (op == _CMP_LE_OQ)
-        return movemask_cmp_ps<_CMP_LE_OQ>(y1, y2);
-    else if (op == _CMP_GE_OQ)
-        return movemask_cmp_ps<_CMP_GE_OQ>(y1, y2);
-    else if (op == _CMP_GT_OQ)
-        return movemask_cmp_ps<_CMP_GT_OQ>(y1, y2);
-
-    REALM_ASSERT(false);
-    return 0;
-}
-
-static inline int movemask_cmp_pd(__m256d* y1, __m256d* y2, int op)
-{
-    // todo, use constexpr;
-    if (op == _CMP_EQ_OQ)
-        return movemask_cmp_pd<_CMP_NEQ_OQ>(y1, y2);
-    else if (op == _CMP_NEQ_OQ)
-        return movemask_cmp_pd<_CMP_NEQ_OQ>(y1, y2);
-    else if (op == _CMP_LT_OQ)
-        return movemask_cmp_pd<_CMP_LT_OQ>(y1, y2);
-    else if (op == _CMP_LE_OQ)
-        return movemask_cmp_pd<_CMP_LE_OQ>(y1, y2);
-    else if (op == _CMP_GE_OQ)
-        return movemask_cmp_pd<_CMP_GE_OQ>(y1, y2);
-    else if (op == _CMP_GT_OQ)
-        return movemask_cmp_pd<_CMP_GT_OQ>(y1, y2);
-
-    REALM_ASSERT(false);
-    return 0;
-}
-
-
-#endif
-#endif
-
 // Instructions introduced by SSE 3 and 4.2
 static inline __m128i _mm_cmpgt_epi64(__m128i xmm1, __m128i xmm2)
 {

diff --git a/src/realm/sync/network/network.cpp b/src/realm/sync/network/network.cpp
@@ -1943,7 +1943,7 @@ std::size_t Service::Descriptor::read_some(char* buffer, std::size_t size, std::
     for (;;) {
         int flags = 0;
 #ifdef _WIN32
-        ssize_t ret = ::recv(m_fd, buffer, int(size), flags);
+        int ret = ::recv(m_fd, buffer, int(size), flags);
         if (ret == SOCKET_ERROR) {
             int err = WSAGetLastError();
             // Retry on interruption by system signal
@@ -2028,7 +2028,7 @@ std::size_t Service::Descriptor::write_some(const char* data, std::size_t size,
         flags |= MSG_NOSIGNAL;
 #endif
 #ifdef _WIN32
-        ssize_t ret = ::send(m_fd, data, int(size), flags);
+        int ret = ::send(m_fd, data, int(size), flags);
         if (ret == SOCKET_ERROR) {
             int err = WSAGetLastError();
             // Retry on interruption by system signal
Original file line number	Diff line number	Diff line change
Expand Up		@@ -79,5 +79,4 @@ size_t ArrayWithFind::first_set_bit64(int64_t v) const
		return first_set_bit(v1) + 32;
		}


		} // namespace realm