From 6460c25c9e021be80b3e099d5f7b32e9cc0f0665 Mon Sep 17 00:00:00 2001
From: iphydf <iphydf@users.noreply.github.com>
Date: Sat, 3 Feb 2024 14:38:47 +0000
Subject: [PATCH] refactor: Use `merge_sort` instead of `qsort` for sorting.

---
 .circleci/config.yml                    |   1 +
 CMakeLists.txt                          |   2 +
 other/analysis/gen-file.sh              |   4 +-
 other/docker/modules/check              |  15 +-
 other/docker/modules/modules.Dockerfile |   1 +
 toxcore/BUILD.bazel                     |  55 ++++++-
 toxcore/DHT.c                           | 162 +++++++++++++--------
 toxcore/Makefile.inc                    |   2 +
 toxcore/crypto_core_test.cc             |  29 ++++
 toxcore/group.c                         |  71 +++++++--
 toxcore/onion_announce.c                | 109 +++++++++-----
 toxcore/onion_client.c                  | 107 +++++++++-----
 toxcore/sort.c                          | 182 ++++++++++++++++++++++++
 toxcore/sort.h                          | 116 +++++++++++++++
 toxcore/sort_bench.cc                   | 140 ++++++++++++++++++
 toxcore/sort_test.cc                    |  79 ++++++++++
 toxcore/sort_test_util.cc               |  32 +++++
 toxcore/sort_test_util.hh               |  54 +++++++
 toxcore/util.c                          |   3 +-
 toxcore/util_test.cc                    |  33 +----
 20 files changed, 1024 insertions(+), 173 deletions(-)
 create mode 100644 toxcore/sort.c
 create mode 100644 toxcore/sort.h
 create mode 100644 toxcore/sort_bench.cc
 create mode 100644 toxcore/sort_test.cc
 create mode 100644 toxcore/sort_test_util.cc
 create mode 100644 toxcore/sort_test_util.hh

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3fae7ff0c6..8692cd0a1b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -68,6 +68,7 @@ jobs:
             clang
             cmake
             git
+            libbenchmark-dev
             libconfig-dev
             libgmock-dev
             libgtest-dev
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5cbbbdb0a..4d6cf22fa9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -323,6 +323,8 @@ set(toxcore_SOURCES
   toxcore/ping.h
   toxcore/shared_key_cache.c
   toxcore/shared_key_cache.h
+  toxcore/sort.c
+  toxcore/sort.h
   toxcore/state.c
   toxcore/state.h
   toxcore/TCP_client.c
diff --git a/other/analysis/gen-file.sh b/other/analysis/gen-file.sh
index 10edd0b531..6af978c887 100644
--- a/other/analysis/gen-file.sh
+++ b/other/analysis/gen-file.sh
@@ -15,7 +15,7 @@ CPPFLAGS+=("-Itoxav")
 CPPFLAGS+=("-Itoxencryptsave")
 CPPFLAGS+=("-Ithird_party/cmp")
 
-LDFLAGS=("-lopus" "-lsodium" "-lvpx" "-lpthread" "-lconfig" "-lgmock" "-lgtest")
+LDFLAGS=("-lopus" "-lsodium" "-lvpx" "-lpthread" "-lconfig" "-lgmock" "-lgtest" "-lbenchmark")
 LDFLAGS+=("-fuse-ld=gold")
 LDFLAGS+=("-Wl,--detect-odr-violations")
 LDFLAGS+=("-Wl,--warn-common")
@@ -27,7 +27,7 @@ put() {
   if [ "$SKIP_LINES" = "" ]; then
     echo "#line 1 \"$1\"" >>amalgamation.cc
   fi
-  cat "$1" >>amalgamation.cc
+  grep -v '^BENCHMARK_MAIN' "$1" >>amalgamation.cc
 }
 
 putmain() {
diff --git a/other/docker/modules/check b/other/docker/modules/check
index 0249efe486..e62be62c8a 100755
--- a/other/docker/modules/check
+++ b/other/docker/modules/check
@@ -50,6 +50,10 @@ module "//c-toxcore/third_party:cmp" {
 module "//c-toxcore/toxencryptsave:defines" {
   header "toxencryptsave/defines.h"
 }
+module "@benchmark" {
+  textual header "/usr/include/benchmark/benchmark.h"
+  use std
+}
 module "@com_google_googletest//:gtest" {
   textual header "/usr/include/gmock/gmock.h"
   textual header "/usr/include/gtest/gtest.h"
@@ -83,9 +87,9 @@ class Context:
         pass
 
     def bzl_exports_files(
-            self,
-            srcs: list[str],
-            visibility: Optional[list[str]] = None,
+        self,
+        srcs: list[str],
+        visibility: Optional[list[str]] = None,
     ) -> None:
         pass
 
@@ -110,7 +114,7 @@ class Context:
             hdrs,
         }
 
-    def bzl_cc_test(
+    def bzl_cc_binary(
             self,
             name: str,
             srcs: Iterable[str] = tuple(),
@@ -161,7 +165,8 @@ def main() -> None:
                     "load": ctx.bzl_load,
                     "exports_files": ctx.bzl_exports_files,
                     "cc_library": ctx.bzl_cc_library,
-                    "cc_test": ctx.bzl_cc_test,
+                    "cc_binary": ctx.bzl_cc_binary,
+                    "cc_test": ctx.bzl_cc_binary,
                     "cc_fuzz_test": ctx.bzl_cc_fuzz_test,
                     "select": ctx.bzl_select,
                     "glob": ctx.bzl_glob,
diff --git a/other/docker/modules/modules.Dockerfile b/other/docker/modules/modules.Dockerfile
index 5fb03a2504..ca4a7059c8 100644
--- a/other/docker/modules/modules.Dockerfile
+++ b/other/docker/modules/modules.Dockerfile
@@ -3,6 +3,7 @@ FROM alpine:3.19.0
 
 RUN ["apk", "add", "--no-cache", \
  "bash", \
+ "benchmark-dev", \
  "clang", \
  "gtest-dev", \
  "libconfig-dev", \
diff --git a/toxcore/BUILD.bazel b/toxcore/BUILD.bazel
index 99a350c20a..63cc08a138 100644
--- a/toxcore/BUILD.bazel
+++ b/toxcore/BUILD.bazel
@@ -1,4 +1,4 @@
-load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
 load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
 
 exports_files(
@@ -100,14 +100,58 @@ cc_test(
     size = "small",
     srcs = ["util_test.cc"],
     deps = [
-        ":crypto_core",
-        ":crypto_core_test_util",
         ":util",
         "@com_google_googletest//:gtest",
         "@com_google_googletest//:gtest_main",
     ],
 )
 
+cc_library(
+    name = "sort",
+    srcs = ["sort.c"],
+    hdrs = ["sort.h"],
+    deps = [
+        ":attributes",
+        ":ccompat",
+        ":util",
+    ],
+)
+
+cc_library(
+    name = "sort_test_util",
+    testonly = True,
+    srcs = ["sort_test_util.cc"],
+    hdrs = ["sort_test_util.hh"],
+    deps = [
+        ":sort",
+        ":util",
+    ],
+)
+
+cc_test(
+    name = "sort_test",
+    size = "small",
+    srcs = ["sort_test.cc"],
+    deps = [
+        ":sort",
+        ":sort_test_util",
+        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_binary(
+    name = "sort_bench",
+    testonly = True,
+    srcs = ["sort_bench.cc"],
+    deps = [
+        ":mem",
+        ":sort",
+        ":sort_test_util",
+        "@benchmark",
+    ],
+)
+
 cc_library(
     name = "logger",
     srcs = ["logger.c"],
@@ -439,6 +483,7 @@ cc_library(
         ":network",
         ":ping_array",
         ":shared_key_cache",
+        ":sort",
         ":state",
         ":util",
     ],
@@ -718,6 +763,7 @@ cc_library(
         ":network",
         ":onion",
         ":shared_key_cache",
+        ":sort",
         ":timed_auth",
         ":util",
     ],
@@ -821,6 +867,7 @@ cc_library(
         ":onion",
         ":onion_announce",
         ":ping_array",
+        ":sort",
         ":timed_auth",
         ":util",
     ],
@@ -986,9 +1033,11 @@ cc_library(
         ":crypto_core",
         ":friend_connection",
         ":logger",
+        ":mem",
         ":mono_time",
         ":net_crypto",
         ":network",
+        ":sort",
         ":state",
         ":util",
     ],
diff --git a/toxcore/DHT.c b/toxcore/DHT.c
index 0e8710b795..da4ab11f09 100644
--- a/toxcore/DHT.c
+++ b/toxcore/DHT.c
@@ -9,7 +9,6 @@
 #include "DHT.h"
 
 #include <assert.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include "LAN_discovery.h"
@@ -24,7 +23,9 @@
 #include "ping.h"
 #include "ping_array.h"
 #include "shared_key_cache.h"
+#include "sort.h"
 #include "state.h"
+#include "util.h"
 
 /** The timeout after which a node is discarded completely. */
 #define KILL_NODE_TIMEOUT (BAD_NODE_TIMEOUT + PING_INTERVAL)
@@ -755,49 +756,6 @@ int get_close_nodes(
                is_lan, want_announce);
 }
 
-typedef struct DHT_Cmp_Data {
-    uint64_t cur_time;
-    const uint8_t *base_public_key;
-    Client_data entry;
-} DHT_Cmp_Data;
-
-non_null()
-static int dht_cmp_entry(const void *a, const void *b)
-{
-    const DHT_Cmp_Data *cmp1 = (const DHT_Cmp_Data *)a;
-    const DHT_Cmp_Data *cmp2 = (const DHT_Cmp_Data *)b;
-    const Client_data entry1 = cmp1->entry;
-    const Client_data entry2 = cmp2->entry;
-    const uint8_t *cmp_public_key = cmp1->base_public_key;
-
-    const bool t1 = assoc_timeout(cmp1->cur_time, &entry1.assoc4) && assoc_timeout(cmp1->cur_time, &entry1.assoc6);
-    const bool t2 = assoc_timeout(cmp2->cur_time, &entry2.assoc4) && assoc_timeout(cmp2->cur_time, &entry2.assoc6);
-
-    if (t1 && t2) {
-        return 0;
-    }
-
-    if (t1) {
-        return -1;
-    }
-
-    if (t2) {
-        return 1;
-    }
-
-    const int closest = id_closest(cmp_public_key, entry1.public_key, entry2.public_key);
-
-    if (closest == 1) {
-        return 1;
-    }
-
-    if (closest == 2) {
-        return -1;
-    }
-
-    return 0;
-}
-
 #ifdef CHECK_ANNOUNCE_NODE
 non_null()
 static void set_announce_node_in_list(Client_data *list, uint32_t list_len, const uint8_t *public_key)
@@ -914,31 +872,117 @@ static bool store_node_ok(const Client_data *client, uint64_t cur_time, const ui
            || id_closest(comp_public_key, client->public_key, public_key) == 2;
 }
 
+typedef struct Client_data_Cmp {
+    const Memory *mem;
+    uint64_t cur_time;
+    const uint8_t *comp_public_key;
+} Client_data_Cmp;
+
 non_null()
-static void sort_client_list(const Memory *mem, Client_data *list, uint64_t cur_time, unsigned int length,
-                             const uint8_t *comp_public_key)
+static int client_data_cmp(const Client_data_Cmp *cmp, const Client_data *entry1, const Client_data *entry2)
 {
-    // Pass comp_public_key to qsort with each Client_data entry, so the
-    // comparison function can use it as the base of comparison.
-    DHT_Cmp_Data *cmp_list = (DHT_Cmp_Data *)mem_valloc(mem, length, sizeof(DHT_Cmp_Data));
+    const bool t1 = assoc_timeout(cmp->cur_time, &entry1->assoc4) && assoc_timeout(cmp->cur_time, &entry1->assoc6);
+    const bool t2 = assoc_timeout(cmp->cur_time, &entry2->assoc4) && assoc_timeout(cmp->cur_time, &entry2->assoc6);
 
-    if (cmp_list == nullptr) {
-        return;
+    if (t1 && t2) {
+        return 0;
     }
 
-    for (uint32_t i = 0; i < length; ++i) {
-        cmp_list[i].cur_time = cur_time;
-        cmp_list[i].base_public_key = comp_public_key;
-        cmp_list[i].entry = list[i];
+    if (t1) {
+        return -1;
     }
 
-    qsort(cmp_list, length, sizeof(DHT_Cmp_Data), dht_cmp_entry);
+    if (t2) {
+        return 1;
+    }
 
-    for (uint32_t i = 0; i < length; ++i) {
-        list[i] = cmp_list[i].entry;
+    const int closest = id_closest(cmp->comp_public_key, entry1->public_key, entry2->public_key);
+
+    if (closest == 1) {
+        return 1;
+    }
+
+    if (closest == 2) {
+        return -1;
+    }
+
+    return 0;
+}
+
+non_null()
+static bool client_data_less_handler(const void *object, const void *a, const void *b)
+{
+    const Client_data_Cmp *cmp = (const Client_data_Cmp *)object;
+    const Client_data *entry1 = (const Client_data *)a;
+    const Client_data *entry2 = (const Client_data *)b;
+
+    return client_data_cmp(cmp, entry1, entry2) < 0;
+}
+
+non_null()
+static const void *client_data_get_handler(const void *arr, uint32_t index)
+{
+    const Client_data *entries = (const Client_data *)arr;
+    return &entries[index];
+}
+
+non_null()
+static void client_data_set_handler(void *arr, uint32_t index, const void *val)
+{
+    Client_data *entries = (Client_data *)arr;
+    const Client_data *entry = (const Client_data *)val;
+    entries[index] = *entry;
+}
+
+non_null()
+static void *client_data_subarr_handler(void *arr, uint32_t index, uint32_t size)
+{
+    Client_data *entries = (Client_data *)arr;
+    return &entries[index];
+}
+
+non_null()
+static void *client_data_alloc_handler(const void *object, uint32_t size)
+{
+    const Client_data_Cmp *cmp = (const Client_data_Cmp *)object;
+    Client_data *tmp = (Client_data *)mem_valloc(cmp->mem, size, sizeof(Client_data));
+
+    if (tmp == nullptr) {
+        return nullptr;
     }
 
-    mem_delete(mem, cmp_list);
+    return tmp;
+}
+
+non_null()
+static void client_data_delete_handler(const void *object, void *arr, uint32_t size)
+{
+    const Client_data_Cmp *cmp = (const Client_data_Cmp *)object;
+    mem_delete(cmp->mem, arr);
+}
+
+static const Sort_Funcs client_data_cmp_funcs = {
+    client_data_less_handler,
+    client_data_get_handler,
+    client_data_set_handler,
+    client_data_subarr_handler,
+    client_data_alloc_handler,
+    client_data_delete_handler,
+};
+
+non_null()
+static void sort_client_list(const Memory *mem, Client_data *list, uint64_t cur_time, unsigned int length,
+                             const uint8_t *comp_public_key)
+{
+    // Pass comp_public_key to merge_sort with each Client_data entry, so the
+    // comparison function can use it as the base of comparison.
+    const Client_data_Cmp cmp = {
+        mem,
+        cur_time,
+        comp_public_key,
+    };
+
+    merge_sort(list, length, &cmp, &client_data_cmp_funcs);
 }
 
 non_null()
diff --git a/toxcore/Makefile.inc b/toxcore/Makefile.inc
index db3e193243..d4d64bcdb8 100644
--- a/toxcore/Makefile.inc
+++ b/toxcore/Makefile.inc
@@ -86,6 +86,8 @@ libtoxcore_la_SOURCES = ../third_party/cmp/cmp.c \
                         ../toxcore/ping.c \
                         ../toxcore/shared_key_cache.h \
                         ../toxcore/shared_key_cache.c \
+                        ../toxcore/sort.h \
+                        ../toxcore/sort.c \
                         ../toxcore/state.h \
                         ../toxcore/state.c \
                         ../toxcore/tox.h \
diff --git a/toxcore/crypto_core_test.cc b/toxcore/crypto_core_test.cc
index 6e67659607..6203afbce5 100644
--- a/toxcore/crypto_core_test.cc
+++ b/toxcore/crypto_core_test.cc
@@ -18,6 +18,35 @@ using SecretKey = std::array<uint8_t, CRYPTO_SECRET_KEY_SIZE>;
 using Signature = std::array<uint8_t, CRYPTO_SIGNATURE_SIZE>;
 using Nonce = std::array<uint8_t, CRYPTO_NONCE_SIZE>;
 
+TEST(PkEqual, TwoRandomIdsAreNotEqual)
+{
+    std::mt19937 rng;
+    std::uniform_int_distribution<unsigned short> dist{0, UINT8_MAX};
+
+    uint8_t pk1[CRYPTO_PUBLIC_KEY_SIZE];
+    uint8_t pk2[CRYPTO_PUBLIC_KEY_SIZE];
+
+    std::generate(std::begin(pk1), std::end(pk1), [&]() { return dist(rng); });
+    std::generate(std::begin(pk2), std::end(pk2), [&]() { return dist(rng); });
+
+    EXPECT_FALSE(pk_equal(pk1, pk2));
+}
+
+TEST(PkEqual, IdCopyMakesKeysEqual)
+{
+    std::mt19937 rng;
+    std::uniform_int_distribution<unsigned short> dist{0, UINT8_MAX};
+
+    uint8_t pk1[CRYPTO_PUBLIC_KEY_SIZE];
+    uint8_t pk2[CRYPTO_PUBLIC_KEY_SIZE] = {0};
+
+    std::generate(std::begin(pk1), std::end(pk1), [&]() { return dist(rng); });
+
+    pk_copy(pk2, pk1);
+
+    EXPECT_TRUE(pk_equal(pk1, pk2));
+}
+
 TEST(CryptoCore, EncryptLargeData)
 {
     Test_Memory mem;
diff --git a/toxcore/group.c b/toxcore/group.c
index 3d96b962de..9d2783dd4a 100644
--- a/toxcore/group.c
+++ b/toxcore/group.c
@@ -9,7 +9,7 @@
 #include "group.h"
 
 #include <assert.h>
-#include <stdlib.h>
+#include <stdlib.h>  // calloc, free
 #include <string.h>
 
 #include "DHT.h"
@@ -20,9 +20,11 @@
 #include "friend_connection.h"
 #include "group_common.h"
 #include "logger.h"
+#include "mem.h"
 #include "mono_time.h"
 #include "net_crypto.h"
 #include "network.h"
+#include "sort.h"
 #include "state.h"
 #include "util.h"
 
@@ -957,24 +959,75 @@ static bool delpeer(Group_Chats *g_c, uint32_t groupnumber, int peer_index, void
 
 /** Order peers with friends first and with more recently active earlier */
 non_null()
-static int cmp_frozen(const void *a, const void *b)
+static bool group_peer_less_handler(const void *object, const void *a, const void *b)
 {
     const Group_Peer *pa = (const Group_Peer *)a;
     const Group_Peer *pb = (const Group_Peer *)b;
 
-    if (pa->is_friend ^ pb->is_friend) {
-        return pa->is_friend ? -1 : 1;
+    if (((pa->is_friend ? 1 : 0) ^ (pb->is_friend ? 1 : 0)) != 0) {
+        return pa->is_friend;
     }
 
-    return cmp_uint(pb->last_active, pa->last_active);
+    return cmp_uint(pb->last_active, pa->last_active) < 0;
 }
 
+non_null()
+static const void *group_peer_get_handler(const void *arr, uint32_t index)
+{
+    const Group_Peer *entries = (const Group_Peer *)arr;
+    return &entries[index];
+}
+
+non_null()
+static void group_peer_set_handler(void *arr, uint32_t index, const void *val)
+{
+    Group_Peer *entries = (Group_Peer *)arr;
+    const Group_Peer *entry = (const Group_Peer *)val;
+    entries[index] = *entry;
+}
+
+non_null()
+static void *group_peer_subarr_handler(void *arr, uint32_t index, uint32_t size)
+{
+    Group_Peer *entries = (Group_Peer *)arr;
+    return &entries[index];
+}
+
+non_null()
+static void *group_peer_alloc_handler(const void *object, uint32_t size)
+{
+    const Memory *mem = (const Memory *)object;
+    Group_Peer *tmp = (Group_Peer *)mem_valloc(mem, size, sizeof(Group_Peer));
+
+    if (tmp == nullptr) {
+        return nullptr;
+    }
+
+    return tmp;
+}
+
+non_null()
+static void group_peer_delete_handler(const void *object, void *arr, uint32_t size)
+{
+    const Memory *mem = (const Memory *)object;
+    mem_delete(mem, arr);
+}
+
+static const Sort_Funcs group_peer_cmp_funcs = {
+    group_peer_less_handler,
+    group_peer_get_handler,
+    group_peer_set_handler,
+    group_peer_subarr_handler,
+    group_peer_alloc_handler,
+    group_peer_delete_handler,
+};
+
 /** @brief Delete frozen peers as necessary to ensure at most `g->maxfrozen` remain.
  *
  * @retval true if any frozen peers are removed.
  */
 non_null()
-static bool delete_old_frozen(Group_c *g)
+static bool delete_old_frozen(Group_c *g, const Memory *mem)
 {
     if (g->numfrozen <= g->maxfrozen) {
         return false;
@@ -987,7 +1040,7 @@ static bool delete_old_frozen(Group_c *g)
         return true;
     }
 
-    qsort(g->frozen, g->numfrozen, sizeof(Group_Peer), cmp_frozen);
+    merge_sort(g->frozen, g->numfrozen, mem, &group_peer_cmp_funcs);
 
     Group_Peer *temp = (Group_Peer *)realloc(g->frozen, g->maxfrozen * sizeof(Group_Peer));
 
@@ -1032,7 +1085,7 @@ static bool freeze_peer(Group_Chats *g_c, uint32_t groupnumber, int peer_index,
 
     ++g->numfrozen;
 
-    delete_old_frozen(g);
+    delete_old_frozen(g, g_c->m->mem);
 
     return true;
 }
@@ -1519,7 +1572,7 @@ int group_set_max_frozen(const Group_Chats *g_c, uint32_t groupnumber, uint32_t
     }
 
     g->maxfrozen = maxfrozen;
-    delete_old_frozen(g);
+    delete_old_frozen(g, g_c->m->mem);
     return 0;
 }
 
diff --git a/toxcore/onion_announce.c b/toxcore/onion_announce.c
index d1585f9f41..c7a6b38ad8 100644
--- a/toxcore/onion_announce.c
+++ b/toxcore/onion_announce.c
@@ -9,7 +9,6 @@
 #include "onion_announce.h"
 
 #include <assert.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include "DHT.h"
@@ -23,6 +22,7 @@
 #include "network.h"
 #include "onion.h"
 #include "shared_key_cache.h"
+#include "sort.h"
 #include "timed_auth.h"
 
 #define PING_ID_TIMEOUT ONION_ANNOUNCE_TIMEOUT
@@ -281,23 +281,17 @@ static int in_entries(const Onion_Announce *onion_a, const uint8_t *public_key)
     return -1;
 }
 
-typedef struct Cmp_Data {
+typedef struct Onion_Announce_Entry_Cmp {
+    const Memory *mem;
     const Mono_Time *mono_time;
-    const uint8_t *base_public_key;
-    Onion_Announce_Entry entry;
-} Cmp_Data;
+    const uint8_t *comp_public_key;
+} Onion_Announce_Entry_Cmp;
 
 non_null()
-static int cmp_entry(const void *a, const void *b)
+static int onion_announce_entry_cmp(const Onion_Announce_Entry_Cmp *cmp, const Onion_Announce_Entry *entry1, const Onion_Announce_Entry *entry2)
 {
-    const Cmp_Data *cmp1 = (const Cmp_Data *)a;
-    const Cmp_Data *cmp2 = (const Cmp_Data *)b;
-    const Onion_Announce_Entry entry1 = cmp1->entry;
-    const Onion_Announce_Entry entry2 = cmp2->entry;
-    const uint8_t *cmp_public_key = cmp1->base_public_key;
-
-    const bool t1 = mono_time_is_timeout(cmp1->mono_time, entry1.announce_time, ONION_ANNOUNCE_TIMEOUT);
-    const bool t2 = mono_time_is_timeout(cmp1->mono_time, entry2.announce_time, ONION_ANNOUNCE_TIMEOUT);
+    const bool t1 = mono_time_is_timeout(cmp->mono_time, entry1->announce_time, ONION_ANNOUNCE_TIMEOUT);
+    const bool t2 = mono_time_is_timeout(cmp->mono_time, entry2->announce_time, ONION_ANNOUNCE_TIMEOUT);
 
     if (t1 && t2) {
         return 0;
@@ -311,7 +305,7 @@ static int cmp_entry(const void *a, const void *b)
         return 1;
     }
 
-    const int closest = id_closest(cmp_public_key, entry1.public_key, entry2.public_key);
+    const int closest = id_closest(cmp->comp_public_key, entry1->public_key, entry2->public_key);
 
     if (closest == 1) {
         return 1;
@@ -325,31 +319,80 @@ static int cmp_entry(const void *a, const void *b)
 }
 
 non_null()
-static void sort_onion_announce_list(const Memory *mem, const Mono_Time *mono_time,
-                                     Onion_Announce_Entry *list, unsigned int length,
-                                     const uint8_t *comp_public_key)
+static bool onion_announce_entry_less_handler(const void *object, const void *a, const void *b)
 {
-    // Pass comp_public_key to qsort with each Client_data entry, so the
-    // comparison function can use it as the base of comparison.
-    Cmp_Data *cmp_list = (Cmp_Data *)mem_valloc(mem, length, sizeof(Cmp_Data));
+    const Onion_Announce_Entry_Cmp *cmp = (const Onion_Announce_Entry_Cmp *)object;
+    const Onion_Announce_Entry *entry1 = (const Onion_Announce_Entry *)a;
+    const Onion_Announce_Entry *entry2 = (const Onion_Announce_Entry *)b;
 
-    if (cmp_list == nullptr) {
-        return;
-    }
+    return onion_announce_entry_cmp(cmp, entry1, entry2) < 0;
+}
 
-    for (uint32_t i = 0; i < length; ++i) {
-        cmp_list[i].mono_time = mono_time;
-        cmp_list[i].base_public_key = comp_public_key;
-        cmp_list[i].entry = list[i];
-    }
+non_null()
+static const void *onion_announce_entry_get_handler(const void *arr, uint32_t index)
+{
+    const Onion_Announce_Entry *entries = (const Onion_Announce_Entry *)arr;
+    return &entries[index];
+}
+
+non_null()
+static void onion_announce_entry_set_handler(void *arr, uint32_t index, const void *val)
+{
+    Onion_Announce_Entry *entries = (Onion_Announce_Entry *)arr;
+    const Onion_Announce_Entry *entry = (const Onion_Announce_Entry *)val;
+    entries[index] = *entry;
+}
 
-    qsort(cmp_list, length, sizeof(Cmp_Data), cmp_entry);
+non_null()
+static void *onion_announce_entry_subarr_handler(void *arr, uint32_t index, uint32_t size)
+{
+    Onion_Announce_Entry *entries = (Onion_Announce_Entry *)arr;
+    return &entries[index];
+}
 
-    for (uint32_t i = 0; i < length; ++i) {
-        list[i] = cmp_list[i].entry;
+non_null()
+static void *onion_announce_entry_alloc_handler(const void *object, uint32_t size)
+{
+    const Onion_Announce_Entry_Cmp *cmp = (const Onion_Announce_Entry_Cmp *)object;
+    Onion_Announce_Entry *tmp = (Onion_Announce_Entry *)mem_valloc(cmp->mem, size, sizeof(Onion_Announce_Entry));
+
+    if (tmp == nullptr) {
+        return nullptr;
     }
 
-    mem_delete(mem, cmp_list);
+    return tmp;
+}
+
+non_null()
+static void onion_announce_entry_delete_handler(const void *object, void *arr, uint32_t size)
+{
+    const Onion_Announce_Entry_Cmp *cmp = (const Onion_Announce_Entry_Cmp *)object;
+    mem_delete(cmp->mem, arr);
+}
+
+static const Sort_Funcs onion_announce_entry_cmp_funcs = {
+    onion_announce_entry_less_handler,
+    onion_announce_entry_get_handler,
+    onion_announce_entry_set_handler,
+    onion_announce_entry_subarr_handler,
+    onion_announce_entry_alloc_handler,
+    onion_announce_entry_delete_handler,
+};
+
+non_null()
+static void sort_onion_announce_list(const Memory *mem, const Mono_Time *mono_time,
+                                     Onion_Announce_Entry *list, unsigned int length,
+                                     const uint8_t *comp_public_key)
+{
+    // Pass comp_public_key to sort with each Onion_Announce_Entry entry, so the
+    // comparison function can use it as the base of comparison.
+    const Onion_Announce_Entry_Cmp cmp = {
+        mem,
+        mono_time,
+        comp_public_key,
+    };
+
+    merge_sort(list, length, &cmp, &onion_announce_entry_cmp_funcs);
 }
 
 /** @brief add entry to entries list
diff --git a/toxcore/onion_client.c b/toxcore/onion_client.c
index 4eb494a620..45546cbf0b 100644
--- a/toxcore/onion_client.c
+++ b/toxcore/onion_client.c
@@ -10,7 +10,6 @@
 #include "onion_client.h"
 
 #include <assert.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include "DHT.h"
@@ -29,6 +28,7 @@
 #include "onion.h"
 #include "onion_announce.h"
 #include "ping_array.h"
+#include "sort.h"
 #include "timed_auth.h"
 #include "util.h"
 
@@ -694,23 +694,17 @@ static int client_send_announce_request(Onion_Client *onion_c, uint32_t num, con
     return send_onion_packet_tcp_udp(onion_c, &path, dest, request, len);
 }
 
-typedef struct Onion_Client_Cmp_Data {
+typedef struct Onion_Node_Cmp {
+    const Memory *mem;
     const Mono_Time *mono_time;
-    const uint8_t *base_public_key;
-    Onion_Node entry;
-} Onion_Client_Cmp_Data;
+    const uint8_t *comp_public_key;
+} Onion_Node_Cmp;
 
 non_null()
-static int onion_client_cmp_entry(const void *a, const void *b)
+static int onion_node_cmp(const Onion_Node_Cmp *cmp, const Onion_Node *entry1, const Onion_Node *entry2)
 {
-    const Onion_Client_Cmp_Data *cmp1 = (const Onion_Client_Cmp_Data *)a;
-    const Onion_Client_Cmp_Data *cmp2 = (const Onion_Client_Cmp_Data *)b;
-    const Onion_Node entry1 = cmp1->entry;
-    const Onion_Node entry2 = cmp2->entry;
-    const uint8_t *cmp_public_key = cmp1->base_public_key;
-
-    const bool t1 = onion_node_timed_out(&entry1, cmp1->mono_time);
-    const bool t2 = onion_node_timed_out(&entry2, cmp2->mono_time);
+    const bool t1 = onion_node_timed_out(entry1, cmp->mono_time);
+    const bool t2 = onion_node_timed_out(entry2, cmp->mono_time);
 
     if (t1 && t2) {
         return 0;
@@ -724,7 +718,7 @@ static int onion_client_cmp_entry(const void *a, const void *b)
         return 1;
     }
 
-    const int closest = id_closest(cmp_public_key, entry1.public_key, entry2.public_key);
+    const int closest = id_closest(cmp->comp_public_key, entry1->public_key, entry2->public_key);
 
     if (closest == 1) {
         return 1;
@@ -738,30 +732,79 @@ static int onion_client_cmp_entry(const void *a, const void *b)
 }
 
 non_null()
-static void sort_onion_node_list(const Memory *mem, const Mono_Time *mono_time,
-                                 Onion_Node *list, unsigned int length, const uint8_t *comp_public_key)
+static bool onion_node_less_handler(const void *object, const void *a, const void *b)
 {
-    // Pass comp_public_key to qsort with each Client_data entry, so the
-    // comparison function can use it as the base of comparison.
-    Onion_Client_Cmp_Data *cmp_list = (Onion_Client_Cmp_Data *)mem_valloc(mem, length, sizeof(Onion_Client_Cmp_Data));
+    const Onion_Node_Cmp *cmp = (const Onion_Node_Cmp *)object;
+    const Onion_Node *entry1 = (const Onion_Node *)a;
+    const Onion_Node *entry2 = (const Onion_Node *)b;
 
-    if (cmp_list == nullptr) {
-        return;
-    }
+    return onion_node_cmp(cmp, entry1, entry2) < 0;
+}
 
-    for (uint32_t i = 0; i < length; ++i) {
-        cmp_list[i].mono_time = mono_time;
-        cmp_list[i].base_public_key = comp_public_key;
-        cmp_list[i].entry = list[i];
-    }
+non_null()
+static const void *onion_node_get_handler(const void *arr, uint32_t index)
+{
+    const Onion_Node *entries = (const Onion_Node *)arr;
+    return &entries[index];
+}
 
-    qsort(cmp_list, length, sizeof(Onion_Client_Cmp_Data), onion_client_cmp_entry);
+non_null()
+static void onion_node_set_handler(void *arr, uint32_t index, const void *val)
+{
+    Onion_Node *entries = (Onion_Node *)arr;
+    const Onion_Node *entry = (const Onion_Node *)val;
+    entries[index] = *entry;
+}
+
+non_null()
+static void *onion_node_subarr_handler(void *arr, uint32_t index, uint32_t size)
+{
+    Onion_Node *entries = (Onion_Node *)arr;
+    return &entries[index];
+}
 
-    for (uint32_t i = 0; i < length; ++i) {
-        list[i] = cmp_list[i].entry;
+non_null()
+static void *onion_node_alloc_handler(const void *object, uint32_t size)
+{
+    const Onion_Node_Cmp *cmp = (const Onion_Node_Cmp *)object;
+    Onion_Node *tmp = (Onion_Node *)mem_valloc(cmp->mem, size, sizeof(Onion_Node));
+
+    if (tmp == nullptr) {
+        return nullptr;
     }
 
-    mem_delete(mem, cmp_list);
+    return tmp;
+}
+
+non_null()
+static void onion_node_delete_handler(const void *object, void *arr, uint32_t size)
+{
+    const Onion_Node_Cmp *cmp = (const Onion_Node_Cmp *)object;
+    mem_delete(cmp->mem, arr);
+}
+
+static const Sort_Funcs onion_node_cmp_funcs = {
+    onion_node_less_handler,
+    onion_node_get_handler,
+    onion_node_set_handler,
+    onion_node_subarr_handler,
+    onion_node_alloc_handler,
+    onion_node_delete_handler,
+};
+
+non_null()
+static void sort_onion_node_list(const Memory *mem, const Mono_Time *mono_time,
+                                 Onion_Node *list, unsigned int length, const uint8_t *comp_public_key)
+{
+    // Pass comp_public_key to sort with each Onion_Node entry, so the
+    // comparison function can use it as the base of comparison.
+    const Onion_Node_Cmp cmp = {
+        mem,
+        mono_time,
+        comp_public_key,
+    };
+
+    merge_sort(list, length, &cmp, &onion_node_cmp_funcs);
 }
 
 non_null()
diff --git a/toxcore/sort.c b/toxcore/sort.c
new file mode 100644
index 0000000000..45b2ffa3ab
--- /dev/null
+++ b/toxcore/sort.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2023-2024 The TokTok team.
+ */
+
+#include "sort.h"
+
+#include <assert.h>
+
+#include "attributes.h"
+#include "ccompat.h"
+#include "util.h"
+
+/**
+ * @brief Threshold for when to switch to insertion sort.
+ *
+ * This is a trade-off between the complexity of insertion sort and the
+ * overhead of merge sort. The threshold is chosen to be the smallest value
+ * that gives a measurable speedup for insertion sort over merge sort. This is
+ * based on measurements done in sort_bench.cc. Starting from 32 elements,
+ * merge sort is faster than insertion sort in all our tests (both unsorted
+ * and mostly-sorted).
+ *
+ * Toxcore has a lot of small arrays it wants to sort, so this optimisation
+ * makes sense.
+ */
+#define SMALL_ARRAY_THRESHOLD 16
+
+non_null()
+static void merge_sort_merge_back(
+    void *arr,
+    const void *l_arr, uint32_t l_arr_size,
+    const void *r_arr, uint32_t r_arr_size,
+    uint32_t left_start,
+    const void *object, const Sort_Funcs *funcs)
+{
+    uint32_t li = 0;
+    uint32_t ri = 0;
+    uint32_t k = left_start;
+
+    while (li < l_arr_size && ri < r_arr_size) {
+        const void *l = funcs->get_callback(l_arr, li);
+        const void *r = funcs->get_callback(r_arr, ri);
+        // !(r < l) <=> (r >= l) <=> (l <= r)
+        if (!funcs->less_callback(object, r, l)) {
+            funcs->set_callback(arr, k, l);
+            ++li;
+        } else {
+            funcs->set_callback(arr, k, r);
+            ++ri;
+        }
+        ++k;
+    }
+
+    /* Copy the remaining elements of `l_arr[]`, if there are any. */
+    while (li < l_arr_size) {
+        funcs->set_callback(arr, k, funcs->get_callback(l_arr, li));
+        ++li;
+        ++k;
+    }
+
+    /* Copy the remaining elements of `r_arr[]`, if there are any. */
+    while (ri < r_arr_size) {
+        funcs->set_callback(arr, k, funcs->get_callback(r_arr, ri));
+        ++ri;
+        ++k;
+    }
+}
+
+/** Function to merge the two haves `arr[left_start..mid]` and `arr[mid+1..right_end]` of array `arr[]`. */
+non_null()
+static void merge_sort_merge(
+    void *arr, uint32_t left_start, uint32_t mid, uint32_t right_end, void *tmp,
+    const void *object, const Sort_Funcs *funcs)
+{
+    const uint32_t l_arr_size = mid - left_start + 1;
+    const uint32_t r_arr_size = right_end - mid;
+
+    /* Temporary arrays, using the tmp buffer created in `merge_sort` below. */
+    void *l_arr = funcs->subarr_callback(tmp, 0, l_arr_size);
+    void *r_arr = funcs->subarr_callback(tmp, l_arr_size, r_arr_size);
+
+    /* Copy data to temp arrays `l_arr[]` and `r_arr[]`.
+     *
+     * This is iterating and repeatedly calling `get` and `set`, which sounds
+     * slow, but is only marginally slower than having a `copy` callback. With
+     * a `copy` callback, we'd save 3-4% in time.
+     */
+    for (uint32_t i = 0; i < l_arr_size; ++i) {
+        funcs->set_callback(l_arr, i, funcs->get_callback(arr, left_start + i));
+    }
+    for (uint32_t i = 0; i < r_arr_size; ++i) {
+        funcs->set_callback(r_arr, i, funcs->get_callback(arr, mid + 1 + i));
+    }
+
+    /* Merge the temp arrays back into `arr[left_start..right_end]`. */
+    merge_sort_merge_back(arr, l_arr, l_arr_size, r_arr, r_arr_size, left_start, object, funcs);
+}
+
+non_null()
+static void insertion_sort_step(void *arr, void *tmp, uint32_t i, const void *object, const Sort_Funcs *funcs)
+{
+    funcs->set_callback(tmp, 0, funcs->get_callback(arr, i));
+    uint32_t j = i;
+
+    while (j > 0) {
+        if (!funcs->less_callback(object, tmp, funcs->get_callback(arr, j - 1))) {
+            break;
+        }
+        funcs->set_callback(arr, j, funcs->get_callback(arr, j - 1));
+        --j;
+    }
+
+    funcs->set_callback(arr, j, tmp);
+}
+
+non_null()
+static void insertion_sort_with_buf(void *arr, uint32_t arr_size, void *tmp, uint32_t tmp_size, const void *object, const Sort_Funcs *funcs)
+{
+    for (uint32_t i = 1; i < arr_size; ++i) {
+        insertion_sort_step(arr, tmp, i, object, funcs);
+    }
+}
+
+non_null()
+static bool insertion_sort(void *arr, uint32_t arr_size, const void *object, const Sort_Funcs *funcs)
+{
+    void *tmp = funcs->alloc_callback(object, 1);
+
+    if (tmp == nullptr) {
+        return false;
+    }
+
+    insertion_sort_with_buf(arr, arr_size, tmp, 1, object, funcs);
+
+    funcs->delete_callback(object, tmp, 1);
+    return true;
+}
+
+void merge_sort_with_buf(void *arr, uint32_t arr_size, void *tmp, uint32_t tmp_size, const void *object, const Sort_Funcs *funcs)
+{
+    assert(tmp_size >= arr_size);
+
+    if (arr_size <= SMALL_ARRAY_THRESHOLD) {
+        assert(tmp_size >= 1);
+        insertion_sort_with_buf(arr, arr_size, tmp, tmp_size, object, funcs);
+        return;
+    }
+
+    // Merge subarrays in bottom up manner.  First merge subarrays of
+    // size 1 to create sorted subarrays of size 2, then merge subarrays
+    // of size 2 to create sorted subarrays of size 4, and so on.
+    for (uint32_t curr_size = 1; curr_size <= arr_size - 1; curr_size = 2 * curr_size) {
+        // Pick starting point of different subarrays of current size
+        for (uint32_t left_start = 0; left_start < arr_size - 1; left_start += 2 * curr_size) {
+            // Find ending point of left subarray. mid+1 is starting
+            // point of right
+            const uint32_t mid = min_u32(left_start + curr_size - 1, arr_size - 1);
+            const uint32_t right_end = min_u32(left_start + 2 * curr_size - 1, arr_size - 1);
+
+            // Merge Subarrays arr[left_start...mid] & arr[mid+1...right_end]
+            merge_sort_merge(arr, left_start, mid, right_end, tmp, object, funcs);
+        }
+    }
+}
+
+bool merge_sort(void *arr, uint32_t arr_size, const void *object, const Sort_Funcs *funcs)
+{
+    if (arr_size <= SMALL_ARRAY_THRESHOLD) {
+        return insertion_sort(arr, arr_size, object, funcs);
+    }
+
+    void *tmp = funcs->alloc_callback(object, arr_size);
+
+    if (tmp == nullptr) {
+        return false;
+    }
+
+    merge_sort_with_buf(arr, arr_size, tmp, arr_size, object, funcs);
+
+    funcs->delete_callback(object, tmp, arr_size);
+    return true;
+}
diff --git a/toxcore/sort.h b/toxcore/sort.h
new file mode 100644
index 0000000000..c84d77694c
--- /dev/null
+++ b/toxcore/sort.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2023-2024 The TokTok team.
+ */
+
+#ifndef C_TOXCORE_TOXCORE_SORT_H
+#define C_TOXCORE_TOXCORE_SORT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "attributes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @brief Compare elements with a less-than ordering: `a < b`. */
+typedef bool sort_less_cb(const void *object, const void *a, const void *b);
+/** @brief Get element from array at index. */
+typedef const void *sort_get_cb(const void *arr, uint32_t index);
+/** @brief Set element in array at index to new value (perform copy). */
+typedef void sort_set_cb(void *arr, uint32_t index, const void *val);
+/** @brief Get a sub-array at an index of a given size (mutable pointer).
+ *
+ * Used to index in the temporary array allocated by `sort_alloc_cb` and get
+ * a sub-array for working memory.
+ */
+typedef void *sort_subarr_cb(void *arr, uint32_t index, uint32_t size);
+/** @brief Allocate a new array of the element type.
+ *
+ * @param size The array size in elements of type T (not byte size). This value
+ *   is always exactly the input array size as passed to `merge_sort`.
+ */
+typedef void *sort_alloc_cb(const void *object, uint32_t size);
+/** @brief Free the element type array. */
+typedef void sort_delete_cb(const void *object, void *arr, uint32_t size);
+
+/** @brief Virtual function table for getting/setting elements in an array and
+ * comparing them.
+ *
+ * Only the `less`, `alloc`, and `delete` functions get a `this`-pointer. We
+ * assume that indexing in an array doesn't need any other information than the
+ * array itself.
+ *
+ * For now, the `this`-pointer is const, because we assume sorting doesn't need
+ * to mutate any state, but if necessary that can be changed in the future.
+ */
+typedef struct Sort_Funcs {
+    sort_less_cb *less_callback;
+    sort_get_cb *get_callback;
+    sort_set_cb *set_callback;
+    sort_subarr_cb *subarr_callback;
+    sort_alloc_cb *alloc_callback;
+    sort_delete_cb *delete_callback;
+} Sort_Funcs;
+
+/** @brief Non-recursive merge sort function to sort `arr[0...arr_size-1]`.
+ *
+ * Avoids `memcpy` and avoids treating elements as byte arrays. Instead, uses
+ * callbacks to index in arrays and copy elements. This makes it quite a bit
+ * slower than `qsort`, but works with elements that require special care when
+ * being copied (e.g. if they are part of a graph or other data structure that
+ * with pointers or other invariants).
+ *
+ * This function actually uses insertion sort for small arrays (up to 16
+ * elements), which is faster than merge sort for small arrays, especially
+ * when mostly sorted (a common use case in toxcore).
+ *
+ * Allocates a single temporary array with the provided alloc callback, and
+ * frees it at the end. This is significantly faster than an in-place
+ * implementation.
+ *
+ * Complexity:
+ * - Space: `O(n) where n = array_size`.
+ * - Time: `O(n * log n) where n = array_size`.
+ *
+ * Compared to `qsort`, this is about 60-70% slower for large arrays. For small
+ * arrays (up to 16 elements), it's about 50% faster than `qsort`.
+ *
+ * @param[in,out] arr An array of type T.
+ * @param arr_size Number of elements in @p arr (count, not byte size).
+ * @param[in] object Comparator object.
+ * @param[in] funcs Callback struct for elements of type T.
+ */
+non_null()
+bool merge_sort(void *arr, uint32_t arr_size, const void *object, const Sort_Funcs *funcs);
+
+/**
+ * @brief Merge sort like above but with a pre-allocated buffer.
+ *
+ * This function is the same as `merge_sort` but uses a pre-allocated buffer
+ * for temporary storage. This can be useful if the caller wants to avoid
+ * dynamic memory allocation.
+ *
+ * This function is 1-2% faster than `merge_sort` for small arrays up to 1000
+ * elements, and about 5-10% faster for large arrays (2000+ elements).
+ *
+ * The main upside is that `alloc` and `delete` callbacks don't need to be
+ * implemented, and the caller can use a stack-allocated buffer.
+ *
+ * @param[in,out] arr An array of type T.
+ * @param arr_size Number of elements in @p arr (count, not byte size).
+ * @param[in,out] tmp A buffer of size `tmp_size` for temporary storage.
+ * @param tmp_size Number of elements in @p tmp (count, not byte size). Must be
+ *   at least as large as `arr_size`.
+ * @param[in] object Comparator object.
+ * @param[in] funcs Callback struct for elements of type T.
+ */
+non_null()
+void merge_sort_with_buf(void *arr, uint32_t arr_size, void *tmp, uint32_t tmp_size, const void *object, const Sort_Funcs *funcs);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* C_TOXCORE_TOXCORE_SORT_H */
diff --git a/toxcore/sort_bench.cc b/toxcore/sort_bench.cc
new file mode 100644
index 0000000000..0e75bc9559
--- /dev/null
+++ b/toxcore/sort_bench.cc
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2023-2024 The TokTok team.
+ */
+
+#include <benchmark/benchmark.h>
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <random>
+
+#include "mem.h"
+#include "sort.h"
+#include "sort_test_util.hh"
+
+namespace {
+
+std::pair<std::vector<Some_Type>, std::mt19937> random_vec(benchmark::State &state)
+{
+    std::mt19937 rng;
+    // INT_MAX-1 so later we have room to add 1 larger element if needed.
+    std::uniform_int_distribution<uint32_t> dist{
+        std::numeric_limits<uint32_t>::min(), std::numeric_limits<uint32_t>::max() - 1};
+
+    std::vector<Some_Type> vec(state.range(0));
+    std::generate(std::begin(vec), std::end(vec), [&]() {
+        std::array<uint32_t, 8> compare_value;
+        std::generate(
+            std::begin(compare_value), std::end(compare_value), [&]() { return dist(rng); });
+        return Some_Type{nullptr, compare_value, "hello there"};
+    });
+
+    return {vec, rng};
+}
+
+std::vector<Some_Type> mostly_sorted_vec(benchmark::State &state)
+{
+    auto [vec, rng] = random_vec(state);
+    std::sort(vec.begin(), vec.end());
+
+    // Randomly swap 5% of the vector.
+    std::uniform_int_distribution<std::size_t> dist{0, vec.size() - 1};
+    for (std::size_t i = 0; i < vec.size() / 20; ++i) {
+        const auto a = dist(rng);
+        const auto b = dist(rng);
+        std::swap(vec[a], vec[b]);
+    }
+
+    return vec;
+}
+
+void BM_merge_sort(benchmark::State &state)
+{
+    const auto vec = random_vec(state).first;
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        merge_sort(unsorted.data(), unsorted.size(), &state, &Some_Type::funcs);
+    }
+}
+
+BENCHMARK(BM_merge_sort)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_merge_sort_with_buf(benchmark::State &state)
+{
+    const auto vec = random_vec(state).first;
+    std::vector<Some_Type> buf(vec.size());
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        merge_sort_with_buf(
+            unsorted.data(), unsorted.size(), buf.data(), buf.size(), &state, &Some_Type::funcs);
+    }
+}
+
+BENCHMARK(BM_merge_sort_with_buf)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_merge_sort_mostly_sorted(benchmark::State &state)
+{
+    auto vec = mostly_sorted_vec(state);
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        merge_sort(unsorted.data(), unsorted.size(), &state, &Some_Type::funcs);
+    }
+}
+
+BENCHMARK(BM_merge_sort_mostly_sorted)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_qsort(benchmark::State &state)
+{
+    const auto vec = random_vec(state).first;
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        qsort(unsorted.data(), unsorted.size(), sizeof(unsorted[0]), my_type_cmp);
+    }
+}
+
+BENCHMARK(BM_qsort)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_qsort_mostly_sorted(benchmark::State &state)
+{
+    auto vec = mostly_sorted_vec(state);
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        qsort(unsorted.data(), unsorted.size(), sizeof(unsorted[0]), my_type_cmp);
+    }
+}
+
+BENCHMARK(BM_qsort_mostly_sorted)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_std_sort(benchmark::State &state)
+{
+    const auto vec = random_vec(state).first;
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        std::sort(unsorted.begin(), unsorted.end());
+    }
+}
+
+BENCHMARK(BM_std_sort)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+void BM_std_sort_mostly_sorted(benchmark::State &state)
+{
+    auto vec = mostly_sorted_vec(state);
+
+    for (auto _ : state) {
+        auto unsorted = vec;
+        std::sort(unsorted.begin(), unsorted.end());
+    }
+}
+
+BENCHMARK(BM_std_sort_mostly_sorted)->RangeMultiplier(2)->Range(8, 8 << 8);
+
+}
+
+BENCHMARK_MAIN();
diff --git a/toxcore/sort_test.cc b/toxcore/sort_test.cc
new file mode 100644
index 0000000000..c8af3346ab
--- /dev/null
+++ b/toxcore/sort_test.cc
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2023-2024 The TokTok team.
+ */
+
+#include "sort.h"
+
+#include <gtest/gtest.h>
+
+#include <limits>
+#include <random>
+
+#include "sort_test_util.hh"
+
+namespace {
+
+TEST(MergeSort, BehavesLikeStdSort)
+{
+    std::mt19937 rng;
+    // INT_MAX-1 so later we have room to add 1 larger element if needed.
+    std::uniform_int_distribution<int> dist{
+        std::numeric_limits<int>::min(), std::numeric_limits<int>::max() - 1};
+
+    constexpr auto int_funcs = sort_funcs<int>();
+
+    // Test with int arrays.
+    for (uint32_t i = 1; i < 500; ++i) {
+        std::vector<int> vec(i);
+        std::generate(std::begin(vec), std::end(vec), [&]() { return dist(rng); });
+
+        auto sorted = vec;
+        std::sort(sorted.begin(), sorted.end(), std::less<int>());
+
+        // If vec was accidentally sorted, add another larger element that almost definitely makes
+        // it not sorted.
+        if (vec == sorted) {
+            int const largest = *std::prev(sorted.end()) + 1;
+            sorted.push_back(largest);
+            vec.insert(vec.begin(), largest);
+        }
+        ASSERT_NE(vec, sorted);
+
+        // Just pass some arbitrary "self" to make sure the callbacks pass it through.
+        ASSERT_TRUE(merge_sort(vec.data(), vec.size(), &i, &int_funcs));
+        ASSERT_EQ(vec, sorted);
+    }
+}
+
+TEST(MergeSort, WorksWithNonTrivialTypes)
+{
+    std::mt19937 rng;
+    std::uniform_int_distribution<int> dist{
+        std::numeric_limits<int>::min(), std::numeric_limits<int>::max()};
+
+    constexpr auto string_funcs = sort_funcs<std::string>();
+
+    // Test with std::string arrays.
+    for (uint32_t i = 1; i < 500; ++i) {
+        std::vector<std::string> vec(i);
+        std::generate(std::begin(vec), std::end(vec), [&]() { return std::to_string(dist(rng)); });
+
+        auto sorted = vec;
+        std::sort(sorted.begin(), sorted.end(), std::less<std::string>());
+
+        // If vec was accidentally sorted, add another larger element that almost definitely makes
+        // it not sorted.
+        if (vec == sorted) {
+            std::string const largest = "larger than largest int";
+            sorted.push_back(largest);
+            vec.insert(vec.begin(), largest);
+        }
+        ASSERT_NE(vec, sorted);
+
+        // Just pass some arbitrary "self" to make sure the callbacks pass it through.
+        ASSERT_TRUE(merge_sort(vec.data(), vec.size(), &i, &string_funcs));
+        ASSERT_EQ(vec, sorted);
+    }
+}
+
+}  // namespace
diff --git a/toxcore/sort_test_util.cc b/toxcore/sort_test_util.cc
new file mode 100644
index 0000000000..ed85127754
--- /dev/null
+++ b/toxcore/sort_test_util.cc
@@ -0,0 +1,32 @@
+#include "sort_test_util.hh"
+
+#include <array>
+#include <cstddef>
+
+#include "sort.h"
+#include "util.h"
+
+namespace {
+template <typename T, std::size_t N>
+int cmp_uint_array(const std::array<T, N> &a, const std::array<T, N> &b)
+{
+    for (std::size_t i = 0; i < a.size(); ++i) {
+        const int cmp = cmp_uint(a[i], b[i]);
+        if (cmp != 0) {
+            return cmp;
+        }
+    }
+    return 0;
+}
+}
+
+const Sort_Funcs Some_Type::funcs = sort_funcs<Some_Type>();
+
+int my_type_cmp(const void *va, const void *vb)
+{
+    const auto *a = static_cast<const Some_Type *>(va);
+    const auto *b = static_cast<const Some_Type *>(vb);
+    return cmp_uint_array(a->compare_value, b->compare_value);
+}
+
+bool operator<(const Some_Type &a, const Some_Type &b) { return a.compare_value < b.compare_value; }
diff --git a/toxcore/sort_test_util.hh b/toxcore/sort_test_util.hh
new file mode 100644
index 0000000000..8013290963
--- /dev/null
+++ b/toxcore/sort_test_util.hh
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2023-2024 The TokTok team.
+ */
+
+#ifndef C_TOXCORE_TOXCORE_SORT_TEST_UTIL_H
+#define C_TOXCORE_TOXCORE_SORT_TEST_UTIL_H
+
+#include <array>
+
+#include "sort.h"
+
+struct Memory;
+
+template <typename T>
+constexpr Sort_Funcs sort_funcs()
+{
+    return {
+        [](const void *object, const void *va, const void *vb) {
+            const T *a = static_cast<const T *>(va);
+            const T *b = static_cast<const T *>(vb);
+
+            return *a < *b;
+        },
+        [](const void *arr, uint32_t index) -> const void * {
+            const T *vec = static_cast<const T *>(arr);
+            return &vec[index];
+        },
+        [](void *arr, uint32_t index, const void *val) {
+            T *vec = static_cast<T *>(arr);
+            const T *value = static_cast<const T *>(val);
+            vec[index] = *value;
+        },
+        [](void *arr, uint32_t index, uint32_t size) -> void * {
+            T *vec = static_cast<T *>(arr);
+            return &vec[index];
+        },
+        [](const void *object, uint32_t size) -> void * { return new T[size]; },
+        [](const void *object, void *arr, uint32_t size) { delete[] static_cast<T *>(arr); },
+    };
+}
+
+// A realistic test case where we have a struct with some stuff and an expensive value we compare.
+struct Some_Type {
+    const Memory *mem;
+    std::array<uint32_t, 8> compare_value;
+    const char *name;
+
+    static const Sort_Funcs funcs;
+};
+
+int my_type_cmp(const void *va, const void *vb);
+bool operator<(const Some_Type &a, const Some_Type &b);
+
+#endif  // C_TOXCORE_TOXCORE_SORT_TEST_UTIL_H
diff --git a/toxcore/util.c b/toxcore/util.c
index 1851e58a08..85d29a44cd 100644
--- a/toxcore/util.c
+++ b/toxcore/util.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-3.0-or-later
- * Copyright © 2016-2018 The TokTok team.
+ * Copyright © 2016-2024 The TokTok team.
  * Copyright © 2013 Tox project.
  * Copyright © 2013 plutooo
  */
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "attributes.h"
 #include "ccompat.h"
 #include "mem.h"
 
diff --git a/toxcore/util_test.cc b/toxcore/util_test.cc
index 94e653f218..ccdd9795da 100644
--- a/toxcore/util_test.cc
+++ b/toxcore/util_test.cc
@@ -1,39 +1,14 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later
+ * Copyright © 2016-2024 The TokTok team.
+ */
 #include "util.h"
 
 #include <gtest/gtest.h>
 
-#include "crypto_core.h"
-#include "crypto_core_test_util.hh"
+#include <climits>
 
 namespace {
 
-TEST(Util, TwoRandomIdsAreNotEqual)
-{
-    Test_Random rng;
-    uint8_t pk1[CRYPTO_PUBLIC_KEY_SIZE];
-    uint8_t sk1[CRYPTO_SECRET_KEY_SIZE];
-    uint8_t pk2[CRYPTO_PUBLIC_KEY_SIZE];
-    uint8_t sk2[CRYPTO_SECRET_KEY_SIZE];
-
-    crypto_new_keypair(rng, pk1, sk1);
-    crypto_new_keypair(rng, pk2, sk2);
-
-    EXPECT_FALSE(pk_equal(pk1, pk2));
-}
-
-TEST(Util, IdCopyMakesKeysEqual)
-{
-    Test_Random rng;
-    uint8_t pk1[CRYPTO_PUBLIC_KEY_SIZE];
-    uint8_t sk1[CRYPTO_SECRET_KEY_SIZE];
-    uint8_t pk2[CRYPTO_PUBLIC_KEY_SIZE] = {0};
-
-    crypto_new_keypair(rng, pk1, sk1);
-    pk_copy(pk2, pk1);
-
-    EXPECT_TRUE(pk_equal(pk1, pk2));
-}
-
 TEST(Cmp, OrdersNumbersCorrectly)
 {
     EXPECT_EQ(cmp_uint(1, 2), -1);