From f3ad0b25b8d3d2e3ca27b5ca1af34dafd4e1ace9 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 13:52:32 -0700
Subject: [PATCH 01/44] lib/vibrio: adding a hack that sends some network
 packets

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 lib/vibrio/src/rumprt/crt/mod.rs | 79 ++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/lib/vibrio/src/rumprt/crt/mod.rs b/lib/vibrio/src/rumprt/crt/mod.rs
index bb004d9c1..ff7e46a2c 100644
--- a/lib/vibrio/src/rumprt/crt/mod.rs
+++ b/lib/vibrio/src/rumprt/crt/mod.rs
@@ -238,6 +238,21 @@ pub extern "C" fn main() {
         ta_root_mode: u32, // mode_t		ta_root_mode;
     }
 
+    #[repr(C)]
+    struct sockaddr_in {
+        sin_len: u8,
+        sin_family: u8, //typedef __uint8_t       __sa_family_t;
+        sin_port: u16,  // typedef __uint16_t      __in_port_t;    /* "Internet" port number */
+        sin_addr: u32,  // typedef __uint32_t      __in_addr_t;    /* IP(v4) address */
+        zero: [u8; 8],
+    }
+
+    #[repr(C)]
+    struct timespec_t {
+        tv_sec: i64,  // time_t
+        tv_nsec: u64, // long
+    }
+
     extern "C" {
         static __init_array_start: extern "C" fn();
         static __init_array_end: extern "C" fn();
@@ -248,6 +263,18 @@ pub extern "C" fn main() {
         fn rump_pub_netconfig_dhcp_ipv4_oneshot(iface: *const i8) -> i64;
         fn _libc_init();
         fn mount(typ: *const i8, path: *const i8, n: u64, args: *const tmpfs_args, argsize: usize);
+
+        fn socket(domain: i64, typ: i64, protocol: i64) -> i64;
+        fn sendto(
+            fd: i64,
+            buf: *const i8,
+            len: usize,
+            flags: i64,
+            addr: *const sockaddr_in,
+            len: usize,
+        ) -> i64;
+        fn close(sock: i64) -> i64;
+        fn nanosleep(rqtp: *const timespec_t, rmtp: *mut timespec_t) -> i64;
     }
 
     unsafe {
@@ -371,6 +398,58 @@ pub extern "C" fn main() {
                     "rump_pub_netconfig_dhcp_ipv4_oneshot done in {:?}",
                     start.elapsed()
                 );
+
+                // HACK: send a message so things get initialized, otherwise we don't have
+                // connectivity.
+
+                const AF_INET: i64 = 2;
+                const SOCK_DGRAM: i64 = 2;
+                const IPPROTO_UDP: i64 = 17;
+                const MSG_DONTWAIT: i64 = 0x0080;
+                let sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+                assert!(sockfd > 0);
+                info!("socket done in {:?}", start.elapsed());
+
+                for i in 0..10 {
+                    info!("sending packet {} of 10 ({:?})", i, start.elapsed());
+                    let addr = sockaddr_in {
+                        sin_len: core::mem::size_of::<sockaddr_in>() as u8,
+                        sin_family: AF_INET as u8,
+                        sin_port: (8889 as u16).to_be(),
+                        sin_addr: (2887712788 as u32).to_be(), // 172.31.0.20
+                        zero: [0; 8],
+                    };
+
+                    // not sure what this one does here
+                    let _r = lineup::tls2::Environment::thread().relinquish();
+
+                    use alloc::string::String;
+                    let buf = String::from("package content\n\0");
+                    let cstr = CStr::from_bytes_with_nul(buf.as_str().as_bytes()).unwrap();
+
+                    let r = sendto(
+                        sockfd,
+                        cstr.as_ptr() as *const i8,
+                        buf.len(),
+                        MSG_DONTWAIT,
+                        &addr as *const sockaddr_in,
+                        core::mem::size_of::<sockaddr_in>(),
+                    );
+                    assert_eq!(r, buf.len() as i64);
+                    core::mem::forget(cstr);
+
+                    // Add some sleep time here, as otherwise
+                    // we send the packet too fast and nothing appears on the other side
+                    // it seems after 6s (pkt 6) things start working.
+                    // I suspect it's due to some ARP resolution issue, but unclear.
+                    let sleep_dur = timespec_t {
+                        tv_sec: 1,
+                        tv_nsec: 0,
+                    };
+                    nanosleep(&sleep_dur as *const timespec_t, ptr::null_mut());
+                }
+                // keep the socket open here...
+                // close(sockfd);
             }
 
             // Set up a garbage environment

From 26a05259c68441ce43ec6ba5fc00b1de978af82c Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 13:53:51 -0700
Subject: [PATCH 02/44] tests/dhcp: statically configure all dhcp entries for
 the VMs

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/dhcpd.conf | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/tests/dhcpd.conf b/kernel/tests/dhcpd.conf
index 26e798afc..4e46f513c 100644
--- a/kernel/tests/dhcpd.conf
+++ b/kernel/tests/dhcpd.conf
@@ -5,11 +5,11 @@ option domain-name-servers ns1.example.org, ns2.example.org;
 ddns-update-style none;
 
 subnet 172.31.0.0 netmask 255.255.255.0 {
-  range 172.31.0.12 172.31.0.16;
+  range 172.31.0.118 172.31.0.118;
   option routers 172.31.0.20;
   option subnet-mask 255.255.255.0;
-  default-lease-time 1;
-  max-lease-time 1;
+  default-lease-time 1000;
+  max-lease-time 1000;
 }
 
 host nrk1 {
@@ -20,4 +20,34 @@ host nrk1 {
 host nrk2 {
   hardware ethernet 56:b4:44:e9:62:d1;
   fixed-address 172.31.0.11;
+}
+
+host nrk3 {
+  hardware ethernet 56:b4:44:e9:62:d2;
+  fixed-address 172.31.0.12;
+}
+
+host nrk4 {
+  hardware ethernet 56:b4:44:e9:62:d3;
+  fixed-address 172.31.0.13;
+}
+
+host nrk5 {
+  hardware ethernet 56:b4:44:e9:62:d4;
+  fixed-address 172.31.0.14;
+}
+
+host nrk6 {
+  hardware ethernet 56:b4:44:e9:62:d5;
+  fixed-address 172.31.0.15;
+}
+
+host nrk7 {
+  hardware ethernet 56:b4:44:e9:62:d6;
+  fixed-address 172.31.0.16;
+}
+
+host nrk8 {
+  hardware ethernet 56:b4:44:e9:62:d7;
+  fixed-address 172.31.0.17;
 }
\ No newline at end of file

From 1bfdc000c7ae1c653162b76f6d68d5a07d497982 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 13:55:29 -0700
Subject: [PATCH 03/44] tests: tweak the sleep times in the benchmarks

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs | 61 ++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index f590ace52..f7682b679 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -56,7 +56,7 @@ fn s10_redis_benchmark_virtio() {
         output += dhcp_server.exp_string(DHCP_ACK_MATCH)?.as_str();
         output += p.exp_string(REDIS_START_MATCH)?.as_str();
 
-        std::thread::sleep(std::time::Duration::from_secs(9));
+        std::thread::sleep(std::time::Duration::from_secs(20));
 
         let mut redis_client = redis_benchmark("virtio", 2_000_000)?;
 
@@ -96,7 +96,7 @@ fn s10_redis_benchmark_e1000() {
         output += p.exp_string(REDIS_START_MATCH)?.as_str();
 
         use std::{thread, time};
-        thread::sleep(time::Duration::from_secs(9));
+        thread::sleep(time::Duration::from_secs(20));
 
         let mut redis_client = redis_benchmark("e1000", 2_000_000)?;
 
@@ -671,8 +671,7 @@ fn memcached_benchmark(
 #[cfg(not(feature = "baremetal"))]
 #[test]
 fn s10_memcached_benchmark() {
-    let _r =
-        which::which(MEMASLAP_BINARY).expect("memaslap not installed on host, test will fail!");
+    let _r = which::which(MEMASLAP_BINARY).expect("memslap not installed on host, test will fail!");
 
     let max_cores = 4;
     let threads = if cfg!(feature = "smoke") {
@@ -693,6 +692,8 @@ fn s10_memcached_benchmark() {
 
     for nic in &["virtio", "e1000"] {
         for thread in threads.iter() {
+            println!("\n# Memcached with {} threads over {}", thread, nic);
+
             let kernel_cmdline = format!("init=memcached.bin initargs={}", *thread);
             let cmdline = RunnerArgs::new_with_build("userspace-smp", &build)
                 .memory(8192)
@@ -716,12 +717,50 @@ fn s10_memcached_benchmark() {
 
                 dhcp_server.exp_regex(DHCP_ACK_MATCH)?;
 
-                std::thread::sleep(std::time::Duration::from_secs(6));
-                let mut memaslap = memcached_benchmark(nic, *thread, 10)?;
+                use std::{thread, time};
+                let timeout = 15;
+                print!("waiting {timeout} seconds to give the server time to start up. ");
+                for _ in 0..timeout {
+                    let _ = std::io::stdout().flush();
+                    thread::sleep(time::Duration::from_secs(1));
+                    print!(". ")
+                }
+                println!("\nstarting benchmark");
 
+                match memcached_benchmark(nic, *thread, 10) {
+                    Ok(mut s) => {
+                        let _ = s.process.kill(SIGTERM)?;
+                        println!("benchmark done.");
+                    }
+                    Err(e) => {
+                        println!("benchmark failed.");
+                        print!("\nnrk: ");
+                        while let Some(c) = p.try_read() {
+                            if c == '\n' {
+                                print!("\nnrk: ");
+                            } else {
+                                print!("{}", c);
+                            }
+                        }
+                        println!();
+                        match e.kind() {
+                            ErrorKind::EOF(_r, s, _) => {
+                                for l in s.lines() {
+                                    println!("memslap: {}", l);
+                                }
+                            }
+                            ErrorKind::Timeout(_r, s, _) => {
+                                for l in s.lines() {
+                                    println!("memslap: {}", l);
+                                }
+                            }
+                            e => {
+                                println!("Error: {:?}", e);
+                            }
+                        }
+                    }
+                }
                 dhcp_server.send_control('c')?;
-                memaslap.process.kill(SIGTERM)?;
-
                 p.process.kill(SIGTERM)
             };
 
@@ -858,9 +897,9 @@ fn s10_memcached_benchmark_internal() {
         (16 * 1024 /* MB */, 16 /* MB */, 2000000, 300_000)
     } else {
         (
-            128 * 1024, /* MB */
-            32 * 1024,  /* MB */
-            50000000,
+            256 * 1024, /* MB */
+            16,         // 64 * 1024, /* MB */
+            100_000_000,
             600_000,
         )
     };

From 7012327f51f53df140ed46e63f0ccf9891248a88 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 13:56:59 -0700
Subject: [PATCH 04/44] usr/rkapps: bump librettos version

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 usr/rkapps/Cargo.toml | 1 +
 usr/rkapps/build.rs   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/usr/rkapps/Cargo.toml b/usr/rkapps/Cargo.toml
index 88da3ff81..9038f5c94 100644
--- a/usr/rkapps/Cargo.toml
+++ b/usr/rkapps/Cargo.toml
@@ -20,4 +20,5 @@ nginx = []
 leveldb-bench = []
 memcached-bench = []
 monetdb = []
+virtio = ["vibrio/virtio"]
 
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 534bbdd80..6a03865e0 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT be303d8bfc2c40d63704848bb3acd9e075dd61e4 {:?}",
+            "CHECKOUT b2a11dee71b5181148830b8869b27742a8ebe96b {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "be303d8bfc2c40d63704848bb3acd9e075dd61e4"])
+            .args(&["checkout", "b2a11dee71b5181148830b8869b27742a8ebe96b"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From 1189be55c81e215128501225b1ccfe88944e754e Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 14:02:45 -0700
Subject: [PATCH 05/44] tests: adding support for multinode test in rackscale
 runner

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/rackscale_runner.rs | 300 ++++++++++++++++++++++-
 1 file changed, 298 insertions(+), 2 deletions(-)

diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index cda022e43..f32c6329b 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -47,6 +47,9 @@ type RackscaleMatchFn<T> = fn(
     arg: Option<T>,
 ) -> Result<()>;
 
+type ControllerRunFn<T> =
+    fn(config: Option<&T>, num_clients: usize, timeout_ms: u64) -> Result<PtySession>;
+
 #[derive(Clone)]
 pub struct RackscaleRun<T>
 where
@@ -60,6 +63,8 @@ where
     pub controller_timeout: u64,
     /// Function that is called after the controller is spawned to match output of the controller process
     pub controller_match_fn: RackscaleMatchFn<T>,
+    /// function to start the controller
+    pub controller_run_fn: Option<ControllerRunFn<T>>,
     /// Timeout for each client process
     pub client_timeout: u64,
     /// Amount of non-shmem QEMU memory given to each QEMU instance
@@ -92,6 +97,8 @@ where
     pub use_qemu_huge_pages: bool,
     /// DCM config
     pub dcm_config: Option<DCMConfig>,
+    /// whether we're running in multi-node mode.
+    pub is_multi_node: bool,
 }
 
 impl<T: Clone + Send + 'static> RackscaleRun<T> {
@@ -112,6 +119,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         RackscaleRun {
             controller_timeout: 60_000,
             controller_match_fn: blank_match_fn,
+            controller_run_fn: None,
             client_timeout: 60_000,
             client_match_fn: blank_match_fn,
             memory: 1024,
@@ -130,10 +138,16 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
             run_dhcpd_for_baseline: false,
             use_qemu_huge_pages: false,
             dcm_config: None,
+            is_multi_node: false,
         }
     }
 
     pub fn run_rackscale(&self) {
+        if self.is_multi_node {
+            self.run_multi_node();
+            return;
+        }
+
         // Do not allow over provisioning
         let machine = Machine::determine();
         assert!(self.cores_per_client * self.num_clients + 1 <= machine.max_cores());
@@ -418,6 +432,287 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         controller_ret.unwrap();
     }
 
+    pub fn run_multi_node(&self) {
+        // Do not allow over provisioning
+        let machine = Machine::determine();
+        assert!(self.cores_per_client * self.num_clients + 1 <= machine.max_cores());
+        let controller_cores = self.num_clients + 1;
+
+        let mut vm_cores = vec![self.cores_per_client; self.num_clients + 1];
+        vm_cores[0] = controller_cores;
+        let placement_cores = machine.rackscale_core_affinity(vm_cores);
+
+        setup_network(self.num_clients + 1);
+
+        // start the dhcp server
+        let mut dhcpd_server = crate::helpers::spawn_dhcpd_with_interface("br0".to_string())
+            .expect("could not spawn dhcpd server");
+
+        let all_outputs = Arc::new(Mutex::new(Vec::new()));
+
+        let (tx, rx) = channel();
+        let rx_mut = Arc::new(Mutex::new(rx));
+        let tx_mut = Arc::new(Mutex::new(tx));
+
+        let (tx_build_timer, _rx_build_timer) = channel();
+        let tx_build_timer_mut = Arc::new(Mutex::new(tx_build_timer));
+
+        // Run client in separate thead. Wait a bit to make sure controller started
+        let mut client_procs = Vec::new();
+        for i in 0..self.num_clients {
+            let client_output_array: Arc<Mutex<Vec<(String, String)>>> = all_outputs.clone();
+            let client_rx = rx_mut.clone();
+            let client_tx = tx_mut.clone();
+            let client_kernel_test = self.kernel_test.clone();
+            let client_file_name = self.file_name.clone();
+            let client_cmd = self.cmd.clone();
+            let client_placement_cores = placement_cores.clone();
+            let state = self.clone();
+            let client_tx_build_timer = tx_build_timer_mut.clone();
+            let use_large_pages = self.use_qemu_huge_pages;
+            let client = std::thread::Builder::new()
+                .name(format!("Client{}", i + 1))
+                .spawn(move || {
+                    let mut cmdline_client =
+                        RunnerArgs::new_with_build(&client_kernel_test, &state.built)
+                            .timeout(state.client_timeout)
+                            .use_virtio()
+                            .tap(&format!("tap{}", (i + 1) * 2))
+                            .no_network_setup()
+                            .cores(state.cores_per_client)
+                            .memory(state.memory)
+                            .nobuild() // Use single build for all for consistency
+                            .cmd(&client_cmd)
+                            .nodes(1)
+                            .node_offset(client_placement_cores[i + 1].0)
+                            .setaffinity(client_placement_cores[i + 1].1.clone());
+
+                    if use_large_pages {
+                        cmdline_client = cmdline_client.large_pages().prealloc();
+                    }
+
+                    let mut output = String::new();
+                    let qemu_run = || -> Result<WaitStatus> {
+                        let mut p = spawn_nrk(&cmdline_client)?;
+
+                        // output += p.exp_string("CLIENT READY")?.as_str();
+                        // {
+                        //     let tx = client_tx_build_timer
+                        //         .lock()
+                        //         .expect("Failed to get build timer lock");
+                        //     send_signal(&tx);
+                        // }
+
+                        // User-supplied function to check output
+                        (state.client_match_fn)(
+                            &mut p,
+                            &mut output,
+                            state.cores_per_client,
+                            state.num_clients,
+                            &client_file_name,
+                            false,
+                            state.arg,
+                        )?;
+
+                        // Wait for controller to terminate
+                        if !state.wait_for_client {
+                            let rx = client_rx.lock().expect("Failed to get rx lock");
+                            let _ = wait_for_signal::<()>(&rx);
+                        }
+
+                        let ret = p.process.kill(SIGTERM);
+                        output += p.exp_eof()?.as_str();
+                        ret
+                    };
+
+                    // Could exit with 'success' or from sigterm, depending on number of clients.
+                    let ret = qemu_run();
+
+                    if ret.is_err() {
+                        let tx = client_tx_build_timer
+                            .lock()
+                            .expect("Failed to get build timer lock");
+                        send_signal(&tx);
+                    }
+
+                    if state.wait_for_client {
+                        let tx = client_tx.lock().expect("Failed to get rx lock");
+                        send_signal(&tx);
+                    }
+
+                    client_output_array
+                        .lock()
+                        .expect("Failed to get mutex to output array")
+                        .push((format!("Client{}", i + 1), output));
+                    wait_for_sigterm_or_successful_exit_no_log(
+                        &cmdline_client,
+                        ret,
+                        format!("Client{}", i + 1),
+                    );
+                })
+                .expect("Client thread failed to spawn");
+            client_procs.push(client);
+        }
+
+        // Run controller in separate thread
+        let controller_output_array: Arc<Mutex<Vec<(String, String)>>> = all_outputs.clone();
+        let controller_kernel_test = self.kernel_test.clone();
+        let controller_rx = rx_mut.clone();
+        let controller_tx = tx_mut.clone();
+        let controller_file_name = self.file_name.clone();
+        let controller_placement_cores = placement_cores.clone();
+        let state = self.clone();
+        let controller_tx_build_timer = tx_build_timer_mut.clone();
+        let controller_run_fn = self.controller_run_fn.clone();
+        let use_large_pages = self.use_qemu_huge_pages;
+        let controller_arg = self.arg.clone();
+        let controller = std::thread::Builder::new()
+            .name("Controller".to_string())
+            .spawn(move || {
+                let mut output = String::new();
+                let ret = if let Some(run_fn) = controller_run_fn {
+                    let qemu_run = || -> Result<WaitStatus> {
+                        let mut p = run_fn(
+                            controller_arg.as_ref(),
+                            state.num_clients,
+                            state.controller_timeout,
+                        )?;
+
+                        // User-supplied function to check output
+                        (state.controller_match_fn)(
+                            &mut p,
+                            &mut output,
+                            state.cores_per_client,
+                            state.num_clients,
+                            &controller_file_name,
+                            false,
+                            state.arg,
+                        )?;
+
+                        for _ in 0..state.num_clients {
+                            if state.wait_for_client {
+                                // Wait for signal from each client that it is done
+                                let rx = controller_rx.lock().expect("Failed to get rx lock");
+                                let _ = wait_for_signal::<()>(&rx);
+                            }
+                        }
+
+                        let ret = p.process.kill(SIGTERM)?;
+                        output += p.exp_eof()?.as_str();
+                        Ok(ret)
+                    };
+                    qemu_run()
+                } else {
+                    let mut cmdline_controller =
+                        RunnerArgs::new_with_build(&controller_kernel_test, &state.built)
+                            .timeout(state.controller_timeout)
+                            .transport(state.transport)
+                            .mode(RackscaleMode::Controller)
+                            .tap("tap0")
+                            .no_network_setup()
+                            .workers(state.num_clients + 1)
+                            .use_vmxnet3()
+                            .memory(state.memory)
+                            .nodes(1)
+                            .cores(controller_cores)
+                            .node_offset(controller_placement_cores[0].0)
+                            .setaffinity(controller_placement_cores[0].1.clone());
+
+                    if use_large_pages {
+                        cmdline_controller = cmdline_controller.large_pages().prealloc();
+                    }
+
+                    let mut output = String::new();
+                    let qemu_run = || -> Result<WaitStatus> {
+                        let mut p = spawn_nrk(&cmdline_controller)?;
+
+                        output += p.exp_string("CONTROLLER READY")?.as_str();
+                        {
+                            let tx = controller_tx_build_timer
+                                .lock()
+                                .expect("Failed to get build timer lock");
+                            send_signal(&tx);
+                        }
+
+                        // User-supplied function to check output
+                        (state.controller_match_fn)(
+                            &mut p,
+                            &mut output,
+                            state.cores_per_client,
+                            state.num_clients,
+                            &controller_file_name,
+                            false,
+                            state.arg,
+                        )?;
+
+                        for _ in 0..state.num_clients {
+                            if state.wait_for_client {
+                                // Wait for signal from each client that it is done
+                                let rx = controller_rx.lock().expect("Failed to get rx lock");
+                                let _ = wait_for_signal::<()>(&rx);
+                            }
+                        }
+
+                        let ret = p.process.kill(SIGTERM)?;
+                        output += p.exp_eof()?.as_str();
+                        Ok(ret)
+                    };
+                    qemu_run()
+                };
+
+                if ret.is_err() {
+                    let tx = controller_tx_build_timer
+                        .lock()
+                        .expect("Failed to get build timer lock");
+                    send_signal(&tx);
+                }
+
+                if !state.wait_for_client {
+                    let tx = controller_tx.lock().expect("Failed to get tx lock");
+                    for _ in 0..state.num_clients {
+                        // Notify each client it's okay to shutdown
+                        send_signal(&tx);
+                    }
+                }
+
+                controller_output_array
+                    .lock()
+                    .expect("Failed to get mutex to output array")
+                    .push((String::from("Controller"), output));
+
+                // This will only find sigterm, that's okay
+                wait_for_sigterm_or_successful_exit_no_log(
+                    &RunnerArgs::new_with_build(&controller_kernel_test, &state.built),
+                    ret,
+                    String::from("Controller"),
+                );
+            })
+            .expect("Controller thread failed to spawn");
+
+        let mut client_rets = Vec::new();
+        for client in client_procs {
+            client_rets.push(client.join());
+        }
+        let controller_ret = controller.join();
+
+        dhcpd_server
+            .send_control('c')
+            .expect("could not terminate dhcp");
+
+        // If there's been an error, print everything
+        if controller_ret.is_err() || (&client_rets).into_iter().any(|ret| ret.is_err()) {
+            let outputs = all_outputs.lock().expect("Failed to get output lock");
+            for (name, output) in outputs.iter() {
+                log_qemu_out_with_name(None, name.to_string(), output.to_string());
+            }
+        }
+
+        for client_ret in client_rets {
+            client_ret.unwrap();
+        }
+        controller_ret.unwrap();
+    }
+
     pub fn run_baseline(&self) {
         // Here we assume run.num_clients == run.num_replicas (num nodes)
         // And the controller match function, timeout, memory will be used
@@ -499,12 +794,13 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
 
         // Set rackscale appropriately, rebuild if necessary.
         if !is_baseline != test_run.built.with_args.rackscale {
-            eprintln!("\tRebuilding with rackscale={}", !is_baseline,);
+            let is_rackscale = !is_baseline && !test_run.is_multi_node;
+            eprintln!("\tRebuilding with rackscale={}", is_rackscale);
             test_run.built = test_run
                 .built
                 .with_args
                 .clone()
-                .set_rackscale(!is_baseline)
+                .set_rackscale(is_rackscale)
                 .build();
         }
 

From 5a0826a92b9bdb54a23ed4203923d9cfdea38599 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 14:03:26 -0700
Subject: [PATCH 06/44] tests: adding sharded memcached benchmark

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 724 ++++++++++++++++++++++-
 1 file changed, 719 insertions(+), 5 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 349a3326e..830e0d33b 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -8,15 +8,20 @@
 //! The naming scheme of the tests ensures a somewhat useful order of test
 //! execution taking into account the dependency chain:
 //! * `s11_*`: Rackscale (distributed) benchmarks
+use std::env;
 use std::fs::OpenOptions;
 use std::io::Write;
-use std::path::Path;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::time::Duration;
 
 use rexpect::errors::*;
+use rexpect::session::spawn_command;
 use rexpect::session::PtySession;
 
 use testutils::builder::{BuildArgs, Machine};
 use testutils::helpers::{DCMConfig, DCMSolver};
+
 use testutils::rackscale_runner::{RackscaleBench, RackscaleRun};
 use testutils::runner_args::RackscaleTransport;
 
@@ -459,6 +464,9 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
     bench.run_bench(false, is_smoke);
 }
 
+const MEMCACHED_MEM_SIZE_MB: usize = 16;
+const MEMCACHED_NUM_QUERIES: usize = 1000_000;
+
 #[derive(Clone)]
 struct MemcachedInternalConfig {
     pub num_queries: usize,
@@ -599,9 +607,11 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
             mem_size: 16,
         }
     } else {
+        // keep in sync with the s10_memcached_benchmark_internal configuration
+        // and the s11_rackscale_memcached_benchmark_sharded configuration
         MemcachedInternalConfig {
-            num_queries: 1_000_000, // TODO(rackscale): should be 100_000_000,
-            mem_size: 16,           // TODO(rackscale): should be 32_000,
+            num_queries: MEMCACHED_NUM_QUERIES,
+            mem_size: MEMCACHED_MEM_SIZE_MB,
         }
     };
 
@@ -616,6 +626,13 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
     test.arg = Some(config);
     test.run_dhcpd_for_baseline = true;
 
+    if !is_smoke {
+        test.shmem_size = std::cmp::max(
+            MEMCACHED_MEM_SIZE_MB * 8,
+            testutils::helpers::SHMEM_SIZE * 4,
+        );
+    }
+
     fn cmd_fn(num_cores: usize, arg: Option<MemcachedInternalConfig>) -> String {
         let config = arg.expect("missing leveldb config");
         format!(
@@ -629,7 +646,11 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
     }
 
     fn rackscale_timeout_fn(num_cores: usize) -> u64 {
-        600_000 + 6_000 * num_cores as u64
+        if is_smoke {
+            60_000 as u64
+        } else {
+            MEMCACHED_MEM_SIZE_MB / 10 * 1000 + MEMCACHED_NUM_QUERIES / 1000
+        }
     }
 
     fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
@@ -637,7 +658,13 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
             8192
         } else {
             // Memory must also be divisible by number of nodes, which could be 1, 2, 3, or 4
-            core::cmp::max(8192, 1024 * (((((num_cores + 1) / 2) + 3 - 1) / 3) * 3))
+            // memory = result of this function / num_clients  - shmem_size
+            (8192
+                + std::cmp::max(
+                    MEMCACHED_MEM_SIZE_MB * 8,
+                    testutils::helpers::SHMEM_SIZE * 4,
+                ))
+                * (((((num_cores + 1) / 2) + 3 - 1) / 3) * 3)
         }
     }
 
@@ -880,6 +907,693 @@ fn rackscale_memcached_dcm(transport: RackscaleTransport, dcm_config: Option<DCM
     }
 }
 
+#[derive(Clone)]
+struct MemcachedShardedConfig {
+    pub num_servers: usize,
+    pub num_queries: usize,
+    pub is_local_host: bool,
+    pub mem_size: usize,
+    pub protocol: &'static str,
+    pub num_threads: usize,
+    pub path: PathBuf,
+}
+
+#[derive(Clone, Debug)]
+struct MemcachedShardedResult {
+    b_threads: String,
+    b_mem: String,
+    b_queries: String,
+    b_time: String,
+    b_thpt: String,
+}
+
+fn parse_memcached_output(
+    p: &mut PtySession,
+    output: &mut String,
+) -> Result<MemcachedShardedResult> {
+    // x_benchmark_mem = 10 MB
+    let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
+    // println!("> {}", matched);
+    let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // number of threads: 3
+    let (prev, matched) = p.exp_regex(r#"number of threads: (\d+)"#)?;
+    // println!("> {}", matched);
+    let b_threads = matched.replace("number of threads: ", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // number of keys: 131072
+    let (prev, matched) = p.exp_regex(r#"number of keys: (\d+)"#)?;
+    // println!("> {}", matched);
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // benchmark took 129 seconds
+    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
+    // println!("> {}", matched);
+    let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // benchmark took 7937984 queries / second
+    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
+    println!("> {}", matched);
+    let b_thpt = matched
+        .replace("benchmark took ", "")
+        .replace(" queries / second", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    let (prev, matched) = p.exp_regex(r#"benchmark executed (\d+)"#)?;
+    println!("> {}", matched);
+    let b_queries = matched
+        .replace("benchmark executed ", "")
+        .split(" ")
+        .next()
+        .unwrap()
+        .to_string();
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    Ok(MemcachedShardedResult {
+        b_threads,
+        b_mem,
+        b_queries,
+        b_time,
+        b_thpt,
+    })
+}
+
+#[cfg(not(feature = "baremetal"))]
+fn rackscale_memcached_checkout() {
+    let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
+
+    let out_dir = out_dir_path.display().to_string();
+
+    println!("CARGO_TARGET_TMPDIR {:?}", out_dir);
+
+    // clone abd build the benchmark
+    if !out_dir_path.is_dir() {
+        println!("RMDIR {:?}", out_dir_path);
+        Command::new(format!("rm",))
+            .args(&["-rf", out_dir.as_str()])
+            .status()
+            .unwrap();
+
+        println!("MKDIR {:?}", out_dir_path);
+        Command::new(format!("mkdir",))
+            .args(&["-p", out_dir.as_str()])
+            .status()
+            .unwrap();
+
+        println!("CLONE {:?}", out_dir_path);
+        let url = "https://github.com/achreto/memcached-bench.git";
+        Command::new("git")
+            .args(&["clone", "--depth=1", url, out_dir.as_str()])
+            .output()
+            .expect("failed to clone");
+    } else {
+        Command::new("git")
+            .args(&["pull"])
+            .current_dir(out_dir_path.as_path())
+            .output()
+            .expect("failed to pull");
+    }
+
+    println!(
+        "CHECKOUT fe0eb024882481717efd6a3f4600e96c99ca77a2 {:?}",
+        out_dir
+    );
+
+    let res = Command::new("git")
+        .args(&["checkout", "fe0eb024882481717efd6a3f4600e96c99ca77a2"])
+        .current_dir(out_dir_path.as_path())
+        .output()
+        .expect("git checkout failed");
+    if !res.status.success() {
+        std::io::stdout().write_all(&res.stdout).unwrap();
+        std::io::stderr().write_all(&res.stderr).unwrap();
+        panic!("git checkout failed!");
+    }
+
+    println!("BUILD {:?}", out_dir_path);
+    for (key, value) in env::vars() {
+        println!("{}: {}", key, value);
+    }
+
+    let build_args = &["-j", "8"];
+
+    // now build the benchmark
+    let status = Command::new("make")
+        .args(build_args)
+        .current_dir(&out_dir_path)
+        .output()
+        .expect("Can't make app dir");
+
+    if !status.status.success() {
+        println!("BUILD FAILED");
+        std::io::stdout().write_all(&status.stdout).unwrap();
+        std::io::stderr().write_all(&status.stderr).unwrap();
+        panic!("BUILD FAILED");
+    }
+}
+
+#[test]
+#[cfg(not(feature = "baremetal"))]
+fn s11_rackscale_memcached_benchmark_sharded_linux() {
+    let machine = Machine::determine();
+    let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
+    let is_smoke = cfg!(feature = "smoke");
+
+    rackscale_memcached_checkout();
+
+    // stuff has been built, now we can run the benchmark
+    let mut config = if is_smoke {
+        MemcachedShardedConfig {
+            num_servers: 1,
+            num_queries: 100_000,
+            mem_size: 16,
+            protocol: "tcp",
+            is_local_host: true,
+            num_threads: 4,
+            path: out_dir_path,
+        }
+    } else {
+        // keep in sync with the s10_memcached_benchmark_internal configuration
+        MemcachedShardedConfig {
+            num_servers: 1,
+            num_queries: MEMCACHED_NUM_QUERIES,
+            mem_size: MEMCACHED_MEM_SIZE_MB,
+            protocol: "tcp",
+            is_local_host: true,
+            num_threads: 4,
+            path: out_dir_path,
+        }
+    };
+
+    let timeout_ms = if is_smoke { 60_000 } else { 600_000 };
+
+    fn run_benchmark_internal(config: &MemcachedShardedConfig, timeout_ms: u64) -> PtySession {
+        Command::new("killall").args(&["memcached"]).status().ok();
+
+        let mut command = Command::new("taskset");
+        command.arg("--cpu-list");
+        command.arg(format!("0-{}", config.num_threads - 1).as_str());
+        command.arg("./build/bin/memcached");
+        command.arg(format!("--x-benchmark-queries={}", config.num_queries).as_str());
+        command.arg(format!("--x-benchmark-mem={}", config.mem_size).as_str());
+        command.current_dir(config.path.as_path());
+        spawn_command(command, Some(timeout_ms)).expect("failed to spawn memcached")
+    }
+
+    fn spawn_memcached(
+        id: usize,
+        config: &MemcachedShardedConfig,
+        timeout_ms: u64,
+    ) -> Result<PtySession> {
+        let con_info = if config.protocol == "tcp" {
+            format!("tcp://localhost:{}", 11211 + id)
+        } else {
+            format!("unix://{}/memcached{}.sock", config.path.display(), id)
+        };
+
+        let mut command = Command::new("bash");
+
+        command.args(&[
+            "scripts/spawn-memcached-process.sh",
+            id.to_string().as_str(),
+            con_info.as_str(),
+            config.mem_size.to_string().as_str(),
+            config.num_threads.to_string().as_str(),
+        ]);
+        command.current_dir(config.path.as_path());
+
+        println!("Spawning memcached:\n $ `{:?}`", command);
+
+        let mut res = spawn_command(command, Some(timeout_ms))?;
+        std::thread::sleep(Duration::from_secs(1));
+
+        match res.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#) {
+            Ok((_prev, _matched)) => {
+                println!(" $ OK.");
+                Ok(res)
+            }
+            Err(e) => {
+                println!(" $ FAILED. {}", e);
+                Err(e)
+            }
+        }
+    }
+
+    fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> Result<PtySession> {
+        let mut command = Command::new("./loadbalancer/loadbalancer");
+        command.args(&["--binary"]);
+        command.arg(format!("--num-queries={}", config.num_queries).as_str());
+        command.arg(format!("--num-threads={}", config.num_threads).as_str());
+        command.arg(format!("--max-memory={}", config.mem_size).as_str());
+        let mut servers = String::from("--servers=");
+        for i in 0..config.num_servers {
+            if i > 0 {
+                servers.push_str(",");
+            }
+            if config.protocol == "tcp" {
+                if config.is_local_host {
+                    servers.push_str(format!("tcp://localhost:{}", 11211 + i).as_str());
+                } else {
+                    // +1 because tap0 is reserved for the controller.
+                    let ip = 10 + i + 1;
+                    servers.push_str(format!("tcp://172.31.0.{}:{}", ip, 11211).as_str());
+                }
+            } else {
+                servers.push_str(
+                    format!("unix://{}/memcached{}.sock", config.path.display(), i).as_str(),
+                );
+            }
+        }
+        command.arg(servers.as_str());
+        command.current_dir(config.path.as_path());
+
+        // give the servers some time to be spawned
+        std::thread::sleep(Duration::from_secs(5));
+
+        println!("Spawning Loadbalancer: \n $ `{:?}`", command);
+
+        spawn_command(command, Some(timeout_ms))
+    }
+
+    let file_name = "memcached_benchmark_sharded_linux.csv";
+    let _r = std::fs::remove_file(file_name);
+
+    let mut csv_file = OpenOptions::new()
+        .append(true)
+        .create(true)
+        .open(file_name)
+        .expect("Can't open file");
+
+    let row = "git_rev,benchmark,os,nthreads,protocol,mem,queries,time,thpt\n";
+    let r = csv_file.write(row.as_bytes());
+    assert!(r.is_ok());
+
+    let max_threads_per_node = if is_smoke {
+        1
+    } else {
+        machine.max_cores() / machine.max_numa_nodes()
+    };
+    println!(
+        "Nodes: {}, max_threads_per_node: {max_threads_per_node}",
+        machine.max_numa_nodes()
+    );
+    for num_nodes in 1..=machine.max_numa_nodes() {
+        config.num_servers = num_nodes;
+
+        for num_threads in 1..=max_threads_per_node {
+            if (num_threads != 1 || num_threads != max_threads_per_node) && (num_threads % 4 != 0) {
+                continue;
+            }
+
+            println!("");
+
+            config.num_threads = num_threads;
+
+            let _ = Command::new("killall").args(&["memcached"]).status();
+            let _ = Command::new("killall").args(&["memcached"]).status();
+            let mut pty = run_benchmark_internal(&config, timeout_ms);
+            let mut output = String::new();
+            let res =
+                parse_memcached_output(&mut pty, &mut output).expect("could not parse output!");
+            let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+            assert!(r.is_ok());
+            let out = format!(
+                "memcached_sharded,linux,{},{},{},{},{},{}\n",
+                res.b_threads, "internal", res.b_mem, res.b_queries, res.b_time, res.b_thpt,
+            );
+            let r = csv_file.write(out.as_bytes());
+            assert!(r.is_ok());
+
+            println!("{:?}", res);
+
+            // single node
+            for protocol in &["tcp", "unix"] {
+                config.protocol = protocol;
+
+                println!("");
+
+                println!("Memcached Sharded: {num_threads}x{num_nodes} with {protocol}");
+
+                // terminate the memcached instance
+                let _ = Command::new("killall").args(&["memcached"]).status();
+                let _ = Command::new("killall").args(&["memcached"]).status();
+
+                let mut memcached_ctrls = Vec::new();
+                for i in 0..num_nodes {
+                    memcached_ctrls.push(
+                        spawn_memcached(i, &config, timeout_ms).expect("could not spawn memcached"),
+                    );
+                }
+
+                let mut pty =
+                    spawn_loadbalancer(&config, timeout_ms).expect("failed to spawn load balancer");
+                let mut output = String::new();
+                use rexpect::errors::ErrorKind::Timeout;
+                match parse_memcached_output(&mut pty, &mut output) {
+                    Ok(res) => {
+                        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+                        assert!(r.is_ok());
+                        let out = format!(
+                            "memcached_sharded,linux,{},{},{},{},{},{}\n",
+                            res.b_threads,
+                            protocol,
+                            res.b_mem,
+                            res.b_queries,
+                            res.b_time,
+                            res.b_thpt,
+                        );
+                        let r = csv_file.write(out.as_bytes());
+                        assert!(r.is_ok());
+
+                        println!("{:?}", res);
+                    }
+
+                    Err(e) => {
+                        if let Timeout(expected, got, timeout) = e.0 {
+                            println!("Timeout while waiting for {} ms\n", timeout.as_millis());
+                            println!("Expected: `{expected}`\n");
+                            println!("Got:",);
+                            for l in got.lines().take(5) {
+                                println!(" > {l}");
+                            }
+                        } else {
+                            println!("error: {}", e);
+                        }
+
+                        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+                        assert!(r.is_ok());
+                        let out = format!(
+                            "memcached_sharded,linux,{},{},failure,failure,failure,failure\n",
+                            config.num_servers, protocol,
+                        );
+                        let r = csv_file.write(out.as_bytes());
+                        assert!(r.is_ok());
+
+                        for mc in memcached_ctrls.iter_mut() {
+                            mc.process
+                                .kill(rexpect::process::signal::Signal::SIGKILL)
+                                .expect("couldn't terminate memcached");
+                            while let Ok(l) = mc.read_line() {
+                                println!("MEMCACHED-OUTPUT: {}", l);
+                            }
+                        }
+                        let _ = Command::new("killall").args(&["memcached"]).status();
+                    }
+                };
+            }
+        }
+    }
+
+    // terminate the memcached instance
+    let _ = Command::new("killall").args(&["memcached"]).status();
+}
+
+#[test]
+#[cfg(not(feature = "baremetal"))]
+fn s11_rackscale_memcached_benchmark_sharded_nros() {
+    let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
+    let is_smoke = cfg!(feature = "smoke");
+
+    rackscale_memcached_checkout();
+
+    // stuff has been built, now we can run the benchmark
+    let mut config = if is_smoke {
+        MemcachedShardedConfig {
+            num_servers: 1,
+            num_queries: 100_000,
+            mem_size: 16,
+            protocol: "tcp",
+            is_local_host: true,
+            num_threads: 4,
+            path: out_dir_path,
+        }
+    } else {
+        // keep in sync with the s10_memcached_benchmark_internal configuration
+        MemcachedShardedConfig {
+            num_servers: 1,
+            num_queries: MEMCACHED_NUM_QUERIES,
+            mem_size: MEMCACHED_MEM_SIZE_MB,
+            protocol: "tcp",
+            is_local_host: true,
+            num_threads: 4,
+            path: out_dir_path,
+        }
+    };
+
+    fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> Result<PtySession> {
+        let mut command = Command::new("./loadbalancer/loadbalancer");
+        command.args(&["--binary"]);
+        command.arg(format!("--num-queries={}", config.num_queries).as_str());
+        command.arg(format!("--num-threads={}", config.num_threads).as_str());
+        command.arg(format!("--max-memory={}", config.mem_size).as_str());
+        let mut servers = String::from("--servers=");
+        for i in 0..config.num_servers {
+            if i > 0 {
+                servers.push_str(",");
+            }
+            if config.protocol == "tcp" {
+                if config.is_local_host {
+                    servers.push_str(format!("tcp://localhost:{}", 11211 + i).as_str());
+                } else {
+                    // +1 because tap0 is reserved for the controller.
+                    let ip = 10 + i + 1;
+                    servers.push_str(format!("tcp://172.31.0.{}:{}", ip, 11211).as_str());
+                }
+            } else {
+                servers.push_str(
+                    format!("unix://{}/memcached{}.sock", config.path.display(), i).as_str(),
+                );
+            }
+        }
+        command.arg(servers.as_str());
+        command.current_dir(config.path.as_path());
+
+        // give the servers some time to be spawned
+        std::thread::sleep(Duration::from_secs(5));
+
+        println!("Spawning Loadbalancer: \n $ `{:?}`", command);
+
+        spawn_command(command, Some(timeout_ms))
+    }
+
+    let file_name = "memcached_benchmark_sharded_nros.csv";
+    let _r = std::fs::remove_file(file_name);
+
+    let mut csv_file = OpenOptions::new()
+        .append(true)
+        .create(true)
+        .open(file_name)
+        .expect("Can't open file");
+
+    let row = "git_rev,benchmark,os,nthreads,protocol,mem,queries,time,thpt\n";
+    let r = csv_file.write(row.as_bytes());
+    assert!(r.is_ok());
+
+    // run with NrOS as host
+    let built = BuildArgs::default()
+        .module("rkapps")
+        .user_feature("rkapps:memcached-bench")
+        .user_feature("rkapps:virtio")
+        .user_feature("libvibrio:virtio")
+        .kernel_feature("pages-4k")
+        .release()
+        .set_rackscale(false)
+        .build();
+
+    fn controller_run_fun(
+        config: Option<&MemcachedShardedConfig>,
+        num_servers: usize,
+        timeout_ms: u64,
+    ) -> Result<PtySession> {
+        // here we should wait
+        std::thread::sleep(Duration::from_secs(15));
+
+        let mut config = config.unwrap().clone();
+
+        config.num_servers = num_servers;
+        spawn_loadbalancer(&config, timeout_ms)
+    }
+
+    fn controller_match_fn(
+        proc: &mut PtySession,
+        output: &mut String,
+        _cores_per_client: usize,
+        _num_clients: usize,
+        file_name: &str,
+        _is_baseline: bool,
+        _arg: Option<MemcachedShardedConfig>,
+    ) -> Result<()> {
+        let mut csv_file = OpenOptions::new()
+            .append(true)
+            .create(true)
+            .open(file_name)
+            .expect("Can't open file");
+
+        use rexpect::errors::Error;
+        use rexpect::errors::ErrorKind::Timeout;
+        let res = match parse_memcached_output(proc, output) {
+            Ok(res) => res,
+            Err(Error(Timeout(expected, got, timeout), st)) => {
+                println!("Expected: `{expected}`\n");
+                println!("Got:",);
+                for l in got.lines().take(5) {
+                    println!(" > {l}");
+                }
+                return Err(Error(Timeout(expected, got, timeout), st));
+            }
+            Err(err) => {
+                println!("Failed: {:?}", err);
+                return Err(err);
+            }
+        };
+
+        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+        assert!(r.is_ok());
+        let out = format!(
+            "memcached_sharded,nros,{},{},{},{},{},{}\n",
+            res.b_threads, "tcp", res.b_mem, res.b_queries, res.b_time, res.b_thpt,
+        );
+        let r = csv_file.write(out.as_bytes());
+        assert!(r.is_ok());
+
+        println!("{:?}", res);
+        Ok(())
+    }
+
+    fn client_match_fn(
+        proc: &mut PtySession,
+        output: &mut String,
+        _cores_per_client: usize,
+        _num_clients: usize,
+        _file_name: &str,
+        _is_baseline: bool,
+        _arg: Option<MemcachedShardedConfig>,
+    ) -> Result<()> {
+        match proc.exp_regex(r#"\[ INFO\]: bootloader/src/kernel.rs"#) {
+            Ok(_) => (),
+            Err(rexpect::errors::Error(
+                rexpect::errors::ErrorKind::EOF(_expected, _s, _),
+                _state,
+            )) => {
+                // for l in s.lines() {
+                //     println!("MEMCACHED-OUTPUT: {}", l);
+                // }
+            }
+            Err(e) => {
+                println!("{e:?}");
+                panic!("error")
+            }
+        }
+
+        match proc.exp_regex(r#"dhcp: vioif0: adding IP address (\d+).(\d+).(\d+).(\d+)/(\d+)"#) {
+            Ok((_prev, matched)) => {
+                println!(" > Networking setup succeeded. {matched}");
+            }
+            Err(e) => {
+                println!(" > Networking setup failed. {e}");
+                return Err(e);
+            }
+        }
+
+        match proc.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#) {
+            Ok((prev, matched)) => {
+                println!(" > Memcached started.");
+                *output += prev.as_str();
+                *output += matched.as_str();
+            }
+            Err(e) => {
+                println!(" > Memcached failed to start. {e}");
+                return Err(e);
+            }
+        }
+
+        let (prev, matched) = proc.exp_regex(r#"x_benchmark_mem = (\d+) MB"#).unwrap();
+        println!("C> {}", matched);
+        // let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+        Ok(())
+    }
+
+    config.is_local_host = false;
+    config.protocol = "tcp";
+
+    let mut test = RackscaleRun::new("userspace-smp".to_string(), built);
+    test.controller_match_fn = controller_match_fn;
+    test.controller_run_fn = Some(controller_run_fun);
+    test.client_match_fn = client_match_fn;
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
+    test.file_name = file_name.to_string();
+    test.arg = Some(config);
+    test.run_dhcpd_for_baseline = true;
+    test.is_multi_node = true;
+
+    if !is_smoke {
+        test.shmem_size = std::cmp::max(
+            MEMCACHED_MEM_SIZE_MB * 8,
+            testutils::helpers::SHMEM_SIZE * 4,
+        );
+    }
+
+    fn cmd_fn(num_cores: usize, arg: Option<MemcachedShardedConfig>) -> String {
+        let config = arg.expect("missing configuration");
+        format!(
+            r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-no-run --disable-evictions --conn-limit=1024 --threads={} --x-benchmark-mem={} --memory-limit={}'"#,
+            num_cores, num_cores, config.mem_size, config.mem_size
+        )
+    }
+
+    fn baseline_timeout_fn(num_cores: usize) -> u64 {
+        120_000 + 500 * num_cores as u64
+    }
+
+    fn rackscale_timeout_fn(num_cores: usize) -> u64 {
+        600_000 + 60_000 * num_cores as u64
+    }
+
+    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+        if is_smoke {
+            8192
+        } else {
+            // Memory must also be divisible by number of nodes, which could be 1, 2, 3, or 4
+            (8192
+                + std::cmp::max(
+                    MEMCACHED_MEM_SIZE_MB * 8,
+                    testutils::helpers::SHMEM_SIZE * 4,
+                ))
+                * (((((num_cores + 1) / 2) + 3 - 1) / 3) * 3)
+        }
+    }
+
+    println!("----------------------------------------------------------");
+
+    // construct bench and run it!
+    let bench = RackscaleBench {
+        test,
+        cmd_fn,
+        baseline_timeout_fn,
+        rackscale_timeout_fn,
+        mem_fn,
+    };
+    bench.run_bench(false, is_smoke);
+}
+
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_monetdb_benchmark() {

From c37ea631d920589947500836a226afd4f3f8d484 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 14:06:31 -0700
Subject: [PATCH 07/44] run.py: handle features of the usr & libs properly

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/run.py | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/kernel/run.py b/kernel/run.py
index a1fa5d4b5..58f93d146 100644
--- a/kernel/run.py
+++ b/kernel/run.py
@@ -49,6 +49,7 @@ def get_network_config(workers):
         config['tap{}'.format(2*i)] = {
             'mid': i,
             'mac': '56:b4:44:e9:62:d{:x}'.format(i),
+            'ip' : f"172.31.0.1{i}"
         }
     return config
 
@@ -215,8 +216,10 @@ def build_kernel(args):
             build_args = ['build', '--target', KERNEL_TARGET]
             if args.no_kfeatures:
                 build_args += ["--no-default-features"]
+                log(" - enable feature --no-default-features")
             for feature in args.kfeatures:
                 build_args += ['--features', feature]
+                log(" - enable feature {}".format(feature))
             build_args += CARGO_DEFAULT_ARGS
             build_args += CARGO_NOSTD_BUILD_ARGS
             if args.verbose:
@@ -233,6 +236,16 @@ def build_user_libraries(args):
     build_args += ["--features", "rumprt"]
     if args.nic == "virtio-net-pci":
         build_args += ["--features", "virtio"]
+        log(" - enable feature virtio")
+
+    for featurelist in args.ufeatures:
+        for feature in featurelist.split(',') :
+            if ':' in feature:
+                mod_part, feature_part = feature.split(':')
+                if "libvibrio" == mod_part:
+                    log(" - enable feature {}".format(feature_part))
+                    build_args += ['--features', feature_part]
+
     # else: use e1000 / wm0
     build_args += CARGO_DEFAULT_ARGS
     build_args += CARGO_NOSTD_BUILD_ARGS
@@ -259,18 +272,21 @@ def build_userspace(args):
         if not (USR_PATH / module).exists():
             log("User module {} not found, skipping.".format(module))
             continue
+        log("build user-space module {}".format(module))
         with local.cwd(USR_PATH / module):
             with local.env(RUSTFLAGS=USER_RUSTFLAGS):
                 with local.env(RUST_TARGET_PATH=USR_PATH.absolute()):
                     build_args = build_args_default.copy()
-                    for feature in args.ufeatures:
-                        if ':' in feature:
-                            mod_part, feature_part = feature.split(':')
-                            if module == mod_part:
-                                build_args += ['--features', feature_part]
-                        else:
-                            build_args += ['--features', feature]
-                    log("Build user-module {}".format(module))
+                    for featurelist in args.ufeatures:
+                        for feature in featurelist.split(',') :
+                            if ':' in feature:
+                                mod_part, feature_part = feature.split(':')
+                                if module == mod_part:
+                                    log(" - enable feature {}".format(feature_part))
+                                    build_args += ['--features', feature_part]
+                            else:
+                                log(" - enable feature {}".format(feature))
+                                build_args += ['--features', feature]
                     if args.verbose:
                         print("cd {}".format(USR_PATH / module))
                         print("RUSTFLAGS={} RUST_TARGET_PATH={} cargo ".format(

From cdc9f738c3ca8d6ca6fc035a375159f268e31565 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 14:07:35 -0700
Subject: [PATCH 08/44] run.py: always create a bridge and run dhcp on the
 bridge

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/run.py                   | 34 +++++++++++++++++++--------------
 kernel/testutils/src/helpers.rs |  4 ++--
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/kernel/run.py b/kernel/run.py
index 58f93d146..2ce23e132 100644
--- a/kernel/run.py
+++ b/kernel/run.py
@@ -749,20 +749,26 @@ def configure_network(args):
         sudo[ip[['link', 'set', '{}'.format(tap), 'down']]](retcode=(0, 1))
         sudo[ip[['link', 'del', '{}'.format(tap)]]](retcode=(0, 1))
 
-    # Need to find out how to set default=True in case workers are >0 in `args`
-    if (not 'workers' in args) or ('workers' in args and args.workers <= 1):
-        sudo[tunctl[['-t', args.tap, '-u', user, '-g', group]]]()
-        sudo[ifconfig[args.tap, NETWORK_INFRA_IP]]()
-        sudo[ip[['link', 'set', args.tap, 'up']]](retcode=(0, 1))
-    else:
-        assert args.workers <= MAX_WORKERS, "Too many workers, can't configure network"
-        sudo[ip[['link', 'add', 'br0', 'type', 'bridge']]]()
-        sudo[ip[['addr', 'add', NETWORK_INFRA_IP, 'brd', '+', 'dev', 'br0']]]()
-        for _, ncfg in zip(range(0, args.workers), NETWORK_CONFIG):
-            sudo[tunctl[['-t', ncfg, '-u', user, '-g', group]]]()
-            sudo[ip[['link', 'set', ncfg, 'up']]](retcode=(0, 1))
-            sudo[brctl[['addif', 'br0', ncfg]]]()
-        sudo[ip[['link', 'set', 'br0', 'up']]](retcode=(0, 1))
+
+    # figure out how many workers we have
+    workers = 1
+    if 'workers' in args:
+        workers = args.workers
+
+    # create the bridge
+    sudo[ip[['link', 'add', 'br0', 'type', 'bridge']]]()
+    sudo[ip[['addr', 'add', NETWORK_INFRA_IP, 'brd', '+', 'dev', 'br0']]]()
+
+    # add a network interface for every worker there is
+    for _, ncfg in zip(range(0, workers), NETWORK_CONFIG):
+        sudo[tunctl[['-t', ncfg, '-u', user, '-g', group]]]()
+        sudo[ip[['link', 'set', ncfg, 'up']]](retcode=(0, 1))
+        sudo[brctl[['addif', 'br0', ncfg]]]()
+
+    # set the link up
+    sudo[ip[['link', 'set', 'br0', 'up']]](retcode=(0, 1))
+
+    sudo[brctl[['setageing', 'br0', 600]]]()
 
 
 def configure_dcm_scheduler(args):
diff --git a/kernel/testutils/src/helpers.rs b/kernel/testutils/src/helpers.rs
index 536df2c83..66c8cc781 100644
--- a/kernel/testutils/src/helpers.rs
+++ b/kernel/testutils/src/helpers.rs
@@ -17,7 +17,7 @@ use crate::runner_args::RunnerArgs;
 ///
 /// # Depends on
 /// - `tests/dhcpd.conf`: config file contains match of MAC to IP
-pub const DHCP_ACK_MATCH: &'static str = "DHCPACK on 172.31.0.10 to 56:b4:44:e9:62:d0 via tap0";
+pub const DHCP_ACK_MATCH: &'static str = "DHCPACK on 172.31.0.10 to 56:b4:44:e9:62:d0 via br0";
 pub const DHCP_ACK_MATCH_NRK2: &'static str = "DHCPACK on 172.31.0.11 to 56:b4:44:e9:62:d1 via br0";
 
 /// Default shmem region size (in MB)
@@ -214,7 +214,7 @@ pub fn spawn_dcm(cfg: Option<DCMConfig>) -> Result<rexpect::session::PtySession>
 
 /// Spawns a DHCP server on our host using most common interface: tap0
 pub fn spawn_dhcpd() -> Result<rexpect::session::PtyReplSession> {
-    spawn_dhcpd_with_interface("tap0".to_string())
+    spawn_dhcpd_with_interface("br0".to_string())
 }
 
 /// Spawns a DHCP server on our host

From 04620acd4cfb5a9fe439ac2c2424f7805e4686ea Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 14:08:18 -0700
Subject: [PATCH 09/44] tests: adding sharded memcached to the CI pipeline

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 scripts/ci.bash | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/ci.bash b/scripts/ci.bash
index a8776d39f..4e1c2406f 100644
--- a/scripts/ci.bash
+++ b/scripts/ci.bash
@@ -17,6 +17,7 @@ rm -f rackscale_shmem_vmops_benchmark.csv
 rm -f rackscale_shmem_vmops_latency_benchmark.csv
 rm -f rackscale_shmem_fxmark_benchmark.csv
 rm -f rackscale_shmem_memcached_benchmark.csv
+rm -f rackscale_sharded_memcached_benchmark_*.csv
 
 # For vmops: --features prealloc can improve performance further (at the expense of test duration)
 RUST_TEST_THREADS=1 cargo test --test s10* -- s10_vmops_benchmark --nocapture
@@ -30,6 +31,8 @@ RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maptput_
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maplat_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_fxmark_bench --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_internal --nocapture
+RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_linux --nocapture
+RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_nros --nocapture
 
 # Clone repo
 rm -rf gh-pages
@@ -61,7 +64,10 @@ if [ -d "${DEPLOY_DIR}" ]; then
 fi
 mkdir -p ${DEPLOY_DIR}
 mv memcached_benchmark_internal.csv ${DEPLOY_DIR}
+mv memcached_benchmark_sharded_*.csv ${DEPLOY_DIR}
 gzip ${DEPLOY_DIR}/memcached_benchmark_internal.csv
+gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_nros.csv
+gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_linux.csv
 
 # Copy vmops results
 DEPLOY_DIR="gh-pages/vmops/${CI_MACHINE_TYPE}/${GIT_REV_CURRENT}/"

From 8c1542128d146a26f4a780c21360e3ef51dd1327 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 16:44:09 -0700
Subject: [PATCH 10/44] run.py: adding --mid argument to set the machine id
 explicitly

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/run.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/run.py b/kernel/run.py
index 2ce23e132..50370e03b 100644
--- a/kernel/run.py
+++ b/kernel/run.py
@@ -107,6 +107,8 @@ def get_network_config(workers):
 # DCM Scheduler arguments
 parser.add_argument("--dcm-path",
                     help='Path of DCM jar to use (defaults to latest release)', required=False, default=None)
+parser.add_argument("--mid",
+                    help="Machine id to set for this instance", required=False, default=None)
 
 # QEMU related arguments
 parser.add_argument("--qemu-nodes", type=int,
@@ -332,7 +334,10 @@ def deploy(args):
     # Append globally unique machine id to cmd (for rackscale)
     # as well as a number of workers (clients)
     if args.cmd and NETWORK_CONFIG[args.tap]['mid'] != None:
-        args.cmd += " mid={}".format(NETWORK_CONFIG[args.tap]['mid'])
+        if args.mid is None :
+            args.cmd += " mid={}".format(NETWORK_CONFIG[args.tap]['mid'])
+        else :
+            args.cmd += f" mid={args.mid}"
         if is_controller or is_client:
             args.cmd += " workers={}".format(args.workers)
     # Write kernel cmd-line file in ESP dir

From ff265f07ac559811107c9a311f8676fd7659b80b Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 16:45:02 -0700
Subject: [PATCH 11/44] tests: set the machine id to 0 for the sharded tests

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/rackscale_runner.rs |  1 +
 kernel/testutils/src/runner_args.rs      | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index f32c6329b..228f951d6 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -483,6 +483,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                             .memory(state.memory)
                             .nobuild() // Use single build for all for consistency
                             .cmd(&client_cmd)
+                            .machine_id(0) // always hardcoded to 0 for the sharded case
                             .nodes(1)
                             .node_offset(client_placement_cores[i + 1].0)
                             .setaffinity(client_placement_cores[i + 1].1.clone());
diff --git a/kernel/testutils/src/runner_args.rs b/kernel/testutils/src/runner_args.rs
index c9b3c5562..90b3d901b 100644
--- a/kernel/testutils/src/runner_args.rs
+++ b/kernel/testutils/src/runner_args.rs
@@ -59,6 +59,8 @@ pub struct RunnerArgs<'a> {
     nobuild: bool,
     /// Parameters to add to the QEMU command line
     qemu_args: Vec<&'a str>,
+    /// the machine id to set
+    machine_id: Option<usize>,
     /// Timeout in ms
     pub timeout: Option<u64>,
     /// Default network interface for QEMU
@@ -119,6 +121,7 @@ impl<'a> RunnerArgs<'a> {
             no_network_setup: false,
             mode: None,
             transport: None,
+            machine_id: None,
         };
 
         if cfg!(feature = "prealloc") {
@@ -156,6 +159,7 @@ impl<'a> RunnerArgs<'a> {
             no_network_setup: false,
             mode: None,
             transport: None,
+            machine_id: None,
         };
 
         if cfg!(feature = "prealloc") {
@@ -283,6 +287,11 @@ impl<'a> RunnerArgs<'a> {
         self
     }
 
+    pub fn machine_id(mut self, id: usize) -> RunnerArgs<'a> {
+        self.machine_id = Some(id);
+        self
+    }
+
     pub fn shmem_size(mut self, sizes: Vec<usize>) -> RunnerArgs<'a> {
         self.shmem_sizes = sizes;
         self
@@ -456,6 +465,10 @@ impl<'a> RunnerArgs<'a> {
             cmd.push(String::from("--kgdb"));
         }
 
+        if let Some(mid) = self.machine_id {
+            cmd.push(format!("--mid={mid}"));
+        }
+
         // Don't run qemu, just build?
         if self.norun {
             cmd.push(String::from("--norun"));

From ddd3fbbc0f27a1533a20ed07af6ce51180ee77f4 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 17 Oct 2023 16:45:38 -0700
Subject: [PATCH 12/44] tests: increase the core count for internal memcached
 benchmark

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index f7682b679..2161e1d49 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -876,6 +876,8 @@ fn s10_leveldb_benchmark() {
 fn s10_memcached_benchmark_internal() {
     setup_network(1);
 
+    let is_smoke = cfg!(feature = "smoke");
+
     let machine = Machine::determine();
     let build = BuildArgs::default()
         .module("rkapps")
@@ -889,7 +891,7 @@ fn s10_memcached_benchmark_internal() {
         // Throw out everything above 28 since we have some non-deterministic
         // bug on larger machines that leads to threads calling sched_yield and
         // no readrandom is performed...
-        .filter(|&t| t <= 28)
+        .filter(|&t| if is_smoke { t <= 10 } else { t <= 128 })
         .collect();
 
     // memcached arguments // currently not there.
@@ -914,7 +916,7 @@ fn s10_memcached_benchmark_internal() {
     println!();
 
     for thread in threads.iter() {
-        println!("Running memcached internal benchmark with {thread} threads, {queries} GETs and {memsize}MB memory. ");
+        println!("\n\nRunning memcached internal benchmark with {thread} threads, {queries} GETs and {memsize}MB memory. ");
 
         let kernel_cmdline = format!(
             r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,

From ee7323d8e5c5c002e5e334bc6e05436edde83aaa Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 18 Oct 2023 16:44:05 -0700
Subject: [PATCH 13/44] tests: unify memcached memory/query config

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs           | 10 ++++--
 kernel/tests/s11_rackscale_benchmarks.rs | 40 +++++++++++++-----------
 kernel/testutils/src/configs.rs          |  5 +++
 kernel/testutils/src/lib.rs              |  1 +
 kernel/testutils/src/rackscale_runner.rs |  2 +-
 5 files changed, 35 insertions(+), 23 deletions(-)
 create mode 100644 kernel/testutils/src/configs.rs

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 2161e1d49..f419811ee 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -872,6 +872,9 @@ fn s10_leveldb_benchmark() {
     }
 }
 
+use testutils::configs::MEMCACHED_MEM_SIZE_MB;
+use testutils::configs::MEMCACHED_NUM_QUERIES;
+
 #[test]
 fn s10_memcached_benchmark_internal() {
     setup_network(1);
@@ -899,9 +902,10 @@ fn s10_memcached_benchmark_internal() {
         (16 * 1024 /* MB */, 16 /* MB */, 2000000, 300_000)
     } else {
         (
-            256 * 1024, /* MB */
-            16,         // 64 * 1024, /* MB */
-            100_000_000,
+            // keep in sync with the s11_ra
+            32 * 1024, /* MB */
+            MEMCACHED_MEM_SIZE_MB,
+            MEMCACHED_NUM_QUERIES,
             600_000,
         )
     };
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 830e0d33b..1024dd4df 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -25,6 +25,9 @@ use testutils::helpers::{DCMConfig, DCMSolver};
 use testutils::rackscale_runner::{RackscaleBench, RackscaleRun};
 use testutils::runner_args::RackscaleTransport;
 
+use testutils::configs::MEMCACHED_MEM_SIZE_MB;
+use testutils::configs::MEMCACHED_NUM_QUERIES;
+
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_shmem_fxmark_benchmark() {
@@ -139,7 +142,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
     fn timeout_fn(num_cores: usize) -> u64 {
         180_000 + 5_000 * num_cores as u64
     }
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
@@ -303,7 +306,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
     fn rackscale_timeout_fn(num_cores: usize) -> u64 {
         240_000 + 1_000 * num_cores as u64
     }
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
@@ -441,7 +444,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
         240_000 + 500 * num_cores as u64
     }
 
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
@@ -464,9 +467,6 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
     bench.run_bench(false, is_smoke);
 }
 
-const MEMCACHED_MEM_SIZE_MB: usize = 16;
-const MEMCACHED_NUM_QUERIES: usize = 1000_000;
-
 #[derive(Clone)]
 struct MemcachedInternalConfig {
     pub num_queries: usize,
@@ -628,8 +628,8 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
 
     if !is_smoke {
         test.shmem_size = std::cmp::max(
-            MEMCACHED_MEM_SIZE_MB * 8,
-            testutils::helpers::SHMEM_SIZE * 4,
+            MEMCACHED_MEM_SIZE_MB * 2,
+            testutils::helpers::SHMEM_SIZE * 2,
         );
     }
 
@@ -653,18 +653,20 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         }
     }
 
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, num_clients: usize, is_smoke: bool) -> usize {
+        let base_memory = if num_cores > 64 { 8192 } else { 4096 };
+
         if is_smoke {
-            8192
+            base_memory
         } else {
             // Memory must also be divisible by number of nodes, which could be 1, 2, 3, or 4
             // memory = result of this function / num_clients  - shmem_size
-            (8192
+            (base_memory
                 + std::cmp::max(
-                    MEMCACHED_MEM_SIZE_MB * 8,
-                    testutils::helpers::SHMEM_SIZE * 4,
+                    MEMCACHED_MEM_SIZE_MB * 2,
+                    testutils::helpers::SHMEM_SIZE * 2,
                 ))
-                * (((((num_cores + 1) / 2) + 3 - 1) / 3) * 3)
+                * num_clients
         }
     }
 
@@ -1084,7 +1086,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             mem_size: 16,
             protocol: "tcp",
             is_local_host: true,
-            num_threads: 4,
+            num_threads: 8,
             path: out_dir_path,
         }
     } else {
@@ -1095,7 +1097,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             mem_size: MEMCACHED_MEM_SIZE_MB,
             protocol: "tcp",
             is_local_host: true,
-            num_threads: 4,
+            num_threads: 8,
             path: out_dir_path,
         }
     };
@@ -1216,7 +1218,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         config.num_servers = num_nodes;
 
         for num_threads in 1..=max_threads_per_node {
-            if (num_threads != 1 || num_threads != max_threads_per_node) && (num_threads % 4 != 0) {
+            if (num_threads != 1 || num_threads != max_threads_per_node) && (num_threads % 8 != 0) {
                 continue;
             }
 
@@ -1567,7 +1569,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         600_000 + 60_000 * num_cores as u64
     }
 
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
@@ -1655,7 +1657,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
         180_000 + 500 * num_cores as u64
     }
 
-    fn mem_fn(num_cores: usize, is_smoke: bool) -> usize {
+    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
diff --git a/kernel/testutils/src/configs.rs b/kernel/testutils/src/configs.rs
new file mode 100644
index 000000000..3c544a9bf
--- /dev/null
+++ b/kernel/testutils/src/configs.rs
@@ -0,0 +1,5 @@
+// Copyright © 2021 VMware, Inc. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+pub const MEMCACHED_MEM_SIZE_MB: usize = 8 * 1024;
+pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
diff --git a/kernel/testutils/src/lib.rs b/kernel/testutils/src/lib.rs
index be8ccbcb0..f6e228498 100644
--- a/kernel/testutils/src/lib.rs
+++ b/kernel/testutils/src/lib.rs
@@ -9,6 +9,7 @@ extern crate rexpect;
 extern crate serde;
 
 pub mod builder;
+pub mod configs;
 pub mod helpers;
 pub mod rackscale_runner;
 pub mod redis;
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 228f951d6..3bb2f16c2 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -786,7 +786,7 @@ pub struct RackscaleBench<T: Clone + Send + 'static> {
     // Function to calculate the timeout. Takes as argument number of application cores
     pub baseline_timeout_fn: fn(usize) -> u64,
     // Function to calculate memory (excpeting controller memory). Takes as argument number of application cores and is_smoke
-    pub mem_fn: fn(usize, bool) -> usize,
+    pub mem_fn: fn(usize, usize, bool) -> usize,
 }
 
 impl<T: Clone + Send + 'static> RackscaleBench<T> {

From 16c5d8e56e7de3377b175b2220b8388e8f29a13b Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 18 Oct 2023 19:41:12 -0700
Subject: [PATCH 14/44] tests: update memcached memory configuration

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 24 +++++++++++++++++-------
 kernel/testutils/src/configs.rs          |  2 +-
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 1024dd4df..18453ad25 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1072,6 +1072,8 @@ fn rackscale_memcached_checkout() {
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_memcached_benchmark_sharded_linux() {
+    use std::fs::remove_file;
+
     let machine = Machine::determine();
     let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
     let is_smoke = cfg!(feature = "smoke");
@@ -1102,7 +1104,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         }
     };
 
-    let timeout_ms = if is_smoke { 60_000 } else { 600_000 };
+    let timeout_ms = if is_smoke { 60_000 } else { 900_000 };
 
     fn run_benchmark_internal(config: &MemcachedShardedConfig, timeout_ms: u64) -> PtySession {
         Command::new("killall").args(&["memcached"]).status().ok();
@@ -1125,7 +1127,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         let con_info = if config.protocol == "tcp" {
             format!("tcp://localhost:{}", 11211 + id)
         } else {
-            format!("unix://{}/memcached{}.sock", config.path.display(), id)
+            let pathname = config.path.join(format!("memcached{id}.sock"));
+            remove_file(pathname); // make sure the socket file is removed
+            format!("unix://{}", pathname.display())
         };
 
         let mut command = Command::new("bash");
@@ -1226,8 +1230,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
 
             config.num_threads = num_threads;
 
-            let _ = Command::new("killall").args(&["memcached"]).status();
-            let _ = Command::new("killall").args(&["memcached"]).status();
+            let _ = Command::new("killall")
+                .args(&["memcached", "-s", "SIGKILL"])
+                .status();
             let mut pty = run_benchmark_internal(&config, timeout_ms);
             let mut output = String::new();
             let res =
@@ -1252,8 +1257,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                 println!("Memcached Sharded: {num_threads}x{num_nodes} with {protocol}");
 
                 // terminate the memcached instance
-                let _ = Command::new("killall").args(&["memcached"]).status();
-                let _ = Command::new("killall").args(&["memcached"]).status();
+                let _ = Command::new("killall")
+                    .args(&["memcached", "-s", "SIGKILL"])
+                    .status();
 
                 let mut memcached_ctrls = Vec::new();
                 for i in 0..num_nodes {
@@ -1317,12 +1323,16 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                         let _ = Command::new("killall").args(&["memcached"]).status();
                     }
                 };
+
+                let _ = pty.process.kill(rexpect::process::signal::Signal::SIGKILL);
             }
         }
     }
 
     // terminate the memcached instance
-    let _ = Command::new("killall").args(&["memcached"]).status();
+    let _ = Command::new("killall")
+        .args(&["memcached", "-s", "SIGKILL"])
+        .status();
 }
 
 #[test]
diff --git a/kernel/testutils/src/configs.rs b/kernel/testutils/src/configs.rs
index 3c544a9bf..ca7f237b6 100644
--- a/kernel/testutils/src/configs.rs
+++ b/kernel/testutils/src/configs.rs
@@ -1,5 +1,5 @@
 // Copyright © 2021 VMware, Inc. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
-pub const MEMCACHED_MEM_SIZE_MB: usize = 8 * 1024;
+pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
 pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;

From 7cec4eaf0688238b85ebd2abd2a66526affb2fa5 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 18 Oct 2023 22:30:57 -0700
Subject: [PATCH 15/44] tests: fix compilation

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 18453ad25..09a7eeec5 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1104,7 +1104,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         }
     };
 
-    let timeout_ms = if is_smoke { 60_000 } else { 900_000 };
+    let timeout_ms = if is_smoke { 60_000 } else { 1200_000 };
 
     fn run_benchmark_internal(config: &MemcachedShardedConfig, timeout_ms: u64) -> PtySession {
         Command::new("killall").args(&["memcached"]).status().ok();
@@ -1128,7 +1128,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             format!("tcp://localhost:{}", 11211 + id)
         } else {
             let pathname = config.path.join(format!("memcached{id}.sock"));
-            remove_file(pathname); // make sure the socket file is removed
+            remove_file(pathname.clone()); // make sure the socket file is removed
             format!("unix://{}", pathname.display())
         };
 

From c183b398bff82a6cf30795b97dd8f3421c1d66a7 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Thu, 19 Oct 2023 09:01:34 -0700
Subject: [PATCH 16/44] tests: increase timeout for sharded nros

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 09a7eeec5..3e9c9dfe8 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1074,6 +1074,8 @@ fn rackscale_memcached_checkout() {
 fn s11_rackscale_memcached_benchmark_sharded_linux() {
     use std::fs::remove_file;
 
+    use rexpect::process::signal::Signal::SIGKILL;
+
     let machine = Machine::determine();
     let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
     let is_smoke = cfg!(feature = "smoke");
@@ -1125,7 +1127,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         timeout_ms: u64,
     ) -> Result<PtySession> {
         let con_info = if config.protocol == "tcp" {
-            format!("tcp://localhost:{}", 11211 + id)
+            format!("tcp://localhost:{}", 11212 + id)
         } else {
             let pathname = config.path.join(format!("memcached{id}.sock"));
             remove_file(pathname.clone()); // make sure the socket file is removed
@@ -1173,7 +1175,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             }
             if config.protocol == "tcp" {
                 if config.is_local_host {
-                    servers.push_str(format!("tcp://localhost:{}", 11211 + i).as_str());
+                    servers.push_str(format!("tcp://localhost:{}", 11212 + i).as_str());
                 } else {
                     // +1 because tap0 is reserved for the controller.
                     let ip = 10 + i + 1;
@@ -1246,6 +1248,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             let r = csv_file.write(out.as_bytes());
             assert!(r.is_ok());
 
+            let r = pty.process.kill(SIGKILL);
+
+
             println!("{:?}", res);
 
             // single node
@@ -1261,6 +1266,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                     .args(&["memcached", "-s", "SIGKILL"])
                     .status();
 
+                // give some time so memcached can be cleaned up
+                std::thread::sleep(Duration::from_secs(5));
+
                 let mut memcached_ctrls = Vec::new();
                 for i in 0..num_nodes {
                     memcached_ctrls.push(
@@ -1576,7 +1584,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     }
 
     fn rackscale_timeout_fn(num_cores: usize) -> u64 {
-        600_000 + 60_000 * num_cores as u64
+        1200_000 + 60_000 * num_cores as u64
     }
 
     fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {

From 9d99c701cf3bbaca2cc5f8ddd6d78d1bafbf617d Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 1 Nov 2023 11:51:59 -0700
Subject: [PATCH 17/44] lib/lineup: don't assert the lock owner

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 lib/lineup/src/mutex.rs | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lib/lineup/src/mutex.rs b/lib/lineup/src/mutex.rs
index 6e5bb0fb7..b2d9a86a7 100644
--- a/lib/lineup/src/mutex.rs
+++ b/lib/lineup/src/mutex.rs
@@ -109,11 +109,6 @@ struct MutexInner {
 impl MutexInner {
     fn try_enter(&self) -> bool {
         let tid = Environment::tid();
-        assert!(
-            self.owner.get() != Some(tid),
-            "Locking mutex against itself."
-        );
-
         let counter = self.counter.load(Ordering::Relaxed);
         loop {
             if counter != 0 {

From 1d9860da0f4c0c0bdaedb0a1eb22496162484977 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 1 Nov 2023 11:55:38 -0700
Subject: [PATCH 18/44] tests: fixing a few things in the sharded nros case

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 86 +++++++++++++-----------
 kernel/testutils/src/rackscale_runner.rs | 36 ++++++----
 2 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 3e9c9dfe8..a66aa07a1 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -130,7 +130,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
     test.file_name = file_name.clone();
     test.arg = Some(config);
 
-    fn cmd_fn(num_cores: usize, arg: Option<FxmarkConfig>) -> String {
+    fn cmd_fn(num_cores: usize, _num_clients: usize, arg: Option<FxmarkConfig>) -> String {
         // TODO: add in arg with formatting.
         //1XmixX0 is - mix benchmark for 0% writes with 1 open file
         let config = arg.expect("Missing fxmark config");
@@ -297,7 +297,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
     test.file_name = file_name.clone();
     test.arg = Some(benchtype);
 
-    fn cmd_fn(num_cores: usize, _arg: Option<VMOpsBench>) -> String {
+    fn cmd_fn(num_cores: usize, _num_clients: usize, _arg: Option<VMOpsBench>) -> String {
         format!("initargs={}", num_cores)
     }
     fn baseline_timeout_fn(num_cores: usize) -> u64 {
@@ -428,7 +428,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
     test.arg = Some(config);
     test.run_dhcpd_for_baseline = true;
 
-    fn cmd_fn(num_cores: usize, arg: Option<LevelDBConfig>) -> String {
+    fn cmd_fn(num_cores: usize, _num_clients: usize, arg: Option<LevelDBConfig>) -> String {
         let config = arg.expect("missing leveldb config");
         format!(
             r#"init=dbbench.bin initargs={} appcmd='--threads={} --benchmarks=fillseq,readrandom --reads={} --num={} --value_size={}'"#,
@@ -633,7 +633,11 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         );
     }
 
-    fn cmd_fn(num_cores: usize, arg: Option<MemcachedInternalConfig>) -> String {
+    fn cmd_fn(
+        num_cores: usize,
+        _num_clients: usize,
+        arg: Option<MemcachedInternalConfig>,
+    ) -> String {
         let config = arg.expect("missing leveldb config");
         format!(
             r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
@@ -1167,7 +1171,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         command.args(&["--binary"]);
         command.arg(format!("--num-queries={}", config.num_queries).as_str());
         command.arg(format!("--num-threads={}", config.num_threads).as_str());
-        command.arg(format!("--max-memory={}", config.mem_size).as_str());
+        command.arg(format!("--max-memory={}", config.mem_size / 8).as_str());
         let mut servers = String::from("--servers=");
         for i in 0..config.num_servers {
             if i > 0 {
@@ -1250,9 +1254,6 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
 
             let r = pty.process.kill(SIGKILL);
 
-
-            println!("{:?}", res);
-
             // single node
             for protocol in &["tcp", "unix"] {
                 config.protocol = protocol;
@@ -1346,6 +1347,8 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_memcached_benchmark_sharded_nros() {
+    use rexpect::process::signal::Signal::SIGKILL;
+
     let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
     let is_smoke = cfg!(feature = "smoke");
 
@@ -1438,14 +1441,16 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     fn controller_run_fun(
         config: Option<&MemcachedShardedConfig>,
         num_servers: usize,
+        num_threads: usize,
         timeout_ms: u64,
     ) -> Result<PtySession> {
         // here we should wait
-        std::thread::sleep(Duration::from_secs(15));
+        std::thread::sleep(Duration::from_secs(15 + 2 * num_servers as u64));
 
         let mut config = config.unwrap().clone();
 
         config.num_servers = num_servers;
+        config.num_threads = num_servers * num_threads;
         spawn_loadbalancer(&config, timeout_ms)
     }
 
@@ -1477,7 +1482,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
                 return Err(Error(Timeout(expected, got, timeout), st));
             }
             Err(err) => {
-                println!("Failed: {:?}", err);
+                // println!("Failed: {:?}", err);
                 return Err(err);
             }
         };
@@ -1491,7 +1496,6 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         let r = csv_file.write(out.as_bytes());
         assert!(r.is_ok());
 
-        println!("{:?}", res);
         Ok(())
     }
 
@@ -1504,22 +1508,6 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         _is_baseline: bool,
         _arg: Option<MemcachedShardedConfig>,
     ) -> Result<()> {
-        match proc.exp_regex(r#"\[ INFO\]: bootloader/src/kernel.rs"#) {
-            Ok(_) => (),
-            Err(rexpect::errors::Error(
-                rexpect::errors::ErrorKind::EOF(_expected, _s, _),
-                _state,
-            )) => {
-                // for l in s.lines() {
-                //     println!("MEMCACHED-OUTPUT: {}", l);
-                // }
-            }
-            Err(e) => {
-                println!("{e:?}");
-                panic!("error")
-            }
-        }
-
         match proc.exp_regex(r#"dhcp: vioif0: adding IP address (\d+).(\d+).(\d+).(\d+)/(\d+)"#) {
             Ok((_prev, matched)) => {
                 println!(" > Networking setup succeeded. {matched}");
@@ -1543,7 +1531,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         }
 
         let (prev, matched) = proc.exp_regex(r#"x_benchmark_mem = (\d+) MB"#).unwrap();
-        println!("C> {}", matched);
+        println!("> {}", matched);
         // let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
 
         *output += prev.as_str();
@@ -1566,16 +1554,21 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
 
     if !is_smoke {
         test.shmem_size = std::cmp::max(
-            MEMCACHED_MEM_SIZE_MB * 8,
-            testutils::helpers::SHMEM_SIZE * 4,
+            MEMCACHED_MEM_SIZE_MB * 2,
+            testutils::helpers::SHMEM_SIZE * 2,
         );
     }
 
-    fn cmd_fn(num_cores: usize, arg: Option<MemcachedShardedConfig>) -> String {
+    fn cmd_fn(num_cores: usize, num_clients: usize, arg: Option<MemcachedShardedConfig>) -> String {
         let config = arg.expect("missing configuration");
+        let num_threads = num_cores / num_clients;
+
         format!(
             r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-no-run --disable-evictions --conn-limit=1024 --threads={} --x-benchmark-mem={} --memory-limit={}'"#,
-            num_cores, num_cores, config.mem_size, config.mem_size
+            num_threads,
+            num_threads,
+            config.mem_size,
+            config.mem_size * 2
         )
     }
 
@@ -1587,22 +1580,34 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         1200_000 + 60_000 * num_cores as u64
     }
 
-    fn mem_fn(num_cores: usize, _num_clients: usize, is_smoke: bool) -> usize {
+    fn mem_fn(_num_cores: usize, num_clients: usize, is_smoke: bool) -> usize {
         if is_smoke {
             8192
         } else {
             // Memory must also be divisible by number of nodes, which could be 1, 2, 3, or 4
-            (8192
-                + std::cmp::max(
-                    MEMCACHED_MEM_SIZE_MB * 8,
-                    testutils::helpers::SHMEM_SIZE * 4,
+            // mem = result of this function / num_clients - shmem_size
+            (8092
+                + 2 * std::cmp::max(
+                    MEMCACHED_MEM_SIZE_MB * 2,
+                    testutils::helpers::SHMEM_SIZE * 2,
                 ))
-                * (((((num_cores + 1) / 2) + 3 - 1) / 3) * 3)
+                * num_clients
         }
     }
 
     println!("----------------------------------------------------------");
 
+    let machine = Machine::determine();
+
+    let mut pings = Vec::new();
+    for i in 0..machine.max_numa_nodes() {
+        let mut command = Command::new("ping");
+        command.arg(&format!("172.31.0.{}", 10 + i + 1));
+
+        let proc = spawn_command(command, None).unwrap();
+        pings.push(proc);
+    }
+
     // construct bench and run it!
     let bench = RackscaleBench {
         test,
@@ -1612,6 +1617,9 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         mem_fn,
     };
     bench.run_bench(false, is_smoke);
+    for mut ping in pings.into_iter() {
+        ping.process.kill(SIGKILL);
+    }
 }
 
 #[test]
@@ -1660,7 +1668,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
     test.arg = None;
     test.run_dhcpd_for_baseline = true;
 
-    fn cmd_fn(num_cores: usize, _arg: Option<()>) -> String {
+    fn cmd_fn(num_cores: usize, _num_clients: usize, _arg: Option<()>) -> String {
         format!(
             r#"init=monetdbd.bin initargs={} appcmd='create dbfarm'"#,
             num_cores
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 3bb2f16c2..c1b7ff049 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -1,6 +1,7 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::mpsc::{Receiver, Sender, TryRecvError};
 use std::sync::{mpsc::channel, Arc, Mutex};
-use std::thread;
+use std::thread::{self, sleep};
 use std::time::Duration;
 
 use rexpect::errors::*;
@@ -47,8 +48,12 @@ type RackscaleMatchFn<T> = fn(
     arg: Option<T>,
 ) -> Result<()>;
 
-type ControllerRunFn<T> =
-    fn(config: Option<&T>, num_clients: usize, timeout_ms: u64) -> Result<PtySession>;
+type ControllerRunFn<T> = fn(
+    config: Option<&T>,
+    num_clients: usize,
+    num_threas: usize,
+    timeout_ms: u64,
+) -> Result<PtySession>;
 
 #[derive(Clone)]
 pub struct RackscaleRun<T>
@@ -457,6 +462,8 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         let (tx_build_timer, _rx_build_timer) = channel();
         let tx_build_timer_mut = Arc::new(Mutex::new(tx_build_timer));
 
+        let boot_counter = Arc::new(AtomicUsize::new(0));
+
         // Run client in separate thead. Wait a bit to make sure controller started
         let mut client_procs = Vec::new();
         for i in 0..self.num_clients {
@@ -467,6 +474,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
             let client_file_name = self.file_name.clone();
             let client_cmd = self.cmd.clone();
             let client_placement_cores = placement_cores.clone();
+            let client_boot_counter = boot_counter.clone();
             let state = self.clone();
             let client_tx_build_timer = tx_build_timer_mut.clone();
             let use_large_pages = self.use_qemu_huge_pages;
@@ -495,14 +503,8 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                     let mut output = String::new();
                     let qemu_run = || -> Result<WaitStatus> {
                         let mut p = spawn_nrk(&cmdline_client)?;
-
-                        // output += p.exp_string("CLIENT READY")?.as_str();
-                        // {
-                        //     let tx = client_tx_build_timer
-                        //         .lock()
-                        //         .expect("Failed to get build timer lock");
-                        //     send_signal(&tx);
-                        // }
+                        output += p.exp_string("NRK booting on")?.as_str();
+                        client_boot_counter.fetch_add(1, Ordering::SeqCst);
 
                         // User-supplied function to check output
                         (state.client_match_fn)(
@@ -552,6 +554,11 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                     );
                 })
                 .expect("Client thread failed to spawn");
+
+            while i == boot_counter.load(Ordering::Relaxed) {
+                thread::sleep(Duration::from_millis(500));
+            }
+
             client_procs.push(client);
         }
 
@@ -576,6 +583,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                         let mut p = run_fn(
                             controller_arg.as_ref(),
                             state.num_clients,
+                            state.cores_per_client,
                             state.controller_timeout,
                         )?;
 
@@ -779,8 +787,8 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
 pub struct RackscaleBench<T: Clone + Send + 'static> {
     // Test to run
     pub test: RackscaleRun<T>,
-    // Function to calculate the command. Takes as argument number of application cores
-    pub cmd_fn: fn(usize, Option<T>) -> String,
+    // Function to calculate the command. Takes as argument number of application cores and the number of clients
+    pub cmd_fn: fn(usize, usize, Option<T>) -> String,
     // Function to calculate the timeout. Takes as argument number of application cores
     pub rackscale_timeout_fn: fn(usize) -> u64,
     // Function to calculate the timeout. Takes as argument number of application cores
@@ -868,7 +876,7 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
             test_run.controller_timeout = test_run.client_timeout;
 
             // Calculate command based on the number of cores
-            test_run.cmd = (self.cmd_fn)(total_cores, test_run.arg.clone());
+            test_run.cmd = (self.cmd_fn)(total_cores, num_clients, test_run.arg.clone());
 
             // Caclulate memory and timeouts, and then run test
             if is_baseline {

From 862b52c2bb2dfd94ea22f2870974d3a0500826da Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 1 Nov 2023 13:53:16 -0700
Subject: [PATCH 19/44] tests: reduce the memory size for memcached to see if
 the tests runs

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/configs.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/testutils/src/configs.rs b/kernel/testutils/src/configs.rs
index ca7f237b6..a9ca338f4 100644
--- a/kernel/testutils/src/configs.rs
+++ b/kernel/testutils/src/configs.rs
@@ -1,5 +1,5 @@
 // Copyright © 2021 VMware, Inc. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
-pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
-pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
+pub const MEMCACHED_MEM_SIZE_MB: usize = 1 * 1024;
+pub const MEMCACHED_NUM_QUERIES: usize = 500_000;

From 1f3f186a04d2448036f10f8f5b79047204e40921 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 13 Nov 2023 10:31:06 -0800
Subject: [PATCH 20/44] bench: disable the sharded nros benchmark

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 20 ++++------
 kernel/testutils/src/configs.rs          |  4 +-
 kernel/testutils/src/runner_args.rs      | 48 +++++++++++++++++++++++-
 scripts/ci.bash                          |  5 ++-
 usr/rkapps/build.rs                      |  4 +-
 5 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index a66aa07a1..f47b49aba 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1036,12 +1036,12 @@ fn rackscale_memcached_checkout() {
     }
 
     println!(
-        "CHECKOUT fe0eb024882481717efd6a3f4600e96c99ca77a2 {:?}",
+        "CHECKOUT 0d90d53b99c3890b6e47efe08446e5180711ff09 {:?}",
         out_dir
     );
 
     let res = Command::new("git")
-        .args(&["checkout", "fe0eb024882481717efd6a3f4600e96c99ca77a2"])
+        .args(&["checkout", "0d90d53b99c3890b6e47efe08446e5180711ff09"])
         .current_dir(out_dir_path.as_path())
         .output()
         .expect("git checkout failed");
@@ -1345,6 +1345,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
 }
 
 #[test]
+#[ignore]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_memcached_benchmark_sharded_nros() {
     use rexpect::process::signal::Signal::SIGKILL;
@@ -1551,24 +1552,17 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     test.arg = Some(config);
     test.run_dhcpd_for_baseline = true;
     test.is_multi_node = true;
+    test.shmem_size = 0;
 
-    if !is_smoke {
-        test.shmem_size = std::cmp::max(
-            MEMCACHED_MEM_SIZE_MB * 2,
-            testutils::helpers::SHMEM_SIZE * 2,
-        );
-    }
 
     fn cmd_fn(num_cores: usize, num_clients: usize, arg: Option<MemcachedShardedConfig>) -> String {
         let config = arg.expect("missing configuration");
         let num_threads = num_cores / num_clients;
 
         format!(
-            r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-no-run --disable-evictions --conn-limit=1024 --threads={} --x-benchmark-mem={} --memory-limit={}'"#,
-            num_threads,
-            num_threads,
-            config.mem_size,
-            config.mem_size * 2
+            r#"init=memcachedbench.bin initargs={num_threads} appcmd='--x-benchmark-no-run --disable-evictions --conn-limit=1024 --threads={num_threads} --x-benchmark-mem={} --memory-limit={}'"#,
+            config.mem_size * 2,
+            config.mem_size * 4
         )
     }
 
diff --git a/kernel/testutils/src/configs.rs b/kernel/testutils/src/configs.rs
index a9ca338f4..ca7f237b6 100644
--- a/kernel/testutils/src/configs.rs
+++ b/kernel/testutils/src/configs.rs
@@ -1,5 +1,5 @@
 // Copyright © 2021 VMware, Inc. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
-pub const MEMCACHED_MEM_SIZE_MB: usize = 1 * 1024;
-pub const MEMCACHED_NUM_QUERIES: usize = 500_000;
+pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
+pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
diff --git a/kernel/testutils/src/runner_args.rs b/kernel/testutils/src/runner_args.rs
index 90b3d901b..7966a5ceb 100644
--- a/kernel/testutils/src/runner_args.rs
+++ b/kernel/testutils/src/runner_args.rs
@@ -9,6 +9,9 @@ use rexpect::process::wait::WaitStatus;
 use crate::builder::{BuildArgs, Built, Machine};
 use crate::ExitStatus;
 
+/// defines the threshold on when the output is truncated.
+const PRINT_NUM_LINES: usize = 100;
+
 /// Different build modes for rackscale
 #[derive(Eq, PartialEq, Debug, Clone)]
 pub enum RackscaleMode {
@@ -496,10 +499,26 @@ pub fn log_qemu_out(args: &RunnerArgs, output: String) {
     log_qemu_out_with_name(Some(args), String::from(""), output)
 }
 
+
+
 pub fn log_qemu_out_with_name(args: Option<&RunnerArgs>, name: String, output: String) {
     if !output.is_empty() {
         println!("\n===== QEMU LOG {}=====", name);
-        println!("{}", &output);
+        let num_lines = output.lines().count();
+
+        if num_lines > PRINT_NUM_LINES {
+            for l in output.lines().take(PRINT_NUM_LINES / 2) {
+                println!(" > {}", l);
+            }
+            println!(" > ... {} more lines\n", num_lines - PRINT_NUM_LINES);
+            for l in output.lines().skip(num_lines - PRINT_NUM_LINES / 2) {
+                println!(" > {}", l);
+            }
+        } else {
+            for l in output.lines() {
+                println!(" > {l}");
+            }
+        }
         println!("===== END QEMU LOG {}=====", name);
     }
     if let Some(nrk_args) = args {
@@ -620,7 +639,32 @@ pub fn wait_for_sigterm_or_successful_exit_no_log(
         }
         Err(e) => {
             log_qemu_args(args);
-            panic!("Qemu testing failed: {} {}", name, e);
+            println!("Qemu testing failed: {} ", name);
+            use rexpect::errors::Error;
+            use rexpect::errors::ErrorKind::Timeout;
+            match e {
+                Error(Timeout(expected, got, timeout), st) => {
+                    println!("Expected: `{expected}`\n");
+                    println!("Got:",);
+                    let count = got.lines().count();
+                    if count > PRINT_NUM_LINES {
+                        for l in got.lines().take(PRINT_NUM_LINES / 2) {
+                            println!(" > {l}");
+                        }
+                        println!(" > ... skipping {} more lines...", count - PRINT_NUM_LINES);
+                        for l in got.lines().skip(count - PRINT_NUM_LINES / 2) {
+                            println!(" > {l}");
+                        }
+                    } else {
+                        for l in got.lines() {
+                            println!(" > {l}");
+                        }
+                    }
+                }
+                _ => println!("{e}")
+            }
+
+            panic!("Qemu testing failed");
         }
         e => {
             log_qemu_args(args);
diff --git a/scripts/ci.bash b/scripts/ci.bash
index 4e1c2406f..d40103f29 100644
--- a/scripts/ci.bash
+++ b/scripts/ci.bash
@@ -32,7 +32,8 @@ RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maplat_b
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_fxmark_bench --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_internal --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_linux --nocapture
-RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_nros --nocapture
+# disabled for now as this causes too much issues with running for now
+# RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_nros --nocapture
 
 # Clone repo
 rm -rf gh-pages
@@ -66,7 +67,7 @@ mkdir -p ${DEPLOY_DIR}
 mv memcached_benchmark_internal.csv ${DEPLOY_DIR}
 mv memcached_benchmark_sharded_*.csv ${DEPLOY_DIR}
 gzip ${DEPLOY_DIR}/memcached_benchmark_internal.csv
-gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_nros.csv
+# gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_nros.csv
 gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_linux.csv
 
 # Copy vmops results
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 6a03865e0..2e3dd2dbd 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT b2a11dee71b5181148830b8869b27742a8ebe96b {:?}",
+            "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "b2a11dee71b5181148830b8869b27742a8ebe96b"])
+            .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From e15d8e7ff4990d946125f4ff29c591cd462af12a Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 09:39:59 -0800
Subject: [PATCH 21/44] Fixed formatting errors

---
 kernel/tests/s11_rackscale_benchmarks.rs | 1 -
 kernel/testutils/src/runner_args.rs      | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index f47b49aba..dcf9f3fd1 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1554,7 +1554,6 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     test.is_multi_node = true;
     test.shmem_size = 0;
 
-
     fn cmd_fn(num_cores: usize, num_clients: usize, arg: Option<MemcachedShardedConfig>) -> String {
         let config = arg.expect("missing configuration");
         let num_threads = num_cores / num_clients;
diff --git a/kernel/testutils/src/runner_args.rs b/kernel/testutils/src/runner_args.rs
index 7966a5ceb..b3f4c15f3 100644
--- a/kernel/testutils/src/runner_args.rs
+++ b/kernel/testutils/src/runner_args.rs
@@ -499,8 +499,6 @@ pub fn log_qemu_out(args: &RunnerArgs, output: String) {
     log_qemu_out_with_name(Some(args), String::from(""), output)
 }
 
-
-
 pub fn log_qemu_out_with_name(args: Option<&RunnerArgs>, name: String, output: String) {
     if !output.is_empty() {
         println!("\n===== QEMU LOG {}=====", name);
@@ -661,7 +659,7 @@ pub fn wait_for_sigterm_or_successful_exit_no_log(
                         }
                     }
                 }
-                _ => println!("{e}")
+                _ => println!("{e}"),
             }
 
             panic!("Qemu testing failed");

From fc31a02b7fed3faee8a84992d10ebb1e363ffdbe Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 18:54:40 -0800
Subject: [PATCH 22/44] Update memcache-bench git hash, detect memory
 allocation failures in memcached output, clean up compilation warnings

---
 kernel/tests/s11_rackscale_benchmarks.rs | 37 +++++++++++++++---------
 kernel/testutils/src/rackscale_runner.rs |  2 +-
 kernel/testutils/src/runner_args.rs      |  3 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index dcf9f3fd1..ede8937d9 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -990,13 +990,18 @@ fn parse_memcached_output(
     *output += prev.as_str();
     *output += matched.as_str();
 
-    Ok(MemcachedShardedResult {
-        b_threads,
-        b_mem,
-        b_queries,
-        b_time,
-        b_thpt,
-    })
+    if output.contains("MEMORY ALLOCATION FAILURE") {
+        println!("Detected memory allocation error in memcached output");
+        Err("Memory allocation failure".into())
+    } else {
+        Ok(MemcachedShardedResult {
+            b_threads,
+            b_mem,
+            b_queries,
+            b_time,
+            b_thpt,
+        })
+    }
 }
 
 #[cfg(not(feature = "baremetal"))]
@@ -1036,12 +1041,12 @@ fn rackscale_memcached_checkout() {
     }
 
     println!(
-        "CHECKOUT 0d90d53b99c3890b6e47efe08446e5180711ff09 {:?}",
+        "CHECKOUT e585c23e578d79b18d703b06f26b6e10a502d129 {:?}",
         out_dir
     );
 
     let res = Command::new("git")
-        .args(&["checkout", "0d90d53b99c3890b6e47efe08446e5180711ff09"])
+        .args(&["checkout", "e585c23e578d79b18d703b06f26b6e10a502d129"])
         .current_dir(out_dir_path.as_path())
         .output()
         .expect("git checkout failed");
@@ -1134,7 +1139,9 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             format!("tcp://localhost:{}", 11212 + id)
         } else {
             let pathname = config.path.join(format!("memcached{id}.sock"));
-            remove_file(pathname.clone()); // make sure the socket file is removed
+            if pathname.is_file() {
+                remove_file(pathname.clone()).expect("Failed to remove path"); // make sure the socket file is removed
+            }
             format!("unix://{}", pathname.display())
         };
 
@@ -1252,7 +1259,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             let r = csv_file.write(out.as_bytes());
             assert!(r.is_ok());
 
-            let r = pty.process.kill(SIGKILL);
+            let _r = pty.process.kill(SIGKILL);
 
             // single node
             for protocol in &["tcp", "unix"] {
@@ -1305,11 +1312,11 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                             println!("Timeout while waiting for {} ms\n", timeout.as_millis());
                             println!("Expected: `{expected}`\n");
                             println!("Got:",);
-                            for l in got.lines().take(5) {
+                            for l in got.lines().take(20) {
                                 println!(" > {l}");
                             }
                         } else {
-                            println!("error: {}", e);
+                            panic!("error: {}", e);
                         }
 
                         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
@@ -1611,7 +1618,9 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     };
     bench.run_bench(false, is_smoke);
     for mut ping in pings.into_iter() {
-        ping.process.kill(SIGKILL);
+        if !ping.process.kill(SIGKILL).is_ok() {
+            println!("Failed to kill ping process");
+        }
     }
 }
 
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index c1b7ff049..30ba5fc43 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -1,7 +1,7 @@
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::mpsc::{Receiver, Sender, TryRecvError};
 use std::sync::{mpsc::channel, Arc, Mutex};
-use std::thread::{self, sleep};
+use std::thread;
 use std::time::Duration;
 
 use rexpect::errors::*;
diff --git a/kernel/testutils/src/runner_args.rs b/kernel/testutils/src/runner_args.rs
index b3f4c15f3..7f935b173 100644
--- a/kernel/testutils/src/runner_args.rs
+++ b/kernel/testutils/src/runner_args.rs
@@ -638,10 +638,9 @@ pub fn wait_for_sigterm_or_successful_exit_no_log(
         Err(e) => {
             log_qemu_args(args);
             println!("Qemu testing failed: {} ", name);
-            use rexpect::errors::Error;
             use rexpect::errors::ErrorKind::Timeout;
             match e {
-                Error(Timeout(expected, got, timeout), st) => {
+                Error(Timeout(expected, got, _timeout), _st) => {
                     println!("Expected: `{expected}`\n");
                     println!("Got:",);
                     let count = got.lines().count();

From 19f5544f11260e256fb7b6811050d360e0107bdf Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 19:14:48 -0800
Subject: [PATCH 23/44] Add additional memcached benchmark memory for sharded
 linux

---
 kernel/tests/s11_rackscale_benchmarks.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index ede8937d9..efbc44576 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1041,12 +1041,12 @@ fn rackscale_memcached_checkout() {
     }
 
     println!(
-        "CHECKOUT e585c23e578d79b18d703b06f26b6e10a502d129 {:?}",
+        "CHECKOUT a703eedd8032ff1e083e8c5972eacc95738c797b {:?}",
         out_dir
     );
 
     let res = Command::new("git")
-        .args(&["checkout", "e585c23e578d79b18d703b06f26b6e10a502d129"])
+        .args(&["checkout", "a703eedd8032ff1e083e8c5972eacc95738c797b"])
         .current_dir(out_dir_path.as_path())
         .output()
         .expect("git checkout failed");
@@ -1151,7 +1151,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
             "scripts/spawn-memcached-process.sh",
             id.to_string().as_str(),
             con_info.as_str(),
-            config.mem_size.to_string().as_str(),
+            (2 * config.mem_size).to_string().as_str(),
             config.num_threads.to_string().as_str(),
         ]);
         command.current_dir(config.path.as_path());

From b0c21f8cc54e6a090436f93e16854ffabf035cbb Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 20:17:00 -0800
Subject: [PATCH 24/44] Start to consolidate memcached helper functions

---
 kernel/tests/s10_benchmarks.rs           |   4 +-
 kernel/tests/s11_rackscale_benchmarks.rs | 244 ++---------------------
 kernel/testutils/src/configs.rs          |   5 -
 kernel/testutils/src/lib.rs              |   2 +-
 kernel/testutils/src/memcached.rs        | 175 ++++++++++++++++
 5 files changed, 192 insertions(+), 238 deletions(-)
 delete mode 100644 kernel/testutils/src/configs.rs
 create mode 100644 kernel/testutils/src/memcached.rs

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index f419811ee..403a6fa66 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -22,6 +22,7 @@ use serde::Serialize;
 
 use testutils::builder::{BuildArgs, Machine};
 use testutils::helpers::{setup_network, spawn_dhcpd, spawn_nrk, DHCP_ACK_MATCH};
+use testutils::memcached::{MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES};
 use testutils::redis::{redis_benchmark, REDIS_BENCHMARK, REDIS_START_MATCH};
 use testutils::runner_args::{check_for_successful_exit, wait_for_sigterm, RunnerArgs};
 
@@ -872,9 +873,6 @@ fn s10_leveldb_benchmark() {
     }
 }
 
-use testutils::configs::MEMCACHED_MEM_SIZE_MB;
-use testutils::configs::MEMCACHED_NUM_QUERIES;
-
 #[test]
 fn s10_memcached_benchmark_internal() {
     setup_network(1);
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index efbc44576..743597983 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -25,8 +25,10 @@ use testutils::helpers::{DCMConfig, DCMSolver};
 use testutils::rackscale_runner::{RackscaleBench, RackscaleRun};
 use testutils::runner_args::RackscaleTransport;
 
-use testutils::configs::MEMCACHED_MEM_SIZE_MB;
-use testutils::configs::MEMCACHED_NUM_QUERIES;
+use testutils::memcached::{
+    parse_memcached_output, rackscale_memcached_checkout, MemcachedShardedConfig,
+    MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES,
+};
 
 #[test]
 #[cfg(not(feature = "baremetal"))]
@@ -513,64 +515,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         *output += prev.as_str();
         *output += matched.as_str();
 
-        // x_benchmark_mem = 10 MB
-        let (prev, matched) = proc.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
-        println!("> {}", matched);
-        let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        // number of threads: 3
-        let (prev, matched) = proc.exp_regex(r#"number of threads: (\d+)"#)?;
-        println!("> {}", matched);
-        let b_threads = matched.replace("number of threads: ", "");
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        // number of keys: 131072
-        let (prev, matched) = proc.exp_regex(r#"number of keys: (\d+)"#)?;
-        println!("> {}", matched);
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        let (prev, matched) = proc.exp_regex(r#"Executing (\d+) queries with (\d+) threads"#)?;
-        println!("> {}", matched);
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        // benchmark took 129 seconds
-        let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) ms"#)?;
-        println!("> {}", matched);
-        let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        // benchmark took 7937984 queries / second
-        let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
-        println!("> {}", matched);
-        let b_thpt = matched
-            .replace("benchmark took ", "")
-            .replace(" queries / second", "");
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-
-        let (prev, matched) = proc.exp_regex(r#"benchmark executed (\d+)"#)?;
-        println!("> {}", matched);
-        let b_queries = matched
-            .replace("benchmark executed ", "")
-            .split(" ")
-            .next()
-            .unwrap()
-            .to_string();
-
-        *output += prev.as_str();
-        *output += matched.as_str();
+        let ret = parse_memcached_output(proc, output)?;
 
         // Append parsed results to a CSV file
         let write_headers = !Path::new(file_name).exists();
@@ -590,8 +535,14 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
         assert!(r.is_ok());
         let out = format!(
-            "memcached,{},{},{},{},{},{},{}",
-            b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients
+            "memcached_internal,{},{},{},{},{},{},{}",
+            ret.b_threads,
+            ret.b_mem,
+            ret.b_queries,
+            ret.b_time,
+            ret.b_thpt,
+            actual_num_clients,
+            num_clients
         );
         let r = csv_file.write(out.as_bytes());
         assert!(r.is_ok());
@@ -913,171 +864,6 @@ fn rackscale_memcached_dcm(transport: RackscaleTransport, dcm_config: Option<DCM
     }
 }
 
-#[derive(Clone)]
-struct MemcachedShardedConfig {
-    pub num_servers: usize,
-    pub num_queries: usize,
-    pub is_local_host: bool,
-    pub mem_size: usize,
-    pub protocol: &'static str,
-    pub num_threads: usize,
-    pub path: PathBuf,
-}
-
-#[derive(Clone, Debug)]
-struct MemcachedShardedResult {
-    b_threads: String,
-    b_mem: String,
-    b_queries: String,
-    b_time: String,
-    b_thpt: String,
-}
-
-fn parse_memcached_output(
-    p: &mut PtySession,
-    output: &mut String,
-) -> Result<MemcachedShardedResult> {
-    // x_benchmark_mem = 10 MB
-    let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
-    // println!("> {}", matched);
-    let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    // number of threads: 3
-    let (prev, matched) = p.exp_regex(r#"number of threads: (\d+)"#)?;
-    // println!("> {}", matched);
-    let b_threads = matched.replace("number of threads: ", "");
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    // number of keys: 131072
-    let (prev, matched) = p.exp_regex(r#"number of keys: (\d+)"#)?;
-    // println!("> {}", matched);
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    // benchmark took 129 seconds
-    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
-    // println!("> {}", matched);
-    let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    // benchmark took 7937984 queries / second
-    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
-    println!("> {}", matched);
-    let b_thpt = matched
-        .replace("benchmark took ", "")
-        .replace(" queries / second", "");
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    let (prev, matched) = p.exp_regex(r#"benchmark executed (\d+)"#)?;
-    println!("> {}", matched);
-    let b_queries = matched
-        .replace("benchmark executed ", "")
-        .split(" ")
-        .next()
-        .unwrap()
-        .to_string();
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    if output.contains("MEMORY ALLOCATION FAILURE") {
-        println!("Detected memory allocation error in memcached output");
-        Err("Memory allocation failure".into())
-    } else {
-        Ok(MemcachedShardedResult {
-            b_threads,
-            b_mem,
-            b_queries,
-            b_time,
-            b_thpt,
-        })
-    }
-}
-
-#[cfg(not(feature = "baremetal"))]
-fn rackscale_memcached_checkout() {
-    let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
-
-    let out_dir = out_dir_path.display().to_string();
-
-    println!("CARGO_TARGET_TMPDIR {:?}", out_dir);
-
-    // clone abd build the benchmark
-    if !out_dir_path.is_dir() {
-        println!("RMDIR {:?}", out_dir_path);
-        Command::new(format!("rm",))
-            .args(&["-rf", out_dir.as_str()])
-            .status()
-            .unwrap();
-
-        println!("MKDIR {:?}", out_dir_path);
-        Command::new(format!("mkdir",))
-            .args(&["-p", out_dir.as_str()])
-            .status()
-            .unwrap();
-
-        println!("CLONE {:?}", out_dir_path);
-        let url = "https://github.com/achreto/memcached-bench.git";
-        Command::new("git")
-            .args(&["clone", "--depth=1", url, out_dir.as_str()])
-            .output()
-            .expect("failed to clone");
-    } else {
-        Command::new("git")
-            .args(&["pull"])
-            .current_dir(out_dir_path.as_path())
-            .output()
-            .expect("failed to pull");
-    }
-
-    println!(
-        "CHECKOUT a703eedd8032ff1e083e8c5972eacc95738c797b {:?}",
-        out_dir
-    );
-
-    let res = Command::new("git")
-        .args(&["checkout", "a703eedd8032ff1e083e8c5972eacc95738c797b"])
-        .current_dir(out_dir_path.as_path())
-        .output()
-        .expect("git checkout failed");
-    if !res.status.success() {
-        std::io::stdout().write_all(&res.stdout).unwrap();
-        std::io::stderr().write_all(&res.stderr).unwrap();
-        panic!("git checkout failed!");
-    }
-
-    println!("BUILD {:?}", out_dir_path);
-    for (key, value) in env::vars() {
-        println!("{}: {}", key, value);
-    }
-
-    let build_args = &["-j", "8"];
-
-    // now build the benchmark
-    let status = Command::new("make")
-        .args(build_args)
-        .current_dir(&out_dir_path)
-        .output()
-        .expect("Can't make app dir");
-
-    if !status.status.success() {
-        println!("BUILD FAILED");
-        std::io::stdout().write_all(&status.stdout).unwrap();
-        std::io::stderr().write_all(&status.stderr).unwrap();
-        panic!("BUILD FAILED");
-    }
-}
-
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_memcached_benchmark_sharded_linux() {
@@ -1089,7 +875,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
     let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
     let is_smoke = cfg!(feature = "smoke");
 
-    rackscale_memcached_checkout();
+    rackscale_memcached_checkout(env!("CARGO_TARGET_TMPDIR"));
 
     // stuff has been built, now we can run the benchmark
     let mut config = if is_smoke {
@@ -1360,7 +1146,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     let out_dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")).join("sharded-memcached");
     let is_smoke = cfg!(feature = "smoke");
 
-    rackscale_memcached_checkout();
+    rackscale_memcached_checkout(env!("CARGO_TARGET_TMPDIR"));
 
     // stuff has been built, now we can run the benchmark
     let mut config = if is_smoke {
diff --git a/kernel/testutils/src/configs.rs b/kernel/testutils/src/configs.rs
deleted file mode 100644
index ca7f237b6..000000000
--- a/kernel/testutils/src/configs.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-// Copyright © 2021 VMware, Inc. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0 OR MIT
-
-pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
-pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
diff --git a/kernel/testutils/src/lib.rs b/kernel/testutils/src/lib.rs
index f6e228498..af1e94825 100644
--- a/kernel/testutils/src/lib.rs
+++ b/kernel/testutils/src/lib.rs
@@ -9,8 +9,8 @@ extern crate rexpect;
 extern crate serde;
 
 pub mod builder;
-pub mod configs;
 pub mod helpers;
+pub mod memcached;
 pub mod rackscale_runner;
 pub mod redis;
 pub mod runner_args;
diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
new file mode 100644
index 000000000..493ddd2ac
--- /dev/null
+++ b/kernel/testutils/src/memcached.rs
@@ -0,0 +1,175 @@
+// Copyright © 2023 VMware, Inc. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+use std::env;
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::Command;
+
+use rexpect::errors::*;
+use rexpect::session::PtySession;
+
+pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
+pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
+
+#[derive(Clone)]
+pub struct MemcachedShardedConfig {
+    pub num_servers: usize,
+    pub num_queries: usize,
+    pub is_local_host: bool,
+    pub mem_size: usize,
+    pub protocol: &'static str,
+    pub num_threads: usize,
+    pub path: PathBuf,
+}
+
+#[derive(Clone, Debug)]
+pub struct MemcachedResult {
+    pub b_threads: String,
+    pub b_mem: String,
+    pub b_queries: String,
+    pub b_time: String,
+    pub b_thpt: String,
+}
+
+pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result<MemcachedResult> {
+    // x_benchmark_mem = 10 MB
+    let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
+    // println!("> {}", matched);
+    let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // number of threads: 3
+    let (prev, matched) = p.exp_regex(r#"number of threads: (\d+)"#)?;
+    // println!("> {}", matched);
+    let b_threads = matched.replace("number of threads: ", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // number of keys: 131072
+    let (prev, matched) = p.exp_regex(r#"number of keys: (\d+)"#)?;
+    // println!("> {}", matched);
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // benchmark took 129 seconds
+    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
+    // println!("> {}", matched);
+    let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    // benchmark took 7937984 queries / second
+    let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
+    println!("> {}", matched);
+    let b_thpt = matched
+        .replace("benchmark took ", "")
+        .replace(" queries / second", "");
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    let (prev, matched) = p.exp_regex(r#"benchmark executed (\d+)"#)?;
+    println!("> {}", matched);
+    let b_queries = matched
+        .replace("benchmark executed ", "")
+        .split(" ")
+        .next()
+        .unwrap()
+        .to_string();
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    if output.contains("MEMORY ALLOCATION FAILURE") {
+        println!("Detected memory allocation error in memcached output");
+        Err("Memory allocation failure".into())
+    } else {
+        Ok(MemcachedResult {
+            b_threads,
+            b_mem,
+            b_queries,
+            b_time,
+            b_thpt,
+        })
+    }
+}
+
+#[cfg(not(feature = "baremetal"))]
+pub fn rackscale_memcached_checkout(tmpdir: &str) {
+    let out_dir_path = PathBuf::from(tmpdir).join("sharded-memcached");
+
+    let out_dir = out_dir_path.display().to_string();
+
+    println!("CARGO_TARGET_TMPDIR {:?}", out_dir);
+
+    // clone abd build the benchmark
+    if !out_dir_path.is_dir() {
+        println!("RMDIR {:?}", out_dir_path);
+        Command::new(format!("rm",))
+            .args(&["-rf", out_dir.as_str()])
+            .status()
+            .unwrap();
+
+        println!("MKDIR {:?}", out_dir_path);
+        Command::new(format!("mkdir",))
+            .args(&["-p", out_dir.as_str()])
+            .status()
+            .unwrap();
+
+        println!("CLONE {:?}", out_dir_path);
+        let url = "https://github.com/achreto/memcached-bench.git";
+        Command::new("git")
+            .args(&["clone", "--depth=1", url, out_dir.as_str()])
+            .output()
+            .expect("failed to clone");
+    } else {
+        Command::new("git")
+            .args(&["pull"])
+            .current_dir(out_dir_path.as_path())
+            .output()
+            .expect("failed to pull");
+    }
+
+    println!(
+        "CHECKOUT a703eedd8032ff1e083e8c5972eacc95738c797b {:?}",
+        out_dir
+    );
+
+    let res = Command::new("git")
+        .args(&["checkout", "a703eedd8032ff1e083e8c5972eacc95738c797b"])
+        .current_dir(out_dir_path.as_path())
+        .output()
+        .expect("git checkout failed");
+    if !res.status.success() {
+        std::io::stdout().write_all(&res.stdout).unwrap();
+        std::io::stderr().write_all(&res.stderr).unwrap();
+        panic!("git checkout failed!");
+    }
+
+    println!("BUILD {:?}", out_dir_path);
+    for (key, value) in env::vars() {
+        println!("{}: {}", key, value);
+    }
+
+    let build_args = &["-j", "8"];
+
+    // now build the benchmark
+    let status = Command::new("make")
+        .args(build_args)
+        .current_dir(&out_dir_path)
+        .output()
+        .expect("Can't make app dir");
+
+    if !status.status.success() {
+        println!("BUILD FAILED");
+        std::io::stdout().write_all(&status.stdout).unwrap();
+        std::io::stderr().write_all(&status.stderr).unwrap();
+        panic!("BUILD FAILED");
+    }
+}

From 296b78aaf88682e9fe6a3e5615cef9b72610c3fa Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 21:35:34 -0800
Subject: [PATCH 25/44] Make memcached naming convention and output csv files
 consistent

---
 kernel/tests/s11_rackscale_benchmarks.rs | 158 ++++++++---------------
 kernel/testutils/src/memcached.rs        |  85 +++++++++++-
 scripts/ci.bash                          |   8 +-
 3 files changed, 139 insertions(+), 112 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 743597983..2053cf7b3 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -26,8 +26,9 @@ use testutils::rackscale_runner::{RackscaleBench, RackscaleRun};
 use testutils::runner_args::RackscaleTransport;
 
 use testutils::memcached::{
-    parse_memcached_output, rackscale_memcached_checkout, MemcachedShardedConfig,
-    MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES,
+    linux_spawn_memcached, parse_memcached_output, rackscale_memcached_checkout,
+    MemcachedShardedConfig, MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES,
+    RACKSCALE_MEMCACHED_CSV_COLUMNS,
 };
 
 #[test]
@@ -477,12 +478,12 @@ struct MemcachedInternalConfig {
 
 #[test]
 #[cfg(not(feature = "baremetal"))]
-fn s11_rackscale_memcached_benchmark_internal() {
-    rackscale_memcached_benchmark(RackscaleTransport::Shmem);
+fn s11_rackscale_shmem_memcached_internal_benchmark() {
+    rackscale_memcached_internal_benchmark(RackscaleTransport::Shmem);
 }
 
 #[cfg(not(feature = "baremetal"))]
-fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
+fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
     let is_smoke = cfg!(feature = "smoke");
 
     let file_name = format!(
@@ -491,6 +492,11 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
     );
     let _ignore = std::fs::remove_file(file_name.clone());
 
+    let baseline_file_name = "rackscale_baseline_memcached_benchmark.csv";
+    if cfg!(feature = "baseline") {
+        let _ignore = std::fs::remove_file(baseline_file_name.clone());
+    }
+
     let built = BuildArgs::default()
         .module("rkapps")
         .user_feature("rkapps:memcached-bench")
@@ -525,24 +531,32 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
             .open(file_name)
             .expect("Can't open file");
         if write_headers {
-            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas\n";
-            let r = csv_file.write(row.as_bytes());
+            let r = csv_file.write(RACKSCALE_MEMCACHED_CSV_COLUMNS.as_bytes());
             assert!(r.is_ok());
         }
 
-        let actual_num_clients = if is_baseline { 0 } else { num_clients };
+        let os_name = if is_baseline { "nros" } else { "dinos" };
+        let protocol = if is_baseline {
+            "internal"
+        } else if file_name.contains(&RackscaleTransport::Ethernet.to_string()) {
+            "tcp"
+        } else {
+            "shmem"
+        };
 
         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
         assert!(r.is_ok());
+
         let out = format!(
-            "memcached_internal,{},{},{},{},{},{},{}",
+            "memcached_internal,{},{},{},{},{},{},{},{}",
+            os_name,
+            protocol,
+            num_clients,
             ret.b_threads,
             ret.b_mem,
             ret.b_queries,
             ret.b_time,
-            ret.b_thpt,
-            actual_num_clients,
-            num_clients
+            ret.b_thpt
         );
         let r = csv_file.write(out.as_bytes());
         assert!(r.is_ok());
@@ -625,7 +639,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         }
     }
 
-    let bench = RackscaleBench {
+    let mut bench = RackscaleBench {
         test,
         cmd_fn,
         baseline_timeout_fn,
@@ -633,10 +647,12 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         mem_fn,
     };
 
+    bench.run_bench(false, is_smoke);
+
     if cfg!(feature = "baseline") {
+        bench.test.file_name = baseline_file_name.to_string();
         bench.run_bench(true, is_smoke);
     }
-    bench.run_bench(false, is_smoke);
 }
 
 #[ignore]
@@ -866,9 +882,7 @@ fn rackscale_memcached_dcm(transport: RackscaleTransport, dcm_config: Option<DCM
 
 #[test]
 #[cfg(not(feature = "baremetal"))]
-fn s11_rackscale_memcached_benchmark_sharded_linux() {
-    use std::fs::remove_file;
-
+fn s11_linux_memcached_sharded_benchmark() {
     use rexpect::process::signal::Signal::SIGKILL;
 
     let machine = Machine::determine();
@@ -916,86 +930,8 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         spawn_command(command, Some(timeout_ms)).expect("failed to spawn memcached")
     }
 
-    fn spawn_memcached(
-        id: usize,
-        config: &MemcachedShardedConfig,
-        timeout_ms: u64,
-    ) -> Result<PtySession> {
-        let con_info = if config.protocol == "tcp" {
-            format!("tcp://localhost:{}", 11212 + id)
-        } else {
-            let pathname = config.path.join(format!("memcached{id}.sock"));
-            if pathname.is_file() {
-                remove_file(pathname.clone()).expect("Failed to remove path"); // make sure the socket file is removed
-            }
-            format!("unix://{}", pathname.display())
-        };
-
-        let mut command = Command::new("bash");
-
-        command.args(&[
-            "scripts/spawn-memcached-process.sh",
-            id.to_string().as_str(),
-            con_info.as_str(),
-            (2 * config.mem_size).to_string().as_str(),
-            config.num_threads.to_string().as_str(),
-        ]);
-        command.current_dir(config.path.as_path());
-
-        println!("Spawning memcached:\n $ `{:?}`", command);
-
-        let mut res = spawn_command(command, Some(timeout_ms))?;
-        std::thread::sleep(Duration::from_secs(1));
-
-        match res.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#) {
-            Ok((_prev, _matched)) => {
-                println!(" $ OK.");
-                Ok(res)
-            }
-            Err(e) => {
-                println!(" $ FAILED. {}", e);
-                Err(e)
-            }
-        }
-    }
+    let file_name = "linux_memcached_sharded_benchmark.csv";
 
-    fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> Result<PtySession> {
-        let mut command = Command::new("./loadbalancer/loadbalancer");
-        command.args(&["--binary"]);
-        command.arg(format!("--num-queries={}", config.num_queries).as_str());
-        command.arg(format!("--num-threads={}", config.num_threads).as_str());
-        command.arg(format!("--max-memory={}", config.mem_size / 8).as_str());
-        let mut servers = String::from("--servers=");
-        for i in 0..config.num_servers {
-            if i > 0 {
-                servers.push_str(",");
-            }
-            if config.protocol == "tcp" {
-                if config.is_local_host {
-                    servers.push_str(format!("tcp://localhost:{}", 11212 + i).as_str());
-                } else {
-                    // +1 because tap0 is reserved for the controller.
-                    let ip = 10 + i + 1;
-                    servers.push_str(format!("tcp://172.31.0.{}:{}", ip, 11211).as_str());
-                }
-            } else {
-                servers.push_str(
-                    format!("unix://{}/memcached{}.sock", config.path.display(), i).as_str(),
-                );
-            }
-        }
-        command.arg(servers.as_str());
-        command.current_dir(config.path.as_path());
-
-        // give the servers some time to be spawned
-        std::thread::sleep(Duration::from_secs(5));
-
-        println!("Spawning Loadbalancer: \n $ `{:?}`", command);
-
-        spawn_command(command, Some(timeout_ms))
-    }
-
-    let file_name = "memcached_benchmark_sharded_linux.csv";
     let _r = std::fs::remove_file(file_name);
 
     let mut csv_file = OpenOptions::new()
@@ -1004,8 +940,7 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
         .open(file_name)
         .expect("Can't open file");
 
-    let row = "git_rev,benchmark,os,nthreads,protocol,mem,queries,time,thpt\n";
-    let r = csv_file.write(row.as_bytes());
+    let r = csv_file.write(RACKSCALE_MEMCACHED_CSV_COLUMNS.as_bytes());
     assert!(r.is_ok());
 
     let max_threads_per_node = if is_smoke {
@@ -1038,9 +973,11 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                 parse_memcached_output(&mut pty, &mut output).expect("could not parse output!");
             let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
             assert!(r.is_ok());
+
+            //git_rev,benchmark,os,protocol,npieces,nthreads,mem,queries,time,thpt
             let out = format!(
-                "memcached_sharded,linux,{},{},{},{},{},{}\n",
-                res.b_threads, "internal", res.b_mem, res.b_queries, res.b_time, res.b_thpt,
+                "memcached_sharded,linux,{},{},{},{},{},{},{}\n",
+                "internal", 1, res.b_threads, res.b_mem, res.b_queries, res.b_time, res.b_thpt,
             );
             let r = csv_file.write(out.as_bytes());
             assert!(r.is_ok());
@@ -1066,12 +1003,13 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                 let mut memcached_ctrls = Vec::new();
                 for i in 0..num_nodes {
                     memcached_ctrls.push(
-                        spawn_memcached(i, &config, timeout_ms).expect("could not spawn memcached"),
+                        linux_spawn_memcached(i, &config, timeout_ms)
+                            .expect("could not spawn memcached"),
                     );
                 }
 
-                let mut pty =
-                    spawn_loadbalancer(&config, timeout_ms).expect("failed to spawn load balancer");
+                let mut pty = testutils::memcached::spawn_loadbalancer(&config, timeout_ms)
+                    .expect("failed to spawn load balancer");
                 let mut output = String::new();
                 use rexpect::errors::ErrorKind::Timeout;
                 match parse_memcached_output(&mut pty, &mut output) {
@@ -1079,9 +1017,10 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
                         assert!(r.is_ok());
                         let out = format!(
-                            "memcached_sharded,linux,{},{},{},{},{},{}\n",
-                            res.b_threads,
+                            "memcached_sharded,linux,{},{},{},{},{},{},{}\n",
                             protocol,
+                            config.num_servers,
+                            res.b_threads,
                             res.b_mem,
                             res.b_queries,
                             res.b_time,
@@ -1108,8 +1047,12 @@ fn s11_rackscale_memcached_benchmark_sharded_linux() {
                         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
                         assert!(r.is_ok());
                         let out = format!(
-                            "memcached_sharded,linux,{},{},failure,failure,failure,failure\n",
-                            config.num_servers, protocol,
+                            "memcached_sharded,linux,{},{},{},{},{},failure,failure\n",
+                            protocol,
+                            config.num_servers,
+                            config.num_threads * config.num_servers,
+                            config.mem_size,
+                            config.num_queries,
                         );
                         let r = csv_file.write(out.as_bytes());
                         assert!(r.is_ok());
@@ -1172,6 +1115,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
         }
     };
 
+    // TODO: consolidate code with testutils::memcached::spawn_loadbalancer
     fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> Result<PtySession> {
         let mut command = Command::new("./loadbalancer/loadbalancer");
         command.args(&["--binary"]);
diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 493ddd2ac..b7197b2cc 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -2,16 +2,21 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
 use std::env;
+use std::fs::remove_file;
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::Command;
+use std::time::Duration;
 
 use rexpect::errors::*;
-use rexpect::session::PtySession;
+use rexpect::session::{spawn_command, PtySession};
 
 pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
 pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
 
+pub const RACKSCALE_MEMCACHED_CSV_COLUMNS: &str =
+    "git_rev,benchmark,os,protocol,npieces,nthreads,mem,queries,time,thpt\n";
+
 #[derive(Clone)]
 pub struct MemcachedShardedConfig {
     pub num_servers: usize,
@@ -173,3 +178,81 @@ pub fn rackscale_memcached_checkout(tmpdir: &str) {
         panic!("BUILD FAILED");
     }
 }
+
+pub fn linux_spawn_memcached(
+    id: usize,
+    config: &MemcachedShardedConfig,
+    timeout_ms: u64,
+) -> Result<PtySession> {
+    let con_info = if config.protocol == "tcp" {
+        format!("tcp://localhost:{}", 11212 + id)
+    } else {
+        let pathname = config.path.join(format!("memcached{id}.sock"));
+        if pathname.is_file() {
+            remove_file(pathname.clone()).expect("Failed to remove path"); // make sure the socket file is removed
+        }
+        format!("unix://{}", pathname.display())
+    };
+
+    let mut command = Command::new("bash");
+
+    command.args(&[
+        "scripts/spawn-memcached-process.sh",
+        id.to_string().as_str(),
+        con_info.as_str(),
+        (2 * config.mem_size).to_string().as_str(),
+        config.num_threads.to_string().as_str(),
+    ]);
+    command.current_dir(config.path.as_path());
+
+    println!("Spawning memcached:\n $ `{:?}`", command);
+
+    let mut res = spawn_command(command, Some(timeout_ms))?;
+    std::thread::sleep(Duration::from_secs(1));
+
+    match res.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#) {
+        Ok((_prev, _matched)) => {
+            println!(" $ OK.");
+            Ok(res)
+        }
+        Err(e) => {
+            println!(" $ FAILED. {}", e);
+            Err(e)
+        }
+    }
+}
+
+pub fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> Result<PtySession> {
+    let mut command = Command::new("./loadbalancer/loadbalancer");
+    command.args(&["--binary"]);
+    command.arg(format!("--num-queries={}", config.num_queries).as_str());
+    command.arg(format!("--num-threads={}", config.num_threads).as_str());
+    command.arg(format!("--max-memory={}", config.mem_size).as_str());
+    let mut servers = String::from("--servers=");
+    for i in 0..config.num_servers {
+        if i > 0 {
+            servers.push_str(",");
+        }
+        if config.protocol == "tcp" {
+            if config.is_local_host {
+                servers.push_str(format!("tcp://localhost:{}", 11212 + i).as_str());
+            } else {
+                // +1 because tap0 is reserved for the controller.
+                let ip = 10 + i + 1;
+                servers.push_str(format!("tcp://172.31.0.{}:{}", ip, 11211).as_str());
+            }
+        } else {
+            servers
+                .push_str(format!("unix://{}/memcached{}.sock", config.path.display(), i).as_str());
+        }
+    }
+    command.arg(servers.as_str());
+    command.current_dir(config.path.as_path());
+
+    // give the servers some time to be spawned
+    std::thread::sleep(Duration::from_secs(5));
+
+    println!("Spawning Loadbalancer: \n $ `{:?}`", command);
+
+    spawn_command(command, Some(timeout_ms))
+}
diff --git a/scripts/ci.bash b/scripts/ci.bash
index d40103f29..a27d02b6d 100644
--- a/scripts/ci.bash
+++ b/scripts/ci.bash
@@ -16,8 +16,8 @@ rm -f leveldb_benchmark.csv
 rm -f rackscale_shmem_vmops_benchmark.csv
 rm -f rackscale_shmem_vmops_latency_benchmark.csv
 rm -f rackscale_shmem_fxmark_benchmark.csv
-rm -f rackscale_shmem_memcached_benchmark.csv
-rm -f rackscale_sharded_memcached_benchmark_*.csv
+rm -f rackscale_shmem_memcached_internal_benchmark.csv
+rm -f linux_memcached_sharded_benchmark.csv
 
 # For vmops: --features prealloc can improve performance further (at the expense of test duration)
 RUST_TEST_THREADS=1 cargo test --test s10* -- s10_vmops_benchmark --nocapture
@@ -30,8 +30,8 @@ RUST_TEST_THREADS=1 cargo test --test s10* -- s10_fxmark_bench --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maptput_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maplat_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_fxmark_bench --nocapture
-RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_internal --nocapture
-RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_linux --nocapture
+RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_memcached_internal_benchmark --nocapture
+RUST_TEST_THREADS=1 cargo test --test s11* -- s11_linux_memcached_sharded_benchmark --nocapture
 # disabled for now as this causes too much issues with running for now
 # RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_nros --nocapture
 

From 8c59e8e98788a05ad0c5df4e26cdc7971e506a1c Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Sun, 19 Nov 2023 21:39:41 -0800
Subject: [PATCH 26/44] Update ci with new file names

---
 scripts/ci.bash | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/scripts/ci.bash b/scripts/ci.bash
index a27d02b6d..53be5d247 100644
--- a/scripts/ci.bash
+++ b/scripts/ci.bash
@@ -32,8 +32,6 @@ RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maplat_b
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_fxmark_bench --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_memcached_internal_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_linux_memcached_sharded_benchmark --nocapture
-# disabled for now as this causes too much issues with running for now
-# RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_memcached_benchmark_sharded_nros --nocapture
 
 # Clone repo
 rm -rf gh-pages
@@ -65,10 +63,9 @@ if [ -d "${DEPLOY_DIR}" ]; then
 fi
 mkdir -p ${DEPLOY_DIR}
 mv memcached_benchmark_internal.csv ${DEPLOY_DIR}
-mv memcached_benchmark_sharded_*.csv ${DEPLOY_DIR}
+mv linux_memcached_sharded_benchmark.csv ${DEPLOY_DIR}
 gzip ${DEPLOY_DIR}/memcached_benchmark_internal.csv
-# gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_nros.csv
-gzip ${DEPLOY_DIR}/memcached_benchmark_sharded_linux.csv
+gzip ${DEPLOY_DIR}/linux_memcached_sharded_benchmark.csv
 
 # Copy vmops results
 DEPLOY_DIR="gh-pages/vmops/${CI_MACHINE_TYPE}/${GIT_REV_CURRENT}/"
@@ -139,8 +136,8 @@ if [ -d "${DEPLOY_DIR}" ]; then
     DEPLOY_DIR=${DEPLOY_DIR}${DATE_PREFIX}
 fi
 mkdir -p ${DEPLOY_DIR}
-mv rackscale_shmem_memcached_benchmark.csv ${DEPLOY_DIR}
-gzip ${DEPLOY_DIR}/rackscale_shmem_memcached_benchmark.csv
+mv rackscale_shmem_memcached_internal_benchmark.csv ${DEPLOY_DIR}
+gzip ${DEPLOY_DIR}/rackscale_shmem_memcached_internal_benchmark.csv
 
 # Update CI history plots
 python3 gh-pages/_scripts/ci_history.py --append --machine $CI_MACHINE_TYPE

From ac20b6d8e7eca9e56bbad96f53883e28ce574376 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 21 Nov 2023 13:31:00 -0800
Subject: [PATCH 27/44] unify the clients/cores for the nr sharded with the
 rackscale version

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 236 ++++++++++++-----------
 1 file changed, 128 insertions(+), 108 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 2053cf7b3..b8a002c8d 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -943,134 +943,154 @@ fn s11_linux_memcached_sharded_benchmark() {
     let r = csv_file.write(RACKSCALE_MEMCACHED_CSV_COLUMNS.as_bytes());
     assert!(r.is_ok());
 
-    let max_threads_per_node = if is_smoke {
-        1
-    } else {
-        machine.max_cores() / machine.max_numa_nodes()
-    };
-    println!(
-        "Nodes: {}, max_threads_per_node: {max_threads_per_node}",
-        machine.max_numa_nodes()
-    );
-    for num_nodes in 1..=machine.max_numa_nodes() {
-        config.num_servers = num_nodes;
+    let machine = Machine::determine();
+    let max_cores = if is_smoke { 2 } else { machine.max_cores() };
+    let max_numa = machine.max_numa_nodes();
+    let total_cores_per_node = core::cmp::max(1, max_cores / max_numa);
+
+    // Do initial network configuration
+    let mut num_clients = 1; // num_clients == num_replicas, for baseline
+    let mut total_cores = 1;
+    while total_cores < max_cores {
+        // Round up to get the number of clients
+        let new_num_clients = (total_cores + (total_cores_per_node - 1)) / total_cores_per_node;
+
+        // Do network setup if number of clients has changed.
+        if num_clients != new_num_clients {
+            num_clients = new_num_clients;
+
+            // ensure total cores is divisible by num clients
+            total_cores = total_cores - (total_cores % num_clients);
+        }
+        let cores_per_client = total_cores / num_clients;
+
+        // Break if not enough total cores for the controller, or if we would have to split controller across nodes to make it fit
+        // We want controller to have it's own socket, so if it's not a 1 socket machine, break when there's equal number of clients
+        // to numa nodes.
+        if total_cores + num_clients + 1 > machine.max_cores()
+            || num_clients == machine.max_numa_nodes()
+                && cores_per_client + num_clients + 1 > total_cores_per_node
+            || num_clients == max_numa && max_numa > 1
+        {
+            break;
+        }
 
-        for num_threads in 1..=max_threads_per_node {
-            if (num_threads != 1 || num_threads != max_threads_per_node) && (num_threads % 8 != 0) {
-                continue;
-            }
+        eprintln!(
+                "\n\nRunning Sharded Memcached test with {:?} total core(s), {:?} (client|replica)(s) (cores_per_(client|replica)={:?})",
+                total_cores, num_clients, cores_per_client
+            );
 
-            println!("");
+        // terminate any previous memcached
+        let _ = Command::new("killall")
+            .args(&["memcached", "-s", "SIGKILL"])
+            .output();
 
-            config.num_threads = num_threads;
+        // run the internal configuration
+        config.num_threads = total_cores;
 
-            let _ = Command::new("killall")
-                .args(&["memcached", "-s", "SIGKILL"])
-                .status();
-            let mut pty = run_benchmark_internal(&config, timeout_ms);
-            let mut output = String::new();
-            let res =
-                parse_memcached_output(&mut pty, &mut output).expect("could not parse output!");
-            let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
-            assert!(r.is_ok());
+        println!("Memcached Internal: {total_cores} cores");
 
-            //git_rev,benchmark,os,protocol,npieces,nthreads,mem,queries,time,thpt
-            let out = format!(
-                "memcached_sharded,linux,{},{},{},{},{},{},{}\n",
-                "internal", 1, res.b_threads, res.b_mem, res.b_queries, res.b_time, res.b_thpt,
-            );
-            let r = csv_file.write(out.as_bytes());
-            assert!(r.is_ok());
+        let mut pty = run_benchmark_internal(&config, timeout_ms);
+        let mut output = String::new();
+        let res = parse_memcached_output(&mut pty, &mut output).expect("could not parse output!");
+        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+        assert!(r.is_ok());
+        let out = format!(
+            "memcached_sharded,linux,{},{},{},{},{},{}\n",
+            res.b_threads, "internal", res.b_mem, res.b_queries, res.b_time, res.b_thpt,
+        );
+        let r = csv_file.write(out.as_bytes());
+        assert!(r.is_ok());
 
-            let _r = pty.process.kill(SIGKILL);
+        let r = pty
+            .process
+            .kill(SIGKILL)
+            .expect("unable to terminate memcached");
 
-            // single node
-            for protocol in &["tcp", "unix"] {
-                config.protocol = protocol;
+        for protocol in &["tcp", "unix"] {
+            config.protocol = protocol;
+            config.num_servers = num_clients;
+            config.num_threads = cores_per_client;
 
-                println!("");
+            println!("Memcached Sharded: {cores_per_client}x{num_clients} with {protocol}");
 
-                println!("Memcached Sharded: {num_threads}x{num_nodes} with {protocol}");
+            // terminate the memcached instance
+            let _ = Command::new("killall")
+                .args(&["memcached", "-s", "SIGKILL"])
+                .status();
 
-                // terminate the memcached instance
-                let _ = Command::new("killall")
-                    .args(&["memcached", "-s", "SIGKILL"])
-                    .status();
+            // give some time so memcached can be cleaned up
+            std::thread::sleep(Duration::from_secs(5));
 
-                // give some time so memcached can be cleaned up
-                std::thread::sleep(Duration::from_secs(5));
+            let mut memcached_ctrls = Vec::new();
+            for i in 0..num_clients {
+                memcached_ctrls.push(
+                    linux_spawn_memcached(i, &config, timeout_ms).expect("could not spawn memcached"),
+                );
+            }
 
-                let mut memcached_ctrls = Vec::new();
-                for i in 0..num_nodes {
-                    memcached_ctrls.push(
-                        linux_spawn_memcached(i, &config, timeout_ms)
-                            .expect("could not spawn memcached"),
-                    );
-                }
+            config.num_threads = total_cores;
 
-                let mut pty = testutils::memcached::spawn_loadbalancer(&config, timeout_ms)
-                    .expect("failed to spawn load balancer");
-                let mut output = String::new();
-                use rexpect::errors::ErrorKind::Timeout;
-                match parse_memcached_output(&mut pty, &mut output) {
-                    Ok(res) => {
-                        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
-                        assert!(r.is_ok());
-                        let out = format!(
-                            "memcached_sharded,linux,{},{},{},{},{},{},{}\n",
-                            protocol,
-                            config.num_servers,
-                            res.b_threads,
-                            res.b_mem,
-                            res.b_queries,
-                            res.b_time,
-                            res.b_thpt,
-                        );
-                        let r = csv_file.write(out.as_bytes());
-                        assert!(r.is_ok());
-
-                        println!("{:?}", res);
-                    }
+            let mut pty =
+            testutils::memcached::spawn_loadbalancer(&config, timeout_ms).expect("failed to spawn load balancer");
+            let mut output = String::new();
+            use rexpect::errors::ErrorKind::Timeout;
+            match parse_memcached_output(&mut pty, &mut output) {
+                Ok(res) => {
+                    let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+                    assert!(r.is_ok());
+                    let out = format!(
+                        "memcached_sharded,linux,{},{},{},{},{},{}\n",
+                        res.b_threads, protocol, res.b_mem, res.b_queries, res.b_time, res.b_thpt,
+                    );
+                    let r = csv_file.write(out.as_bytes());
+                    assert!(r.is_ok());
 
-                    Err(e) => {
-                        if let Timeout(expected, got, timeout) = e.0 {
-                            println!("Timeout while waiting for {} ms\n", timeout.as_millis());
-                            println!("Expected: `{expected}`\n");
-                            println!("Got:",);
-                            for l in got.lines().take(20) {
-                                println!(" > {l}");
-                            }
-                        } else {
-                            panic!("error: {}", e);
+                    println!("{:?}", res);
+                }
+                Err(e) => {
+                    if let Timeout(expected, got, timeout) = e.0 {
+                        println!("Timeout while waiting for {} ms\n", timeout.as_millis());
+                        println!("Expected: `{expected}`\n");
+                        println!("Got:",);
+                        for l in got.lines().take(5) {
+                            println!(" > {l}");
                         }
+                    } else {
+                        println!("error: {}", e);
+                    }
 
-                        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
-                        assert!(r.is_ok());
-                        let out = format!(
-                            "memcached_sharded,linux,{},{},{},{},{},failure,failure\n",
-                            protocol,
-                            config.num_servers,
-                            config.num_threads * config.num_servers,
-                            config.mem_size,
-                            config.num_queries,
-                        );
-                        let r = csv_file.write(out.as_bytes());
-                        assert!(r.is_ok());
-
-                        for mc in memcached_ctrls.iter_mut() {
-                            mc.process
-                                .kill(rexpect::process::signal::Signal::SIGKILL)
-                                .expect("couldn't terminate memcached");
-                            while let Ok(l) = mc.read_line() {
-                                println!("MEMCACHED-OUTPUT: {}", l);
-                            }
+                    let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+                    assert!(r.is_ok());
+                    let out = format!(
+                        "memcached_sharded,linux,{},{},failure,failure,failure,failure\n",
+                        config.num_servers, protocol,
+                    );
+                    let r = csv_file.write(out.as_bytes());
+                    assert!(r.is_ok());
+
+                    for mc in memcached_ctrls.iter_mut() {
+                        mc.process
+                            .kill(rexpect::process::signal::Signal::SIGKILL)
+                            .expect("couldn't terminate memcached");
+                        while let Ok(l) = mc.read_line() {
+                            println!("MEMCACHED-OUTPUT: {}", l);
                         }
-                        let _ = Command::new("killall").args(&["memcached"]).status();
                     }
-                };
+                }
+            };
 
-                let _ = pty.process.kill(rexpect::process::signal::Signal::SIGKILL);
+            if total_cores == 1 {
+                total_cores = 0;
             }
+
+            if num_clients == 3 {
+                total_cores += 3;
+            } else {
+                total_cores += 4;
+            }
+
+            let _ = pty.process.kill(rexpect::process::signal::Signal::SIGKILL);
         }
     }
 

From a98a3f80a5a1f45ef103ce10f4c0ef188b92b599 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Tue, 21 Nov 2023 16:57:13 -0800
Subject: [PATCH 28/44] benchmarks: apply formatter

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index b8a002c8d..917c3bc60 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1025,14 +1025,15 @@ fn s11_linux_memcached_sharded_benchmark() {
             let mut memcached_ctrls = Vec::new();
             for i in 0..num_clients {
                 memcached_ctrls.push(
-                    linux_spawn_memcached(i, &config, timeout_ms).expect("could not spawn memcached"),
+                    linux_spawn_memcached(i, &config, timeout_ms)
+                        .expect("could not spawn memcached"),
                 );
             }
 
             config.num_threads = total_cores;
 
-            let mut pty =
-            testutils::memcached::spawn_loadbalancer(&config, timeout_ms).expect("failed to spawn load balancer");
+            let mut pty = testutils::memcached::spawn_loadbalancer(&config, timeout_ms)
+                .expect("failed to spawn load balancer");
             let mut output = String::new();
             use rexpect::errors::ErrorKind::Timeout;
             match parse_memcached_output(&mut pty, &mut output) {

From 3ca169cac05e3d2f97b5823012d2ed23fddfe7a7 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Wed, 22 Nov 2023 13:06:28 -0800
Subject: [PATCH 29/44] bench: use 4G/10M configuration for memcached

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/memcached.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index b7197b2cc..8c4a2cb8c 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -12,7 +12,7 @@ use rexpect::errors::*;
 use rexpect::session::{spawn_command, PtySession};
 
 pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
-pub const MEMCACHED_NUM_QUERIES: usize = 1_000_000;
+pub const MEMCACHED_NUM_QUERIES: usize = 10_000_000;
 
 pub const RACKSCALE_MEMCACHED_CSV_COLUMNS: &str =
     "git_rev,benchmark,os,protocol,npieces,nthreads,mem,queries,time,thpt\n";

From f2ed2b1c508eae51ac2cb405a93e3c6e09869650 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Sun, 26 Nov 2023 19:48:19 -0800
Subject: [PATCH 30/44] memcached: 64G memory and 10M queries

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/memcached.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 8c4a2cb8c..54b83db14 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -11,7 +11,7 @@ use std::time::Duration;
 use rexpect::errors::*;
 use rexpect::session::{spawn_command, PtySession};
 
-pub const MEMCACHED_MEM_SIZE_MB: usize = 4 * 1024;
+pub const MEMCACHED_MEM_SIZE_MB: usize = 64 * 1024;
 pub const MEMCACHED_NUM_QUERIES: usize = 10_000_000;
 
 pub const RACKSCALE_MEMCACHED_CSV_COLUMNS: &str =
@@ -248,6 +248,7 @@ pub fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> R
     }
     command.arg(servers.as_str());
     command.current_dir(config.path.as_path());
+    command.env("LD_LIBRARY_PATH", "build/lib")
 
     // give the servers some time to be spawned
     std::thread::sleep(Duration::from_secs(5));

From 3b5ec980b568381fb3e9d33b0c0884857b07bc6d Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Sun, 26 Nov 2023 19:50:26 -0800
Subject: [PATCH 31/44] fix compilation

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/memcached.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 54b83db14..c4049b656 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -248,7 +248,7 @@ pub fn spawn_loadbalancer(config: &MemcachedShardedConfig, timeout_ms: u64) -> R
     }
     command.arg(servers.as_str());
     command.current_dir(config.path.as_path());
-    command.env("LD_LIBRARY_PATH", "build/lib")
+    command.env("LD_LIBRARY_PATH", "build/lib");
 
     // give the servers some time to be spawned
     std::thread::sleep(Duration::from_secs(5));

From f7e3f71a6c06eaf3a12934a056e3265ed790e3fd Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Sun, 26 Nov 2023 22:11:21 -0800
Subject: [PATCH 32/44] increase timeout

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 917c3bc60..f2456781a 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -915,7 +915,7 @@ fn s11_linux_memcached_sharded_benchmark() {
         }
     };
 
-    let timeout_ms = if is_smoke { 60_000 } else { 1200_000 };
+    let timeout_ms = if is_smoke { 60_000 } else { std::cmp::max(config.mem_size / 10 * 1000, 60_000) + std::cmp::max(60_000, config.num_queries / 1000) } as u64;
 
     fn run_benchmark_internal(config: &MemcachedShardedConfig, timeout_ms: u64) -> PtySession {
         Command::new("killall").args(&["memcached"]).status().ok();

From 6085529e7ddf3e9a94ba0387548562fa25748de6 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 27 Nov 2023 10:57:49 -0800
Subject: [PATCH 33/44] some fixes after rebasing on the large-shmem branch

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs           | 4 ++--
 kernel/tests/s11_rackscale_benchmarks.rs | 4 ++--
 kernel/testutils/src/rackscale_runner.rs | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 403a6fa66..4bac86257 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -901,10 +901,10 @@ fn s10_memcached_benchmark_internal() {
     } else {
         (
             // keep in sync with the s11_ra
-            32 * 1024, /* MB */
+            2* MEMCACHED_MEM_SIZE_MB, /* MB */
             MEMCACHED_MEM_SIZE_MB,
             MEMCACHED_NUM_QUERIES,
-            600_000,
+            std::cmp::max(60_000, MEMCACHED_NUM_QUERIES) as u64,
         )
     };
 
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index f2456781a..99f2c98fd 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -615,10 +615,10 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
     }
 
     fn rackscale_timeout_fn(num_cores: usize) -> u64 {
-        if is_smoke {
+        if cfg!(feature = "smoke") {
             60_000 as u64
         } else {
-            MEMCACHED_MEM_SIZE_MB / 10 * 1000 + MEMCACHED_NUM_QUERIES / 1000
+            (MEMCACHED_MEM_SIZE_MB / 10 * 1000 + MEMCACHED_NUM_QUERIES) as u64
         }
     }
 
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 30ba5fc43..937c016da 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -882,13 +882,13 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
             if is_baseline {
                 test_run.client_timeout = (self.baseline_timeout_fn)(total_cores);
                 // Total client memory in test is: (mem_based_on_cores) + shmem_size * num_clients
-                test_run.memory = (self.mem_fn)(total_cores, is_smoke)
+                test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke)
                     + test_run.shmem_size * test_run.num_clients;
 
                 test_run.run_baseline();
             } else {
                 test_run.client_timeout = (self.rackscale_timeout_fn)(total_cores);
-                test_run.memory = (self.mem_fn)(total_cores, is_smoke) / test_run.num_clients;
+                test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke) / test_run.num_clients;
 
                 test_run.run_rackscale();
             }

From 82c757dd0323fe441d974d82ea9356a8e04fba1a Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 27 Nov 2023 12:08:23 -0800
Subject: [PATCH 34/44] memcached: increase memory size to avoid running out of
 memory

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 4bac86257..23d109d73 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -901,7 +901,7 @@ fn s10_memcached_benchmark_internal() {
     } else {
         (
             // keep in sync with the s11_ra
-            2* MEMCACHED_MEM_SIZE_MB, /* MB */
+            4* MEMCACHED_MEM_SIZE_MB, /* MB */
             MEMCACHED_MEM_SIZE_MB,
             MEMCACHED_NUM_QUERIES,
             std::cmp::max(60_000, MEMCACHED_NUM_QUERIES) as u64,

From bf776d46aee9acb1458992139f5101b35ef4226f Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 27 Nov 2023 19:23:17 -0800
Subject: [PATCH 35/44] correctly set timeout in rackscale runner

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 2 +-
 kernel/testutils/src/memcached.rs        | 4 ++--
 kernel/testutils/src/rackscale_runner.rs | 9 ++++++---
 kernel/testutils/src/runner_args.rs      | 1 +
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 99f2c98fd..ff1cc9bde 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -618,7 +618,7 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
         if cfg!(feature = "smoke") {
             60_000 as u64
         } else {
-            (MEMCACHED_MEM_SIZE_MB / 10 * 1000 + MEMCACHED_NUM_QUERIES) as u64
+            ((MEMCACHED_MEM_SIZE_MB / 10 + MEMCACHED_NUM_QUERIES) * 1000) as u64
         }
     }
 
diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index c4049b656..9520239df 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -40,7 +40,7 @@ pub struct MemcachedResult {
 pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result<MemcachedResult> {
     // x_benchmark_mem = 10 MB
     let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
-    // println!("> {}", matched);
+    println!("> {}", matched);
     let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
 
     *output += prev.as_str();
@@ -48,7 +48,7 @@ pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result
 
     // number of threads: 3
     let (prev, matched) = p.exp_regex(r#"number of threads: (\d+)"#)?;
-    // println!("> {}", matched);
+    println!("> {}", matched);
     let b_threads = matched.replace("number of threads: ", "");
 
     *output += prev.as_str();
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 937c016da..e7bd25a0c 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -872,9 +872,6 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
             test_run.cores_per_client = cores_per_client;
             test_run.num_clients = num_clients;
 
-            // Set controller timeout for this test
-            test_run.controller_timeout = test_run.client_timeout;
-
             // Calculate command based on the number of cores
             test_run.cmd = (self.cmd_fn)(total_cores, num_clients, test_run.arg.clone());
 
@@ -885,11 +882,17 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
                 test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke)
                     + test_run.shmem_size * test_run.num_clients;
 
+                // Set controller timeout for this test
+                test_run.controller_timeout = test_run.client_timeout;
+
                 test_run.run_baseline();
             } else {
                 test_run.client_timeout = (self.rackscale_timeout_fn)(total_cores);
                 test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke) / test_run.num_clients;
 
+                // Set controller timeout for this test
+                test_run.controller_timeout = test_run.client_timeout;
+
                 test_run.run_rackscale();
             }
 
diff --git a/kernel/testutils/src/runner_args.rs b/kernel/testutils/src/runner_args.rs
index 7f935b173..df86d3db7 100644
--- a/kernel/testutils/src/runner_args.rs
+++ b/kernel/testutils/src/runner_args.rs
@@ -641,6 +641,7 @@ pub fn wait_for_sigterm_or_successful_exit_no_log(
             use rexpect::errors::ErrorKind::Timeout;
             match e {
                 Error(Timeout(expected, got, _timeout), _st) => {
+                    println!("Timeout");
                     println!("Expected: `{expected}`\n");
                     println!("Got:",);
                     let count = got.lines().count();

From a80b98c2038175a967e2b181eec3d2f184dd74a2 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Thu, 30 Nov 2023 08:44:20 -0800
Subject: [PATCH 36/44] committing the working memcached benchmark

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 17 ++++++++++-------
 kernel/testutils/src/memcached.rs        | 17 ++++++++++++++---
 kernel/testutils/src/rackscale_runner.rs |  2 +-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index ff1cc9bde..be06627f8 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -518,6 +518,8 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
         // match the title
         let (prev, matched) = proc.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#)?;
 
+        println!("Configured. Waiting for benchmark to start...");
+
         *output += prev.as_str();
         *output += matched.as_str();
 
@@ -618,7 +620,7 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
         if cfg!(feature = "smoke") {
             60_000 as u64
         } else {
-            ((MEMCACHED_MEM_SIZE_MB / 10 + MEMCACHED_NUM_QUERIES) * 1000) as u64
+            ((MEMCACHED_MEM_SIZE_MB * 1000 / 10 + MEMCACHED_NUM_QUERIES / 1000)) as u64
         }
     }
 
@@ -630,12 +632,13 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
         } else {
             // Memory must also be divisible by number of nodes, which could be 1, 2, 3, or 4
             // memory = result of this function / num_clients  - shmem_size
-            (base_memory
-                + std::cmp::max(
-                    MEMCACHED_MEM_SIZE_MB * 2,
-                    testutils::helpers::SHMEM_SIZE * 2,
-                ))
-                * num_clients
+            // (base_memory
+            //     + std::cmp::max(
+            //         MEMCACHED_MEM_SIZE_MB * 2,
+            //         testutils::helpers::SHMEM_SIZE * 2,
+            //     ))
+            //     * num_clients
+            base_memory
         }
     }
 
diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 9520239df..160c73120 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -11,7 +11,7 @@ use std::time::Duration;
 use rexpect::errors::*;
 use rexpect::session::{spawn_command, PtySession};
 
-pub const MEMCACHED_MEM_SIZE_MB: usize = 64 * 1024;
+pub const MEMCACHED_MEM_SIZE_MB: usize = 64 * 1024; // 64 * 1024;
 pub const MEMCACHED_NUM_QUERIES: usize = 10_000_000;
 
 pub const RACKSCALE_MEMCACHED_CSV_COLUMNS: &str =
@@ -56,14 +56,25 @@ pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result
 
     // number of keys: 131072
     let (prev, matched) = p.exp_regex(r#"number of keys: (\d+)"#)?;
-    // println!("> {}", matched);
+    println!("> {}", matched);
 
     *output += prev.as_str();
     *output += matched.as_str();
 
+
+    // number of keys: 131072
+    let (prev, matched) = p.exp_regex(r#"Prefilling slabs"#)?;
+    println!("> {}", matched);
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    let (prev, matched) = p.exp_regex(r#"Executing (\d+) queries with (\d+) threads."#)?;
+    println!("> {}", matched);
+
     // benchmark took 129 seconds
     let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
-    // println!("> {}", matched);
+    println!("> {}", matched);
     let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
 
     *output += prev.as_str();
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index e7bd25a0c..c98aee880 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -888,7 +888,7 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
                 test_run.run_baseline();
             } else {
                 test_run.client_timeout = (self.rackscale_timeout_fn)(total_cores);
-                test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke) / test_run.num_clients;
+                test_run.memory = (self.mem_fn)(total_cores, cores_per_client, is_smoke);
 
                 // Set controller timeout for this test
                 test_run.controller_timeout = test_run.client_timeout;

From 9f03c7deefd83a98154bd415973a60549d9fa864 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Sat, 2 Dec 2023 18:10:57 -0800
Subject: [PATCH 37/44] bump memcached versions

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/memcached.rs | 4 ++--
 usr/rkapps/build.rs               | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 160c73120..cb1e13e54 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -153,12 +153,12 @@ pub fn rackscale_memcached_checkout(tmpdir: &str) {
     }
 
     println!(
-        "CHECKOUT a703eedd8032ff1e083e8c5972eacc95738c797b {:?}",
+        "CHECKOUT 2c521ec573da7cad604670dde3c9c369dba01f75 {:?}",
         out_dir
     );
 
     let res = Command::new("git")
-        .args(&["checkout", "a703eedd8032ff1e083e8c5972eacc95738c797b"])
+        .args(&["checkout", "2c521ec573da7cad604670dde3c9c369dba01f75"])
         .current_dir(out_dir_path.as_path())
         .output()
         .expect("git checkout failed");
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 2e3dd2dbd..e2a6832dd 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}",
+            "CHECKOUT d090e547b7bc9a7033551b9c2471993f59c5abd6 {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"])
+            .args(&["checkout", "d090e547b7bc9a7033551b9c2471993f59c5abd6"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From 1c552bc881988c08fc139a6e8d1ac360b56aaeb9 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 4 Dec 2023 15:17:27 -0800
Subject: [PATCH 38/44] some more tweaks in the memcachd benchmark

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs           | 72 ++----------------------
 kernel/tests/s11_rackscale_benchmarks.rs | 14 ++---
 kernel/testutils/src/memcached.rs        | 56 +++++++++++++++++-
 3 files changed, 67 insertions(+), 75 deletions(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 23d109d73..9b115e38f 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -22,7 +22,7 @@ use serde::Serialize;
 
 use testutils::builder::{BuildArgs, Machine};
 use testutils::helpers::{setup_network, spawn_dhcpd, spawn_nrk, DHCP_ACK_MATCH};
-use testutils::memcached::{MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES};
+use testutils::memcached::{MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES, parse_memcached_output};
 use testutils::redis::{redis_benchmark, REDIS_BENCHMARK, REDIS_START_MATCH};
 use testutils::runner_args::{check_for_successful_exit, wait_for_sigterm, RunnerArgs};
 
@@ -901,7 +901,7 @@ fn s10_memcached_benchmark_internal() {
     } else {
         (
             // keep in sync with the s11_ra
-            4* MEMCACHED_MEM_SIZE_MB, /* MB */
+            std::cmp::max(8192, 4 * MEMCACHED_MEM_SIZE_MB), /* MB */
             MEMCACHED_MEM_SIZE_MB,
             MEMCACHED_NUM_QUERIES,
             std::cmp::max(60_000, MEMCACHED_NUM_QUERIES) as u64,
@@ -927,7 +927,7 @@ fn s10_memcached_benchmark_internal() {
 
         let cmdline = RunnerArgs::new_with_build("userspace-smp", &build)
             .timeout(timeout)
-            .cores(machine.max_cores())
+            .cores(*thread)
             .nodes(2)
             .use_virtio()
             .memory(qemu_mem)
@@ -942,70 +942,8 @@ fn s10_memcached_benchmark_internal() {
 
             output += dhcp_server.exp_string(DHCP_ACK_MATCH)?.as_str();
 
-            // match the title
-            let (prev, matched) = p.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#)?;
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            // x_benchmark_mem = 10 MB
-            let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
-            println!("> {}", matched);
-            let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            // number of threads: 3
-            let (prev, matched) = p.exp_regex(r#"number of threads: (\d+)"#)?;
-            println!("> {}", matched);
-            let b_threads = matched.replace("number of threads: ", "");
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            // number of keys: 131072
-            let (prev, matched) = p.exp_regex(r#"number of keys: (\d+)"#)?;
-            println!("> {}", matched);
-
-            output += prev.as_str();
-            output += matched.as_str();
 
-            let (prev, matched) = p.exp_regex(r#"Executing (\d+) queries with (\d+) threads"#)?;
-            println!("> {}", matched);
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            // benchmark took 129 seconds
-            let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
-            println!("> {}", matched);
-            let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            // benchmark took 7937984 queries / second
-            let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
-            println!("> {}", matched);
-            let b_thpt = matched
-                .replace("benchmark took ", "")
-                .replace(" queries / second", "");
-
-            output += prev.as_str();
-            output += matched.as_str();
-
-            let (prev, matched) = p.exp_regex(r#"benchmark executed (\d+)"#)?;
-            println!("> {}", matched);
-            let b_queries = matched
-                .replace("benchmark executed ", "")
-                .split(' ')
-                .next()
-                .unwrap()
-                .to_string();
-
-            output += prev.as_str();
-            output += matched.as_str();
+            let ret = parse_memcached_output(&mut p ,*thread, &mut output)?;
 
             // Append parsed results to a CSV file
             let write_headers = !Path::new(file_name).exists();
@@ -1024,7 +962,7 @@ fn s10_memcached_benchmark_internal() {
             assert!(r.is_ok());
             let out = format!(
                 "memcached,{},{},{},{},{}",
-                b_threads, b_mem, b_queries, b_time, b_thpt,
+                ret.b_threads, ret.b_mem,  ret.b_queries,  ret.b_time, ret.b_thpt
             );
             let r = csv_file.write(out.as_bytes());
             assert!(r.is_ok());
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index be06627f8..a2e58fa86 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -507,7 +507,7 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
     fn controller_match_fn(
         proc: &mut PtySession,
         output: &mut String,
-        _cores_per_client: usize,
+        cores_per_client: usize,
         num_clients: usize,
         file_name: &str,
         is_baseline: bool,
@@ -523,7 +523,7 @@ fn rackscale_memcached_internal_benchmark(transport: RackscaleTransport) {
         *output += prev.as_str();
         *output += matched.as_str();
 
-        let ret = parse_memcached_output(proc, output)?;
+        let ret = parse_memcached_output(proc, num_clients * cores_per_client, output)?;
 
         // Append parsed results to a CSV file
         let write_headers = !Path::new(file_name).exists();
@@ -995,7 +995,7 @@ fn s11_linux_memcached_sharded_benchmark() {
 
         let mut pty = run_benchmark_internal(&config, timeout_ms);
         let mut output = String::new();
-        let res = parse_memcached_output(&mut pty, &mut output).expect("could not parse output!");
+        let res = parse_memcached_output(&mut pty, config.num_threads, &mut output).expect("could not parse output!");
         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
         assert!(r.is_ok());
         let out = format!(
@@ -1039,7 +1039,7 @@ fn s11_linux_memcached_sharded_benchmark() {
                 .expect("failed to spawn load balancer");
             let mut output = String::new();
             use rexpect::errors::ErrorKind::Timeout;
-            match parse_memcached_output(&mut pty, &mut output) {
+            match parse_memcached_output(&mut pty, config.num_threads, &mut output) {
                 Ok(res) => {
                     let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
                     assert!(r.is_ok());
@@ -1219,8 +1219,8 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
     fn controller_match_fn(
         proc: &mut PtySession,
         output: &mut String,
-        _cores_per_client: usize,
-        _num_clients: usize,
+        cores_per_client: usize,
+        num_clients: usize,
         file_name: &str,
         _is_baseline: bool,
         _arg: Option<MemcachedShardedConfig>,
@@ -1233,7 +1233,7 @@ fn s11_rackscale_memcached_benchmark_sharded_nros() {
 
         use rexpect::errors::Error;
         use rexpect::errors::ErrorKind::Timeout;
-        let res = match parse_memcached_output(proc, output) {
+        let res = match parse_memcached_output(proc, num_clients * cores_per_client,  output) {
             Ok(res) => res,
             Err(Error(Timeout(expected, got, timeout), st)) => {
                 println!("Expected: `{expected}`\n");
diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index cb1e13e54..4982c5343 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -37,7 +37,7 @@ pub struct MemcachedResult {
     pub b_thpt: String,
 }
 
-pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result<MemcachedResult> {
+pub fn parse_memcached_output(p: &mut PtySession, num_threads: usize,  output: &mut String) -> Result<MemcachedResult> {
     // x_benchmark_mem = 10 MB
     let (prev, matched) = p.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
     println!("> {}", matched);
@@ -69,9 +69,63 @@ pub fn parse_memcached_output(p: &mut PtySession, output: &mut String) -> Result
     *output += prev.as_str();
     *output += matched.as_str();
 
+
+    // number of keys: 131072
+    let (prev, matched) = p.exp_regex(r#"Prefilling slabs took (\d+) ms"#)?;
+    println!("> {}", matched);
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+    for i in 0..num_threads {
+        let (prev, matched) = p.exp_regex(r#"starting thread (\d+) / (\d+)"#)?;
+        println!("> {}", matched);
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+    }
+
+    // number of keys: 131072
+    let (prev, matched) = p.exp_regex(r#"starting all (\d+) threads"#)?;
+    println!("> {}", matched);
+
+    *output += prev.as_str();
+    *output += matched.as_str();
+
+
+
+    for i in 0..num_threads {
+        let (prev, matched) = p.exp_regex(r#"thread.(\d+) start running"#)?;
+        println!("> {}", matched);
+        *output += prev.as_str();
+        *output += matched.as_str();
+    }
+
+    for i in 0..num_threads {
+        let (prev, matched) = p.exp_regex(r#"populate: thread.(\d+) done. added (\d+) elements, (\d+) not added of which (\d+) already existed"#)?;
+        println!("> {}", matched);
+        *output += prev.as_str();
+        *output += matched.as_str();
+    }
+
     let (prev, matched) = p.exp_regex(r#"Executing (\d+) queries with (\d+) threads."#)?;
     println!("> {}", matched);
 
+    for i in 0..num_threads {
+        let (prev, matched) = p.exp_regex(r#"execute: thread.(\d+) startes executing with connection "#)?;
+        println!("> {}", matched);
+        *output += prev.as_str();
+        *output += matched.as_str();
+    }
+
+    for i in 0..num_threads {
+        let (prev, matched) = p.exp_regex(r#"execute: thread.(\d+) done. executed (\d+) found (\d+), missed (\d+)"#)?;
+        println!("> {}", matched);
+        *output += prev.as_str();
+        *output += matched.as_str();
+    }
+
+
     // benchmark took 129 seconds
     let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
     println!("> {}", matched);

From d06f1718c7dee232b49d4b26a36e29921ab89c48 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 4 Dec 2023 18:43:46 -0800
Subject: [PATCH 39/44] memcached: setting the number of threads propery

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s10_benchmarks.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 9b115e38f..9f797a667 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -917,9 +917,13 @@ fn s10_memcached_benchmark_internal() {
     }
     println!();
 
+
+    let total_cores_per_node = core::cmp::max(1, machine.max_cores() / machine.max_numa_nodes());
     for thread in threads.iter() {
         println!("\n\nRunning memcached internal benchmark with {thread} threads, {queries} GETs and {memsize}MB memory. ");
 
+        let num_nodes = (thread + (total_cores_per_node - 1)) / total_cores_per_node;
+
         let kernel_cmdline = format!(
             r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
             *thread, memsize, queries
@@ -928,7 +932,7 @@ fn s10_memcached_benchmark_internal() {
         let cmdline = RunnerArgs::new_with_build("userspace-smp", &build)
             .timeout(timeout)
             .cores(*thread)
-            .nodes(2)
+            .nodes(num_nodes)
             .use_virtio()
             .memory(qemu_mem)
             .setaffinity(Vec::new())
@@ -942,6 +946,10 @@ fn s10_memcached_benchmark_internal() {
 
             output += dhcp_server.exp_string(DHCP_ACK_MATCH)?.as_str();
 
+            // somehow that needs to be here ???
+            let (prev, matched) = p.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#)?;
+            output += prev.as_str();
+            output += matched.as_str();
 
             let ret = parse_memcached_output(&mut p ,*thread, &mut output)?;
 

From bdcf45ea6d98f1de5ac7028d4c01e8ef8b13aa7a Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 4 Dec 2023 18:44:38 -0800
Subject: [PATCH 40/44] memcached: match on multiple things to account for
 reordering

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/testutils/src/memcached.rs | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index 4982c5343..b946560db 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -77,30 +77,15 @@ pub fn parse_memcached_output(p: &mut PtySession, num_threads: usize,  output: &
     *output += prev.as_str();
     *output += matched.as_str();
 
-    for i in 0..num_threads {
-        let (prev, matched) = p.exp_regex(r#"starting thread (\d+) / (\d+)"#)?;
+    // there could be some reordering happening here with the prints, so we account for all of them.
+    for i in 0..(2* num_threads + 1) {
+        let (prev, matched) = p.exp_regex(r#"(thread.(\d+) start running|starting all (\d+) threads|starting thread (\d+) / (\d+))"#)?;
         println!("> {}", matched);
 
         *output += prev.as_str();
         *output += matched.as_str();
     }
 
-    // number of keys: 131072
-    let (prev, matched) = p.exp_regex(r#"starting all (\d+) threads"#)?;
-    println!("> {}", matched);
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-
-
-    for i in 0..num_threads {
-        let (prev, matched) = p.exp_regex(r#"thread.(\d+) start running"#)?;
-        println!("> {}", matched);
-        *output += prev.as_str();
-        *output += matched.as_str();
-    }
-
     for i in 0..num_threads {
         let (prev, matched) = p.exp_regex(r#"populate: thread.(\d+) done. added (\d+) elements, (\d+) not added of which (\d+) already existed"#)?;
         println!("> {}", matched);

From a486aec307e12d499c1fd7226691b5153b3a2b60 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Mon, 4 Dec 2023 18:47:14 -0800
Subject: [PATCH 41/44] update librettors

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 usr/rkapps/build.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index e2a6832dd..8c9042b41 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -133,16 +133,16 @@ fn main() {
         println!("CLONE {:?}", out_dir);
         let url = "https://github.com/gz/librettos-packages.git";
         Command::new("git")
-            .args(&["clone", "--depth=1", url, out_dir.as_str()])
+            .args(&["clone", url, out_dir.as_str()])
             .status()
             .unwrap();
 
         println!(
-            "CHECKOUT d090e547b7bc9a7033551b9c2471993f59c5abd6 {:?}",
+            "CHECKOUT 1386c46c25d74eabc255c76184aaccd1eb985768 {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "d090e547b7bc9a7033551b9c2471993f59c5abd6"])
+            .args(&["checkout", "1386c46c25d74eabc255c76184aaccd1eb985768"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From 481ead01f13357d85847743eb94d57f1abee47ae Mon Sep 17 00:00:00 2001
From: zmckevitt <zack.mckevitt@gmail.com>
Date: Mon, 11 Mar 2024 20:46:42 -0400
Subject: [PATCH 42/44] updated memcached hashes to updated version, updated
 parse_memcached_output to fit new version

---
 kernel/testutils/src/memcached.rs | 51 ++-----------------------------
 usr/rkapps/build.rs               |  4 +--
 2 files changed, 4 insertions(+), 51 deletions(-)

diff --git a/kernel/testutils/src/memcached.rs b/kernel/testutils/src/memcached.rs
index b946560db..33f36afb9 100644
--- a/kernel/testutils/src/memcached.rs
+++ b/kernel/testutils/src/memcached.rs
@@ -61,56 +61,9 @@ pub fn parse_memcached_output(p: &mut PtySession, num_threads: usize,  output: &
     *output += prev.as_str();
     *output += matched.as_str();
 
-
-    // number of keys: 131072
-    let (prev, matched) = p.exp_regex(r#"Prefilling slabs"#)?;
-    println!("> {}", matched);
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-
-    // number of keys: 131072
-    let (prev, matched) = p.exp_regex(r#"Prefilling slabs took (\d+) ms"#)?;
-    println!("> {}", matched);
-
-    *output += prev.as_str();
-    *output += matched.as_str();
-
-    // there could be some reordering happening here with the prints, so we account for all of them.
-    for i in 0..(2* num_threads + 1) {
-        let (prev, matched) = p.exp_regex(r#"(thread.(\d+) start running|starting all (\d+) threads|starting thread (\d+) / (\d+))"#)?;
-        println!("> {}", matched);
-
-        *output += prev.as_str();
-        *output += matched.as_str();
-    }
-
-    for i in 0..num_threads {
-        let (prev, matched) = p.exp_regex(r#"populate: thread.(\d+) done. added (\d+) elements, (\d+) not added of which (\d+) already existed"#)?;
-        println!("> {}", matched);
-        *output += prev.as_str();
-        *output += matched.as_str();
-    }
-
     let (prev, matched) = p.exp_regex(r#"Executing (\d+) queries with (\d+) threads."#)?;
     println!("> {}", matched);
 
-    for i in 0..num_threads {
-        let (prev, matched) = p.exp_regex(r#"execute: thread.(\d+) startes executing with connection "#)?;
-        println!("> {}", matched);
-        *output += prev.as_str();
-        *output += matched.as_str();
-    }
-
-    for i in 0..num_threads {
-        let (prev, matched) = p.exp_regex(r#"execute: thread.(\d+) done. executed (\d+) found (\d+), missed (\d+)"#)?;
-        println!("> {}", matched);
-        *output += prev.as_str();
-        *output += matched.as_str();
-    }
-
-
     // benchmark took 129 seconds
     let (prev, matched) = p.exp_regex(r#"benchmark took (\d+) ms"#)?;
     println!("> {}", matched);
@@ -192,12 +145,12 @@ pub fn rackscale_memcached_checkout(tmpdir: &str) {
     }
 
     println!(
-        "CHECKOUT 2c521ec573da7cad604670dde3c9c369dba01f75 {:?}",
+        "CHECKOUT 0a4f217105d994d2ce438464041546ab4f4c4b2c {:?}",
         out_dir
     );
 
     let res = Command::new("git")
-        .args(&["checkout", "2c521ec573da7cad604670dde3c9c369dba01f75"])
+        .args(&["checkout", "0a4f217105d994d2ce438464041546ab4f4c4b2c"])
         .current_dir(out_dir_path.as_path())
         .output()
         .expect("git checkout failed");
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 8c9042b41..16d31f70b 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT 1386c46c25d74eabc255c76184aaccd1eb985768 {:?}",
+            "CHECKOUT 161e05606915fb9a29c8387db8702e16f85b8806 {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "1386c46c25d74eabc255c76184aaccd1eb985768"])
+            .args(&["checkout", "161e05606915fb9a29c8387db8702e16f85b8806"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From fce5f9a6f2badb3d54c9917393e7c56286b1bd3d Mon Sep 17 00:00:00 2001
From: zmckevitt <zack.mckevitt@gmail.com>
Date: Tue, 12 Mar 2024 01:12:22 -0400
Subject: [PATCH 43/44] ignoring s10 memcached test

---
 kernel/tests/s10_benchmarks.rs | 8 ++++----
 scripts/ci.bash                | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 9f797a667..f7caa0a0a 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -22,7 +22,7 @@ use serde::Serialize;
 
 use testutils::builder::{BuildArgs, Machine};
 use testutils::helpers::{setup_network, spawn_dhcpd, spawn_nrk, DHCP_ACK_MATCH};
-use testutils::memcached::{MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES, parse_memcached_output};
+use testutils::memcached::{parse_memcached_output, MEMCACHED_MEM_SIZE_MB, MEMCACHED_NUM_QUERIES};
 use testutils::redis::{redis_benchmark, REDIS_BENCHMARK, REDIS_START_MATCH};
 use testutils::runner_args::{check_for_successful_exit, wait_for_sigterm, RunnerArgs};
 
@@ -874,6 +874,7 @@ fn s10_leveldb_benchmark() {
 }
 
 #[test]
+#[ignore]
 fn s10_memcached_benchmark_internal() {
     setup_network(1);
 
@@ -917,7 +918,6 @@ fn s10_memcached_benchmark_internal() {
     }
     println!();
 
-
     let total_cores_per_node = core::cmp::max(1, machine.max_cores() / machine.max_numa_nodes());
     for thread in threads.iter() {
         println!("\n\nRunning memcached internal benchmark with {thread} threads, {queries} GETs and {memsize}MB memory. ");
@@ -951,7 +951,7 @@ fn s10_memcached_benchmark_internal() {
             output += prev.as_str();
             output += matched.as_str();
 
-            let ret = parse_memcached_output(&mut p ,*thread, &mut output)?;
+            let ret = parse_memcached_output(&mut p, *thread, &mut output)?;
 
             // Append parsed results to a CSV file
             let write_headers = !Path::new(file_name).exists();
@@ -970,7 +970,7 @@ fn s10_memcached_benchmark_internal() {
             assert!(r.is_ok());
             let out = format!(
                 "memcached,{},{},{},{},{}",
-                ret.b_threads, ret.b_mem,  ret.b_queries,  ret.b_time, ret.b_thpt
+                ret.b_threads, ret.b_mem, ret.b_queries, ret.b_time, ret.b_thpt
             );
             let r = csv_file.write(out.as_bytes());
             assert!(r.is_ok());
diff --git a/scripts/ci.bash b/scripts/ci.bash
index 53be5d247..0881edeb5 100644
--- a/scripts/ci.bash
+++ b/scripts/ci.bash
@@ -30,7 +30,7 @@ RUST_TEST_THREADS=1 cargo test --test s10* -- s10_fxmark_bench --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maptput_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_vmops_maplat_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_fxmark_bench --nocapture
-RUST_TEST_THREADS=1 cargo test --test s11* -- s11_rackscale_shmem_memcached_internal_benchmark --nocapture
+RUST_TEST_THREADS=1 cargo test --features baseline --test s11* -- s11_rackscale_shmem_memcached_internal_benchmark --nocapture
 RUST_TEST_THREADS=1 cargo test --test s11* -- s11_linux_memcached_sharded_benchmark --nocapture
 
 # Clone repo

From 4cdaace1104c69fe0a72dde6558e611908f6024a Mon Sep 17 00:00:00 2001
From: zmckevitt <zack.mckevitt@gmail.com>
Date: Tue, 12 Mar 2024 12:36:23 -0500
Subject: [PATCH 44/44] increased timeout for benchmarks on CI runner and
 ignoring leveldb benchmark for now

---
 .github/workflows/skylake2x-tests.yml | 1 +
 .github/workflows/skylake4x-tests.yml | 3 ++-
 kernel/tests/s10_benchmarks.rs        | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/skylake2x-tests.yml b/.github/workflows/skylake2x-tests.yml
index 5bc73a452..e75ab2cec 100644
--- a/.github/workflows/skylake2x-tests.yml
+++ b/.github/workflows/skylake2x-tests.yml
@@ -49,3 +49,4 @@ jobs:
           bash scripts/ci.bash
     env:
       CI_MACHINE_TYPE: "skylake2x"
+    timeout-minutes: 600
diff --git a/.github/workflows/skylake4x-tests.yml b/.github/workflows/skylake4x-tests.yml
index aacca4ab2..7a64cca9b 100644
--- a/.github/workflows/skylake4x-tests.yml
+++ b/.github/workflows/skylake4x-tests.yml
@@ -48,4 +48,5 @@ jobs:
           bash setup.sh
           bash scripts/ci.bash
     env:
-      CI_MACHINE_TYPE: "skylake4x"
\ No newline at end of file
+      CI_MACHINE_TYPE: "skylake4x"
+    timeout-minutes: 600
diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index f7caa0a0a..17cba2dc0 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -771,6 +771,7 @@ fn s10_memcached_benchmark() {
 }
 
 #[test]
+#[ignore]
 fn s10_leveldb_benchmark() {
     setup_network(1);