Skip to content

Commit

Permalink
ipc/msg: mitigate the lock contention with percpu counter
Browse files Browse the repository at this point in the history
The msg_bytes and msg_hdrs atomic counters are frequently updated when IPC
msg queue is in heavy use, causing heavy cache bounce and overhead. 
Change them to percpu_counter greatly improve the performance.  Since
there is one percpu struct per namespace, additional memory cost is
minimal.  Reading of the count done in msgctl call, which is infrequent. 
So the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.99x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Jiebin Sun <[email protected]>
Reviewed-by: Tim Chen <[email protected]>
Cc: Alexander Mikhalitsyn <[email protected]>
Cc: Alexey Gladkov <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Dennis Zhou <[email protected]>
Cc: "Eric W . Biederman" <[email protected]>
Cc: Manfred Spraul <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Vasily Averin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
jiebinn authored and akpm00 committed Oct 1, 2022
1 parent cdfef1d commit a28f49b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 19 deletions.
5 changes: 3 additions & 2 deletions include/linux/ipc_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>

struct user_namespace;

Expand All @@ -36,8 +37,8 @@ struct ipc_namespace {
unsigned int msg_ctlmax;
unsigned int msg_ctlmnb;
unsigned int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
struct percpu_counter percpu_msg_bytes;
struct percpu_counter percpu_msg_hdrs;

size_t shm_ctlmax;
size_t shm_ctlall;
Expand Down
44 changes: 30 additions & 14 deletions ipc/msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/rhashtable.h>
#include <linux/percpu_counter.h>

#include <asm/current.h>
#include <linux/uaccess.h>
Expand Down Expand Up @@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
rcu_read_unlock();

list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
atomic_dec(&ns->msg_hdrs);
percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
free_msg(msg);
}
atomic_sub(msq->q_cbytes, &ns->msg_bytes);
percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
ipc_update_pid(&msq->q_lspid, NULL);
ipc_update_pid(&msq->q_lrpid, NULL);
ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
Expand Down Expand Up @@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgssz = MSGSSZ;
msginfo->msgseg = MSGSEG;
down_read(&msg_ids(ns).rwsem);
if (cmd == MSG_INFO) {
if (cmd == MSG_INFO)
msginfo->msgpool = msg_ids(ns).in_use;
msginfo->msgmap = atomic_read(&ns->msg_hdrs);
msginfo->msgtql = atomic_read(&ns->msg_bytes);
max_idx = ipc_get_maxidx(&msg_ids(ns));
up_read(&msg_ids(ns).rwsem);
if (cmd == MSG_INFO) {
msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
} else {
msginfo->msgmap = MSGMAP;
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
max_idx = ipc_get_maxidx(&msg_ids(ns));
up_read(&msg_ids(ns).rwsem);
return (max_idx < 0) ? 0 : max_idx;
}

Expand Down Expand Up @@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
atomic_add(msgsz, &ns->msg_bytes);
atomic_inc(&ns->msg_hdrs);
percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
}

err = 0;
Expand Down Expand Up @@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msq->q_rtime = ktime_get_real_seconds();
ipc_update_pid(&msq->q_lrpid, task_tgid(current));
msq->q_cbytes -= msg->m_ts;
atomic_sub(msg->m_ts, &ns->msg_bytes);
atomic_dec(&ns->msg_hdrs);
percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
ss_wakeup(msq, &wake_q, false);

goto out_unlock0;
Expand Down Expand Up @@ -1297,20 +1299,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif

void msg_init_ns(struct ipc_namespace *ns)
int msg_init_ns(struct ipc_namespace *ns)
{
int ret;

ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
ns->msg_ctlmni = MSGMNI;

atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
if (ret)
goto fail_msg_bytes;
ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
if (ret)
goto fail_msg_hdrs;
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
return 0;

fail_msg_hdrs:
percpu_counter_destroy(&ns->percpu_msg_bytes);
fail_msg_bytes:
return ret;
}

#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
percpu_counter_destroy(&ns->percpu_msg_bytes);
percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
Expand Down
5 changes: 4 additions & 1 deletion ipc/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (!setup_ipc_sysctls(ns))
goto fail_mq;

err = msg_init_ns(ns);
if (err)
goto fail_put;

sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);

return ns;
Expand Down
4 changes: 2 additions & 2 deletions ipc/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }

#ifdef CONFIG_SYSVIPC
void sem_init_ns(struct ipc_namespace *ns);
void msg_init_ns(struct ipc_namespace *ns);
int msg_init_ns(struct ipc_namespace *ns);
void shm_init_ns(struct ipc_namespace *ns);

void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
static inline void sem_init_ns(struct ipc_namespace *ns) { }
static inline void msg_init_ns(struct ipc_namespace *ns) { }
static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
static inline void shm_init_ns(struct ipc_namespace *ns) { }

static inline void sem_exit_ns(struct ipc_namespace *ns) { }
Expand Down

0 comments on commit a28f49b

Please sign in to comment.