From 66d58f046c9d3a8f996b7138d02e965fd0617de0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 13:52:08 +0000 Subject: [PATCH 1/5] net: use sk_forward_alloc_get() in sk_get_meminfo() inet_sk_diag_fill() has been changed to use sk_forward_alloc_get(), but sk_get_meminfo() was forgotten. Fixes: 292e6077b040 ("net: introduce sk_forward_alloc_get()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index b0dd501dabd6a..a61ec97098add 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3743,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); From 5e6300e7b3a4ab5b72a82079753868e91fbf9efc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 13:52:09 +0000 Subject: [PATCH 2/5] net: annotate data-races around sk->sk_forward_alloc Every time sk->sk_forward_alloc is read locklessly, add a READ_ONCE(). Add sk_forward_alloc_add() helper to centralize updates, to reduce number of WRITE_ONCE(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 12 +++++++++--- net/core/sock.c | 8 ++++---- net/ipv4/tcp_output.c | 2 +- net/ipv4/udp.c | 6 +++--- net/mptcp/protocol.c | 6 +++--- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 11d503417591e..f04869ac1d922 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1053,6 +1053,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val) WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } +static inline void sk_forward_alloc_add(struct sock *sk, int val) +{ + /* Paired with lockless reads of sk->sk_forward_alloc */ + WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val); +} + void sk_stream_write_space(struct sock *sk); /* OOB backlog add */ @@ -1377,7 +1383,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk) if (sk->sk_prot->forward_alloc_get) return sk->sk_prot->forward_alloc_get(sk); #endif - return sk->sk_forward_alloc; + return READ_ONCE(sk->sk_forward_alloc); } static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) @@ -1673,14 +1679,14 @@ static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) return; - sk->sk_forward_alloc -= size; + sk_forward_alloc_add(sk, -size); } static inline void sk_mem_uncharge(struct sock *sk, int size) { if (!sk_has_account(sk)) return; - sk->sk_forward_alloc += size; + sk_forward_alloc_add(sk, size); sk_mem_reclaim(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index a61ec97098add..40e1bda4bde0c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1045,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } - sk->sk_forward_alloc += pages << PAGE_SHIFT; + sk_forward_alloc_add(sk, pages << PAGE_SHIFT); WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem + (pages << PAGE_SHIFT)); @@ -3139,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt << PAGE_SHIFT; + sk_forward_alloc_add(sk, amt << PAGE_SHIFT); ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) - sk->sk_forward_alloc -= amt << PAGE_SHIFT; + sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT)); return ret; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -3174,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) void __sk_mem_reclaim(struct sock *sk, int amount) { amount >>= PAGE_SHIFT; - sk->sk_forward_alloc -= amount << PAGE_SHIFT; + sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT)); __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e6b4fbd642f7e..ccfc8bbf74558 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3474,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size) if (delta <= 0) return; amt = sk_mem_pages(delta); - sk->sk_forward_alloc += amt << PAGE_SHIFT; + sk_forward_alloc_add(sk, amt << PAGE_SHIFT); sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0794a2c46a568..f39b9c8445808 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, spin_lock(&sk_queue->lock); - sk->sk_forward_alloc += size; + sk_forward_alloc_add(sk, size); amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); - sk->sk_forward_alloc -= amt; + sk_forward_alloc_add(sk, -amt); if (amt) __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); @@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) goto uncharge_drop; } - sk->sk_forward_alloc -= size; + sk_forward_alloc_add(sk, -size); /* no need to setup a destructor, we will explicitly release the * forward allocated memory on dequeue diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 933b257eee024..625df3a36c469 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1800,7 +1800,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } /* data successfully copied into the write queue */ - sk->sk_forward_alloc -= total_ts; + sk_forward_alloc_add(sk, -total_ts); copied += psize; dfrag->data_len += psize; frag_truesize += psize; @@ -3257,7 +3257,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) /* move all the rx fwd alloc into the sk_mem_reclaim_final in * inet_sock_destruct() will dispose it */ - sk->sk_forward_alloc += msk->rmem_fwd_alloc; + sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); msk->rmem_fwd_alloc = 0; mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); @@ -3522,7 +3522,7 @@ static void mptcp_shutdown(struct sock *sk, int how) static int mptcp_forward_alloc_get(const struct sock *sk) { - return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; + return READ_ONCE(sk->sk_forward_alloc) + mptcp_sk(sk)->rmem_fwd_alloc; } static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) From 9531e4a83febc3fb47ac77e24cfb5ea97e50034d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 13:52:10 +0000 Subject: [PATCH 3/5] mptcp: annotate data-races around msk->rmem_fwd_alloc msk->rmem_fwd_alloc can be read locklessly. Add mptcp_rmem_fwd_alloc_add(), similar to sk_forward_alloc_add(), and appropriate READ_ONCE()/WRITE_ONCE() annotations. Fixes: 6511882cdd82 ("mptcp: allocate fwd memory separately on the rx and tx path") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 625df3a36c469..a7fc16f5175d2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -134,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } +static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) +{ + WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, + mptcp_sk(sk)->rmem_fwd_alloc + size); +} + static void mptcp_rmem_charge(struct sock *sk, int size) { - mptcp_sk(sk)->rmem_fwd_alloc -= size; + mptcp_rmem_fwd_alloc_add(sk, -size); } static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, @@ -177,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, static void __mptcp_rmem_reclaim(struct sock *sk, int amount) { amount >>= PAGE_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT; + mptcp_rmem_charge(sk, amount << PAGE_SHIFT); __sk_mem_reduce_allocated(sk, amount); } @@ -186,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size) struct mptcp_sock *msk = mptcp_sk(sk); int reclaimable; - msk->rmem_fwd_alloc += size; + mptcp_rmem_fwd_alloc_add(sk, size); reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); /* see sk_mem_uncharge() for the rationale behind the following schema */ @@ -341,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) return false; - msk->rmem_fwd_alloc += amount; + mptcp_rmem_fwd_alloc_add(sk, amount); return true; } @@ -3258,7 +3264,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) * inet_sock_destruct() will dispose it */ sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); - msk->rmem_fwd_alloc = 0; + WRITE_ONCE(msk->rmem_fwd_alloc, 0); mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); mptcp_free_local_addr_list(msk); @@ -3522,7 +3528,8 @@ static void mptcp_shutdown(struct sock *sk, int how) static int mptcp_forward_alloc_get(const struct sock *sk) { - return READ_ONCE(sk->sk_forward_alloc) + mptcp_sk(sk)->rmem_fwd_alloc; + return READ_ONCE(sk->sk_forward_alloc) + + READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); } static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) From e3390b30a5dfb112e8e802a59c0f68f947b638b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 13:52:11 +0000 Subject: [PATCH 4/5] net: annotate data-races around sk->sk_tsflags sk->sk_tsflags can be read locklessly, add corresponding annotations. Fixes: b9f40e21ef42 ("net-timestamp: move timestamp flags out of sk_flags") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/sock.h | 17 ++++++++++------- net/can/j1939/socket.c | 10 ++++++---- net/core/skbuff.c | 10 ++++++---- net/core/sock.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/socket.c | 13 +++++++------ 13 files changed, 40 insertions(+), 32 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 19adacd5ece03..9276cea775cc2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -94,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm_init(ipcm); ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); - ipcm->sockc.tsflags = inet->sk.sk_tsflags; + ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; ipcm->protocol = inet->inet_num; diff --git a/include/net/sock.h b/include/net/sock.h index f04869ac1d922..b770261fbdaf5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1906,7 +1906,9 @@ struct sockcm_cookie { static inline void sockcm_init(struct sockcm_cookie *sockc, const struct sock *sk) { - *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags }; + *sockc = (struct sockcm_cookie) { + .tsflags = READ_ONCE(sk->sk_tsflags) + }; } int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, @@ -2701,9 +2703,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - ktime_t kt = skb->tstamp; struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); - + u32 tsflags = READ_ONCE(sk->sk_tsflags); + ktime_t kt = skb->tstamp; /* * generate control messages if * - receive time stamping in software requested @@ -2711,10 +2713,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) * - hardware time stamps available and wanted */ if (sock_flag(sk, SOCK_RCVTSTAMP) || - (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) || - (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) || + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) || + (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) || (hwtstamps->hwtstamp && - (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) + (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else sock_write_timestamp(sk, kt); @@ -2736,7 +2738,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + if (sk->sk_flags & FLAGS_RECV_CMSGS || + READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY) __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index feaec4ad6d163..b28c976f52a0a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, struct sock_exterr_skb *serr; struct sk_buff *skb; char *state = "UNK"; + u32 tsflags; int err; jsk = j1939_sk(sk); @@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, if (!(jsk->state & J1939_SOCK_ERRQUEUE)) return; + tsflags = READ_ONCE(sk->sk_tsflags); switch (type) { case J1939_ERRQUEUE_TX_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + if (!(tsflags & SOF_TIMESTAMPING_TX_ACK)) return; break; case J1939_ERRQUEUE_TX_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED)) return; break; case J1939_ERRQUEUE_TX_ABORT: @@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, case J1939_ERRQUEUE_RX_DPO: fallthrough; case J1939_ERRQUEUE_RX_ABORT: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) + if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) return; break; default: @@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, } serr->opt_stats = true; - if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + if (tsflags & SOF_TIMESTAMPING_OPT_ID) serr->ee.ee_data = session->tskey; netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 45707059082f2..24f26e816184e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5207,7 +5207,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; - if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk_is_tcp(sk)) serr->ee.ee_data -= atomic_read(&sk->sk_tskey); @@ -5263,21 +5263,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, { struct sk_buff *skb; bool tsonly, opt_stats = false; + u32 tsflags; if (!sk) return; - if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && + tsflags = READ_ONCE(sk->sk_tsflags); + if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) return; - tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) { #ifdef CONFIG_INET - if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && + if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk_is_tcp(sk)) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); diff --git a/net/core/sock.c b/net/core/sock.c index 40e1bda4bde0c..d05a290300b6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -937,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname, return ret; } - sk->sk_tsflags = val; + WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) @@ -1719,7 +1719,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, case SO_TIMESTAMPING_OLD: lv = sizeof(v.timestamping); - v.timestamping.flags = sk->sk_tsflags; + v.timestamping.flags = READ_ONCE(sk->sk_tsflags); v.timestamping.bind_phc = sk->sk_bind_phc; break; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b2e0ad3120280..4ab877cf6d35f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -981,7 +981,7 @@ static int __ip_append_data(struct sock *sk, paged = !!cork->gso_size; if (cork->tx_flags & SKBTX_ANY_TSTAMP && - sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d1c73660b8449..cce9cb25f3b31 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). */ info = PKTINFO_SKB_CB(skb); - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || + if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) || !info->ipi_ifindex) return false; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cee1e548660cb..cc4b250262c19 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2259,14 +2259,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, } } - if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) + if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE) has_timestamping = true; else tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { - if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) + if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE) has_timestamping = true; else tss->ts[2] = (struct timespec64) {0}; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4ab50169a5a9d..54fc4c711f2c5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1501,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk, orig_mtu = mtu; if (cork->tx_flags & SKBTX_ANY_TSTAMP && - sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 1b27728349725..5831aaa53d75e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; ipcm6_init_sk(&ipc6, np); - ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); fl6.flowi6_oif = oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0eae7661a85c4..42fcec3ecf5e1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_uid = sk->sk_uid; ipcm6_init(&ipc6); - ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = fl6.flowi6_mark; if (sin6) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ebc6ae47cfead..86b5d509a4688 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1339,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.gso_size = READ_ONCE(up->gso_size); - ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); /* destination address check */ diff --git a/net/socket.c b/net/socket.c index 848116d06b511..98ffffab949e8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -825,7 +825,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index) { - bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC; + bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); struct net_device *orig_dev; ktime_t hwtstamp; @@ -877,12 +877,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); struct scm_timestamping_internal tss; - int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); int if_index; ktime_t hwtstamp; + u32 tsflags; /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ @@ -924,11 +924,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } memset(&tss, 0, sizeof(tss)); - if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && + tsflags = READ_ONCE(sk->sk_tsflags); + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && - (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp)) { if_index = 0; if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) @@ -936,14 +937,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, else hwtstamp = shhwtstamps->hwtstamp; - if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC) + if (tsflags & SOF_TIMESTAMPING_BIND_PHC) hwtstamp = ptp_convert_timestamp(&hwtstamp, sk->sk_bind_phc); if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { empty = 0; - if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb, if_index); } From 251cd405a9e6e70b92fe5afbdd17fd5caf9d3266 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 13:52:12 +0000 Subject: [PATCH 5/5] net: annotate data-races around sk->sk_bind_phc sk->sk_bind_phc is read locklessly. Add corresponding annotations. Fixes: d463126e23f1 ("net: sock: extend SO_TIMESTAMPING for PHC binding") Signed-off-by: Eric Dumazet Cc: Yangbo Lu Signed-off-by: David S. Miller --- net/core/sock.c | 4 ++-- net/socket.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index d05a290300b6c..d3c7b53368d22 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -894,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) if (!match) return -EINVAL; - sk->sk_bind_phc = phc_index; + WRITE_ONCE(sk->sk_bind_phc, phc_index); return 0; } @@ -1720,7 +1720,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, case SO_TIMESTAMPING_OLD: lv = sizeof(v.timestamping); v.timestamping.flags = READ_ONCE(sk->sk_tsflags); - v.timestamping.bind_phc = sk->sk_bind_phc; + v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); break; case SO_RCVTIMEO_OLD: diff --git a/net/socket.c b/net/socket.c index 98ffffab949e8..928b05811cfd3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -939,7 +939,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, if (tsflags & SOF_TIMESTAMPING_BIND_PHC) hwtstamp = ptp_convert_timestamp(&hwtstamp, - sk->sk_bind_phc); + READ_ONCE(sk->sk_bind_phc)); if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { empty = 0;