From d7e3f877e974b1c1f3fcf87287dc36e9b2d54d29 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 16 Mar 2021 10:19:09 +0900 Subject: [PATCH] gcc_builtin: fix performance regression on x86_64 in order to work around a bug in older gcc versions on x86_64, __atomic_thread_fence (__ATOMIC_SEQ_CST) was replaced with __atomic_thread_fence (__ATOMIC_ACQUIRE) based on the asumption that this did not introduce performance regressions. It was recently found that this did introduce some performance regression, mainly at scale on fat nodes. So simply use an asm memory globber to both workaround older gcc bugs and fix the performance regression. Thanks S. Biplab Raut for bringing this issue to our attention. Refs. open-mpi/ompi#8603 Signed-off-by: Gilles Gouaillardet --- opal/include/opal/sys/gcc_builtin/atomic.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index f42559bc781..7ff295bdc80 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -13,8 +13,8 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -61,9 +61,8 @@ static inline void opal_atomic_rmb(void) { #if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 /* work around a bug in older gcc versions where ACQUIRE seems to get - * treated as a no-op instead of being equivalent to - * __asm__ __volatile__("": : :"memory") */ - __atomic_thread_fence (__ATOMIC_SEQ_CST); + * treated as a no-op instead */ + __asm__ __volatile__("": : :"memory"); #else __atomic_thread_fence (__ATOMIC_ACQUIRE); #endif