Skip to content

Commit

Permalink
[OpenBLAS] Update to v0.3.26
Browse files Browse the repository at this point in the history
  • Loading branch information
giordano committed Jan 5, 2024
1 parent 03308c6 commit 15f3796
Show file tree
Hide file tree
Showing 11 changed files with 679 additions and 0 deletions.
20 changes: 20 additions & 0 deletions O/OpenBLAS/[email protected]/build_tarballs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using BinaryBuilder

include("../common.jl")

# Collection of sources required to build OpenBLAS
name = "OpenBLAS32"
version = v"0.3.26"

sources = openblas_sources(version)
script = openblas_script(openblas32=true)
platforms = openblas_platforms()
products = openblas_products()
dependencies = openblas_dependencies(platforms)

# Build the tarballs
build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
preferred_gcc_version=v"6", lock_microarchitecture=false,
julia_compat="1.11", preferred_llvm_version=v"13.0.1")

# Build trigger: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
index ea010db4..074d7215 100644
--- a/kernel/arm64/KERNEL.NEOVERSEN1
+++ b/kernel/arm64/KERNEL.NEOVERSEN1
@@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

-SNRM2KERNEL = scnrm2_thunderx2t99.c
-DNRM2KERNEL = dznrm2_thunderx2t99.c
-CNRM2KERNEL = scnrm2_thunderx2t99.c
-ZNRM2KERNEL = dznrm2_thunderx2t99.c
+SNRM2KERNEL = nrm2.S
+DNRM2KERNEL = nrm2.S
+CNRM2KERNEL = znrm2.S
+ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
From d99aad8ee308600832da39105a6511275cfe32ad Mon Sep 17 00:00:00 2001
From: Chip-Kerchner <[email protected]>
Date: Tue, 14 Nov 2023 11:07:08 -0600
Subject: [PATCH] Fix older version of gcc - missing __has_builtin, cpuid and
no support of P10.

---
Makefile.power | 12 ++++++++++++
driver/others/dynamic_power.c | 17 ++++++++++++-----
2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/Makefile.power b/Makefile.power
index 95bada34f..aa1ca080a 100644
--- a/Makefile.power
+++ b/Makefile.power
@@ -11,7 +11,19 @@ endif

ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
+ifeq ($(C_COMPILER), GCC))
+ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+else ifneq ($(GCCVERSIONGT4), 1)
+$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
+CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
+else
+$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
+CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
+endif
+else
+CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
else
diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c
index f0faf2baf..0454f186c 100644
--- a/driver/others/dynamic_power.c
+++ b/driver/others/dynamic_power.c
@@ -66,8 +66,7 @@ static int cpuid(void)
#endif
return CPU_UNKNOWN;
}
-#else
-#if defined(C_PGI) || defined(__clang__)
+#elif defined(C_PGI) || defined(__clang__)
/*
* NV HPC compilers do not yet implement __builtin_cpu_is().
* Fake a version here for use in the CPU detection code below.
@@ -196,13 +195,21 @@ static int cpuid(void)
cpu_type = pvrPOWER[i].cpu_type;
return (int)(cpu_type);
}
-#endif /* C_PGI */
+#elif !defined(__BUILTIN_CPU_SUPPORTS__)
+static int cpuid(void)
+{
+ return CPU_UNKNOWN;
+}
#endif /* _AIX */

#ifndef __BUILTIN_CPU_SUPPORTS__
#include <string.h>

-#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_is))
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if defined(_AIX) || !__has_builtin(__builtin_cpu_is)
static int __builtin_cpu_is(const char *arg)
{
static int ipinfo = -1;
@@ -227,7 +234,7 @@ static int __builtin_cpu_is(const char *arg)
}
#endif

-#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports))
+#if defined(_AIX) || !__has_builtin(__builtin_cpu_supports)
static int __builtin_cpu_supports(const char *arg)
{
return 0;
--
2.42.1

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
diff --git a/Makefile.power b/Makefile.power
index aa1ca080a..42c417a78 100644
--- a/Makefile.power
+++ b/Makefile.power
@@ -13,16 +13,16 @@ ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
ifeq ($(C_COMPILER), GCC))
ifeq ($(GCCVERSIONGTEQ10), 1)
-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
else ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
+CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
-CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
+CCOMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
endif
else
-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
@@ -34,7 +34,7 @@ endif

ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)
-CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
+CCOMMON_OPT += -mvsx -fno-fast-math
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
@@ -70,7 +70,7 @@ endif

ifeq ($(CORE), POWER8)
ifneq ($(C_COMPILER), PGI)
-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
+CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
diff --git a/driver/others/memory.c b/driver/others/memory.c
index 6e654ccf..1d2f9f12 100644
--- a/driver/others/memory.c
+++ b/driver/others/memory.c
@@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) {

}

-void DESTRUCTOR gotoblas_quit(void) {
+void gotoblas_quit(void) {

if (gotoblas_initialized == 0) return;

@@ -1572,75 +1572,11 @@ void DESTRUCTOR gotoblas_quit(void) {
}

#if defined(_MSC_VER) && !defined(__clang__)
-BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
-{
- switch (ul_reason_for_call)
- {
- case DLL_PROCESS_ATTACH:
- gotoblas_init();
- break;
- case DLL_THREAD_ATTACH:
- break;
- case DLL_THREAD_DETACH:
-#if defined(SMP)
- blas_thread_memory_cleanup();
-#endif
- break;
- case DLL_PROCESS_DETACH:
- gotoblas_quit();
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-/*
- This is to allow static linking.
- Code adapted from Google performance tools:
- https://gperftools.googlecode.com/git-history/perftools-1.0/src/windows/port.cc
- Reference:
- https://sourceware.org/ml/pthreads-win32/2008/msg00028.html
- http://ci.boost.org/svn-trac/browser/trunk/libs/thread/src/win32/tss_pe.cpp
-*/
-static int on_process_term(void)
-{
- gotoblas_quit();
- return 0;
-}
#ifdef _WIN64
#pragma comment(linker, "/INCLUDE:_tls_used")
#else
#pragma comment(linker, "/INCLUDE:__tls_used")
#endif
-
-#ifdef _WIN64
-#pragma const_seg(".CRT$XLB")
-#else
-#pragma data_seg(".CRT$XLB")
-#endif
-
-#ifdef _WIN64
-static const PIMAGE_TLS_CALLBACK dll_callback(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
-#pragma const_seg()
-#else
-static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
-#pragma data_seg()
-#endif
-
-#ifdef _WIN64
-#pragma const_seg(".CRT$XTU")
-#else
-#pragma data_seg(".CRT$XTU")
-#endif
-
-#ifdef _WIN64
-static const int(*p_process_term)(void) = on_process_term;
-#pragma const_seg()
-#else
-static int(*p_process_term)(void) = on_process_term;
-#pragma data_seg()
-#endif
#endif

#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
@@ -3146,7 +3082,7 @@ void CONSTRUCTOR gotoblas_init(void) {

}

-void DESTRUCTOR gotoblas_quit(void) {
+void gotoblas_quit(void) {

if (gotoblas_initialized == 0) return;

@@ -3175,71 +3111,6 @@ void DESTRUCTOR gotoblas_quit(void) {
#endif
}

-#if defined(_MSC_VER) && !defined(__clang__)
-BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
-{
- switch (ul_reason_for_call)
- {
- case DLL_PROCESS_ATTACH:
- gotoblas_init();
- break;
- case DLL_THREAD_ATTACH:
- break;
- case DLL_THREAD_DETACH:
- break;
- case DLL_PROCESS_DETACH:
- gotoblas_quit();
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-/*
- This is to allow static linking.
- Code adapted from Google performance tools:
- https://gperftools.googlecode.com/git-history/perftools-1.0/src/windows/port.cc
- Reference:
- https://sourceware.org/ml/pthreads-win32/2008/msg00028.html
- http://ci.boost.org/svn-trac/browser/trunk/libs/thread/src/win32/tss_pe.cpp
-*/
-static int on_process_term(void)
-{
- gotoblas_quit();
- return 0;
-}
-#ifdef _WIN64
-#pragma comment(linker, "/INCLUDE:_tls_used")
-#else
-#pragma comment(linker, "/INCLUDE:__tls_used")
-#endif
-
-#ifdef _WIN64
-#pragma const_seg(".CRT$XLB")
-#else
-#pragma data_seg(".CRT$XLB")
-#endif
-static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
-#ifdef _WIN64
-#pragma const_seg()
-#else
-#pragma data_seg()
-#endif
-
-#ifdef _WIN64
-#pragma const_seg(".CRT$XTU")
-#else
-#pragma data_seg(".CRT$XTU")
-#endif
-static int(*p_process_term)(void) = on_process_term;
-#ifdef _WIN64
-#pragma const_seg()
-#else
-#pragma data_seg()
-#endif
-#endif
-
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
/* Don't call me; this is just work around for PGI / Sun bug */
void gotoblas_dummy_for_PGI(void) {
21 changes: 21 additions & 0 deletions O/OpenBLAS/[email protected]/build_tarballs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using BinaryBuilder

include("../common.jl")

# Collection of sources required to build OpenBLAS
name = "OpenBLAS"
version = v"0.3.26"

sources = openblas_sources(version)
script = openblas_script(;aarch64_ilp64=true, num_64bit_threads=512)
platforms = openblas_platforms(;experimental=true)
push!(platforms, Platform("x86_64", "linux"; sanitize="memory"))
products = openblas_products()
dependencies = openblas_dependencies(platforms)

# Build the tarballs
build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
preferred_gcc_version=v"6", lock_microarchitecture=false,
julia_compat="1.11", preferred_llvm_version=v"13.0.1")

# Build trigger: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
index ea010db4..074d7215 100644
--- a/kernel/arm64/KERNEL.NEOVERSEN1
+++ b/kernel/arm64/KERNEL.NEOVERSEN1
@@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

-SNRM2KERNEL = scnrm2_thunderx2t99.c
-DNRM2KERNEL = dznrm2_thunderx2t99.c
-CNRM2KERNEL = scnrm2_thunderx2t99.c
-ZNRM2KERNEL = dznrm2_thunderx2t99.c
+SNRM2KERNEL = nrm2.S
+DNRM2KERNEL = nrm2.S
+CNRM2KERNEL = znrm2.S
+ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c
Loading

0 comments on commit 15f3796

Please sign in to comment.