From 58795582905e08fa7748846c1971b4ab911d1e16 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 30 Nov 2020 12:47:45 -0800 Subject: [PATCH] Update to musl 1.2.1. (#222) See the WHATSNEW file for details. WASI libc currently uses a separate malloc, so the new mallocng is not currently used. This includes a few new custom changes to disable code for handling directed rounding modes. Wasm doesn't have directed rounding modes, so disabling this code size saves code size, something the WASI libc project cares about! --- expected/wasm32-wasi/defined-symbols.txt | 1 - libc-top-half/musl/COPYRIGHT | 5 +- libc-top-half/musl/Makefile | 3 +- libc-top-half/musl/VERSION | 2 +- libc-top-half/musl/WHATSNEW | 36 ++ libc-top-half/musl/arch/arm/syscall_arch.h | 6 - libc-top-half/musl/arch/mips/bits/signal.h | 2 +- libc-top-half/musl/arch/mips/syscall_arch.h | 80 ++-- libc-top-half/musl/arch/mips64/bits/signal.h | 2 +- libc-top-half/musl/arch/mips64/syscall_arch.h | 70 ++-- libc-top-half/musl/arch/mipsn32/bits/signal.h | 2 +- .../musl/arch/mipsn32/syscall_arch.h | 75 ++-- libc-top-half/musl/arch/powerpc/bits/mman.h | 1 - libc-top-half/musl/arch/powerpc64/bits/mman.h | 1 - libc-top-half/musl/configure | 19 + libc-top-half/musl/include/arpa/inet.h | 5 - libc-top-half/musl/include/netinet/in.h | 2 - libc-top-half/musl/include/pthread.h | 3 + libc-top-half/musl/ldso/dynlink.c | 5 +- libc-top-half/musl/src/errno/__strerror.h | 17 +- libc-top-half/musl/src/errno/strerror.c | 43 +- libc-top-half/musl/src/fenv/sh/fenv.S | 2 + libc-top-half/musl/src/internal/atomic.h | 15 + libc-top-half/musl/src/internal/dynlink.h | 5 + libc-top-half/musl/src/internal/libc.h | 9 +- libc-top-half/musl/src/internal/libm.h | 8 + libc-top-half/musl/src/internal/shgetc.c | 2 +- libc-top-half/musl/src/internal/stdio_impl.h | 2 - libc-top-half/musl/src/ipc/msgctl.c | 10 + libc-top-half/musl/src/ipc/semctl.c | 10 + libc-top-half/musl/src/ipc/shmctl.c | 10 + libc-top-half/musl/src/linux/clock_adjtime.c | 95 +++-- libc-top-half/musl/src/malloc/DESIGN | 22 - libc-top-half/musl/src/malloc/aligned_alloc.c | 7 - libc-top-half/musl/src/malloc/calloc.c | 45 ++ libc-top-half/musl/src/malloc/expand_heap.c | 71 ---- libc-top-half/musl/src/malloc/lite_malloc.c | 100 +++-- .../musl/src/malloc/mallocng/aligned_alloc.c | 57 +++ .../musl/src/malloc/mallocng/donate.c | 39 ++ libc-top-half/musl/src/malloc/mallocng/free.c | 143 +++++++ libc-top-half/musl/src/malloc/mallocng/glue.h | 77 ++++ .../musl/src/malloc/mallocng/malloc.c | 387 ++++++++++++++++++ .../src/malloc/mallocng/malloc_usable_size.c | 12 + libc-top-half/musl/src/malloc/mallocng/meta.h | 288 +++++++++++++ .../musl/src/malloc/mallocng/realloc.c | 51 +++ libc-top-half/musl/src/malloc/memalign.c | 53 +-- .../musl/src/malloc/oldmalloc/aligned_alloc.c | 53 +++ .../musl/src/malloc/{ => oldmalloc}/malloc.c | 357 ++++++++-------- .../oldmalloc}/malloc_impl.h | 9 +- .../{ => oldmalloc}/malloc_usable_size.c | 0 .../musl/src/malloc/posix_memalign.c | 3 +- libc-top-half/musl/src/malloc/replaced.c | 4 + libc-top-half/musl/src/math/__expo2.c | 9 + libc-top-half/musl/src/math/__expo2f.c | 9 + libc-top-half/musl/src/math/__rem_pio2.c | 19 +- libc-top-half/musl/src/math/__rem_pio2f.c | 17 +- libc-top-half/musl/src/math/__rem_pio2l.c | 20 +- libc-top-half/musl/src/math/cosh.c | 4 + libc-top-half/musl/src/math/coshf.c | 4 + libc-top-half/musl/src/math/i386/fabs.c | 7 + libc-top-half/musl/src/math/i386/fabs.s | 6 - libc-top-half/musl/src/math/i386/fabsf.c | 7 + libc-top-half/musl/src/math/i386/fabsf.s | 6 - libc-top-half/musl/src/math/i386/fabsl.c | 7 + libc-top-half/musl/src/math/i386/fabsl.s | 6 - libc-top-half/musl/src/math/i386/fmod.c | 10 + libc-top-half/musl/src/math/i386/fmod.s | 11 - libc-top-half/musl/src/math/i386/fmodf.c | 10 + libc-top-half/musl/src/math/i386/fmodf.s | 11 - libc-top-half/musl/src/math/i386/fmodl.c | 9 + libc-top-half/musl/src/math/i386/fmodl.s | 11 - libc-top-half/musl/src/math/i386/llrint.c | 8 + libc-top-half/musl/src/math/i386/llrint.s | 8 - libc-top-half/musl/src/math/i386/llrintf.c | 8 + libc-top-half/musl/src/math/i386/llrintf.s | 9 - libc-top-half/musl/src/math/i386/llrintl.c | 8 + libc-top-half/musl/src/math/i386/llrintl.s | 8 - libc-top-half/musl/src/math/i386/lrint.c | 8 + libc-top-half/musl/src/math/i386/lrint.s | 7 - libc-top-half/musl/src/math/i386/lrintf.c | 8 + libc-top-half/musl/src/math/i386/lrintf.s | 7 - libc-top-half/musl/src/math/i386/lrintl.c | 8 + libc-top-half/musl/src/math/i386/lrintl.s | 7 - libc-top-half/musl/src/math/i386/remainder.c | 12 + libc-top-half/musl/src/math/i386/remainder.s | 14 - libc-top-half/musl/src/math/i386/remainderf.c | 12 + libc-top-half/musl/src/math/i386/remainderf.s | 14 - libc-top-half/musl/src/math/i386/remainderl.c | 9 + libc-top-half/musl/src/math/i386/remainderl.s | 11 - libc-top-half/musl/src/math/i386/rint.c | 7 + libc-top-half/musl/src/math/i386/rint.s | 6 - libc-top-half/musl/src/math/i386/rintf.c | 7 + libc-top-half/musl/src/math/i386/rintf.s | 6 - libc-top-half/musl/src/math/i386/rintl.c | 7 + libc-top-half/musl/src/math/i386/rintl.s | 6 - libc-top-half/musl/src/math/i386/sqrt.c | 15 + libc-top-half/musl/src/math/i386/sqrt.s | 21 - libc-top-half/musl/src/math/i386/sqrtf.c | 12 + libc-top-half/musl/src/math/i386/sqrtf.s | 7 - libc-top-half/musl/src/math/i386/sqrtl.c | 7 + libc-top-half/musl/src/math/i386/sqrtl.s | 5 - libc-top-half/musl/src/math/m68k/sqrtl.c | 15 + libc-top-half/musl/src/math/sinh.c | 6 +- libc-top-half/musl/src/math/sinhf.c | 6 +- libc-top-half/musl/src/math/x86_64/fabs.c | 10 + libc-top-half/musl/src/math/x86_64/fabs.s | 9 - libc-top-half/musl/src/math/x86_64/fabsf.c | 10 + libc-top-half/musl/src/math/x86_64/fabsf.s | 7 - libc-top-half/musl/src/math/x86_64/fabsl.c | 7 + libc-top-half/musl/src/math/x86_64/fabsl.s | 6 - libc-top-half/musl/src/math/x86_64/fmodl.c | 9 + libc-top-half/musl/src/math/x86_64/fmodl.s | 11 - libc-top-half/musl/src/math/x86_64/llrint.c | 8 + libc-top-half/musl/src/math/x86_64/llrint.s | 5 - libc-top-half/musl/src/math/x86_64/llrintf.c | 8 + libc-top-half/musl/src/math/x86_64/llrintf.s | 5 - libc-top-half/musl/src/math/x86_64/llrintl.c | 8 + libc-top-half/musl/src/math/x86_64/llrintl.s | 7 - libc-top-half/musl/src/math/x86_64/lrint.c | 8 + libc-top-half/musl/src/math/x86_64/lrint.s | 5 - libc-top-half/musl/src/math/x86_64/lrintf.c | 8 + libc-top-half/musl/src/math/x86_64/lrintf.s | 5 - libc-top-half/musl/src/math/x86_64/lrintl.c | 8 + libc-top-half/musl/src/math/x86_64/lrintl.s | 7 - .../musl/src/math/x86_64/remainderl.c | 9 + .../musl/src/math/x86_64/remainderl.s | 11 - libc-top-half/musl/src/math/x86_64/remquol.c | 32 ++ libc-top-half/musl/src/math/x86_64/rintl.c | 7 + libc-top-half/musl/src/math/x86_64/rintl.s | 6 - libc-top-half/musl/src/math/x86_64/sqrt.c | 7 + libc-top-half/musl/src/math/x86_64/sqrt.s | 4 - libc-top-half/musl/src/math/x86_64/sqrtf.c | 7 + libc-top-half/musl/src/math/x86_64/sqrtf.s | 4 - libc-top-half/musl/src/math/x86_64/sqrtl.c | 7 + libc-top-half/musl/src/math/x86_64/sqrtl.s | 5 - libc-top-half/musl/src/misc/getentropy.c | 2 +- libc-top-half/musl/src/misc/nftw.c | 22 +- libc-top-half/musl/src/network/getnameinfo.c | 1 + libc-top-half/musl/src/network/lookup_name.c | 12 +- libc-top-half/musl/src/network/res_mkquery.c | 1 + libc-top-half/musl/src/network/res_send.c | 2 +- libc-top-half/musl/src/network/socket.c | 10 +- libc-top-half/musl/src/process/fork.c | 1 + libc-top-half/musl/src/stdio/__string_read.c | 16 - libc-top-half/musl/src/stdio/fmemopen.c | 6 +- libc-top-half/musl/src/stdio/vdprintf.c | 7 +- libc-top-half/musl/src/stdio/vfscanf.c | 5 +- libc-top-half/musl/src/stdio/vsscanf.c | 18 +- libc-top-half/musl/src/stdlib/wcstod.c | 3 +- libc-top-half/musl/src/stdlib/wcstol.c | 3 +- .../musl/src/string/aarch64/memcpy.S | 186 +++++++++ .../musl/src/string/aarch64/memset.S | 115 ++++++ .../src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++- libc-top-half/musl/src/string/arm/memcpy.c | 3 - libc-top-half/musl/src/string/memccpy.c | 2 +- libc-top-half/musl/src/string/memmem.c | 8 +- libc-top-half/musl/src/string/strsignal.c | 10 + libc-top-half/musl/src/string/strstr.c | 8 +- libc-top-half/musl/src/thread/__lock.c | 4 +- .../musl/src/thread/pthread_create.c | 34 +- .../musl/src/thread/pthread_getschedparam.c | 3 + libc-top-half/musl/src/thread/pthread_kill.c | 6 + .../musl/src/thread/pthread_setschedparam.c | 3 + .../musl/src/thread/pthread_setschedprio.c | 3 + libc-top-half/musl/src/time/__tz.c | 12 +- 165 files changed, 2693 insertions(+), 972 deletions(-) delete mode 100644 libc-top-half/musl/src/malloc/DESIGN delete mode 100644 libc-top-half/musl/src/malloc/aligned_alloc.c create mode 100644 libc-top-half/musl/src/malloc/calloc.c delete mode 100644 libc-top-half/musl/src/malloc/expand_heap.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/aligned_alloc.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/donate.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/free.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/glue.h create mode 100644 libc-top-half/musl/src/malloc/mallocng/malloc.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/malloc_usable_size.c create mode 100644 libc-top-half/musl/src/malloc/mallocng/meta.h create mode 100644 libc-top-half/musl/src/malloc/mallocng/realloc.c create mode 100644 libc-top-half/musl/src/malloc/oldmalloc/aligned_alloc.c rename libc-top-half/musl/src/malloc/{ => oldmalloc}/malloc.c (67%) rename libc-top-half/musl/src/{internal => malloc/oldmalloc}/malloc_impl.h (85%) rename libc-top-half/musl/src/malloc/{ => oldmalloc}/malloc_usable_size.c (100%) create mode 100644 libc-top-half/musl/src/malloc/replaced.c create mode 100644 libc-top-half/musl/src/math/i386/fabs.c delete mode 100644 libc-top-half/musl/src/math/i386/fabs.s create mode 100644 libc-top-half/musl/src/math/i386/fabsf.c delete mode 100644 libc-top-half/musl/src/math/i386/fabsf.s create mode 100644 libc-top-half/musl/src/math/i386/fabsl.c delete mode 100644 libc-top-half/musl/src/math/i386/fabsl.s create mode 100644 libc-top-half/musl/src/math/i386/fmod.c delete mode 100644 libc-top-half/musl/src/math/i386/fmod.s create mode 100644 libc-top-half/musl/src/math/i386/fmodf.c delete mode 100644 libc-top-half/musl/src/math/i386/fmodf.s create mode 100644 libc-top-half/musl/src/math/i386/fmodl.c delete mode 100644 libc-top-half/musl/src/math/i386/fmodl.s create mode 100644 libc-top-half/musl/src/math/i386/llrint.c delete mode 100644 libc-top-half/musl/src/math/i386/llrint.s create mode 100644 libc-top-half/musl/src/math/i386/llrintf.c delete mode 100644 libc-top-half/musl/src/math/i386/llrintf.s create mode 100644 libc-top-half/musl/src/math/i386/llrintl.c delete mode 100644 libc-top-half/musl/src/math/i386/llrintl.s create mode 100644 libc-top-half/musl/src/math/i386/lrint.c delete mode 100644 libc-top-half/musl/src/math/i386/lrint.s create mode 100644 libc-top-half/musl/src/math/i386/lrintf.c delete mode 100644 libc-top-half/musl/src/math/i386/lrintf.s create mode 100644 libc-top-half/musl/src/math/i386/lrintl.c delete mode 100644 libc-top-half/musl/src/math/i386/lrintl.s create mode 100644 libc-top-half/musl/src/math/i386/remainder.c delete mode 100644 libc-top-half/musl/src/math/i386/remainder.s create mode 100644 libc-top-half/musl/src/math/i386/remainderf.c delete mode 100644 libc-top-half/musl/src/math/i386/remainderf.s create mode 100644 libc-top-half/musl/src/math/i386/remainderl.c delete mode 100644 libc-top-half/musl/src/math/i386/remainderl.s create mode 100644 libc-top-half/musl/src/math/i386/rint.c delete mode 100644 libc-top-half/musl/src/math/i386/rint.s create mode 100644 libc-top-half/musl/src/math/i386/rintf.c delete mode 100644 libc-top-half/musl/src/math/i386/rintf.s create mode 100644 libc-top-half/musl/src/math/i386/rintl.c delete mode 100644 libc-top-half/musl/src/math/i386/rintl.s create mode 100644 libc-top-half/musl/src/math/i386/sqrt.c delete mode 100644 libc-top-half/musl/src/math/i386/sqrt.s create mode 100644 libc-top-half/musl/src/math/i386/sqrtf.c delete mode 100644 libc-top-half/musl/src/math/i386/sqrtf.s create mode 100644 libc-top-half/musl/src/math/i386/sqrtl.c delete mode 100644 libc-top-half/musl/src/math/i386/sqrtl.s create mode 100644 libc-top-half/musl/src/math/m68k/sqrtl.c create mode 100644 libc-top-half/musl/src/math/x86_64/fabs.c delete mode 100644 libc-top-half/musl/src/math/x86_64/fabs.s create mode 100644 libc-top-half/musl/src/math/x86_64/fabsf.c delete mode 100644 libc-top-half/musl/src/math/x86_64/fabsf.s create mode 100644 libc-top-half/musl/src/math/x86_64/fabsl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/fabsl.s create mode 100644 libc-top-half/musl/src/math/x86_64/fmodl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/fmodl.s create mode 100644 libc-top-half/musl/src/math/x86_64/llrint.c delete mode 100644 libc-top-half/musl/src/math/x86_64/llrint.s create mode 100644 libc-top-half/musl/src/math/x86_64/llrintf.c delete mode 100644 libc-top-half/musl/src/math/x86_64/llrintf.s create mode 100644 libc-top-half/musl/src/math/x86_64/llrintl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/llrintl.s create mode 100644 libc-top-half/musl/src/math/x86_64/lrint.c delete mode 100644 libc-top-half/musl/src/math/x86_64/lrint.s create mode 100644 libc-top-half/musl/src/math/x86_64/lrintf.c delete mode 100644 libc-top-half/musl/src/math/x86_64/lrintf.s create mode 100644 libc-top-half/musl/src/math/x86_64/lrintl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/lrintl.s create mode 100644 libc-top-half/musl/src/math/x86_64/remainderl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/remainderl.s create mode 100644 libc-top-half/musl/src/math/x86_64/remquol.c create mode 100644 libc-top-half/musl/src/math/x86_64/rintl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/rintl.s create mode 100644 libc-top-half/musl/src/math/x86_64/sqrt.c delete mode 100644 libc-top-half/musl/src/math/x86_64/sqrt.s create mode 100644 libc-top-half/musl/src/math/x86_64/sqrtf.c delete mode 100644 libc-top-half/musl/src/math/x86_64/sqrtf.s create mode 100644 libc-top-half/musl/src/math/x86_64/sqrtl.c delete mode 100644 libc-top-half/musl/src/math/x86_64/sqrtl.s delete mode 100644 libc-top-half/musl/src/stdio/__string_read.c create mode 100644 libc-top-half/musl/src/string/aarch64/memcpy.S create mode 100644 libc-top-half/musl/src/string/aarch64/memset.S rename libc-top-half/musl/src/string/arm/{memcpy_le.S => memcpy.S} (82%) delete mode 100644 libc-top-half/musl/src/string/arm/memcpy.c diff --git a/expected/wasm32-wasi/defined-symbols.txt b/expected/wasm32-wasi/defined-symbols.txt index 5208dbbac..c3efc0835 100644 --- a/expected/wasm32-wasi/defined-symbols.txt +++ b/expected/wasm32-wasi/defined-symbols.txt @@ -218,7 +218,6 @@ __strcoll_l __strerror_l __strftime_fmt_1 __strftime_l -__string_read __strncasecmp_l __strtoimax_internal __strtol_internal diff --git a/libc-top-half/musl/COPYRIGHT b/libc-top-half/musl/COPYRIGHT index e64723714..c1628e9ac 100644 --- a/libc-top-half/musl/COPYRIGHT +++ b/libc-top-half/musl/COPYRIGHT @@ -127,10 +127,13 @@ Copyright © 2017-2018 Arm Limited and labelled as such in comments in the individual source files. All have been licensed under extremely permissive terms. -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008 +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 The Android Open Source Project and is licensed under a two-clause BSD license. It was taken from Bionic libc, used on Android. +The AArch64 memcpy and memset code (src/string/aarch64/*) are +Copyright © 1999-2019, Arm Limited. + The implementation of DES for crypt (src/crypt/crypt_des.c) is Copyright © 1994 David Burren. It is licensed under a BSD license. diff --git a/libc-top-half/musl/Makefile b/libc-top-half/musl/Makefile index bd8f5c389..e8cc44367 100644 --- a/libc-top-half/musl/Makefile +++ b/libc-top-half/musl/Makefile @@ -17,7 +17,8 @@ includedir = $(prefix)/include libdir = $(prefix)/lib syslibdir = /lib -SRC_DIRS = $(addprefix $(srcdir)/,src/* crt ldso $(COMPAT_SRC_DIRS)) +MALLOC_DIR = mallocng +SRC_DIRS = $(addprefix $(srcdir)/,src/* src/malloc/$(MALLOC_DIR) crt ldso $(COMPAT_SRC_DIRS)) BASE_GLOBS = $(addsuffix /*.c,$(SRC_DIRS)) ARCH_GLOBS = $(addsuffix /$(ARCH)/*.[csS],$(SRC_DIRS)) BASE_SRCS = $(sort $(wildcard $(BASE_GLOBS))) diff --git a/libc-top-half/musl/VERSION b/libc-top-half/musl/VERSION index 26aaba0e8..6085e9465 100644 --- a/libc-top-half/musl/VERSION +++ b/libc-top-half/musl/VERSION @@ -1 +1 @@ -1.2.0 +1.2.1 diff --git a/libc-top-half/musl/WHATSNEW b/libc-top-half/musl/WHATSNEW index ecf0cebab..d9826fc0e 100644 --- a/libc-top-half/musl/WHATSNEW +++ b/libc-top-half/musl/WHATSNEW @@ -2200,3 +2200,39 @@ arch-specific bugs fixed: - arm dynamic linker chose wrong tls/atomic variants since 1.1.21 - some math library functions returned excess precision on i386 - unconfirmed regression in fchmodat AT_SYMLINK_NOFOLLOW on mips* + + + +1.2.1 release notes + +major changes: +- new malloc implementation (mallocng & overhauled bump allocator) + +new features: +- DNS queries via res_* now set AD flag, report zone signedness (DNSSEC) +- PTHREAD_NULL macro (POSIX-future) + +performance: +- optimized memcpy and memset for aarch64 +- optimized memcpy for arm now supports big endian +- optimized x86_64 remquol +- improved strerror without linear search + +bugs fixed: +- lock-skipping for processes that returned to single-threaded was wrong +- AF_UNSPEC dns lookups mishandled single failure in paired A+AAAA +- res_send and res_query returned wrong value on errors from nameserver +- corrupted sysvipc timestamps on 32-bit archs with old kernels +- incorrect parsing of timezone offsets after overly-long zone name +- clock_adjtime was broken on 32-bit archs (time64) +- pthread_kill as not async-signal-safe +- pthread_cancel was not async-cancel-safe +- large-ulp errors in various math functions in non-default rounding modes + +arch-specific bugs fixed: +- arm clock_gettime was broken on some hw due to bad time64 vdso +- m68k sqrtl lacked long double precision +- mips* syscall mechanism regressions on older kernels +- mips* had negated error codes for some syscalls (kernel bug) +- mips* SIGEMT was wrongly called SIGSTKFLT +- sh fesetround didn't work correctly on sh diff --git a/libc-top-half/musl/arch/arm/syscall_arch.h b/libc-top-half/musl/arch/arm/syscall_arch.h index 4b08762d7..a877b2cff 100644 --- a/libc-top-half/musl/arch/arm/syscall_arch.h +++ b/libc-top-half/musl/arch/arm/syscall_arch.h @@ -98,12 +98,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo __asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5)); } -#define VDSO_USEFUL -#define VDSO_CGT32_SYM "__vdso_clock_gettime" -#define VDSO_CGT32_VER "LINUX_2.6" -#define VDSO_CGT_SYM "__vdso_clock_gettime64" -#define VDSO_CGT_VER "LINUX_2.6" - #define SYSCALL_FADVISE_6_ARG #define SYSCALL_IPC_BROKEN_MODE diff --git a/libc-top-half/musl/arch/mips/bits/signal.h b/libc-top-half/musl/arch/mips/bits/signal.h index e1d97ac78..1b69e7625 100644 --- a/libc-top-half/musl/arch/mips/bits/signal.h +++ b/libc-top-half/musl/arch/mips/bits/signal.h @@ -93,7 +93,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 diff --git a/libc-top-half/musl/arch/mips/syscall_arch.h b/libc-top-half/musl/arch/mips/syscall_arch.h index f821e73fc..380a94b36 100644 --- a/libc-top-half/musl/arch/mips/syscall_arch.h +++ b/libc-top-half/musl/arch/mips/syscall_arch.h @@ -18,26 +18,26 @@ static inline long __syscall0(long n) { register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+r"(r2), "=r"(r7) - : + "addu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2) : SYSCALL_CLOBBERLIST, "$8", "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall1(long n, long a) { register long r4 __asm__("$4") = a; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+r"(r2), "=r"(r7) - : "r"(r4) + "addu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4) : SYSCALL_CLOBBERLIST, "$8", "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall2(long n, long a, long b) @@ -45,13 +45,13 @@ static inline long __syscall2(long n, long a, long b) register long r4 __asm__("$4") = a; register long r5 __asm__("$5") = b; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+r"(r2), "=r"(r7) - : "r"(r4), "r"(r5) + "addu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5) : SYSCALL_CLOBBERLIST, "$8", "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall3(long n, long a, long b, long c) @@ -60,13 +60,13 @@ static inline long __syscall3(long n, long a, long b, long c) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+r"(r2), "=r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "addu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST, "$8", "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall4(long n, long a, long b, long c, long d) @@ -75,13 +75,13 @@ static inline long __syscall4(long n, long a, long b, long c, long d) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "addu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST, "$8", "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall5(long n, long a, long b, long c, long d, long e) @@ -91,15 +91,15 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e) register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( "subu $sp,$sp,32 ; sw $8,16($sp) ; " - "syscall ;" + "addu $2,$0,%3 ; syscall ;" "addu $sp,$sp,32" - : "+r"(r2), "+r"(r7), "+r"(r8) - : "r"(r4), "r"(r5), "r"(r6) + : "=&r"(r2), "+r"(r7), "+r"(r8) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST, "$9", "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) @@ -110,15 +110,15 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; register long r9 __asm__("$9") = f; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( "subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; " - "syscall ;" + "addu $2,$0,%4 ; syscall ;" "addu $sp,$sp,32" - : "+r"(r2), "+r"(r7), "+r"(r8), "+r"(r9) - : "r"(r4), "r"(r5), "r"(r6) + : "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST, "$10"); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g) @@ -130,15 +130,15 @@ static inline long __syscall7(long n, long a, long b, long c, long d, long e, lo register long r8 __asm__("$8") = e; register long r9 __asm__("$9") = f; register long r10 __asm__("$10") = g; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( "subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; sw $10,24($sp) ; " - "syscall ;" + "addu $2,$0,%5 ; syscall ;" "addu $sp,$sp,32" - : "+r"(r2), "+r"(r7), "+r"(r8), "+r"(r9), "+r"(r10) - : "r"(r4), "r"(r5), "r"(r6) + : "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9), "+r"(r10) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } #define VDSO_USEFUL diff --git a/libc-top-half/musl/arch/mips64/bits/signal.h b/libc-top-half/musl/arch/mips64/bits/signal.h index c31ad07ec..4f91c9fc8 100644 --- a/libc-top-half/musl/arch/mips64/bits/signal.h +++ b/libc-top-half/musl/arch/mips64/bits/signal.h @@ -112,7 +112,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 diff --git a/libc-top-half/musl/arch/mips64/syscall_arch.h b/libc-top-half/musl/arch/mips64/syscall_arch.h index 69c429b86..ae6532fcd 100644 --- a/libc-top-half/musl/arch/mips64/syscall_arch.h +++ b/libc-top-half/musl/arch/mips64/syscall_arch.h @@ -16,26 +16,26 @@ static inline long __syscall0(long n) { register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall1(long n, long a) { register long r4 __asm__("$4") = a; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall2(long n, long a, long b) @@ -43,14 +43,14 @@ static inline long __syscall2(long n, long a, long b) register long r4 __asm__("$4") = a; register long r5 __asm__("$5") = b; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4), "r"(r5) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall3(long n, long a, long b, long c) @@ -59,14 +59,14 @@ static inline long __syscall3(long n, long a, long b, long c) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall4(long n, long a, long b, long c, long d) @@ -75,14 +75,14 @@ static inline long __syscall4(long n, long a, long b, long c, long d) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall5(long n, long a, long b, long c, long d, long e) @@ -92,14 +92,14 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e) register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6), "r"(r8) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) @@ -110,14 +110,14 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; register long r9 __asm__("$9") = f; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } #define VDSO_USEFUL diff --git a/libc-top-half/musl/arch/mipsn32/bits/signal.h b/libc-top-half/musl/arch/mipsn32/bits/signal.h index c31ad07ec..4f91c9fc8 100644 --- a/libc-top-half/musl/arch/mipsn32/bits/signal.h +++ b/libc-top-half/musl/arch/mipsn32/bits/signal.h @@ -112,7 +112,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 diff --git a/libc-top-half/musl/arch/mipsn32/syscall_arch.h b/libc-top-half/musl/arch/mipsn32/syscall_arch.h index c1a4b7da3..c681905d0 100644 --- a/libc-top-half/musl/arch/mipsn32/syscall_arch.h +++ b/libc-top-half/musl/arch/mipsn32/syscall_arch.h @@ -16,26 +16,26 @@ static inline long __syscall0(long n) { register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall1(long n, long a) { register long r4 __asm__("$4") = a; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall2(long n, long a, long b) @@ -43,13 +43,14 @@ static inline long __syscall2(long n, long a, long b) register long r4 __asm__("$4") = a; register long r5 __asm__("$5") = b; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); + __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4), "r"(r5) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall3(long n, long a, long b, long c) @@ -58,13 +59,14 @@ static inline long __syscall3(long n, long a, long b, long c) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7"); - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); + __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "=r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "=r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall4(long n, long a, long b, long c, long d) @@ -73,13 +75,14 @@ static inline long __syscall4(long n, long a, long b, long c, long d) register long r5 __asm__("$5") = b; register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); + __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall5(long n, long a, long b, long c, long d, long e) @@ -89,13 +92,14 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e) register long r6 __asm__("$6") = c; register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); + __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6), "r"(r8) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) @@ -106,13 +110,14 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo register long r7 __asm__("$7") = d; register long r8 __asm__("$8") = e; register long r9 __asm__("$9") = f; - register long r2 __asm__("$2") = n; + register long r2 __asm__("$2"); + __asm__ __volatile__ ( - "syscall" - : "+&r"(r2), "+r"(r7) - : "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9) + "daddu $2,$0,%2 ; syscall" + : "=&r"(r2), "+r"(r7) + : "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9) : SYSCALL_CLOBBERLIST); - return r7 ? -r2 : r2; + return r7 && r2>0 ? -r2 : r2; } #define VDSO_USEFUL diff --git a/libc-top-half/musl/arch/powerpc/bits/mman.h b/libc-top-half/musl/arch/powerpc/bits/mman.h index b3a675a82..95ec4358d 100644 --- a/libc-top-half/musl/arch/powerpc/bits/mman.h +++ b/libc-top-half/musl/arch/powerpc/bits/mman.h @@ -4,7 +4,6 @@ #define MAP_NORESERVE 0x40 #undef MAP_LOCKED #define MAP_LOCKED 0x80 -#undef MAP_SYNC #undef MCL_CURRENT #define MCL_CURRENT 0x2000 diff --git a/libc-top-half/musl/arch/powerpc64/bits/mman.h b/libc-top-half/musl/arch/powerpc64/bits/mman.h index b3a675a82..95ec4358d 100644 --- a/libc-top-half/musl/arch/powerpc64/bits/mman.h +++ b/libc-top-half/musl/arch/powerpc64/bits/mman.h @@ -4,7 +4,6 @@ #define MAP_NORESERVE 0x40 #undef MAP_LOCKED #define MAP_LOCKED 0x80 -#undef MAP_SYNC #undef MCL_CURRENT #define MCL_CURRENT 0x2000 diff --git a/libc-top-half/musl/configure b/libc-top-half/musl/configure index 8bacaee9c..c50a66107 100755 --- a/libc-top-half/musl/configure +++ b/libc-top-half/musl/configure @@ -35,6 +35,9 @@ Optional features: --disable-shared inhibit building shared library [enabled] --disable-static inhibit building static library [enabled] +Optional packages: + --with-malloc=... choose malloc implementation [mallocng] + Some influential environment variables: CC C compiler command [detected] CFLAGS C compiler flags [-Os -pipe ...] @@ -139,6 +142,7 @@ static=yes wrapper=auto gcc_wrapper=no clang_wrapper=no +malloc_dir=mallocng for arg ; do case "$arg" in @@ -168,6 +172,7 @@ case "$arg" in --disable-wrapper|--enable-wrapper=no) wrapper=no ;; --enable-gcc-wrapper|--enable-gcc-wrapper=yes) wrapper=yes ; gcc_wrapper=yes ;; --disable-gcc-wrapper|--enable-gcc-wrapper=no) wrapper=no ;; +--with-malloc=*) malloc_dir=${arg#*=} ;; --enable-*|--disable-*|--with-*|--without-*|--*dir=*) ;; --host=*|--target=*) target=${arg#*=} ;; --build=*) build=${arg#*=} ;; @@ -214,6 +219,12 @@ done set +C trap 'rm "$tmpc"' EXIT INT QUIT TERM HUP +# +# Check that the requested malloc implementation exists +# +test -d "$srcdir/src/malloc/$malloc_dir" \ +|| fail "$0: error: chosen malloc implementation '$malloc_dir' does not exist" + # # Check whether we are cross-compiling, and set a default # CROSS_COMPILE prefix if none was provided. @@ -496,6 +507,13 @@ fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO -march=i48 fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO -mtune=generic fi +# +# GCC defines -w as overriding any -W options, regardless of order, but +# clang has a bunch of annoying warnings enabled by default and needs -w +# to start from a clean slate. So use -w if building with clang. +# +test "$cc_family" = clang && tryflag CFLAGS_AUTO -w + # # Even with -std=c99, gcc accepts some constructs which are constraint # violations. We want to treat these as errors regardless of whether @@ -774,6 +792,7 @@ OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS ALL_TOOLS = $tools TOOL_LIBS = $tool_libs ADD_CFI = $ADD_CFI +MALLOC_DIR = $malloc_dir EOF test "x$static" = xno && echo "STATIC_LIBS =" test "x$shared" = xno && echo "SHARED_LIBS =" diff --git a/libc-top-half/musl/include/arpa/inet.h b/libc-top-half/musl/include/arpa/inet.h index be038dd3b..f657c30ec 100644 --- a/libc-top-half/musl/include/arpa/inet.h +++ b/libc-top-half/musl/include/arpa/inet.h @@ -28,11 +28,6 @@ in_addr_t inet_lnaof(struct in_addr); in_addr_t inet_netof(struct in_addr); #endif -#undef INET_ADDRSTRLEN -#undef INET6_ADDRSTRLEN -#define INET_ADDRSTRLEN 16 -#define INET6_ADDRSTRLEN 46 - #ifdef __cplusplus } #endif diff --git a/libc-top-half/musl/include/netinet/in.h b/libc-top-half/musl/include/netinet/in.h index 887b926e2..06964893f 100644 --- a/libc-top-half/musl/include/netinet/in.h +++ b/libc-top-half/musl/include/netinet/in.h @@ -74,8 +74,6 @@ struct ipv6_mreq { extern const struct in6_addr in6addr_any, in6addr_loopback; -#undef INET_ADDRSTRLEN -#undef INET6_ADDRSTRLEN #define INET_ADDRSTRLEN 16 #define INET6_ADDRSTRLEN 46 diff --git a/libc-top-half/musl/include/pthread.h b/libc-top-half/musl/include/pthread.h index d4a9b65f4..b11a567d5 100644 --- a/libc-top-half/musl/include/pthread.h +++ b/libc-top-half/musl/include/pthread.h @@ -80,6 +80,9 @@ extern "C" { #define PTHREAD_BARRIER_SERIAL_THREAD (-1) +#define PTHREAD_NULL ((pthread_t)0) + + int pthread_create(pthread_t *__restrict, const pthread_attr_t *__restrict, void *(*)(void *), void *__restrict); int pthread_detach(pthread_t); _Noreturn void pthread_exit(void *); diff --git a/libc-top-half/musl/ldso/dynlink.c b/libc-top-half/musl/ldso/dynlink.c index afec985a3..d3d4ddd28 100644 --- a/libc-top-half/musl/ldso/dynlink.c +++ b/libc-top-half/musl/ldso/dynlink.c @@ -23,7 +23,6 @@ #include "pthread_impl.h" #include "libc.h" #include "dynlink.h" -#include "malloc_impl.h" static void error(const char *, ...); @@ -415,8 +414,6 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri } switch(type) { - case REL_NONE: - break; case REL_OFFSET: addend -= (size_t)reloc_addr; case REL_SYMBOLIC: @@ -1938,6 +1935,8 @@ void __dls3(size_t *sp, size_t *auxv) * possibility of incomplete replacement. */ if (find_sym(head, "malloc", 1).dso != &ldso) __malloc_replaced = 1; + if (find_sym(head, "aligned_alloc", 1).dso != &ldso) + __aligned_alloc_replaced = 1; /* Switch to runtime mode: any further failures in the dynamic * linker are a reportable failure rather than a fatal startup diff --git a/libc-top-half/musl/src/errno/__strerror.h b/libc-top-half/musl/src/errno/__strerror.h index f84bdcc41..db0660d2e 100644 --- a/libc-top-half/musl/src/errno/__strerror.h +++ b/libc-top-half/musl/src/errno/__strerror.h @@ -1,13 +1,14 @@ -/* This file is sorted such that 'errors' which represent exceptional - * conditions under which a correct program may fail come first, followed - * by messages that indicate an incorrect program or system failure. The - * macro E() along with double-inclusion is used to ensure that ordering - * of the strings remains synchronized. */ +/* The first entry is a catch-all for codes not enumerated here. + * This file is included multiple times to declare and define a structure + * with these messages, and then to define a lookup table translating + * error codes to offsets of corresponding fields in the structure. */ #ifdef __wasilibc_unmodified_upstream // Print "Success" for ESUCCESS. +E(0, "No error information") #else E(0, "Success") #endif + E(EILSEQ, "Illegal byte sequence") E(EDOM, "Domain error") E(ERANGE, "Result not representable") @@ -128,9 +129,3 @@ E(EMULTIHOP, "Multihop attempted") // WASI adds this errno code. E(ENOTCAPABLE, "Capabilities insufficient") #endif - -#ifdef __wasilibc_unmodified_upstream // Print "Success" for ESUCCESS. -E(0, "No error information") -#else -E(UCHAR_MAX, "No error information") -#endif diff --git a/libc-top-half/musl/src/errno/strerror.c b/libc-top-half/musl/src/errno/strerror.c index 84020962b..7f926432b 100644 --- a/libc-top-half/musl/src/errno/strerror.c +++ b/libc-top-half/musl/src/errno/strerror.c @@ -1,34 +1,41 @@ #include +#include #include #include "locale_impl.h" -#define E(a,b) ((unsigned char)a), -static const unsigned char errid[] = { +/* mips has one error code outside of the 8-bit range due to a + * historical typo, so we just remap it. */ +#if EDQUOT==1133 +#define EDQUOT_ORIG 1133 +#undef EDQUOT +#define EDQUOT 109 +#endif + +static const struct errmsgstr_t { +#define E(n, s) char str##n[sizeof(s)]; +#include "__strerror.h" +#undef E +} errmsgstr = { +#define E(n, s) s, #include "__strerror.h" +#undef E }; -#undef E -#define E(a,b) b "\0" -static const char errmsg[] = +static const unsigned short errmsgidx[] = { +#define E(n, s) [n] = offsetof(struct errmsgstr_t, str##n), #include "__strerror.h" -; +#undef E +}; char *__strerror_l(int e, locale_t loc) { const char *s; - int i; - /* mips has one error code outside of the 8-bit range due to a - * historical typo, so we just remap it. */ - if (EDQUOT==1133) { - if (e==109) e=-1; - else if (e==EDQUOT) e=109; - } -#ifdef __wasilibc_unmodified_upstream // Print "Success" for ESUCCESS. - for (i=0; errid[i] && errid[i] != e; i++); -#else - for (i=0; errid[i] != UCHAR_MAX && errid[i] != e; i++); +#ifdef EDQUOT_ORIG + if (e==EDQUOT) e=0; + else if (e==EDQUOT_ORIG) e=EDQUOT; #endif - for (s=errmsg; i; s++, i--) for (; *s; s++); + if (e >= sizeof errmsgidx / sizeof *errmsgidx) e = 0; + s = (char *)&errmsgstr + errmsgidx[e]; return (char *)LCTRANS(s, LC_MESSAGES, loc); } diff --git a/libc-top-half/musl/src/fenv/sh/fenv.S b/libc-top-half/musl/src/fenv/sh/fenv.S index 907aefc0d..b3b7d66ad 100644 --- a/libc-top-half/musl/src/fenv/sh/fenv.S +++ b/libc-top-half/musl/src/fenv/sh/fenv.S @@ -12,6 +12,8 @@ fegetround: .type __fesetround, @function __fesetround: sts fpscr, r0 + mov #-4, r1 + and r1, r0 or r4, r0 lds r0, fpscr rts diff --git a/libc-top-half/musl/src/internal/atomic.h b/libc-top-half/musl/src/internal/atomic.h index f938879b0..96c1552d6 100644 --- a/libc-top-half/musl/src/internal/atomic.h +++ b/libc-top-half/musl/src/internal/atomic.h @@ -315,4 +315,19 @@ static inline int a_clz_64(uint64_t x) } #endif +#ifndef a_clz_32 +#define a_clz_32 a_clz_32 +static inline int a_clz_32(uint32_t x) +{ + x >>= 1; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return 31-a_ctz_32(x); +} +#endif + #endif diff --git a/libc-top-half/musl/src/internal/dynlink.h b/libc-top-half/musl/src/internal/dynlink.h index 764e3a1a8..51c0639ff 100644 --- a/libc-top-half/musl/src/internal/dynlink.h +++ b/libc-top-half/musl/src/internal/dynlink.h @@ -105,4 +105,9 @@ hidden void __dl_vseterr(const char *, va_list); hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic(); +hidden extern int __malloc_replaced; +hidden extern int __aligned_alloc_replaced; +hidden void __malloc_donate(char *, char *); +hidden int __malloc_allzerop(void *); + #endif diff --git a/libc-top-half/musl/src/internal/libc.h b/libc-top-half/musl/src/internal/libc.h index 0e95d0f4e..9b3984742 100644 --- a/libc-top-half/musl/src/internal/libc.h +++ b/libc-top-half/musl/src/internal/libc.h @@ -19,14 +19,15 @@ struct tls_module { struct __libc { #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) - int can_do_threads; - int threaded; + char can_do_threads; + char threaded; #endif #ifdef __wasilibc_unmodified_upstream // WASI doesn't currently use any code that needs "secure" mode - int secure; + char secure; #endif #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) - volatile int threads_minus_1; + volatile signed char need_locks; + int threads_minus_1; #endif #ifdef __wasilibc_unmodified_upstream // WASI has no auxv size_t *auxv; diff --git a/libc-top-half/musl/src/internal/libm.h b/libc-top-half/musl/src/internal/libm.h index 8983c3e76..a45894b7e 100644 --- a/libc-top-half/musl/src/internal/libm.h +++ b/libc-top-half/musl/src/internal/libm.h @@ -246,13 +246,21 @@ hidden int __rem_pio2(double,double*); hidden double __sin(double,double,int); hidden double __cos(double,double); hidden double __tan(double,double,int); +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +hidden double __expo2(double,double); +#else hidden double __expo2(double); +#endif hidden int __rem_pio2f(float,double*); hidden float __sindf(double); hidden float __cosdf(double); hidden float __tandf(double,int); +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +hidden float __expo2f(float,float); +#else hidden float __expo2f(float); +#endif hidden int __rem_pio2l(long double, long double *); hidden long double __sinl(long double, long double, int); diff --git a/libc-top-half/musl/src/internal/shgetc.c b/libc-top-half/musl/src/internal/shgetc.c index a4a9c633d..7455d2f00 100644 --- a/libc-top-half/musl/src/internal/shgetc.c +++ b/libc-top-half/musl/src/internal/shgetc.c @@ -32,6 +32,6 @@ int __shgetc(FILE *f) else f->shend = f->rend; f->shcnt = f->buf - f->rpos + cnt; - if (f->rpos[-1] != c) f->rpos[-1] = c; + if (f->rpos <= f->buf) f->rpos[-1] = c; return c; } diff --git a/libc-top-half/musl/src/internal/stdio_impl.h b/libc-top-half/musl/src/internal/stdio_impl.h index 537e7a29e..4167ba72e 100644 --- a/libc-top-half/musl/src/internal/stdio_impl.h +++ b/libc-top-half/musl/src/internal/stdio_impl.h @@ -81,8 +81,6 @@ hidden size_t __stdout_write(FILE *, const unsigned char *, size_t); hidden off_t __stdio_seek(FILE *, off_t, int); hidden int __stdio_close(FILE *); -hidden size_t __string_read(FILE *, unsigned char *, size_t); - hidden int __toread(FILE *); hidden int __towrite(FILE *); diff --git a/libc-top-half/musl/src/ipc/msgctl.c b/libc-top-half/musl/src/ipc/msgctl.c index b043041a5..9c1144064 100644 --- a/libc-top-half/musl/src/ipc/msgctl.c +++ b/libc-top-half/musl/src/ipc/msgctl.c @@ -9,6 +9,14 @@ int msgctl(int q, int cmd, struct msqid_ds *buf) { +#if IPC_TIME64 + struct msqid_ds out, *orig; + if (cmd&IPC_TIME64) { + out = (struct msqid_ds){0}; + orig = buf; + buf = &out; + } +#endif #ifdef SYSCALL_IPC_BROKEN_MODE struct msqid_ds tmp; if (cmd == IPC_SET) { @@ -32,6 +40,8 @@ int msgctl(int q, int cmd, struct msqid_ds *buf) #endif #if IPC_TIME64 if (r >= 0 && (cmd&IPC_TIME64)) { + buf = orig; + *buf = out; IPC_HILO(buf, msg_stime); IPC_HILO(buf, msg_rtime); IPC_HILO(buf, msg_ctime); diff --git a/libc-top-half/musl/src/ipc/semctl.c b/libc-top-half/musl/src/ipc/semctl.c index ed9827477..bbb97d7ae 100644 --- a/libc-top-half/musl/src/ipc/semctl.c +++ b/libc-top-half/musl/src/ipc/semctl.c @@ -28,6 +28,14 @@ int semctl(int id, int num, int cmd, ...) arg = va_arg(ap, union semun); va_end(ap); } +#if IPC_TIME64 + struct semid_ds out, *orig; + if (cmd&IPC_TIME64) { + out = (struct semid_ds){0}; + orig = arg.buf; + arg.buf = &out; + } +#endif #ifdef SYSCALL_IPC_BROKEN_MODE struct semid_ds tmp; if (cmd == IPC_SET) { @@ -51,6 +59,8 @@ int semctl(int id, int num, int cmd, ...) #endif #if IPC_TIME64 if (r >= 0 && (cmd&IPC_TIME64)) { + arg.buf = orig; + *arg.buf = out; IPC_HILO(arg.buf, sem_otime); IPC_HILO(arg.buf, sem_ctime); } diff --git a/libc-top-half/musl/src/ipc/shmctl.c b/libc-top-half/musl/src/ipc/shmctl.c index de3ce9d4d..1c9f78c2f 100644 --- a/libc-top-half/musl/src/ipc/shmctl.c +++ b/libc-top-half/musl/src/ipc/shmctl.c @@ -9,6 +9,14 @@ int shmctl(int id, int cmd, struct shmid_ds *buf) { +#if IPC_TIME64 + struct shmid_ds out, *orig; + if (cmd&IPC_TIME64) { + out = (struct shmid_ds){0}; + orig = buf; + buf = &out; + } +#endif #ifdef SYSCALL_IPC_BROKEN_MODE struct shmid_ds tmp; if (cmd == IPC_SET) { @@ -32,6 +40,8 @@ int shmctl(int id, int cmd, struct shmid_ds *buf) #endif #if IPC_TIME64 if (r >= 0 && (cmd&IPC_TIME64)) { + buf = orig; + *buf = out; IPC_HILO(buf, shm_atime); IPC_HILO(buf, shm_dtime); IPC_HILO(buf, shm_ctime); diff --git a/libc-top-half/musl/src/linux/clock_adjtime.c b/libc-top-half/musl/src/linux/clock_adjtime.c index 23eb8729d..d4d03d24d 100644 --- a/libc-top-half/musl/src/linux/clock_adjtime.c +++ b/libc-top-half/musl/src/linux/clock_adjtime.c @@ -38,55 +38,52 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) { int r = -ENOSYS; #ifdef SYS_clock_adjtime64 - if (SYS_clock_adjtime == SYS_clock_adjtime64 || - (utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) { - struct ktimex64 ktx = { - .modes = utx->modes, - .offset = utx->offset, - .freq = utx->freq, - .maxerror = utx->maxerror, - .esterror = utx->esterror, - .status = utx->status, - .constant = utx->constant, - .precision = utx->precision, - .tolerance = utx->tolerance, - .time_sec = utx->time.tv_sec, - .time_usec = utx->time.tv_usec, - .tick = utx->tick, - .ppsfreq = utx->ppsfreq, - .jitter = utx->jitter, - .shift = utx->shift, - .stabil = utx->stabil, - .jitcnt = utx->jitcnt, - .calcnt = utx->calcnt, - .errcnt = utx->errcnt, - .stbcnt = utx->stbcnt, - .tai = utx->tai, - }; - r = __syscall(SYS_clock_adjtime, clock_id, &ktx); - if (r>=0) { - utx->modes = ktx.modes; - utx->offset = ktx.offset; - utx->freq = ktx.freq; - utx->maxerror = ktx.maxerror; - utx->esterror = ktx.esterror; - utx->status = ktx.status; - utx->constant = ktx.constant; - utx->precision = ktx.precision; - utx->tolerance = ktx.tolerance; - utx->time.tv_sec = ktx.time_sec; - utx->time.tv_usec = ktx.time_usec; - utx->tick = ktx.tick; - utx->ppsfreq = ktx.ppsfreq; - utx->jitter = ktx.jitter; - utx->shift = ktx.shift; - utx->stabil = ktx.stabil; - utx->jitcnt = ktx.jitcnt; - utx->calcnt = ktx.calcnt; - utx->errcnt = ktx.errcnt; - utx->stbcnt = ktx.stbcnt; - utx->tai = ktx.tai; - } + struct ktimex64 ktx = { + .modes = utx->modes, + .offset = utx->offset, + .freq = utx->freq, + .maxerror = utx->maxerror, + .esterror = utx->esterror, + .status = utx->status, + .constant = utx->constant, + .precision = utx->precision, + .tolerance = utx->tolerance, + .time_sec = utx->time.tv_sec, + .time_usec = utx->time.tv_usec, + .tick = utx->tick, + .ppsfreq = utx->ppsfreq, + .jitter = utx->jitter, + .shift = utx->shift, + .stabil = utx->stabil, + .jitcnt = utx->jitcnt, + .calcnt = utx->calcnt, + .errcnt = utx->errcnt, + .stbcnt = utx->stbcnt, + .tai = utx->tai, + }; + r = __syscall(SYS_clock_adjtime64, clock_id, &ktx); + if (r>=0) { + utx->modes = ktx.modes; + utx->offset = ktx.offset; + utx->freq = ktx.freq; + utx->maxerror = ktx.maxerror; + utx->esterror = ktx.esterror; + utx->status = ktx.status; + utx->constant = ktx.constant; + utx->precision = ktx.precision; + utx->tolerance = ktx.tolerance; + utx->time.tv_sec = ktx.time_sec; + utx->time.tv_usec = ktx.time_usec; + utx->tick = ktx.tick; + utx->ppsfreq = ktx.ppsfreq; + utx->jitter = ktx.jitter; + utx->shift = ktx.shift; + utx->stabil = ktx.stabil; + utx->jitcnt = ktx.jitcnt; + utx->calcnt = ktx.calcnt; + utx->errcnt = ktx.errcnt; + utx->stbcnt = ktx.stbcnt; + utx->tai = ktx.tai; } if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS) return __syscall_ret(r); diff --git a/libc-top-half/musl/src/malloc/DESIGN b/libc-top-half/musl/src/malloc/DESIGN deleted file mode 100644 index 58b0523ff..000000000 --- a/libc-top-half/musl/src/malloc/DESIGN +++ /dev/null @@ -1,22 +0,0 @@ - - -In principle, this memory allocator is roughly equivalent to Doug -Lea's dlmalloc with fine-grained locking. - - - -malloc: - -Uses a freelist binned by chunk size, with a bitmap to optimize -searching for the smallest non-empty bin which can satisfy an -allocation. If no free chunks are available, it creates a new chunk of -the requested size and attempts to merge it with any existing free -chunk immediately below the newly created chunk. - -Whether the chunk was obtained from a bin or newly created, it's -likely to be larger than the requested allocation. malloc always -finishes its work by passing the new chunk to realloc, which will -split it into two chunks and free the tail portion. - - - diff --git a/libc-top-half/musl/src/malloc/aligned_alloc.c b/libc-top-half/musl/src/malloc/aligned_alloc.c deleted file mode 100644 index b6143f303..000000000 --- a/libc-top-half/musl/src/malloc/aligned_alloc.c +++ /dev/null @@ -1,7 +0,0 @@ -#include -#include "malloc_impl.h" - -void *aligned_alloc(size_t align, size_t len) -{ - return __memalign(align, len); -} diff --git a/libc-top-half/musl/src/malloc/calloc.c b/libc-top-half/musl/src/malloc/calloc.c new file mode 100644 index 000000000..bf6bddca3 --- /dev/null +++ b/libc-top-half/musl/src/malloc/calloc.c @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include "dynlink.h" + +static size_t mal0_clear(char *p, size_t n) +{ + const size_t pagesz = 4096; /* arbitrary */ + if (n < pagesz) return n; +#ifdef __GNUC__ + typedef uint64_t __attribute__((__may_alias__)) T; +#else + typedef unsigned char T; +#endif + char *pp = p + n; + size_t i = (uintptr_t)pp & (pagesz - 1); + for (;;) { + pp = memset(pp - i, 0, i); + if (pp - p < pagesz) return pp - p; + for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T)) + if (((T *)pp)[-1] | ((T *)pp)[-2]) + break; + } +} + +static int allzerop(void *p) +{ + return 0; +} +weak_alias(allzerop, __malloc_allzerop); + +void *calloc(size_t m, size_t n) +{ + if (n && m > (size_t)-1/n) { + errno = ENOMEM; + return 0; + } + n *= m; + void *p = malloc(n); + if (!p || (!__malloc_replaced && __malloc_allzerop(p))) + return p; + n = mal0_clear(p, n); + return memset(p, 0, n); +} diff --git a/libc-top-half/musl/src/malloc/expand_heap.c b/libc-top-half/musl/src/malloc/expand_heap.c deleted file mode 100644 index e6a3d7a00..000000000 --- a/libc-top-half/musl/src/malloc/expand_heap.c +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include "libc.h" -#include "syscall.h" -#include "malloc_impl.h" - -/* This function returns true if the interval [old,new] - * intersects the 'len'-sized interval below &libc.auxv - * (interpreted as the main-thread stack) or below &b - * (the current stack). It is used to defend against - * buggy brk implementations that can cross the stack. */ - -static int traverses_stack_p(uintptr_t old, uintptr_t new) -{ - const uintptr_t len = 8<<20; - uintptr_t a, b; - - b = (uintptr_t)libc.auxv; - a = b > len ? b-len : 0; - if (new>a && old len ? b-len : 0; - if (new>a && old SIZE_MAX/2 - PAGE_SIZE) { - errno = ENOMEM; - return 0; - } - n += -n & PAGE_SIZE-1; - - if (!brk) { - brk = __syscall(SYS_brk, 0); - brk += -brk & PAGE_SIZE-1; - } - - if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n) - && __syscall(SYS_brk, brk+n)==brk+n) { - *pn = n; - brk += n; - return (void *)(brk-n); - } - - size_t min = (size_t)PAGE_SIZE << mmap_step/2; - if (n < min) n = min; - void *area = __mmap(0, n, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (area == MAP_FAILED) return 0; - *pn = n; - mmap_step++; - return area; -} diff --git a/libc-top-half/musl/src/malloc/lite_malloc.c b/libc-top-half/musl/src/malloc/lite_malloc.c index 050d84f64..f8931ba59 100644 --- a/libc-top-half/musl/src/malloc/lite_malloc.c +++ b/libc-top-half/musl/src/malloc/lite_malloc.c @@ -2,58 +2,102 @@ #include #include #include +#include +#include "libc.h" #include "lock.h" -#include "malloc_impl.h" +#include "syscall.h" #define ALIGN 16 +/* This function returns true if the interval [old,new] + * intersects the 'len'-sized interval below &libc.auxv + * (interpreted as the main-thread stack) or below &b + * (the current stack). It is used to defend against + * buggy brk implementations that can cross the stack. */ + +static int traverses_stack_p(uintptr_t old, uintptr_t new) +{ + const uintptr_t len = 8<<20; + uintptr_t a, b; + + b = (uintptr_t)libc.auxv; + a = b > len ? b-len : 0; + if (new>a && old len ? b-len : 0; + if (new>a && old SIZE_MAX/2) { + errno = ENOMEM; + return 0; + } + if (!n) n++; while (align end-cur) { - size_t m = n; - char *new = __expand_heap(&m); - if (!new) { - UNLOCK(lock); - return 0; + size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE; + + if (!cur) { + brk = __syscall(SYS_brk, 0); + brk += -brk & PAGE_SIZE-1; + cur = end = brk; } - if (new != end) { - cur = new; - n -= pad; - pad = 0; + + if (brk == end && req < SIZE_MAX-brk + && !traverses_stack_p(brk, brk+req) + && __syscall(SYS_brk, brk+req)==brk+req) { + brk = end += req; + } else { + int new_area = 0; + req = n + PAGE_SIZE-1 & -PAGE_SIZE; + /* Only make a new area rather than individual mmap + * if wasted space would be over 1/8 of the map. */ + if (req-n > req/8) { + /* Geometric area size growth up to 64 pages, + * bounding waste by 1/8 of the area. */ + size_t min = PAGE_SIZE<<(mmap_step/2); + if (min-n > end-cur) { + if (req < min) { + req = min; + if (mmap_step < 12) + mmap_step++; + } + new_area = 1; + } + } + void *mem = __mmap(0, req, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED || !new_area) { + UNLOCK(lock); + return mem==MAP_FAILED ? 0 : mem; + } + cur = (uintptr_t)mem; + end = cur + req; } - end = new + m; } - p = cur + pad; + p = (void *)cur; cur += n; UNLOCK(lock); return p; } weak_alias(__simple_malloc, malloc); - -static void *__simple_calloc(size_t m, size_t n) -{ - if (n && m > (size_t)-1/n) { - errno = ENOMEM; - return 0; - } - return __simple_malloc(n * m); -} - -weak_alias(__simple_calloc, calloc); diff --git a/libc-top-half/musl/src/malloc/mallocng/aligned_alloc.c b/libc-top-half/musl/src/malloc/mallocng/aligned_alloc.c new file mode 100644 index 000000000..341168960 --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/aligned_alloc.c @@ -0,0 +1,57 @@ +#include +#include +#include "meta.h" + +void *aligned_alloc(size_t align, size_t len) +{ + if ((align & -align) != align) { + errno = EINVAL; + return 0; + } + + if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) { + errno = ENOMEM; + return 0; + } + + if (DISABLE_ALIGNED_ALLOC) { + errno = ENOMEM; + return 0; + } + + if (align <= UNIT) align = UNIT; + + unsigned char *p = malloc(len + align - UNIT); + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = g->mem->storage + stride*(idx+1) - IB; + size_t adj = -(uintptr_t)p & (align-1); + + if (!adj) { + set_size(p, end, len); + return p; + } + p += adj; + uint32_t offset = (size_t)(p-g->mem->storage)/UNIT; + if (offset <= 0xffff) { + *(uint16_t *)(p-2) = offset; + p[-4] = 0; + } else { + // use a 32-bit offset if 16-bit doesn't fit. for this, + // 16-bit field must be zero, [-4] byte nonzero. + *(uint16_t *)(p-2) = 0; + *(uint32_t *)(p-8) = offset; + p[-4] = 1; + } + p[-3] = idx; + set_size(p, end, len); + // store offset to aligned enframing. this facilitates cycling + // offset and also iteration of heap for debugging/measurement. + // for extreme overalignment it won't fit but these are classless + // allocations anyway. + *(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT; + start[-3] = 7<<5; + return p; +} diff --git a/libc-top-half/musl/src/malloc/mallocng/donate.c b/libc-top-half/musl/src/malloc/mallocng/donate.c new file mode 100644 index 000000000..41d850f35 --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/donate.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +#include "meta.h" + +static void donate(unsigned char *base, size_t len) +{ + uintptr_t a = (uintptr_t)base; + uintptr_t b = a + len; + a += -a & (UNIT-1); + b -= b & (UNIT-1); + memset(base, 0, len); + for (int sc=47; sc>0 && b>a; sc-=4) { + if (b-a < (size_classes[sc]+1)*UNIT) continue; + struct meta *m = alloc_meta(); + m->avail_mask = 0; + m->freed_mask = 1; + m->mem = (void *)a; + m->mem->meta = m; + m->last_idx = 0; + m->freeable = 0; + m->sizeclass = sc; + m->maplen = 0; + *((unsigned char *)m->mem+UNIT-4) = 0; + *((unsigned char *)m->mem+UNIT-3) = 255; + m->mem->storage[size_classes[sc]*UNIT-4] = 0; + queue(&ctx.active[sc], m); + a += (size_classes[sc]+1)*UNIT; + } +} + +void __malloc_donate(char *start, char *end) +{ + donate((void *)start, end-start); +} diff --git a/libc-top-half/musl/src/malloc/mallocng/free.c b/libc-top-half/musl/src/malloc/mallocng/free.c new file mode 100644 index 000000000..40745f97d --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/free.c @@ -0,0 +1,143 @@ +#define _BSD_SOURCE +#include +#include + +#include "meta.h" + +struct mapinfo { + void *base; + size_t len; +}; + +static struct mapinfo nontrivial_free(struct meta *, int); + +static struct mapinfo free_group(struct meta *g) +{ + struct mapinfo mi = { 0 }; + int sc = g->sizeclass; + if (sc < 48) { + ctx.usage_by_class[sc] -= g->last_idx+1; + } + if (g->maplen) { + step_seq(); + record_seq(sc); + mi.base = g->mem; + mi.len = g->maplen*4096UL; + } else { + void *p = g->mem; + struct meta *m = get_meta(p); + int idx = get_slot_index(p); + g->mem->meta = 0; + // not checking size/reserved here; it's intentionally invalid + mi = nontrivial_free(m, idx); + } + free_meta(g); + return mi; +} + +static int okay_to_free(struct meta *g) +{ + int sc = g->sizeclass; + + if (!g->freeable) return 0; + + // always free individual mmaps not suitable for reuse + if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc]) + return 1; + + // always free groups allocated inside another group's slot + // since recreating them should not be expensive and they + // might be blocking freeing of a much larger group. + if (!g->maplen) return 1; + + // if there is another non-full group, free this one to + // consolidate future allocations, reduce fragmentation. + if (g->next != g) return 1; + + // free any group in a size class that's not bouncing + if (!is_bouncing(sc)) return 1; + + size_t cnt = g->last_idx+1; + size_t usage = ctx.usage_by_class[sc]; + + // if usage is high enough that a larger count should be + // used, free the low-count group so a new one will be made. + if (9*cnt <= usage && cnt < 20) + return 1; + + // otherwise, keep the last group in a bouncing class. + return 0; +} + +static struct mapinfo nontrivial_free(struct meta *g, int i) +{ + uint32_t self = 1u<sizeclass; + uint32_t mask = g->freed_mask | g->avail_mask; + + if (mask+self == (2u<last_idx)-1 && okay_to_free(g)) { + // any multi-slot group is necessarily on an active list + // here, but single-slot groups might or might not be. + if (g->next) { + assert(sc < 48); + int activate_new = (ctx.active[sc]==g); + dequeue(&ctx.active[sc], g); + if (activate_new && ctx.active[sc]) + activate_group(ctx.active[sc]); + } + return free_group(g); + } else if (!mask) { + assert(sc < 48); + // might still be active if there were no allocations + // after last available slot was taken. + if (ctx.active[sc] != g) { + queue(&ctx.active[sc], g); + } + } + a_or(&g->freed_mask, self); + return (struct mapinfo){ 0 }; +} + +void free(void *p) +{ + if (!p) return; + + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + get_nominal_size(p, end); + uint32_t self = 1u<last_idx)-1; + ((unsigned char *)p)[-3] = 255; + // invalidate offset to group header, and cycle offset of + // used region within slot if current offset is zero. + *(uint16_t *)((char *)p-2) = 0; + + // release any whole pages contained in the slot to be freed + // unless it's a single-slot group that will be unmapped. + if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) { + unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1)); + size_t len = (end-base) & -PGSZ; + if (len) madvise(base, len, MADV_FREE); + } + + // atomic free without locking if this is neither first or last slot + for (;;) { + uint32_t freed = g->freed_mask; + uint32_t avail = g->avail_mask; + uint32_t mask = freed | avail; + assert(!(mask&self)); + if (!freed || mask+self==all) break; + if (!MT) + g->freed_mask = freed+self; + else if (a_cas(&g->freed_mask, freed, freed+self)!=freed) + continue; + return; + } + + wrlock(); + struct mapinfo mi = nontrivial_free(g, idx); + unlock(); + if (mi.len) munmap(mi.base, mi.len); +} diff --git a/libc-top-half/musl/src/malloc/mallocng/glue.h b/libc-top-half/musl/src/malloc/mallocng/glue.h new file mode 100644 index 000000000..16acd1ea3 --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/glue.h @@ -0,0 +1,77 @@ +#ifndef MALLOC_GLUE_H +#define MALLOC_GLUE_H + +#include +#include +#include +#include +#include +#include +#include "atomic.h" +#include "syscall.h" +#include "libc.h" +#include "lock.h" +#include "dynlink.h" + +// use macros to appropriately namespace these. +#define size_classes __malloc_size_classes +#define ctx __malloc_context +#define alloc_meta __malloc_alloc_meta +#define is_allzero __malloc_allzerop +#define dump_heap __dump_heap + +#if USE_REAL_ASSERT +#include +#else +#undef assert +#define assert(x) do { if (!(x)) a_crash(); } while(0) +#endif + +#define brk(p) ((uintptr_t)__syscall(SYS_brk, p)) + +#define mmap __mmap +#define madvise __madvise +#define mremap __mremap + +#define DISABLE_ALIGNED_ALLOC (__malloc_replaced && !__aligned_alloc_replaced) + +static inline uint64_t get_random_secret() +{ + uint64_t secret = (uintptr_t)&secret * 1103515245; + for (size_t i=0; libc.auxv[i]; i+=2) + if (libc.auxv[i]==AT_RANDOM) + memcpy(&secret, (char *)libc.auxv[i+1]+8, sizeof secret); + return secret; +} + +#ifndef PAGESIZE +#define PAGESIZE PAGE_SIZE +#endif + +#define MT (libc.need_locks) + +#define RDLOCK_IS_EXCLUSIVE 1 + +__attribute__((__visibility__("hidden"))) +extern int __malloc_lock[1]; + +#define LOCK_OBJ_DEF \ +int __malloc_lock[1]; + +static inline void rdlock() +{ + if (MT) LOCK(__malloc_lock); +} +static inline void wrlock() +{ + if (MT) LOCK(__malloc_lock); +} +static inline void unlock() +{ + UNLOCK(__malloc_lock); +} +static inline void upgradelock() +{ +} + +#endif diff --git a/libc-top-half/musl/src/malloc/mallocng/malloc.c b/libc-top-half/musl/src/malloc/mallocng/malloc.c new file mode 100644 index 000000000..d695ab8ec --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/malloc.c @@ -0,0 +1,387 @@ +#include +#include +#include +#include +#include +#include + +#include "meta.h" + +LOCK_OBJ_DEF; + +const uint16_t size_classes[] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 12, 15, + 18, 20, 25, 31, + 36, 42, 50, 63, + 72, 84, 102, 127, + 146, 170, 204, 255, + 292, 340, 409, 511, + 584, 682, 818, 1023, + 1169, 1364, 1637, 2047, + 2340, 2730, 3276, 4095, + 4680, 5460, 6552, 8191, +}; + +static const uint8_t small_cnt_tab[][3] = { + { 30, 30, 30 }, + { 31, 15, 15 }, + { 20, 10, 10 }, + { 31, 15, 7 }, + { 25, 12, 6 }, + { 21, 10, 5 }, + { 18, 8, 4 }, + { 31, 15, 7 }, + { 28, 14, 6 }, +}; + +static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 }; + +struct malloc_context ctx = { 0 }; + +struct meta *alloc_meta(void) +{ + struct meta *m; + unsigned char *p; + if (!ctx.init_done) { +#ifndef PAGESIZE + ctx.pagesize = get_page_size(); +#endif + ctx.secret = get_random_secret(); + ctx.init_done = 1; + } + size_t pagesize = PGSZ; + if (pagesize < 4096) pagesize = 4096; + if ((m = dequeue_head(&ctx.free_meta_head))) return m; + if (!ctx.avail_meta_count) { + int need_unprotect = 1; + if (!ctx.avail_meta_area_count && ctx.brk!=-1) { + uintptr_t new = ctx.brk + pagesize; + int need_guard = 0; + if (!ctx.brk) { + need_guard = 1; + ctx.brk = brk(0); + // some ancient kernels returned _ebss + // instead of next page as initial brk. + ctx.brk += -ctx.brk & (pagesize-1); + new = ctx.brk + 2*pagesize; + } + if (brk(new) != new) { + ctx.brk = -1; + } else { + if (need_guard) mmap((void *)ctx.brk, pagesize, + PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0); + ctx.brk = new; + ctx.avail_meta_areas = (void *)(new - pagesize); + ctx.avail_meta_area_count = pagesize>>12; + need_unprotect = 0; + } + } + if (!ctx.avail_meta_area_count) { + size_t n = 2UL << ctx.meta_alloc_shift; + p = mmap(0, n*pagesize, PROT_NONE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) return 0; + ctx.avail_meta_areas = p + pagesize; + ctx.avail_meta_area_count = (n-1)*(pagesize>>12); + ctx.meta_alloc_shift++; + } + p = ctx.avail_meta_areas; + if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0; + if (need_unprotect) + if (mprotect(p, pagesize, PROT_READ|PROT_WRITE) + && errno != ENOSYS) + return 0; + ctx.avail_meta_area_count--; + ctx.avail_meta_areas = p + 4096; + if (ctx.meta_area_tail) { + ctx.meta_area_tail->next = (void *)p; + } else { + ctx.meta_area_head = (void *)p; + } + ctx.meta_area_tail = (void *)p; + ctx.meta_area_tail->check = ctx.secret; + ctx.avail_meta_count = ctx.meta_area_tail->nslots + = (4096-sizeof(struct meta_area))/sizeof *m; + ctx.avail_meta = ctx.meta_area_tail->slots; + } + ctx.avail_meta_count--; + m = ctx.avail_meta++; + m->prev = m->next = 0; + return m; +} + +static uint32_t try_avail(struct meta **pm) +{ + struct meta *m = *pm; + uint32_t first; + if (!m) return 0; + uint32_t mask = m->avail_mask; + if (!mask) { + if (!m) return 0; + if (!m->freed_mask) { + dequeue(pm, m); + m = *pm; + if (!m) return 0; + } else { + m = m->next; + *pm = m; + } + + mask = m->freed_mask; + + // skip fully-free group unless it's the only one + // or it's a permanently non-freeable group + if (mask == (2u<last_idx)-1 && m->freeable) { + m = m->next; + *pm = m; + mask = m->freed_mask; + } + + // activate more slots in a not-fully-active group + // if needed, but only as a last resort. prefer using + // any other group with free slots. this avoids + // touching & dirtying as-yet-unused pages. + if (!(mask & ((2u<mem->active_idx)-1))) { + if (m->next != m) { + m = m->next; + *pm = m; + } else { + int cnt = m->mem->active_idx + 2; + int size = size_classes[m->sizeclass]*UNIT; + int span = UNIT + size*cnt; + // activate up to next 4k boundary + while ((span^(span+size-1)) < 4096) { + cnt++; + span += size; + } + if (cnt > m->last_idx+1) + cnt = m->last_idx+1; + m->mem->active_idx = cnt-1; + } + } + mask = activate_group(m); + assert(mask); + decay_bounces(m->sizeclass); + } + first = mask&-mask; + m->avail_mask = mask-first; + return first; +} + +static int alloc_slot(int, size_t); + +static struct meta *alloc_group(int sc, size_t req) +{ + size_t size = UNIT*size_classes[sc]; + int i = 0, cnt; + unsigned char *p; + struct meta *m = alloc_meta(); + if (!m) return 0; + size_t usage = ctx.usage_by_class[sc]; + size_t pagesize = PGSZ; + int active_idx; + if (sc < 9) { + while (i<2 && 4*small_cnt_tab[sc][i] > usage) + i++; + cnt = small_cnt_tab[sc][i]; + } else { + // lookup max number of slots fitting in power-of-two size + // from a table, along with number of factors of two we + // can divide out without a remainder or reaching 1. + cnt = med_cnt_tab[sc&3]; + + // reduce cnt to avoid excessive eagar allocation. + while (!(cnt&1) && 4*cnt > usage) + cnt >>= 1; + + // data structures don't support groups whose slot offsets + // in units don't fit in 16 bits. + while (size*cnt >= 65536*UNIT) + cnt >>= 1; + } + + // If we selected a count of 1 above but it's not sufficient to use + // mmap, increase to 2. Then it might be; if not it will nest. + if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2; + + // All choices of size*cnt are "just below" a power of two, so anything + // larger than half the page size should be allocated as whole pages. + if (size*cnt+UNIT > pagesize/2) { + // check/update bounce counter to start/increase retention + // of freed maps, and inhibit use of low-count, odd-size + // small mappings and single-slot groups if activated. + int nosmall = is_bouncing(sc); + account_bounce(sc); + step_seq(); + + // since the following count reduction opportunities have + // an absolute memory usage cost, don't overdo them. count + // coarse usage as part of usage. + if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1]; + + // try to drop to a lower count if the one found above + // increases usage by more than 25%. these reduced counts + // roughly fill an integral number of pages, just not a + // power of two, limiting amount of unusable space. + if (4*cnt > usage && !nosmall) { + if (0); + else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2; + else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3; + else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3; + else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5; + } + size_t needed = size*cnt + UNIT; + needed += -needed & (pagesize-1); + + // produce an individually-mmapped allocation if usage is low, + // bounce counter hasn't triggered, and either it saves memory + // or it avoids eagar slot allocation without wasting too much. + if (!nosmall && cnt<=7) { + req += IB + UNIT; + req += -req & (pagesize-1); + if (req=4*pagesize && 2*cnt>usage)) { + cnt = 1; + needed = req; + } + } + + p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) { + free_meta(m); + return 0; + } + m->maplen = needed>>12; + ctx.mmap_counter++; + active_idx = (4096-UNIT)/size-1; + if (active_idx > cnt-1) active_idx = cnt-1; + if (active_idx < 0) active_idx = 0; + } else { + int j = size_to_class(UNIT+cnt*size-IB); + int idx = alloc_slot(j, UNIT+cnt*size-IB); + if (idx < 0) { + free_meta(m); + return 0; + } + struct meta *g = ctx.active[j]; + p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter); + m->maplen = 0; + p[-3] = (p[-3]&31) | (6<<5); + for (int i=0; i<=cnt; i++) + p[UNIT+i*size-4] = 0; + active_idx = cnt-1; + } + ctx.usage_by_class[sc] += cnt; + m->avail_mask = (2u<freed_mask = (2u<<(cnt-1))-1 - m->avail_mask; + m->mem = (void *)p; + m->mem->meta = m; + m->mem->active_idx = active_idx; + m->last_idx = cnt-1; + m->freeable = 1; + m->sizeclass = sc; + return m; +} + +static int alloc_slot(int sc, size_t req) +{ + uint32_t first = try_avail(&ctx.active[sc]); + if (first) return a_ctz_32(first); + + struct meta *g = alloc_group(sc, req); + if (!g) return -1; + + g->avail_mask--; + queue(&ctx.active[sc], g); + return 0; +} + +void *malloc(size_t n) +{ + if (size_overflows(n)) return 0; + struct meta *g; + uint32_t mask, first; + int sc; + int idx; + int ctr; + + if (n >= MMAP_THRESHOLD) { + size_t needed = n + IB + UNIT; + void *p = mmap(0, needed, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) return 0; + wrlock(); + step_seq(); + g = alloc_meta(); + if (!g) { + unlock(); + munmap(p, needed); + return 0; + } + g->mem = p; + g->mem->meta = g; + g->last_idx = 0; + g->freeable = 1; + g->sizeclass = 63; + g->maplen = (needed+4095)/4096; + g->avail_mask = g->freed_mask = 0; + // use a global counter to cycle offset in + // individually-mmapped allocations. + ctx.mmap_counter++; + idx = 0; + goto success; + } + + sc = size_to_class(n); + + rdlock(); + g = ctx.active[sc]; + + // use coarse size classes initially when there are not yet + // any groups of desired size. this allows counts of 2 or 3 + // to be allocated at first rather than having to start with + // 7 or 5, the min counts for even size classes. + if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) { + size_t usage = ctx.usage_by_class[sc|1]; + // if a new group may be allocated, count it toward + // usage in deciding if we can use coarse class. + if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask + && !ctx.active[sc|1]->freed_mask)) + usage += 3; + if (usage <= 12) + sc |= 1; + g = ctx.active[sc]; + } + + for (;;) { + mask = g ? g->avail_mask : 0; + first = mask&-mask; + if (!first) break; + if (RDLOCK_IS_EXCLUSIVE || !MT) + g->avail_mask = mask-first; + else if (a_cas(&g->avail_mask, mask, mask-first)!=mask) + continue; + idx = a_ctz_32(first); + goto success; + } + upgradelock(); + + idx = alloc_slot(sc, n); + if (idx < 0) { + unlock(); + return 0; + } + g = ctx.active[sc]; + +success: + ctr = ctx.mmap_counter; + unlock(); + return enframe(g, idx, n, ctr); +} + +int is_allzero(void *p) +{ + struct meta *g = get_meta(p); + return g->sizeclass >= 48 || + get_stride(g) < UNIT*size_classes[g->sizeclass]; +} diff --git a/libc-top-half/musl/src/malloc/mallocng/malloc_usable_size.c b/libc-top-half/musl/src/malloc/mallocng/malloc_usable_size.c new file mode 100644 index 000000000..a440a4eab --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/malloc_usable_size.c @@ -0,0 +1,12 @@ +#include +#include "meta.h" + +size_t malloc_usable_size(void *p) +{ + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + return get_nominal_size(p, end); +} diff --git a/libc-top-half/musl/src/malloc/mallocng/meta.h b/libc-top-half/musl/src/malloc/mallocng/meta.h new file mode 100644 index 000000000..61ec53f9a --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/meta.h @@ -0,0 +1,288 @@ +#ifndef MALLOC_META_H +#define MALLOC_META_H + +#include +#include +#include +#include "glue.h" + +__attribute__((__visibility__("hidden"))) +extern const uint16_t size_classes[]; + +#define MMAP_THRESHOLD 131052 + +#define UNIT 16 +#define IB 4 + +struct group { + struct meta *meta; + unsigned char active_idx:5; + char pad[UNIT - sizeof(struct meta *) - 1]; + unsigned char storage[]; +}; + +struct meta { + struct meta *prev, *next; + struct group *mem; + volatile int avail_mask, freed_mask; + uintptr_t last_idx:5; + uintptr_t freeable:1; + uintptr_t sizeclass:6; + uintptr_t maplen:8*sizeof(uintptr_t)-12; +}; + +struct meta_area { + uint64_t check; + struct meta_area *next; + int nslots; + struct meta slots[]; +}; + +struct malloc_context { + uint64_t secret; +#ifndef PAGESIZE + size_t pagesize; +#endif + int init_done; + unsigned mmap_counter; + struct meta *free_meta_head; + struct meta *avail_meta; + size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift; + struct meta_area *meta_area_head, *meta_area_tail; + unsigned char *avail_meta_areas; + struct meta *active[48]; + size_t usage_by_class[48]; + uint8_t unmap_seq[32], bounces[32]; + uint8_t seq; + uintptr_t brk; +}; + +__attribute__((__visibility__("hidden"))) +extern struct malloc_context ctx; + +#ifdef PAGESIZE +#define PGSZ PAGESIZE +#else +#define PGSZ ctx.pagesize +#endif + +__attribute__((__visibility__("hidden"))) +struct meta *alloc_meta(void); + +__attribute__((__visibility__("hidden"))) +int is_allzero(void *); + +static inline void queue(struct meta **phead, struct meta *m) +{ + assert(!m->next); + assert(!m->prev); + if (*phead) { + struct meta *head = *phead; + m->next = head; + m->prev = head->prev; + m->next->prev = m->prev->next = m; + } else { + m->prev = m->next = m; + *phead = m; + } +} + +static inline void dequeue(struct meta **phead, struct meta *m) +{ + if (m->next != m) { + m->prev->next = m->next; + m->next->prev = m->prev; + if (*phead == m) *phead = m->next; + } else { + *phead = 0; + } + m->prev = m->next = 0; +} + +static inline struct meta *dequeue_head(struct meta **phead) +{ + struct meta *m = *phead; + if (m) dequeue(phead, m); + return m; +} + +static inline void free_meta(struct meta *m) +{ + *m = (struct meta){0}; + queue(&ctx.free_meta_head, m); +} + +static inline uint32_t activate_group(struct meta *m) +{ + assert(!m->avail_mask); + uint32_t mask, act = (2u<mem->active_idx)-1; + do mask = m->freed_mask; + while (a_cas(&m->freed_mask, mask, mask&~act)!=mask); + return m->avail_mask = mask & act; +} + +static inline int get_slot_index(const unsigned char *p) +{ + return p[-3] & 31; +} + +static inline struct meta *get_meta(const unsigned char *p) +{ + assert(!((uintptr_t)p & 15)); + int offset = *(const uint16_t *)(p - 2); + int index = get_slot_index(p); + if (p[-4]) { + assert(!offset); + offset = *(uint32_t *)(p - 8); + assert(offset > 0xffff); + } + const struct group *base = (const void *)(p - UNIT*offset - UNIT); + const struct meta *meta = base->meta; + assert(meta->mem == base); + assert(index <= meta->last_idx); + assert(!(meta->avail_mask & (1u<freed_mask & (1u<check == ctx.secret); + if (meta->sizeclass < 48) { + assert(offset >= size_classes[meta->sizeclass]*index); + assert(offset < size_classes[meta->sizeclass]*(index+1)); + } else { + assert(meta->sizeclass == 63); + } + if (meta->maplen) { + assert(offset <= meta->maplen*4096UL/UNIT - 1); + } + return (struct meta *)meta; +} + +static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end) +{ + size_t reserved = p[-3] >> 5; + if (reserved >= 5) { + assert(reserved == 5); + reserved = *(const uint32_t *)(end-4); + assert(reserved >= 5); + assert(!end[-5]); + } + assert(reserved <= end-p); + assert(!*(end-reserved)); + // also check the slot's overflow byte + assert(!*end); + return end-reserved-p; +} + +static inline size_t get_stride(const struct meta *g) +{ + if (!g->last_idx && g->maplen) { + return g->maplen*4096UL - UNIT; + } else { + return UNIT*size_classes[g->sizeclass]; + } +} + +static inline void set_size(unsigned char *p, unsigned char *end, size_t n) +{ + int reserved = end-p-n; + if (reserved) end[-reserved] = 0; + if (reserved >= 5) { + *(uint32_t *)(end-4) = reserved; + end[-5] = 0; + reserved = 5; + } + p[-3] = (p[-3]&31) + (reserved<<5); +} + +static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) +{ + size_t stride = get_stride(g); + size_t slack = (stride-IB-n)/UNIT; + unsigned char *p = g->mem->storage + stride*idx; + unsigned char *end = p+stride-IB; + // cycle offset within slot to increase interval to address + // reuse, facilitate trapping double-free. + int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255; + assert(!p[-4]); + if (off > slack) { + size_t m = slack; + m |= m>>1; m |= m>>2; m |= m>>4; + off &= m; + if (off > slack) off -= slack+1; + assert(off <= slack); + } + if (off) { + // store offset in unused header at offset zero + // if enframing at non-zero offset. + *(uint16_t *)(p-2) = off; + p[-3] = 7<<5; + p += UNIT*off; + // for nonzero offset there is no permanent check + // byte, so make one. + p[-4] = 0; + } + *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT; + p[-3] = idx; + set_size(p, end, n); + return p; +} + +static inline int size_to_class(size_t n) +{ + n = (n+IB-1)>>4; + if (n<10) return n; + n++; + int i = (28-a_clz_32(n))*4 + 8; + if (n>size_classes[i+1]) i+=2; + if (n>size_classes[i]) i++; + return i; +} + +static inline int size_overflows(size_t n) +{ + if (n >= SIZE_MAX/2 - 4096) { + errno = ENOMEM; + return 1; + } + return 0; +} + +static inline void step_seq(void) +{ + if (ctx.seq==255) { + for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0; + ctx.seq = 1; + } else { + ctx.seq++; + } +} + +static inline void record_seq(int sc) +{ + if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq; +} + +static inline void account_bounce(int sc) +{ + if (sc-7U < 32) { + int seq = ctx.unmap_seq[sc-7]; + if (seq && ctx.seq-seq < 10) { + if (ctx.bounces[sc-7]+1 < 100) + ctx.bounces[sc-7]++; + else + ctx.bounces[sc-7] = 150; + } + } +} + +static inline void decay_bounces(int sc) +{ + if (sc-7U < 32 && ctx.bounces[sc-7]) + ctx.bounces[sc-7]--; +} + +static inline int is_bouncing(int sc) +{ + return (sc-7U < 32 && ctx.bounces[sc-7] >= 100); +} + +#endif diff --git a/libc-top-half/musl/src/malloc/mallocng/realloc.c b/libc-top-half/musl/src/malloc/mallocng/realloc.c new file mode 100644 index 000000000..18769f42d --- /dev/null +++ b/libc-top-half/musl/src/malloc/mallocng/realloc.c @@ -0,0 +1,51 @@ +#define _GNU_SOURCE +#include +#include +#include +#include "meta.h" + +void *realloc(void *p, size_t n) +{ + if (!p) return malloc(n); + if (size_overflows(n)) return 0; + + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + size_t old_size = get_nominal_size(p, end); + size_t avail_size = end-(unsigned char *)p; + void *new; + + // only resize in-place if size class matches + if (n <= avail_size && n= g->sizeclass) { + set_size(p, end, n); + return p; + } + + // use mremap if old and new size are both mmap-worthy + if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) { + assert(g->sizeclass==63); + size_t base = (unsigned char *)p-start; + size_t needed = (n + base + UNIT + IB + 4095) & -4096; + new = g->maplen*4096UL == needed ? g->mem : + mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE); + if (new!=MAP_FAILED) { + g->mem = new; + g->maplen = needed/4096; + p = g->mem->storage + base; + end = g->mem->storage + (needed - UNIT) - IB; + *end = 0; + set_size(p, end, n); + return p; + } + } + + new = malloc(n); + if (!new) return 0; + memcpy(new, p, n < old_size ? n : old_size); + free(p); + return new; +} diff --git a/libc-top-half/musl/src/malloc/memalign.c b/libc-top-half/musl/src/malloc/memalign.c index cf9dfbda6..32cd87d81 100644 --- a/libc-top-half/musl/src/malloc/memalign.c +++ b/libc-top-half/musl/src/malloc/memalign.c @@ -1,54 +1,7 @@ +#define _BSD_SOURCE #include -#include -#include -#include "malloc_impl.h" -void *__memalign(size_t align, size_t len) +void *memalign(size_t align, size_t len) { - unsigned char *mem, *new; - - if ((align & -align) != align) { - errno = EINVAL; - return 0; - } - - if (len > SIZE_MAX - align || __malloc_replaced) { - errno = ENOMEM; - return 0; - } - - if (align <= SIZE_ALIGN) - return malloc(len); - - if (!(mem = malloc(len + align-1))) - return 0; - - new = (void *)((uintptr_t)mem + align-1 & -align); - if (new == mem) return mem; - - struct chunk *c = MEM_TO_CHUNK(mem); - struct chunk *n = MEM_TO_CHUNK(new); - - if (IS_MMAPPED(c)) { - /* Apply difference between aligned and original - * address to the "extra" field of mmapped chunk. */ - n->psize = c->psize + (new-mem); - n->csize = c->csize - (new-mem); - return new; - } - - struct chunk *t = NEXT_CHUNK(c); - - /* Split the allocated chunk into two chunks. The aligned part - * that will be used has the size in its footer reduced by the - * difference between the aligned and original addresses, and - * the resulting size copied to its header. A new header and - * footer are written for the split-off part to be freed. */ - n->psize = c->csize = C_INUSE | (new-mem); - n->csize = t->psize -= new-mem; - - __bin_chunk(c); - return new; + return aligned_alloc(align, len); } - -weak_alias(__memalign, memalign); diff --git a/libc-top-half/musl/src/malloc/oldmalloc/aligned_alloc.c b/libc-top-half/musl/src/malloc/oldmalloc/aligned_alloc.c new file mode 100644 index 000000000..4adca3b4f --- /dev/null +++ b/libc-top-half/musl/src/malloc/oldmalloc/aligned_alloc.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include "malloc_impl.h" + +void *aligned_alloc(size_t align, size_t len) +{ + unsigned char *mem, *new; + + if ((align & -align) != align) { + errno = EINVAL; + return 0; + } + + if (len > SIZE_MAX - align || + (__malloc_replaced && !__aligned_alloc_replaced)) { + errno = ENOMEM; + return 0; + } + + if (align <= SIZE_ALIGN) + return malloc(len); + + if (!(mem = malloc(len + align-1))) + return 0; + + new = (void *)((uintptr_t)mem + align-1 & -align); + if (new == mem) return mem; + + struct chunk *c = MEM_TO_CHUNK(mem); + struct chunk *n = MEM_TO_CHUNK(new); + + if (IS_MMAPPED(c)) { + /* Apply difference between aligned and original + * address to the "extra" field of mmapped chunk. */ + n->psize = c->psize + (new-mem); + n->csize = c->csize - (new-mem); + return new; + } + + struct chunk *t = NEXT_CHUNK(c); + + /* Split the allocated chunk into two chunks. The aligned part + * that will be used has the size in its footer reduced by the + * difference between the aligned and original addresses, and + * the resulting size copied to its header. A new header and + * footer are written for the split-off part to be freed. */ + n->psize = c->csize = C_INUSE | (new-mem); + n->csize = t->psize -= new-mem; + + __bin_chunk(c); + return new; +} diff --git a/libc-top-half/musl/src/malloc/malloc.c b/libc-top-half/musl/src/malloc/oldmalloc/malloc.c similarity index 67% rename from libc-top-half/musl/src/malloc/malloc.c rename to libc-top-half/musl/src/malloc/oldmalloc/malloc.c index 96982596b..c0997ad85 100644 --- a/libc-top-half/musl/src/malloc/malloc.c +++ b/libc-top-half/musl/src/malloc/oldmalloc/malloc.c @@ -17,17 +17,18 @@ static struct { volatile uint64_t binmap; struct bin bins[64]; - volatile int free_lock[2]; + volatile int split_merge_lock[2]; } mal; -int __malloc_replaced; - /* Synchronization tools */ static inline void lock(volatile int *lk) { - if (libc.threads_minus_1) + int need_locks = libc.need_locks; + if (need_locks) { while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); + if (need_locks < 0) libc.need_locks = 0; + } } static inline void unlock(volatile int *lk) @@ -123,9 +124,72 @@ void __dump_heap(int x) } #endif +/* This function returns true if the interval [old,new] + * intersects the 'len'-sized interval below &libc.auxv + * (interpreted as the main-thread stack) or below &b + * (the current stack). It is used to defend against + * buggy brk implementations that can cross the stack. */ + +static int traverses_stack_p(uintptr_t old, uintptr_t new) +{ + const uintptr_t len = 8<<20; + uintptr_t a, b; + + b = (uintptr_t)libc.auxv; + a = b > len ? b-len : 0; + if (new>a && old len ? b-len : 0; + if (new>a && old SIZE_MAX/2 - PAGE_SIZE) { + errno = ENOMEM; + return 0; + } + n += -n & PAGE_SIZE-1; + + if (!brk) { + brk = __syscall(SYS_brk, 0); + brk += -brk & PAGE_SIZE-1; + } + + if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n) + && __syscall(SYS_brk, brk+n)==brk+n) { + *pn = n; + brk += n; + return (void *)(brk-n); + } + + size_t min = (size_t)PAGE_SIZE << mmap_step/2; + if (n < min) n = min; + void *area = __mmap(0, n, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (area == MAP_FAILED) return 0; + *pn = n; + mmap_step++; + return area; +} + static struct chunk *expand_heap(size_t n) { - static int heap_lock[2]; static void *end; void *p; struct chunk *w; @@ -135,13 +199,8 @@ static struct chunk *expand_heap(size_t n) * we need room for an extra zero-sized sentinel chunk. */ n += SIZE_ALIGN; - lock(heap_lock); - p = __expand_heap(&n); - if (!p) { - unlock(heap_lock); - return 0; - } + if (!p) return 0; /* If not just expanding existing space, we need to make a * new sentinel chunk below the allocated space. */ @@ -164,8 +223,6 @@ static struct chunk *expand_heap(size_t n) w = MEM_TO_CHUNK(p); w->csize = n | C_INUSE; - unlock(heap_lock); - return w; } @@ -195,96 +252,44 @@ static void unbin(struct chunk *c, int i) NEXT_CHUNK(c)->psize |= C_INUSE; } -static int alloc_fwd(struct chunk *c) +static void bin_chunk(struct chunk *self, int i) { - int i; - size_t k; - while (!((k=c->csize) & C_INUSE)) { - i = bin_index(k); - lock_bin(i); - if (c->csize == k) { - unbin(c, i); - unlock_bin(i); - return 1; - } - unlock_bin(i); - } - return 0; -} - -static int alloc_rev(struct chunk *c) -{ - int i; - size_t k; - while (!((k=c->psize) & C_INUSE)) { - i = bin_index(k); - lock_bin(i); - if (c->psize == k) { - unbin(PREV_CHUNK(c), i); - unlock_bin(i); - return 1; - } - unlock_bin(i); - } - return 0; + self->next = BIN_TO_CHUNK(i); + self->prev = mal.bins[i].tail; + self->next->prev = self; + self->prev->next = self; + if (self->prev == BIN_TO_CHUNK(i)) + a_or_64(&mal.binmap, 1ULL<= n1 - DONTCARE) return; next = NEXT_CHUNK(self); split = (void *)((char *)self + n); - split->prev = self->prev; - split->next = self->next; - split->prev->next = split; - split->next->prev = split; split->psize = n | C_INUSE; split->csize = n1-n; next->psize = n1-n; self->csize = n | C_INUSE; - return 1; -} - -static void trim(struct chunk *self, size_t n) -{ - size_t n1 = CHUNK_SIZE(self); - struct chunk *next, *split; - - if (n >= n1 - DONTCARE) return; - next = NEXT_CHUNK(self); - split = (void *)((char *)self + n); + int i = bin_index(n1-n); + lock_bin(i); - split->psize = n | C_INUSE; - split->csize = n1-n | C_INUSE; - next->psize = n1-n | C_INUSE; - self->csize = n | C_INUSE; + bin_chunk(split, i); - __bin_chunk(split); + unlock_bin(i); } void *malloc(size_t n) { struct chunk *c; int i, j; + uint64_t mask; if (adjust_size(&n) < 0) return 0; @@ -300,70 +305,43 @@ void *malloc(size_t n) } i = bin_index_up(n); - for (;;) { - uint64_t mask = mal.binmap & -(1ULL<psize = c->csize = - x->csize + CHUNK_SIZE(c); - } - break; + if (i<63 && (mal.binmap & (1ULL< (size_t)-1/n) { - errno = ENOMEM; - return 0; - } - n *= m; - void *p = malloc(n); - if (!p) return p; - if (!__malloc_replaced) { - if (IS_MMAPPED(MEM_TO_CHUNK(p))) - return p; - if (n >= PAGE_SIZE) - n = mal0_clear(p, PAGE_SIZE, n); - } - return memset(p, 0, n); + return IS_MMAPPED(MEM_TO_CHUNK(p)); } void *realloc(void *p, size_t n) @@ -379,6 +357,8 @@ void *realloc(void *p, size_t n) self = MEM_TO_CHUNK(p); n1 = n0 = CHUNK_SIZE(self); + if (n<=n0 && n0-n<=DONTCARE) return p; + if (IS_MMAPPED(self)) { size_t extra = self->psize; char *base = (char *)self - extra; @@ -405,34 +385,43 @@ void *realloc(void *p, size_t n) /* Crash on corrupted footer (likely from buffer overflow) */ if (next->psize != self->csize) a_crash(); - /* Merge adjacent chunks if we need more space. This is not - * a waste of time even if we fail to get enough space, because our - * subsequent call to free would otherwise have to do the merge. */ - if (n > n1 && alloc_fwd(next)) { - n1 += CHUNK_SIZE(next); - next = NEXT_CHUNK(next); - } - /* FIXME: find what's wrong here and reenable it..? */ - if (0 && n > n1 && alloc_rev(self)) { - self = PREV_CHUNK(self); - n1 += CHUNK_SIZE(self); + if (n < n0) { + int i = bin_index_up(n); + int j = bin_index(n0); + if (icsize = split->psize = n | C_INUSE; + split->csize = next->psize = n0-n | C_INUSE; + __bin_chunk(split); + return CHUNK_TO_MEM(self); } - self->csize = n1 | C_INUSE; - next->psize = n1 | C_INUSE; - /* If we got enough space, split off the excess and return */ - if (n <= n1) { - //memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD); - trim(self, n); - return CHUNK_TO_MEM(self); + lock(mal.split_merge_lock); + + size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next); + if (n0+nsize >= n) { + int i = bin_index(nsize); + lock_bin(i); + if (!(next->csize & C_INUSE)) { + unbin(next, i); + unlock_bin(i); + next = NEXT_CHUNK(next); + self->csize = next->psize = n0+nsize | C_INUSE; + trim(self, n); + unlock(mal.split_merge_lock); + return CHUNK_TO_MEM(self); + } + unlock_bin(i); } + unlock(mal.split_merge_lock); copy_realloc: /* As a last resort, allocate a new chunk and copy to it. */ new = malloc(n-OVERHEAD); if (!new) return 0; copy_free_ret: - memcpy(new, p, n0-OVERHEAD); + memcpy(new, p, (npsize != self->csize) a_crash(); - for (;;) { - if (self->psize & next->csize & C_INUSE) { - self->csize = final_size | C_INUSE; - next->psize = final_size | C_INUSE; - i = bin_index(final_size); - lock_bin(i); - lock(mal.free_lock); - if (self->psize & next->csize & C_INUSE) - break; - unlock(mal.free_lock); - unlock_bin(i); - } + lock(mal.split_merge_lock); - if (alloc_rev(self)) { - self = PREV_CHUNK(self); - size = CHUNK_SIZE(self); - final_size += size; - if (new_size+size > RECLAIM && (new_size+size^size) > size) - reclaim = 1; - } + size_t osize = CHUNK_SIZE(self), size = osize; + + /* Since we hold split_merge_lock, only transition from free to + * in-use can race; in-use to free is impossible */ + size_t psize = self->psize & C_INUSE ? 0 : CHUNK_PSIZE(self); + size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next); - if (alloc_fwd(next)) { - size = CHUNK_SIZE(next); - final_size += size; - if (new_size+size > RECLAIM && (new_size+size^size) > size) - reclaim = 1; + if (psize) { + int i = bin_index(psize); + lock_bin(i); + if (!(self->psize & C_INUSE)) { + struct chunk *prev = PREV_CHUNK(self); + unbin(prev, i); + self = prev; + size += psize; + } + unlock_bin(i); + } + if (nsize) { + int i = bin_index(nsize); + lock_bin(i); + if (!(next->csize & C_INUSE)) { + unbin(next, i); next = NEXT_CHUNK(next); + size += nsize; } + unlock_bin(i); } - if (!(mal.binmap & 1ULL<csize = final_size; - next->psize = final_size; - unlock(mal.free_lock); + int i = bin_index(size); + lock_bin(i); - self->next = BIN_TO_CHUNK(i); - self->prev = mal.bins[i].tail; - self->next->prev = self; - self->prev->next = self; + self->csize = size; + next->psize = size; + bin_chunk(self, i); + unlock(mal.split_merge_lock); /* Replace middle of large chunks with fresh zero pages */ - if (reclaim) { + if (size > RECLAIM && (size^(size-osize)) > size-osize) { uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE; uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE; #if 1 diff --git a/libc-top-half/musl/src/internal/malloc_impl.h b/libc-top-half/musl/src/malloc/oldmalloc/malloc_impl.h similarity index 85% rename from libc-top-half/musl/src/internal/malloc_impl.h rename to libc-top-half/musl/src/malloc/oldmalloc/malloc_impl.h index 59785a7fe..e1cf4774c 100644 --- a/libc-top-half/musl/src/internal/malloc_impl.h +++ b/libc-top-half/musl/src/malloc/oldmalloc/malloc_impl.h @@ -2,12 +2,7 @@ #define MALLOC_IMPL_H #include - -hidden void *__expand_heap(size_t *); - -hidden void __malloc_donate(char *, char *); - -hidden void *__memalign(size_t, size_t); +#include "dynlink.h" struct chunk { size_t psize, csize; @@ -41,6 +36,4 @@ struct bin { hidden void __bin_chunk(struct chunk *); -hidden extern int __malloc_replaced; - #endif diff --git a/libc-top-half/musl/src/malloc/malloc_usable_size.c b/libc-top-half/musl/src/malloc/oldmalloc/malloc_usable_size.c similarity index 100% rename from libc-top-half/musl/src/malloc/malloc_usable_size.c rename to libc-top-half/musl/src/malloc/oldmalloc/malloc_usable_size.c diff --git a/libc-top-half/musl/src/malloc/posix_memalign.c b/libc-top-half/musl/src/malloc/posix_memalign.c index 2ea8bd8a4..ad4d8f473 100644 --- a/libc-top-half/musl/src/malloc/posix_memalign.c +++ b/libc-top-half/musl/src/malloc/posix_memalign.c @@ -1,11 +1,10 @@ #include #include -#include "malloc_impl.h" int posix_memalign(void **res, size_t align, size_t len) { if (align < sizeof(void *)) return EINVAL; - void *mem = __memalign(align, len); + void *mem = aligned_alloc(align, len); if (!mem) return errno; *res = mem; return 0; diff --git a/libc-top-half/musl/src/malloc/replaced.c b/libc-top-half/musl/src/malloc/replaced.c new file mode 100644 index 000000000..07fce61ec --- /dev/null +++ b/libc-top-half/musl/src/malloc/replaced.c @@ -0,0 +1,4 @@ +#include "dynlink.h" + +int __malloc_replaced; +int __aligned_alloc_replaced; diff --git a/libc-top-half/musl/src/math/__expo2.c b/libc-top-half/musl/src/math/__expo2.c index 740ac680e..4ff17df40 100644 --- a/libc-top-half/musl/src/math/__expo2.c +++ b/libc-top-half/musl/src/math/__expo2.c @@ -5,12 +5,21 @@ static const int k = 2043; static const double kln2 = 0x1.62066151add8bp+10; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +double __expo2(double x, double sign) +#else double __expo2(double x) +#endif { double scale; /* note that k is odd and scale*scale overflows */ INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0); /* exp(x - k ln2) * 2**(k-1) */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + /* in directed rounding correct sign before rounding or overflow is important */ + return exp(x - kln2) * (sign * scale) * scale; +#else return exp(x - kln2) * scale * scale; +#endif } diff --git a/libc-top-half/musl/src/math/__expo2f.c b/libc-top-half/musl/src/math/__expo2f.c index 5163e4180..acbaeb4ce 100644 --- a/libc-top-half/musl/src/math/__expo2f.c +++ b/libc-top-half/musl/src/math/__expo2f.c @@ -5,12 +5,21 @@ static const int k = 235; static const float kln2 = 0x1.45c778p+7f; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +float __expo2f(float x, float sign) +#else float __expo2f(float x) +#endif { float scale; /* note that k is odd and scale*scale overflows */ SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23); /* exp(x - k ln2) * 2**(k-1) */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + /* in directed rounding correct sign before rounding or overflow is important */ + return expf(x - kln2) * (sign * scale) * scale; +#else return expf(x - kln2) * scale * scale; +#endif } diff --git a/libc-top-half/musl/src/math/__rem_pio2.c b/libc-top-half/musl/src/math/__rem_pio2.c index d403f81c7..326cda758 100644 --- a/libc-top-half/musl/src/math/__rem_pio2.c +++ b/libc-top-half/musl/src/math/__rem_pio2.c @@ -36,6 +36,9 @@ */ static const double toint = 1.5/EPS, +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +pio4 = 0x1.921fb54442d18p-1, +#endif invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ @@ -117,11 +120,25 @@ int __rem_pio2(double x, double *y) } if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: - /* rint(x/(pi/2)), Assume round-to-nearest. */ + /* rint(x/(pi/2)) */ fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + /* Matters with directed rounding. */ + if (predict_false(r - w < -pio4)) { + n--; + fn--; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } else if (predict_false(r - w > pio4)) { + n++; + fn++; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } +#endif y[0] = r - w; u.f = y[0]; ey = u.i>>52 & 0x7ff; diff --git a/libc-top-half/musl/src/math/__rem_pio2f.c b/libc-top-half/musl/src/math/__rem_pio2f.c index 4473c1c42..034ed8100 100644 --- a/libc-top-half/musl/src/math/__rem_pio2f.c +++ b/libc-top-half/musl/src/math/__rem_pio2f.c @@ -35,6 +35,9 @@ */ static const double toint = 1.5/EPS, +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +pio4 = 0x1.921fb6p-1, +#endif invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ @@ -50,10 +53,22 @@ int __rem_pio2f(float x, double *y) ix = u.i & 0x7fffffff; /* 25+53 bit pi is good enough for medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + /* Use a specialized rint() to get fn. */ fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; *y = x - fn*pio2_1 - fn*pio2_1t; +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + /* Matters with directed rounding. */ + if (predict_false(*y < -pio4)) { + n--; + fn--; + *y = x - fn*pio2_1 - fn*pio2_1t; + } else if (predict_false(*y > pio4)) { + n++; + fn++; + *y = x - fn*pio2_1 - fn*pio2_1t; + } +#endif return n; } if(ix>=0x7f800000) { /* x is inf or NaN */ diff --git a/libc-top-half/musl/src/math/__rem_pio2l.c b/libc-top-half/musl/src/math/__rem_pio2l.c index 77255bd80..e73a86bf3 100644 --- a/libc-top-half/musl/src/math/__rem_pio2l.c +++ b/libc-top-half/musl/src/math/__rem_pio2l.c @@ -44,6 +44,7 @@ pio2_1 = 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */ pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */ pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */ static const long double +pio4 = 0x1.921fb54442d1846ap-1L, invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */ pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */ pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */ @@ -57,6 +58,9 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */ #define NX 5 #define NY 3 static const long double +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes +pio4 = 0x1.921fb54442d18469898cc51701b8p-1L, +#endif invpio2 = 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eafa3ea6a.0p-113 */ pio2_1 = 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000000000.0p-112 */ pio2_1t = 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a4093822299f3.0p-181 */ @@ -76,11 +80,25 @@ int __rem_pio2l(long double x, long double *y) u.f = x; ex = u.i.se & 0x7fff; if (SMALL(u)) { - /* rint(x/(pi/2)), Assume round-to-nearest. */ + /* rint(x/(pi/2)) */ fn = x*invpio2 + toint - toint; n = QUOBITS(fn); r = x-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + /* Matters with directed rounding. */ + if (predict_false(r - w < -pio4)) { + n--; + fn--; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } else if (predict_false(r - w > pio4)) { + n++; + fn++; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } +#endif y[0] = r-w; u.f = y[0]; ey = u.i.se & 0x7fff; diff --git a/libc-top-half/musl/src/math/cosh.c b/libc-top-half/musl/src/math/cosh.c index 100f8231d..2cdf0023f 100644 --- a/libc-top-half/musl/src/math/cosh.c +++ b/libc-top-half/musl/src/math/cosh.c @@ -35,6 +35,10 @@ double cosh(double x) /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + t = __expo2(x, 1.0); +#else t = __expo2(x); +#endif return t; } diff --git a/libc-top-half/musl/src/math/coshf.c b/libc-top-half/musl/src/math/coshf.c index b09f2ee57..b946c0b06 100644 --- a/libc-top-half/musl/src/math/coshf.c +++ b/libc-top-half/musl/src/math/coshf.c @@ -28,6 +28,10 @@ float coshf(float x) } /* |x| > log(FLT_MAX) or nan */ +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + t = __expo2f(x, 1.0f); +#else t = __expo2f(x); +#endif return t; } diff --git a/libc-top-half/musl/src/math/i386/fabs.c b/libc-top-half/musl/src/math/i386/fabs.c new file mode 100644 index 000000000..396727863 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fabs.c @@ -0,0 +1,7 @@ +#include + +double fabs(double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fabs.s b/libc-top-half/musl/src/math/i386/fabs.s deleted file mode 100644 index d66ea9a19..000000000 --- a/libc-top-half/musl/src/math/i386/fabs.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabs -.type fabs,@function -fabs: - fldl 4(%esp) - fabs - ret diff --git a/libc-top-half/musl/src/math/i386/fabsf.c b/libc-top-half/musl/src/math/i386/fabsf.c new file mode 100644 index 000000000..d882eee34 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fabsf.c @@ -0,0 +1,7 @@ +#include + +float fabsf(float x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fabsf.s b/libc-top-half/musl/src/math/i386/fabsf.s deleted file mode 100644 index a981c4222..000000000 --- a/libc-top-half/musl/src/math/i386/fabsf.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsf -.type fabsf,@function -fabsf: - flds 4(%esp) - fabs - ret diff --git a/libc-top-half/musl/src/math/i386/fabsl.c b/libc-top-half/musl/src/math/i386/fabsl.c new file mode 100644 index 000000000..cc1c9ed9c --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fabsl.s b/libc-top-half/musl/src/math/i386/fabsl.s deleted file mode 100644 index ceef9e4cc..000000000 --- a/libc-top-half/musl/src/math/i386/fabsl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsl -.type fabsl,@function -fabsl: - fldt 4(%esp) - fabs - ret diff --git a/libc-top-half/musl/src/math/i386/fmod.c b/libc-top-half/musl/src/math/i386/fmod.c new file mode 100644 index 000000000..ea0c58d9b --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fmod.c @@ -0,0 +1,10 @@ +#include + +double fmod(double x, double y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fmod.s b/libc-top-half/musl/src/math/i386/fmod.s deleted file mode 100644 index 2113b3c54..000000000 --- a/libc-top-half/musl/src/math/i386/fmod.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmod -.type fmod,@function -fmod: - fldl 12(%esp) - fldl 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/fmodf.c b/libc-top-half/musl/src/math/i386/fmodf.c new file mode 100644 index 000000000..90b56ab0f --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fmodf.c @@ -0,0 +1,10 @@ +#include + +float fmodf(float x, float y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fmodf.s b/libc-top-half/musl/src/math/i386/fmodf.s deleted file mode 100644 index e04e2a567..000000000 --- a/libc-top-half/musl/src/math/i386/fmodf.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodf -.type fmodf,@function -fmodf: - flds 8(%esp) - flds 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/fmodl.c b/libc-top-half/musl/src/math/i386/fmodl.c new file mode 100644 index 000000000..3daeab060 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/fmodl.s b/libc-top-half/musl/src/math/i386/fmodl.s deleted file mode 100644 index 0cb3fe9ba..000000000 --- a/libc-top-half/musl/src/math/i386/fmodl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodl -.type fmodl,@function -fmodl: - fldt 16(%esp) - fldt 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/llrint.c b/libc-top-half/musl/src/math/i386/llrint.c new file mode 100644 index 000000000..aa4008171 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/llrint.s b/libc-top-half/musl/src/math/i386/llrint.s deleted file mode 100644 index 8e89cd913..000000000 --- a/libc-top-half/musl/src/math/i386/llrint.s +++ /dev/null @@ -1,8 +0,0 @@ -.global llrint -.type llrint,@function -llrint: - fldl 4(%esp) - fistpll 4(%esp) - mov 4(%esp),%eax - mov 8(%esp),%edx - ret diff --git a/libc-top-half/musl/src/math/i386/llrintf.c b/libc-top-half/musl/src/math/i386/llrintf.c new file mode 100644 index 000000000..c41a317bd --- /dev/null +++ b/libc-top-half/musl/src/math/i386/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/llrintf.s b/libc-top-half/musl/src/math/i386/llrintf.s deleted file mode 100644 index aa850c6cd..000000000 --- a/libc-top-half/musl/src/math/i386/llrintf.s +++ /dev/null @@ -1,9 +0,0 @@ -.global llrintf -.type llrintf,@function -llrintf: - sub $8,%esp - flds 12(%esp) - fistpll (%esp) - pop %eax - pop %edx - ret diff --git a/libc-top-half/musl/src/math/i386/llrintl.c b/libc-top-half/musl/src/math/i386/llrintl.c new file mode 100644 index 000000000..c439ef28d --- /dev/null +++ b/libc-top-half/musl/src/math/i386/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/llrintl.s b/libc-top-half/musl/src/math/i386/llrintl.s deleted file mode 100644 index 1cfb56f15..000000000 --- a/libc-top-half/musl/src/math/i386/llrintl.s +++ /dev/null @@ -1,8 +0,0 @@ -.global llrintl -.type llrintl,@function -llrintl: - fldt 4(%esp) - fistpll 4(%esp) - mov 4(%esp),%eax - mov 8(%esp),%edx - ret diff --git a/libc-top-half/musl/src/math/i386/lrint.c b/libc-top-half/musl/src/math/i386/lrint.c new file mode 100644 index 000000000..89563ab26 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/lrint.s b/libc-top-half/musl/src/math/i386/lrint.s deleted file mode 100644 index 02b83d9ff..000000000 --- a/libc-top-half/musl/src/math/i386/lrint.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrint -.type lrint,@function -lrint: - fldl 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/libc-top-half/musl/src/math/i386/lrintf.c b/libc-top-half/musl/src/math/i386/lrintf.c new file mode 100644 index 000000000..0bbf29de0 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/lrintf.s b/libc-top-half/musl/src/math/i386/lrintf.s deleted file mode 100644 index 907aac291..000000000 --- a/libc-top-half/musl/src/math/i386/lrintf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintf -.type lrintf,@function -lrintf: - flds 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/libc-top-half/musl/src/math/i386/lrintl.c b/libc-top-half/musl/src/math/i386/lrintl.c new file mode 100644 index 000000000..eb8c09028 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/i386/lrintl.s b/libc-top-half/musl/src/math/i386/lrintl.s deleted file mode 100644 index 3ae05aac2..000000000 --- a/libc-top-half/musl/src/math/i386/lrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintl -.type lrintl,@function -lrintl: - fldt 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/libc-top-half/musl/src/math/i386/remainder.c b/libc-top-half/musl/src/math/i386/remainder.c new file mode 100644 index 000000000..c083df904 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/remainder.c @@ -0,0 +1,12 @@ +#include + +double remainder(double x, double y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +weak_alias(remainder, drem); diff --git a/libc-top-half/musl/src/math/i386/remainder.s b/libc-top-half/musl/src/math/i386/remainder.s deleted file mode 100644 index ab1da95dd..000000000 --- a/libc-top-half/musl/src/math/i386/remainder.s +++ /dev/null @@ -1,14 +0,0 @@ -.global remainder -.type remainder,@function -remainder: -.weak drem -.type drem,@function -drem: - fldl 12(%esp) - fldl 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/remainderf.c b/libc-top-half/musl/src/math/i386/remainderf.c new file mode 100644 index 000000000..280207d26 --- /dev/null +++ b/libc-top-half/musl/src/math/i386/remainderf.c @@ -0,0 +1,12 @@ +#include + +float remainderf(float x, float y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +weak_alias(remainderf, dremf); diff --git a/libc-top-half/musl/src/math/i386/remainderf.s b/libc-top-half/musl/src/math/i386/remainderf.s deleted file mode 100644 index 6a7378a34..000000000 --- a/libc-top-half/musl/src/math/i386/remainderf.s +++ /dev/null @@ -1,14 +0,0 @@ -.global remainderf -.type remainderf,@function -remainderf: -.weak dremf -.type dremf,@function -dremf: - flds 8(%esp) - flds 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/remainderl.c b/libc-top-half/musl/src/math/i386/remainderl.c new file mode 100644 index 000000000..8cf75071e --- /dev/null +++ b/libc-top-half/musl/src/math/i386/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/remainderl.s b/libc-top-half/musl/src/math/i386/remainderl.s deleted file mode 100644 index b41518ede..000000000 --- a/libc-top-half/musl/src/math/i386/remainderl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global remainderl -.type remainderl,@function -remainderl: - fldt 16(%esp) - fldt 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/i386/rint.c b/libc-top-half/musl/src/math/i386/rint.c new file mode 100644 index 000000000..a5276a60d --- /dev/null +++ b/libc-top-half/musl/src/math/i386/rint.c @@ -0,0 +1,7 @@ +#include + +double rint(double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/rint.s b/libc-top-half/musl/src/math/i386/rint.s deleted file mode 100644 index bb99a11c3..000000000 --- a/libc-top-half/musl/src/math/i386/rint.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rint -.type rint,@function -rint: - fldl 4(%esp) - frndint - ret diff --git a/libc-top-half/musl/src/math/i386/rintf.c b/libc-top-half/musl/src/math/i386/rintf.c new file mode 100644 index 000000000..bb4121a4e --- /dev/null +++ b/libc-top-half/musl/src/math/i386/rintf.c @@ -0,0 +1,7 @@ +#include + +float rintf(float x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/rintf.s b/libc-top-half/musl/src/math/i386/rintf.s deleted file mode 100644 index bce4c5a60..000000000 --- a/libc-top-half/musl/src/math/i386/rintf.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintf -.type rintf,@function -rintf: - flds 4(%esp) - frndint - ret diff --git a/libc-top-half/musl/src/math/i386/rintl.c b/libc-top-half/musl/src/math/i386/rintl.c new file mode 100644 index 000000000..e1a92077f --- /dev/null +++ b/libc-top-half/musl/src/math/i386/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/rintl.s b/libc-top-half/musl/src/math/i386/rintl.s deleted file mode 100644 index cd2bf9a98..000000000 --- a/libc-top-half/musl/src/math/i386/rintl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintl -.type rintl,@function -rintl: - fldt 4(%esp) - frndint - ret diff --git a/libc-top-half/musl/src/math/i386/sqrt.c b/libc-top-half/musl/src/math/i386/sqrt.c new file mode 100644 index 000000000..934fbccab --- /dev/null +++ b/libc-top-half/musl/src/math/i386/sqrt.c @@ -0,0 +1,15 @@ +#include "libm.h" + +double sqrt(double x) +{ + union ldshape ux; + unsigned fpsr; + __asm__ ("fsqrt; fnstsw %%ax": "=t"(ux.f), "=a"(fpsr) : "0"(x)); + if ((ux.i.m & 0x7ff) != 0x400) + return (double)ux.f; + /* Rounding to double would have encountered an exact halfway case. + Adjust mantissa downwards if fsqrt rounded up, else upwards. + (result of fsqrt could not have been exact) */ + ux.i.m ^= (fpsr & 0x200) + 0x300; + return (double)ux.f; +} diff --git a/libc-top-half/musl/src/math/i386/sqrt.s b/libc-top-half/musl/src/math/i386/sqrt.s deleted file mode 100644 index 57837e256..000000000 --- a/libc-top-half/musl/src/math/i386/sqrt.s +++ /dev/null @@ -1,21 +0,0 @@ -.global sqrt -.type sqrt,@function -sqrt: fldl 4(%esp) - fsqrt - fnstsw %ax - sub $12,%esp - fld %st(0) - fstpt (%esp) - mov (%esp),%ecx - and $0x7ff,%ecx - cmp $0x400,%ecx - jnz 1f - and $0x200,%eax - sub $0x100,%eax - sub %eax,(%esp) - fstp %st(0) - fldt (%esp) -1: add $12,%esp - fstpl 4(%esp) - fldl 4(%esp) - ret diff --git a/libc-top-half/musl/src/math/i386/sqrtf.c b/libc-top-half/musl/src/math/i386/sqrtf.c new file mode 100644 index 000000000..41c65c2bd --- /dev/null +++ b/libc-top-half/musl/src/math/i386/sqrtf.c @@ -0,0 +1,12 @@ +#include + +float sqrtf(float x) +{ + long double t; + /* The long double result has sufficient precision so that + * second rounding to float still keeps the returned value + * correctly rounded, see Pierre Roux, "Innocuous Double + * Rounding of Basic Arithmetic Operations". */ + __asm__ ("fsqrt" : "=t"(t) : "0"(x)); + return (float)t; +} diff --git a/libc-top-half/musl/src/math/i386/sqrtf.s b/libc-top-half/musl/src/math/i386/sqrtf.s deleted file mode 100644 index 9e944f456..000000000 --- a/libc-top-half/musl/src/math/i386/sqrtf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global sqrtf -.type sqrtf,@function -sqrtf: flds 4(%esp) - fsqrt - fstps 4(%esp) - flds 4(%esp) - ret diff --git a/libc-top-half/musl/src/math/i386/sqrtl.c b/libc-top-half/musl/src/math/i386/sqrtl.c new file mode 100644 index 000000000..864cfcc4f --- /dev/null +++ b/libc-top-half/musl/src/math/i386/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/i386/sqrtl.s b/libc-top-half/musl/src/math/i386/sqrtl.s deleted file mode 100644 index e0d426168..000000000 --- a/libc-top-half/musl/src/math/i386/sqrtl.s +++ /dev/null @@ -1,5 +0,0 @@ -.global sqrtl -.type sqrtl,@function -sqrtl: fldt 4(%esp) - fsqrt - ret diff --git a/libc-top-half/musl/src/math/m68k/sqrtl.c b/libc-top-half/musl/src/math/m68k/sqrtl.c new file mode 100644 index 000000000..b1c303c7e --- /dev/null +++ b/libc-top-half/musl/src/math/m68k/sqrtl.c @@ -0,0 +1,15 @@ +#include + +#if __HAVE_68881__ + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt.x %1,%0" : "=f"(x) : "fm"(x)); + return x; +} + +#else + +#include "../sqrtl.c" + +#endif diff --git a/libc-top-half/musl/src/math/sinh.c b/libc-top-half/musl/src/math/sinh.c index 00022c4e6..8463abc7b 100644 --- a/libc-top-half/musl/src/math/sinh.c +++ b/libc-top-half/musl/src/math/sinh.c @@ -34,6 +34,10 @@ double sinh(double x) /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ - t = 2*h*__expo2(absx); +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + t = __expo2(absx, 2*h); +#else + t = __expo2(absx); +#endif return t; } diff --git a/libc-top-half/musl/src/math/sinhf.c b/libc-top-half/musl/src/math/sinhf.c index 6ad19ea2b..1fcd27a06 100644 --- a/libc-top-half/musl/src/math/sinhf.c +++ b/libc-top-half/musl/src/math/sinhf.c @@ -26,6 +26,10 @@ float sinhf(float x) } /* |x| > logf(FLT_MAX) or nan */ - t = 2*h*__expo2f(absx); +#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes + t = __expo2f(absx, 2*h); +#else + t = __expo2f(absx); +#endif return t; } diff --git a/libc-top-half/musl/src/math/x86_64/fabs.c b/libc-top-half/musl/src/math/x86_64/fabs.c new file mode 100644 index 000000000..165624777 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/fabs.c @@ -0,0 +1,10 @@ +#include + +double fabs(double x) +{ + double t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrlq $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/fabs.s b/libc-top-half/musl/src/math/x86_64/fabs.s deleted file mode 100644 index 5715005e3..000000000 --- a/libc-top-half/musl/src/math/x86_64/fabs.s +++ /dev/null @@ -1,9 +0,0 @@ -.global fabs -.type fabs,@function -fabs: - xor %eax,%eax - dec %rax - shr %rax - movq %rax,%xmm1 - andpd %xmm1,%xmm0 - ret diff --git a/libc-top-half/musl/src/math/x86_64/fabsf.c b/libc-top-half/musl/src/math/x86_64/fabsf.c new file mode 100644 index 000000000..36ea7481f --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/fabsf.c @@ -0,0 +1,10 @@ +#include + +float fabsf(float x) +{ + float t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrld $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/fabsf.s b/libc-top-half/musl/src/math/x86_64/fabsf.s deleted file mode 100644 index 501a1f175..000000000 --- a/libc-top-half/musl/src/math/x86_64/fabsf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global fabsf -.type fabsf,@function -fabsf: - mov $0x7fffffff,%eax - movq %rax,%xmm1 - andps %xmm1,%xmm0 - ret diff --git a/libc-top-half/musl/src/math/x86_64/fabsl.c b/libc-top-half/musl/src/math/x86_64/fabsl.c new file mode 100644 index 000000000..cc1c9ed9c --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/fabsl.s b/libc-top-half/musl/src/math/x86_64/fabsl.s deleted file mode 100644 index 4e7ab525e..000000000 --- a/libc-top-half/musl/src/math/x86_64/fabsl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsl -.type fabsl,@function -fabsl: - fldt 8(%rsp) - fabs - ret diff --git a/libc-top-half/musl/src/math/x86_64/fmodl.c b/libc-top-half/musl/src/math/x86_64/fmodl.c new file mode 100644 index 000000000..3daeab060 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/fmodl.s b/libc-top-half/musl/src/math/x86_64/fmodl.s deleted file mode 100644 index ea07b402f..000000000 --- a/libc-top-half/musl/src/math/x86_64/fmodl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodl -.type fmodl,@function -fmodl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/x86_64/llrint.c b/libc-top-half/musl/src/math/x86_64/llrint.c new file mode 100644 index 000000000..dd38a7223 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/llrint.s b/libc-top-half/musl/src/math/x86_64/llrint.s deleted file mode 100644 index bf4764983..000000000 --- a/libc-top-half/musl/src/math/x86_64/llrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrint -.type llrint,@function -llrint: - cvtsd2si %xmm0,%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/llrintf.c b/libc-top-half/musl/src/math/x86_64/llrintf.c new file mode 100644 index 000000000..fc8625e88 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/llrintf.s b/libc-top-half/musl/src/math/x86_64/llrintf.s deleted file mode 100644 index d7204ac0c..000000000 --- a/libc-top-half/musl/src/math/x86_64/llrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrintf -.type llrintf,@function -llrintf: - cvtss2si %xmm0,%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/llrintl.c b/libc-top-half/musl/src/math/x86_64/llrintl.c new file mode 100644 index 000000000..c439ef28d --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/llrintl.s b/libc-top-half/musl/src/math/x86_64/llrintl.s deleted file mode 100644 index 1ec0817d3..000000000 --- a/libc-top-half/musl/src/math/x86_64/llrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global llrintl -.type llrintl,@function -llrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/lrint.c b/libc-top-half/musl/src/math/x86_64/lrint.c new file mode 100644 index 000000000..a742fec64 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/lrint.s b/libc-top-half/musl/src/math/x86_64/lrint.s deleted file mode 100644 index 15fc2454b..000000000 --- a/libc-top-half/musl/src/math/x86_64/lrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrint -.type lrint,@function -lrint: - cvtsd2si %xmm0,%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/lrintf.c b/libc-top-half/musl/src/math/x86_64/lrintf.c new file mode 100644 index 000000000..2ba5639dc --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/lrintf.s b/libc-top-half/musl/src/math/x86_64/lrintf.s deleted file mode 100644 index 488423d21..000000000 --- a/libc-top-half/musl/src/math/x86_64/lrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrintf -.type lrintf,@function -lrintf: - cvtss2si %xmm0,%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/lrintl.c b/libc-top-half/musl/src/math/x86_64/lrintl.c new file mode 100644 index 000000000..068e2e4d6 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/libc-top-half/musl/src/math/x86_64/lrintl.s b/libc-top-half/musl/src/math/x86_64/lrintl.s deleted file mode 100644 index d587b12b5..000000000 --- a/libc-top-half/musl/src/math/x86_64/lrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintl -.type lrintl,@function -lrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret diff --git a/libc-top-half/musl/src/math/x86_64/remainderl.c b/libc-top-half/musl/src/math/x86_64/remainderl.c new file mode 100644 index 000000000..8cf75071e --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/remainderl.s b/libc-top-half/musl/src/math/x86_64/remainderl.s deleted file mode 100644 index cb3857b48..000000000 --- a/libc-top-half/musl/src/math/x86_64/remainderl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global remainderl -.type remainderl,@function -remainderl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem1 - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret diff --git a/libc-top-half/musl/src/math/x86_64/remquol.c b/libc-top-half/musl/src/math/x86_64/remquol.c new file mode 100644 index 000000000..60eef089f --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/remquol.c @@ -0,0 +1,32 @@ +#include + +long double remquol(long double x, long double y, int *quo) +{ + signed char *cx = (void *)&x, *cy = (void *)&y; + /* By ensuring that addresses of x and y cannot be discarded, + * this empty asm guides GCC into representing extraction of + * their sign bits as memory loads rather than making x and y + * not-address-taken internally and using bitfield operations, + * which in the end wouldn't work out, as extraction from FPU + * registers needs to go through memory anyway. This way GCC + * should manage to use incoming stack slots without spills. */ + __asm__ ("" :: "X"(cx), "X"(cy)); + + long double t = x; + unsigned fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + /* C0, C1, C3 flags in x87 status word carry low bits of quotient: + * 15 14 13 12 11 10 9 8 + * . C3 . . . C2 C1 C0 + * . b1 . . . 0 b0 b2 */ + unsigned char i = fpsr >> 8; + i = i>>4 | i<<4; + /* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via + * in-register table lookup. */ + unsigned qbits = 0x7575313164642020 >> (i & 60); + qbits &= 7; + + *quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits; + return t; +} diff --git a/libc-top-half/musl/src/math/x86_64/rintl.c b/libc-top-half/musl/src/math/x86_64/rintl.c new file mode 100644 index 000000000..e1a92077f --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/rintl.s b/libc-top-half/musl/src/math/x86_64/rintl.s deleted file mode 100644 index 64e663cd2..000000000 --- a/libc-top-half/musl/src/math/x86_64/rintl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintl -.type rintl,@function -rintl: - fldt 8(%rsp) - frndint - ret diff --git a/libc-top-half/musl/src/math/x86_64/sqrt.c b/libc-top-half/musl/src/math/x86_64/sqrt.c new file mode 100644 index 000000000..657e09e3b --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/sqrt.c @@ -0,0 +1,7 @@ +#include + +double sqrt(double x) +{ + __asm__ ("sqrtsd %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/sqrt.s b/libc-top-half/musl/src/math/x86_64/sqrt.s deleted file mode 100644 index d3c609f9f..000000000 --- a/libc-top-half/musl/src/math/x86_64/sqrt.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrt -.type sqrt,@function -sqrt: sqrtsd %xmm0, %xmm0 - ret diff --git a/libc-top-half/musl/src/math/x86_64/sqrtf.c b/libc-top-half/musl/src/math/x86_64/sqrtf.c new file mode 100644 index 000000000..720baec60 --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/sqrtf.c @@ -0,0 +1,7 @@ +#include + +float sqrtf(float x) +{ + __asm__ ("sqrtss %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/sqrtf.s b/libc-top-half/musl/src/math/x86_64/sqrtf.s deleted file mode 100644 index eec48c609..000000000 --- a/libc-top-half/musl/src/math/x86_64/sqrtf.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrtf -.type sqrtf,@function -sqrtf: sqrtss %xmm0, %xmm0 - ret diff --git a/libc-top-half/musl/src/math/x86_64/sqrtl.c b/libc-top-half/musl/src/math/x86_64/sqrtl.c new file mode 100644 index 000000000..864cfcc4f --- /dev/null +++ b/libc-top-half/musl/src/math/x86_64/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/libc-top-half/musl/src/math/x86_64/sqrtl.s b/libc-top-half/musl/src/math/x86_64/sqrtl.s deleted file mode 100644 index 23cd687d0..000000000 --- a/libc-top-half/musl/src/math/x86_64/sqrtl.s +++ /dev/null @@ -1,5 +0,0 @@ -.global sqrtl -.type sqrtl,@function -sqrtl: fldt 8(%rsp) - fsqrt - ret diff --git a/libc-top-half/musl/src/misc/getentropy.c b/libc-top-half/musl/src/misc/getentropy.c index d2f282ce8..651ea95f1 100644 --- a/libc-top-half/musl/src/misc/getentropy.c +++ b/libc-top-half/musl/src/misc/getentropy.c @@ -6,7 +6,7 @@ int getentropy(void *buffer, size_t len) { - int cs, ret; + int cs, ret = 0; char *pos = buffer; if (len > 256) { diff --git a/libc-top-half/musl/src/misc/nftw.c b/libc-top-half/musl/src/misc/nftw.c index a8c5ba448..ebfd61567 100644 --- a/libc-top-half/musl/src/misc/nftw.c +++ b/libc-top-half/musl/src/misc/nftw.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -28,6 +29,8 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct history new; int type; int r; + int dfd; + int err; struct FTW lev; if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) { @@ -36,8 +39,7 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, else if (errno != EACCES) return -1; else type = FTW_NS; } else if (S_ISDIR(st.st_mode)) { - if (access(path, R_OK) < 0) type = FTW_DNR; - else if (flags & FTW_DEPTH) type = FTW_DP; + if (flags & FTW_DEPTH) type = FTW_DP; else type = FTW_D; } else if (S_ISLNK(st.st_mode)) { if (flags & FTW_PHYS) type = FTW_SL; @@ -65,6 +67,13 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, lev.base = k; } + if (type == FTW_D || type == FTW_DP) { + dfd = open(path, O_RDONLY); + err = errno; + if (dfd < 0 && err == EACCES) type = FTW_DNR; + if (!fd_limit) close(dfd); + } + if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev))) return r; @@ -73,7 +82,11 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, return 0; if ((type == FTW_D || type == FTW_DP) && fd_limit) { - DIR *d = opendir(path); + if (dfd < 0) { + errno = err; + return -1; + } + DIR *d = fdopendir(dfd); if (d) { struct dirent *de; while ((de = readdir(d))) { @@ -94,7 +107,8 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, } } closedir(d); - } else if (errno != EACCES) { + } else { + close(dfd); return -1; } } diff --git a/libc-top-half/musl/src/network/getnameinfo.c b/libc-top-half/musl/src/network/getnameinfo.c index f77e73ade..949e18115 100644 --- a/libc-top-half/musl/src/network/getnameinfo.c +++ b/libc-top-half/musl/src/network/getnameinfo.c @@ -158,6 +158,7 @@ int getnameinfo(const struct sockaddr *restrict sa, socklen_t sl, unsigned char query[18+PTR_MAX], reply[512]; int qlen = __res_mkquery(0, ptr, 1, RR_PTR, 0, 0, 0, query, sizeof query); + query[3] = 0; /* don't need AD flag */ int rlen = __res_send(query, qlen, reply, sizeof reply); buf[0] = 0; if (rlen > 0) diff --git a/libc-top-half/musl/src/network/lookup_name.c b/libc-top-half/musl/src/network/lookup_name.c index c93263a9d..aae0d95a0 100644 --- a/libc-top-half/musl/src/network/lookup_name.c +++ b/libc-top-half/musl/src/network/lookup_name.c @@ -149,6 +149,7 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 0, 0, 0, qbuf[nq], sizeof *qbuf); if (qlens[nq] == -1) return EAI_NONAME; + qbuf[nq][3] = 0; /* don't need AD flag */ nq++; } } @@ -156,14 +157,17 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static if (__res_msend_rc(nq, qp, qlens, ap, alens, sizeof *abuf, conf) < 0) return EAI_SYSTEM; + for (i=0; inext = self->prev = self; __thread_list_lock = 0; libc.threads_minus_1 = 0; + if (libc.need_locks) libc.need_locks = -1; } __restore_sigs(&set); __fork_handler(!ret); diff --git a/libc-top-half/musl/src/stdio/__string_read.c b/libc-top-half/musl/src/stdio/__string_read.c deleted file mode 100644 index 7b50a7e11..000000000 --- a/libc-top-half/musl/src/stdio/__string_read.c +++ /dev/null @@ -1,16 +0,0 @@ -#include "stdio_impl.h" -#include - -size_t __string_read(FILE *f, unsigned char *buf, size_t len) -{ - char *src = f->cookie; - size_t k = len+256; - char *end = memchr(src, 0, k); - if (end) k = end-src; - if (k < len) len = k; - memcpy(buf, src, len); - f->rpos = (void *)(src+len); - f->rend = (void *)(src+k); - f->cookie = src+k; - return len; -} diff --git a/libc-top-half/musl/src/stdio/fmemopen.c b/libc-top-half/musl/src/stdio/fmemopen.c index d09c6e26e..3ee57b9ea 100644 --- a/libc-top-half/musl/src/stdio/fmemopen.c +++ b/libc-top-half/musl/src/stdio/fmemopen.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "libc.h" @@ -103,18 +104,17 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode) f = malloc(sizeof *f + (buf?0:size)); if (!f) return 0; - memset(&f->f, 0, sizeof f->f); + memset(f, 0, offsetof(struct mem_FILE, buf)); f->f.cookie = &f->c; f->f.fd = -1; f->f.lbf = EOF; f->f.buf = f->buf + UNGET; f->f.buf_size = sizeof f->buf - UNGET; if (!buf) { - buf = f->buf2;; + buf = f->buf2; memset(buf, 0, size); } - memset(&f->c, 0, sizeof f->c); f->c.buf = buf; f->c.size = size; f->c.mode = *mode; diff --git a/libc-top-half/musl/src/stdio/vdprintf.c b/libc-top-half/musl/src/stdio/vdprintf.c index e366bfa0c..cef0a1af2 100644 --- a/libc-top-half/musl/src/stdio/vdprintf.c +++ b/libc-top-half/musl/src/stdio/vdprintf.c @@ -1,14 +1,9 @@ #include "stdio_impl.h" -static size_t wrap_write(FILE *f, const unsigned char *buf, size_t len) -{ - return __stdio_write(f, buf, len); -} - int vdprintf(int fd, const char *restrict fmt, va_list ap) { FILE f = { - .fd = fd, .lbf = EOF, .write = wrap_write, + .fd = fd, .lbf = EOF, .write = __stdio_write, .buf = (void *)fmt, .buf_size = 0, #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) .lock = -1 diff --git a/libc-top-half/musl/src/stdio/vfscanf.c b/libc-top-half/musl/src/stdio/vfscanf.c index d5be86475..b612bbdd4 100644 --- a/libc-top-half/musl/src/stdio/vfscanf.c +++ b/libc-top-half/musl/src/stdio/vfscanf.c @@ -61,7 +61,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) { int width; int size; - int alloc; + int alloc = 0; int base; const unsigned char *p; int c, t; @@ -84,6 +84,9 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) FLOCK(f); + if (!f->rpos) __toread(f); + if (!f->rpos) goto input_fail; + for (p=(const unsigned char *)fmt; *p; p++) { alloc = 0; diff --git a/libc-top-half/musl/src/stdio/vsscanf.c b/libc-top-half/musl/src/stdio/vsscanf.c index 4b0e9e326..0e5b48265 100644 --- a/libc-top-half/musl/src/stdio/vsscanf.c +++ b/libc-top-half/musl/src/stdio/vsscanf.c @@ -1,8 +1,18 @@ #include "stdio_impl.h" +#include -static size_t do_read(FILE *f, unsigned char *buf, size_t len) +static size_t string_read(FILE *f, unsigned char *buf, size_t len) { - return __string_read(f, buf, len); + char *src = f->cookie; + size_t k = len+256; + char *end = memchr(src, 0, k); + if (end) k = end-src; + if (k < len) len = k; + memcpy(buf, src, len); + f->rpos = (void *)(src+len); + f->rend = (void *)(src+k); + f->cookie = src+k; + return len; } int vsscanf(const char *restrict s, const char *restrict fmt, va_list ap) @@ -10,9 +20,9 @@ int vsscanf(const char *restrict s, const char *restrict fmt, va_list ap) FILE f = { .buf = (void *)s, .cookie = (void *)s, #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) - .read = do_read, .lock = -1 + .read = string_read, .lock = -1 #else - .read = do_read + .read = string_read #endif }; return vfscanf(&f, fmt, ap); diff --git a/libc-top-half/musl/src/stdlib/wcstod.c b/libc-top-half/musl/src/stdlib/wcstod.c index bc33e5c9a..97b894eb2 100644 --- a/libc-top-half/musl/src/stdlib/wcstod.c +++ b/libc-top-half/musl/src/stdlib/wcstod.c @@ -41,8 +41,7 @@ static long double wcstox(const wchar_t *s, wchar_t **p, int prec) unsigned char buf[64]; FILE f = {0}; f.flags = 0; - f.rpos = f.rend = 0; - f.buf = buf + 4; + f.rpos = f.rend = f.buf = buf + 4; f.buf_size = sizeof buf - 4; #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) f.lock = -1; diff --git a/libc-top-half/musl/src/stdlib/wcstol.c b/libc-top-half/musl/src/stdlib/wcstol.c index 1aa971b60..3aefd06f4 100644 --- a/libc-top-half/musl/src/stdlib/wcstol.c +++ b/libc-top-half/musl/src/stdlib/wcstol.c @@ -35,8 +35,7 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign unsigned char buf[64]; FILE f = {0}; f.flags = 0; - f.rpos = f.rend = 0; - f.buf = buf + 4; + f.rpos = f.rend = f.buf = buf + 4; f.buf_size = sizeof buf - 4; #if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) f.lock = -1; diff --git a/libc-top-half/musl/src/string/aarch64/memcpy.S b/libc-top-half/musl/src/string/aarch64/memcpy.S new file mode 100644 index 000000000..48bb8a8d3 --- /dev/null +++ b/libc-top-half/musl/src/string/aarch64/memcpy.S @@ -0,0 +1,186 @@ +/* + * memcpy - copy memory area + * + * Copyright (c) 2012-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_lw w10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l x14 +#define E_h x15 +#define F_l x16 +#define F_h x17 +#define G_l count +#define G_h dst +#define H_l src +#define H_h srcend +#define tmp1 x14 + +/* This implementation of memcpy uses unaligned accesses and branchless + sequences to keep the code small, simple and improve performance. + + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check is negligible since it is only required for large copies. + + Large copies use a software pipelined loop processing 64 bytes per iteration. + The destination pointer is 16-byte aligned to minimize unaligned accesses. + The loop tail is handled by always copying 64 bytes from the end. +*/ + +.global memcpy +.type memcpy,%function +memcpy: + add srcend, src, count + add dstend, dstin, count + cmp count, 128 + b.hi .Lcopy_long + cmp count, 32 + b.hi .Lcopy32_128 + + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo .Lcopy16 + ldp A_l, A_h, [src] + ldp D_l, D_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + /* Copy 8-15 bytes. */ +.Lcopy16: + tbz count, 3, .Lcopy8 + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + + .p2align 3 + /* Copy 4-7 bytes. */ +.Lcopy8: + tbz count, 2, .Lcopy4 + ldr A_lw, [src] + ldr B_lw, [srcend, -4] + str A_lw, [dstin] + str B_lw, [dstend, -4] + ret + + /* Copy 0..3 bytes using a branchless sequence. */ +.Lcopy4: + cbz count, .Lcopy0 + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb C_lw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb C_lw, [dstend, -1] +.Lcopy0: + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +.Lcopy32_128: + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + ldp D_l, D_h, [srcend, -16] + cmp count, 64 + b.hi .Lcopy128 + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Copy 65..128 bytes. */ +.Lcopy128: + ldp E_l, E_h, [src, 32] + ldp F_l, F_h, [src, 48] + cmp count, 96 + b.ls .Lcopy96 + ldp G_l, G_h, [srcend, -64] + ldp H_l, H_h, [srcend, -48] + stp G_l, G_h, [dstend, -64] + stp H_l, H_h, [dstend, -48] +.Lcopy96: + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp E_l, E_h, [dstin, 32] + stp F_l, F_h, [dstin, 48] + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Copy more than 128 bytes. */ +.Lcopy_long: + + /* Copy 16 bytes and then align dst to 16-byte alignment. */ + + ldp D_l, D_h, [src] + and tmp1, dstin, 15 + bic dst, dstin, 15 + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls .Lcopy64_from_end + +.Lloop64: + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi .Lloop64 + + /* Write the last iteration and copy 64 bytes from the end. */ +.Lcopy64_from_end: + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret + +.size memcpy,.-memcpy diff --git a/libc-top-half/musl/src/string/aarch64/memset.S b/libc-top-half/musl/src/string/aarch64/memset.S new file mode 100644 index 000000000..f0d29b7fa --- /dev/null +++ b/libc-top-half/musl/src/string/aarch64/memset.S @@ -0,0 +1,115 @@ +/* + * memset - fill memory with a constant byte + * + * Copyright (c) 2012-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. + * + */ + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 + +.global memset +.type memset,%function +memset: + + dup v0.16B, valw + add dstend, dstin, count + + cmp count, 96 + b.hi .Lset_long + cmp count, 16 + b.hs .Lset_medium + mov val, v0.D[0] + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend, -8] + ret + nop +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend, -2] +3: ret + + /* Set 17..96 bytes. */ +.Lset_medium: + str q0, [dstin] + tbnz count, 6, .Lset96 + str q0, [dstend, -16] + tbz count, 5, 1f + str q0, [dstin, 16] + str q0, [dstend, -32] +1: ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +.Lset96: + str q0, [dstin, 16] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -32] + ret + + .p2align 4 +.Lset_long: + and valw, valw, 255 + bic dst, dstin, 15 + str q0, [dstin] + cmp count, 160 + ccmp valw, 0, 0, hs + b.ne .Lno_zva + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne .Lno_zva +#endif + str q0, [dst, 16] + stp q0, q0, [dst, 32] + bic dst, dst, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +.Lzva_loop: + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi .Lzva_loop + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +.Lno_zva: + sub count, dstend, dst /* Count is 16 too large. */ + sub dst, dst, 16 /* Dst is biased by -32. */ + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +.Lno_zva_loop: + stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64]! + subs count, count, 64 + b.hi .Lno_zva_loop + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +.size memset,.-memset + diff --git a/libc-top-half/musl/src/string/arm/memcpy_le.S b/libc-top-half/musl/src/string/arm/memcpy.S similarity index 82% rename from libc-top-half/musl/src/string/arm/memcpy_le.S rename to libc-top-half/musl/src/string/arm/memcpy.S index 7b35d305e..869e34481 100644 --- a/libc-top-half/musl/src/string/arm/memcpy_le.S +++ b/libc-top-half/musl/src/string/arm/memcpy.S @@ -1,5 +1,3 @@ -#if !__ARMEB__ - /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. @@ -42,7 +40,7 @@ * code safely callable from thumb mode, adjusting the return * instructions to be compatible with pre-thumb ARM cpus, removal of * prefetch code that is not compatible with older cpus and support for - * building as thumb 2. + * building as thumb 2 and big-endian. */ .syntax unified @@ -227,24 +225,45 @@ non_congruent: * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) */ movs r5, r5, lsl #31 + +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 +#endif cmp r2, #4 blo partial_word_tail +#if __ARMEB__ + mov r3, r3, lsr r12 + mov r3, r3, lsl r12 +#endif + /* Align destination to 32 bytes (cache line boundary) */ 1: tst r0, #0x1c beq 2f ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -270,6 +289,25 @@ loop16: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r6, lsr #16 + mov r6, r6, lsl #16 + orr r6, r6, r7, lsr #16 + mov r7, r7, lsl #16 + orr r7, r7, r8, lsr #16 + mov r8, r8, lsl #16 + orr r8, r8, r9, lsr #16 + mov r9, r9, lsl #16 + orr r9, r9, r10, lsr #16 + mov r10, r10, lsl #16 + orr r10, r10, r11, lsr #16 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #16 +#else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 @@ -287,6 +325,7 @@ loop16: orr r10, r10, r11, lsl #16 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #16 +#endif bhs 1b b less_than_thirtytwo @@ -296,6 +335,25 @@ loop8: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r6, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r7, lsr #24 + mov r7, r7, lsl #8 + orr r7, r7, r8, lsr #24 + mov r8, r8, lsl #8 + orr r8, r8, r9, lsr #24 + mov r9, r9, lsl #8 + orr r9, r9, r10, lsr #24 + mov r10, r10, lsl #8 + orr r10, r10, r11, lsr #24 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #8 +#else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 @@ -313,6 +371,7 @@ loop8: orr r10, r10, r11, lsl #24 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #8 +#endif bhs 1b b less_than_thirtytwo @@ -322,6 +381,25 @@ loop24: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r6, lsr #8 + mov r6, r6, lsl #24 + orr r6, r6, r7, lsr #8 + mov r7, r7, lsl #24 + orr r7, r7, r8, lsr #8 + mov r8, r8, lsl #24 + orr r8, r8, r9, lsr #8 + mov r9, r9, lsl #24 + orr r9, r9, r10, lsr #8 + mov r10, r10, lsl #24 + orr r10, r10, r11, lsr #8 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #24 +#else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 @@ -339,6 +417,7 @@ loop24: orr r10, r10, r11, lsl #8 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #24 +#endif bhs 1b less_than_thirtytwo: @@ -350,9 +429,15 @@ less_than_thirtytwo: 1: ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -360,11 +445,20 @@ less_than_thirtytwo: partial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 +#endif /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} @@ -383,4 +477,3 @@ copy_last_3_and_return: ldmfd sp!, {r0, r4, lr} bx lr -#endif diff --git a/libc-top-half/musl/src/string/arm/memcpy.c b/libc-top-half/musl/src/string/arm/memcpy.c deleted file mode 100644 index 041614f4b..000000000 --- a/libc-top-half/musl/src/string/arm/memcpy.c +++ /dev/null @@ -1,3 +0,0 @@ -#if __ARMEB__ -#include "../memcpy.c" -#endif diff --git a/libc-top-half/musl/src/string/memccpy.c b/libc-top-half/musl/src/string/memccpy.c index 00c18e2b5..3b0a37002 100644 --- a/libc-top-half/musl/src/string/memccpy.c +++ b/libc-top-half/musl/src/string/memccpy.c @@ -29,6 +29,6 @@ void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n) #endif for (; n && (*d=*s)!=c; n--, s++, d++); tail: - if (n && *s==c) return d+1; + if (n) return d+1; return 0; } diff --git a/libc-top-half/musl/src/string/memmem.c b/libc-top-half/musl/src/string/memmem.c index 58a21fcd6..11eff86e4 100644 --- a/libc-top-half/musl/src/string/memmem.c +++ b/libc-top-half/musl/src/string/memmem.c @@ -12,8 +12,8 @@ static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned cha static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8) if (hw == nw) return (char *)h-3; return hw == nw ? (char *)h-3 : 0; @@ -21,8 +21,8 @@ static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned c static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++) if (hw == nw) return (char *)h-4; return hw == nw ? (char *)h-4 : 0; diff --git a/libc-top-half/musl/src/string/strsignal.c b/libc-top-half/musl/src/string/strsignal.c index 96bfe841f..5156366e6 100644 --- a/libc-top-half/musl/src/string/strsignal.c +++ b/libc-top-half/musl/src/string/strsignal.c @@ -31,7 +31,11 @@ static const char map[] = { [SIGPIPE] = 13, [SIGALRM] = 14, [SIGTERM] = 15, +#if defined(SIGSTKFLT) [SIGSTKFLT] = 16, +#elif defined(SIGEMT) + [SIGEMT] = 16, +#endif [SIGCHLD] = 17, [SIGCONT] = 18, [SIGSTOP] = 19, @@ -70,7 +74,13 @@ static const char strings[] = "Broken pipe\0" "Alarm clock\0" "Terminated\0" +#if defined(SIGSTKFLT) "Stack fault\0" +#elif defined(SIGEMT) + "Emulator trap\0" +#else + "Unknown signal\0" +#endif "Child process status\0" "Continued\0" "Stopped (signal)\0" diff --git a/libc-top-half/musl/src/string/strstr.c b/libc-top-half/musl/src/string/strstr.c index 55ba1c7b4..43a0207a7 100644 --- a/libc-top-half/musl/src/string/strstr.c +++ b/libc-top-half/musl/src/string/strstr.c @@ -10,16 +10,16 @@ static char *twobyte_strstr(const unsigned char *h, const unsigned char *n) static char *threebyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8); return *h ? (char *)h-2 : 0; } static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; for (h+=3; *h && hw != nw; hw = hw<<8 | *++h); return *h ? (char *)h-3 : 0; } diff --git a/libc-top-half/musl/src/thread/__lock.c b/libc-top-half/musl/src/thread/__lock.c index 45557c888..60eece49a 100644 --- a/libc-top-half/musl/src/thread/__lock.c +++ b/libc-top-half/musl/src/thread/__lock.c @@ -18,9 +18,11 @@ void __lock(volatile int *l) { - if (!libc.threads_minus_1) return; + int need_locks = libc.need_locks; + if (!need_locks) return; /* fast path: INT_MIN for the lock, +1 for the congestion */ int current = a_cas(l, 0, INT_MIN + 1); + if (need_locks < 0) libc.need_locks = 0; if (!current) return; /* A first spin loop, for medium congestion. */ for (unsigned i = 0; i < 10; ++i) { diff --git a/libc-top-half/musl/src/thread/pthread_create.c b/libc-top-half/musl/src/thread/pthread_create.c index 5f4910925..10f1b7d8c 100644 --- a/libc-top-half/musl/src/thread/pthread_create.c +++ b/libc-top-half/musl/src/thread/pthread_create.c @@ -72,12 +72,13 @@ _Noreturn void __pthread_exit(void *result) /* Access to target the exiting thread with syscalls that use * its kernel tid is controlled by killlock. For detached threads, * any use past this point would have undefined behavior, but for - * joinable threads it's a valid usage that must be handled. */ + * joinable threads it's a valid usage that must be handled. + * Signals must be blocked since pthread_kill must be AS-safe. */ + __block_app_sigs(&set); LOCK(self->killlock); - /* The thread list lock must be AS-safe, and thus requires - * application signals to be blocked before it can be taken. */ - __block_app_sigs(&set); + /* The thread list lock must be AS-safe, and thus depends on + * application signals being blocked above. */ __tl_lock(); /* If this is the only thread in the list, don't proceed with @@ -85,19 +86,12 @@ _Noreturn void __pthread_exit(void *result) * signal state to prepare for exit to call atexit handlers. */ if (self->next == self) { __tl_unlock(); - __restore_sigs(&set); UNLOCK(self->killlock); + __restore_sigs(&set); exit(0); } - /* At this point we are committed to thread termination. Unlink - * the thread from the list. This change will not be visible - * until the lock is released, which only happens after SYS_exit - * has been called, via the exit futex address pointing at the lock. */ - libc.threads_minus_1--; - self->next->prev = self->prev; - self->prev->next = self->next; - self->prev = self->next = self; + /* At this point we are committed to thread termination. */ /* Process robust list in userspace to handle non-pshared mutexes * and the detached thread case where the robust list head will @@ -121,6 +115,16 @@ _Noreturn void __pthread_exit(void *result) __do_orphaned_stdio_locks(); __dl_thread_cleanup(); + /* Last, unlink thread from the list. This change will not be visible + * until the lock is released, which only happens after SYS_exit + * has been called, via the exit futex address pointing at the lock. + * This needs to happen after any possible calls to LOCK() that might + * skip locking if process appears single-threaded. */ + if (!--libc.threads_minus_1) libc.need_locks = -1; + self->next->prev = self->prev; + self->prev->next = self->next; + self->prev = self->next = self; + /* This atomic potentially competes with a concurrent pthread_detach * call; the loser is responsible for freeing thread resources. */ int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); @@ -336,7 +340,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); __tl_lock(); - libc.threads_minus_1++; + if (!libc.threads_minus_1++) libc.need_locks = 1; ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock); /* All clone failures translate to EAGAIN. If explicit scheduling @@ -360,7 +364,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att new->next->prev = new; new->prev->next = new; } else { - libc.threads_minus_1--; + if (!--libc.threads_minus_1) libc.need_locks = 0; } __tl_unlock(); __restore_sigs(&set); diff --git a/libc-top-half/musl/src/thread/pthread_getschedparam.c b/libc-top-half/musl/src/thread/pthread_getschedparam.c index 1cba073d0..c098befb1 100644 --- a/libc-top-half/musl/src/thread/pthread_getschedparam.c +++ b/libc-top-half/musl/src/thread/pthread_getschedparam.c @@ -4,6 +4,8 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param *restrict param) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); if (!t->tid) { r = ESRCH; @@ -14,5 +16,6 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param } } UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/libc-top-half/musl/src/thread/pthread_kill.c b/libc-top-half/musl/src/thread/pthread_kill.c index 3d9395cb7..79ddb2097 100644 --- a/libc-top-half/musl/src/thread/pthread_kill.c +++ b/libc-top-half/musl/src/thread/pthread_kill.c @@ -4,9 +4,15 @@ int pthread_kill(pthread_t t, int sig) { int r; + sigset_t set; + /* Block not just app signals, but internal ones too, since + * pthread_kill is used to implement pthread_cancel, which + * must be async-cancel-safe. */ + __block_all_sigs(&set); LOCK(t->killlock); r = t->tid ? -__syscall(SYS_tkill, t->tid, sig) : (sig+0U >= _NSIG ? EINVAL : 0); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/libc-top-half/musl/src/thread/pthread_setschedparam.c b/libc-top-half/musl/src/thread/pthread_setschedparam.c index 038d13d8a..76d4d45a3 100644 --- a/libc-top-half/musl/src/thread/pthread_setschedparam.c +++ b/libc-top-half/musl/src/thread/pthread_setschedparam.c @@ -4,8 +4,11 @@ int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); r = !t->tid ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/libc-top-half/musl/src/thread/pthread_setschedprio.c b/libc-top-half/musl/src/thread/pthread_setschedprio.c index 5bf4a0197..fc2e13ddb 100644 --- a/libc-top-half/musl/src/thread/pthread_setschedprio.c +++ b/libc-top-half/musl/src/thread/pthread_setschedprio.c @@ -4,8 +4,11 @@ int pthread_setschedprio(pthread_t t, int prio) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); r = !t->tid ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/libc-top-half/musl/src/time/__tz.c b/libc-top-half/musl/src/time/__tz.c index 82b0cd642..383d05ec3 100644 --- a/libc-top-half/musl/src/time/__tz.c +++ b/libc-top-half/musl/src/time/__tz.c @@ -91,15 +91,15 @@ static void getname(char *d, const char **p) int i; if (**p == '<') { ++*p; - for (i=0; (*p)[i]!='>' && i