From d9dc3ee3f244da45c21385fbbada684274b012e5 Mon Sep 17 00:00:00 2001 From: Stefan Kerkmann Date: Fri, 23 Sep 2022 20:02:53 +0200 Subject: [PATCH] rp2040: use built-in integer hardware divider and optimized i64 multiplication The rp2040 cortex m0+ cores do not implement an integer divider itself, but instead the rp2040 provides two on-chip hardware dividers as an extra peripheral. To use those the GCC arm-v6 eabi integer division and modulo built-ins have to be replaced with implementations that utilize them. An optimized i64 multiplication implementation is provided as well. --- platforms/chibios/vendors/RP/RP2040.mk | 251 +++---------------------- 1 file changed, 31 insertions(+), 220 deletions(-) diff --git a/platforms/chibios/vendors/RP/RP2040.mk b/platforms/chibios/vendors/RP/RP2040.mk index efa3fef13f6a..4360512c050d 100644 --- a/platforms/chibios/vendors/RP/RP2040.mk +++ b/platforms/chibios/vendors/RP/RP2040.mk @@ -66,223 +66,34 @@ EXTRAINCDIRS += $(PLATFORM_RP2040_PATH) # RP2040 optimized compiler intrinsics ############################################################################## -# Enables optimized Compiler intrinsics which are located in the RP2040 -# bootrom. This needs startup code and linker script support from ChibiOS, -# which is WIP. Therefore disabled by default for now. -RP2040_INTRINSICS_ENABLED ?= no -ifeq ($(strip $(RP2040_INTRINSICS_ENABLED)), yes) - PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_float/float_aeabi.S \ - $(PICOSDKROOT)/src/rp2_common/pico_float/float_math.c \ - $(PICOSDKROOT)/src/rp2_common/pico_float/float_init_rom.c \ - $(PICOSDKROOT)/src/rp2_common/pico_float/float_v1_rom_shim.S \ - $(PICOSDKROOT)/src/rp2_common/pico_double/double_aeabi.S \ - $(PICOSDKROOT)/src/rp2_common/pico_double/double_math.c \ - $(PICOSDKROOT)/src/rp2_common/pico_double/double_init_rom.c \ - $(PICOSDKROOT)/src/rp2_common/pico_double/double_v1_rom_shim.S \ - $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \ - $(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S \ - $(PICOSDKROOT)/src/rp2_common/pico_mem_ops/mem_ops_aeabi.S \ - $(PICOSDKROOT)/src/rp2_common/pico_malloc/pico_malloc.c \ - $(PICOSDKROOT)/src/rp2_common/pico_bit_ops/bit_ops_aeabi.S - - PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \ - $(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \ - $(PICOSDKROOT)/src/rp2_common/pico_bootrom/include \ - $(PICOSDKROOT)/src/rp2_common/hardware_divider/include \ - $(PICOSDKROOT)/src/rp2_common/pico_float/include \ - $(PICOSDKROOT)/src/rp2_common/pico_double/include \ - $(PICOSDKROOT)/src/rp2_common/pico_malloc/include - - OPT_DEFS += -DPICO_FLOAT_SUPPORT_ROM_V1=0 -DPICO_DOUBLE_SUPPORT_ROM_V1=0 - - CFLAGS += -Wl,--defsym=__StackLimit=__heap_end__ - CFLAGS += -Wl,--defsym=__unhandled_user_irq=_unhandled_exception - CFLAGS += -Wl,--build-id=none - - # single precision floating point intrinsics - OPT_DEFS += -DPICO_FLOAT_IN_RAM=1 - OPT_DEFS += -DPICO_FLOAT_PROPAGATE_NANS=0 - - CFLAGS += -Wl,--wrap=__aeabi_fdiv - CFLAGS += -Wl,--wrap=__aeabi_fmul - CFLAGS += -Wl,--wrap=__aeabi_frsub - CFLAGS += -Wl,--wrap=__aeabi_fsub - CFLAGS += -Wl,--wrap=__aeabi_cfcmpeq - CFLAGS += -Wl,--wrap=__aeabi_cfrcmple - CFLAGS += -Wl,--wrap=__aeabi_cfcmple - CFLAGS += -Wl,--wrap=__aeabi_fcmpeq - CFLAGS += -Wl,--wrap=__aeabi_fcmplt - CFLAGS += -Wl,--wrap=__aeabi_fcmple - CFLAGS += -Wl,--wrap=__aeabi_fcmpge - CFLAGS += -Wl,--wrap=__aeabi_fcmpgt - CFLAGS += -Wl,--wrap=__aeabi_fcmpun - CFLAGS += -Wl,--wrap=__aeabi_i2f - CFLAGS += -Wl,--wrap=__aeabi_l2f - CFLAGS += -Wl,--wrap=__aeabi_ui2f - CFLAGS += -Wl,--wrap=__aeabi_ul2f - CFLAGS += -Wl,--wrap=__aeabi_i2f - CFLAGS += -Wl,--wrap=__aeabi_f2iz - CFLAGS += -Wl,--wrap=__aeabi_f2lz - CFLAGS += -Wl,--wrap=__aeabi_f2uiz - CFLAGS += -Wl,--wrap=__aeabi_f2ulz - CFLAGS += -Wl,--wrap=__aeabi_f2d - CFLAGS += -Wl,--wrap=sqrtf - CFLAGS += -Wl,--wrap=cosf - CFLAGS += -Wl,--wrap=sinf - CFLAGS += -Wl,--wrap=tanf - CFLAGS += -Wl,--wrap=atan2f - CFLAGS += -Wl,--wrap=expf - CFLAGS += -Wl,--wrap=logf - CFLAGS += -Wl,--wrap=ldexpf - CFLAGS += -Wl,--wrap=copysignf - CFLAGS += -Wl,--wrap=truncf - CFLAGS += -Wl,--wrap=floorf - CFLAGS += -Wl,--wrap=ceilf - CFLAGS += -Wl,--wrap=roundf - CFLAGS += -Wl,--wrap=sincosf - CFLAGS += -Wl,--wrap=asinf - CFLAGS += -Wl,--wrap=acosf - CFLAGS += -Wl,--wrap=atanf - CFLAGS += -Wl,--wrap=sinhf - CFLAGS += -Wl,--wrap=coshf - CFLAGS += -Wl,--wrap=tanhf - CFLAGS += -Wl,--wrap=asinhf - CFLAGS += -Wl,--wrap=acoshf - CFLAGS += -Wl,--wrap=atanhf - CFLAGS += -Wl,--wrap=exp2f - CFLAGS += -Wl,--wrap=log2f - CFLAGS += -Wl,--wrap=exp10f - CFLAGS += -Wl,--wrap=log10f - CFLAGS += -Wl,--wrap=powf - CFLAGS += -Wl,--wrap=powintf - CFLAGS += -Wl,--wrap=hypotf - CFLAGS += -Wl,--wrap=cbrtf - CFLAGS += -Wl,--wrap=fmodf - CFLAGS += -Wl,--wrap=dremf - CFLAGS += -Wl,--wrap=remainderf - CFLAGS += -Wl,--wrap=remquof - CFLAGS += -Wl,--wrap=expm1f - CFLAGS += -Wl,--wrap=log1pf - CFLAGS += -Wl,--wrap=fmaf - - # double precision floating point intrinsics - OPT_DEFS += -DPICO_DOUBLE_IN_RAM=1 - OPT_DEFS += -DPICO_DOUBLE_PROPAGATE_NANS=0 - - CFLAGS += -Wl,--wrap=__aeabi_dadd - CFLAGS += -Wl,--wrap=__aeabi_ddiv - CFLAGS += -Wl,--wrap=__aeabi_dmul - CFLAGS += -Wl,--wrap=__aeabi_drsub - CFLAGS += -Wl,--wrap=__aeabi_dsub - CFLAGS += -Wl,--wrap=__aeabi_cdcmpeq - CFLAGS += -Wl,--wrap=__aeabi_cdrcmple - CFLAGS += -Wl,--wrap=__aeabi_cdcmple - CFLAGS += -Wl,--wrap=__aeabi_dcmpeq - CFLAGS += -Wl,--wrap=__aeabi_dcmplt - CFLAGS += -Wl,--wrap=__aeabi_dcmple - CFLAGS += -Wl,--wrap=__aeabi_dcmpge - CFLAGS += -Wl,--wrap=__aeabi_dcmpgt - CFLAGS += -Wl,--wrap=__aeabi_dcmpun - CFLAGS += -Wl,--wrap=__aeabi_i2d - CFLAGS += -Wl,--wrap=__aeabi_l2d - CFLAGS += -Wl,--wrap=__aeabi_ui2d - CFLAGS += -Wl,--wrap=__aeabi_ul2d - CFLAGS += -Wl,--wrap=__aeabi_d2iz - CFLAGS += -Wl,--wrap=__aeabi_d2lz - CFLAGS += -Wl,--wrap=__aeabi_d2uiz - CFLAGS += -Wl,--wrap=__aeabi_d2ulz - CFLAGS += -Wl,--wrap=__aeabi_d2f - CFLAGS += -Wl,--wrap=sqrt - CFLAGS += -Wl,--wrap=cos - CFLAGS += -Wl,--wrap=sin - CFLAGS += -Wl,--wrap=tan - CFLAGS += -Wl,--wrap=atan2 - CFLAGS += -Wl,--wrap=exp - CFLAGS += -Wl,--wrap=log - CFLAGS += -Wl,--wrap=ldexp - CFLAGS += -Wl,--wrap=copysign - CFLAGS += -Wl,--wrap=trunc - CFLAGS += -Wl,--wrap=floor - CFLAGS += -Wl,--wrap=ceil - CFLAGS += -Wl,--wrap=round - CFLAGS += -Wl,--wrap=sincos - CFLAGS += -Wl,--wrap=asin - CFLAGS += -Wl,--wrap=acos - CFLAGS += -Wl,--wrap=atan - CFLAGS += -Wl,--wrap=sinh - CFLAGS += -Wl,--wrap=cosh - CFLAGS += -Wl,--wrap=tanh - CFLAGS += -Wl,--wrap=asinh - CFLAGS += -Wl,--wrap=acosh - CFLAGS += -Wl,--wrap=atanh - CFLAGS += -Wl,--wrap=exp2 - CFLAGS += -Wl,--wrap=log2 - CFLAGS += -Wl,--wrap=exp10 - CFLAGS += -Wl,--wrap=log10 - CFLAGS += -Wl,--wrap=pow - CFLAGS += -Wl,--wrap=powint - CFLAGS += -Wl,--wrap=hypot - CFLAGS += -Wl,--wrap=cbrt - CFLAGS += -Wl,--wrap=fmod - CFLAGS += -Wl,--wrap=drem - CFLAGS += -Wl,--wrap=remainder - CFLAGS += -Wl,--wrap=remquo - CFLAGS += -Wl,--wrap=expm1 - CFLAGS += -Wl,--wrap=log1p - CFLAGS += -Wl,--wrap=fma - - # bit operation intrinsics - OPT_DEFS += -DPICO_BITS_IN_RAM=1 - - CFLAGS += -Wl,--wrap=__clzsi2 - CFLAGS += -Wl,--wrap=__clzsi2 - CFLAGS += -Wl,--wrap=__clzdi2 - CFLAGS += -Wl,--wrap=__ctzsi2 - CFLAGS += -Wl,--wrap=__ctzdi2 - CFLAGS += -Wl,--wrap=__popcountsi2 - CFLAGS += -Wl,--wrap=__popcountdi2 - CFLAGS += -Wl,--wrap=__clz - CFLAGS += -Wl,--wrap=__clzl - CFLAGS += -Wl,--wrap=__clzsi2 - CFLAGS += -Wl,--wrap=__clzll - - # integer division intrinsics - OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1 - OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1 - - CFLAGS += -Wl,--wrap=__aeabi_idiv - CFLAGS += -Wl,--wrap=__aeabi_idivmod - CFLAGS += -Wl,--wrap=__aeabi_ldivmod - CFLAGS += -Wl,--wrap=__aeabi_uidiv - CFLAGS += -Wl,--wrap=__aeabi_uidivmod - CFLAGS += -Wl,--wrap=__aeabi_uldivmod - - # 64bit integer intrinsics - OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1 - - CFLAGS += -Wl,--wrap=__aeabi_lmul - - # malloc and friends functions - OPT_DEFS += -DPICO_USE_MALLOC_MUTEX=0 - OPT_DEFS += -DPICO_DEBUG_MALLOC=0 - OPT_DEFS ?= -DPICO_MALLOC_PANIC=0 - - CFLAGS += -Wl,--wrap=malloc - CFLAGS += -Wl,--wrap=calloc - CFLAGS += -Wl,--wrap=free - - # memory operation intrinsics - OPT_DEFS += -DPICO_MEM_IN_RAM=1 - - CFLAGS += -Wl,--wrap=memcpy - CFLAGS += -Wl,--wrap=memset - CFLAGS += -Wl,--wrap=__aeabi_memcpy - CFLAGS += -Wl,--wrap=__aeabi_memset - CFLAGS += -Wl,--wrap=__aeabi_memcpy4 - CFLAGS += -Wl,--wrap=__aeabi_memset4 - CFLAGS += -Wl,--wrap=__aeabi_memcpy8 - CFLAGS += -Wl,--wrap=__aeabi_memset8 - - PLATFORM_SRC += $(PICOSDKINTRINSICSSRC) - EXTRAINCDIRS += $(PICOSDKINTRINSICSINC) -endif +# The RP2040 sdk provides optimized compiler intrinsics which override the GCC +# built-ins. Some of these functions are located in the bootrom of the RP2040. +# Execution of these functions is realized via a vtable that is populated on +# bootup. This mechanism needs startup code and linker script support from +# ChibiOS, which is currently not implemented thus these functions are disabled +# ATM. +PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \ + $(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S + +PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \ + $(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \ + $(PICOSDKROOT)/src/rp2_common/hardware_divider/include + +# integer division intrinsics utilizing the RP2040 hardware divider +OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1 +OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1 + +CFLAGS += -Wl,--wrap=__aeabi_idiv +CFLAGS += -Wl,--wrap=__aeabi_idivmod +CFLAGS += -Wl,--wrap=__aeabi_ldivmod +CFLAGS += -Wl,--wrap=__aeabi_uidiv +CFLAGS += -Wl,--wrap=__aeabi_uidivmod +CFLAGS += -Wl,--wrap=__aeabi_uldivmod + +# 64bit integer intrinsics +OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1 + +CFLAGS += -Wl,--wrap=__aeabi_lmul + +PLATFORM_SRC += $(PICOSDKINTRINSICSSRC) +EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)