Skip to content

Commit

Permalink
rp2040: use built-in integer hardware divider and optimized i64 multi…
Browse files Browse the repository at this point in the history
…plication

The rp2040 cortex m0+ cores do not implement an integer divider itself, but
instead the rp2040 provides two on-chip hardware dividers as an extra
peripheral. To use those the GCC arm-v6 eabi integer division and modulo
built-ins have to be replaced with implementations that utilize them. An
optimized i64 multiplication implementation is provided as well.
  • Loading branch information
KarlK90 committed Oct 4, 2022
1 parent 996a900 commit d9dc3ee
Showing 1 changed file with 31 additions and 220 deletions.
251 changes: 31 additions & 220 deletions platforms/chibios/vendors/RP/RP2040.mk
Original file line number Diff line number Diff line change
Expand Up @@ -66,223 +66,34 @@ EXTRAINCDIRS += $(PLATFORM_RP2040_PATH)
# RP2040 optimized compiler intrinsics
##############################################################################

# Enables optimized Compiler intrinsics which are located in the RP2040
# bootrom. This needs startup code and linker script support from ChibiOS,
# which is WIP. Therefore disabled by default for now.
RP2040_INTRINSICS_ENABLED ?= no
ifeq ($(strip $(RP2040_INTRINSICS_ENABLED)), yes)
PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_float/float_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_math.c \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_init_rom.c \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_v1_rom_shim.S \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_math.c \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_init_rom.c \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_v1_rom_shim.S \
$(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
$(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_mem_ops/mem_ops_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_malloc/pico_malloc.c \
$(PICOSDKROOT)/src/rp2_common/pico_bit_ops/bit_ops_aeabi.S

PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \
$(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
$(PICOSDKROOT)/src/rp2_common/pico_bootrom/include \
$(PICOSDKROOT)/src/rp2_common/hardware_divider/include \
$(PICOSDKROOT)/src/rp2_common/pico_float/include \
$(PICOSDKROOT)/src/rp2_common/pico_double/include \
$(PICOSDKROOT)/src/rp2_common/pico_malloc/include

OPT_DEFS += -DPICO_FLOAT_SUPPORT_ROM_V1=0 -DPICO_DOUBLE_SUPPORT_ROM_V1=0

CFLAGS += -Wl,--defsym=__StackLimit=__heap_end__
CFLAGS += -Wl,--defsym=__unhandled_user_irq=_unhandled_exception
CFLAGS += -Wl,--build-id=none

# single precision floating point intrinsics
OPT_DEFS += -DPICO_FLOAT_IN_RAM=1
OPT_DEFS += -DPICO_FLOAT_PROPAGATE_NANS=0

CFLAGS += -Wl,--wrap=__aeabi_fdiv
CFLAGS += -Wl,--wrap=__aeabi_fmul
CFLAGS += -Wl,--wrap=__aeabi_frsub
CFLAGS += -Wl,--wrap=__aeabi_fsub
CFLAGS += -Wl,--wrap=__aeabi_cfcmpeq
CFLAGS += -Wl,--wrap=__aeabi_cfrcmple
CFLAGS += -Wl,--wrap=__aeabi_cfcmple
CFLAGS += -Wl,--wrap=__aeabi_fcmpeq
CFLAGS += -Wl,--wrap=__aeabi_fcmplt
CFLAGS += -Wl,--wrap=__aeabi_fcmple
CFLAGS += -Wl,--wrap=__aeabi_fcmpge
CFLAGS += -Wl,--wrap=__aeabi_fcmpgt
CFLAGS += -Wl,--wrap=__aeabi_fcmpun
CFLAGS += -Wl,--wrap=__aeabi_i2f
CFLAGS += -Wl,--wrap=__aeabi_l2f
CFLAGS += -Wl,--wrap=__aeabi_ui2f
CFLAGS += -Wl,--wrap=__aeabi_ul2f
CFLAGS += -Wl,--wrap=__aeabi_i2f
CFLAGS += -Wl,--wrap=__aeabi_f2iz
CFLAGS += -Wl,--wrap=__aeabi_f2lz
CFLAGS += -Wl,--wrap=__aeabi_f2uiz
CFLAGS += -Wl,--wrap=__aeabi_f2ulz
CFLAGS += -Wl,--wrap=__aeabi_f2d
CFLAGS += -Wl,--wrap=sqrtf
CFLAGS += -Wl,--wrap=cosf
CFLAGS += -Wl,--wrap=sinf
CFLAGS += -Wl,--wrap=tanf
CFLAGS += -Wl,--wrap=atan2f
CFLAGS += -Wl,--wrap=expf
CFLAGS += -Wl,--wrap=logf
CFLAGS += -Wl,--wrap=ldexpf
CFLAGS += -Wl,--wrap=copysignf
CFLAGS += -Wl,--wrap=truncf
CFLAGS += -Wl,--wrap=floorf
CFLAGS += -Wl,--wrap=ceilf
CFLAGS += -Wl,--wrap=roundf
CFLAGS += -Wl,--wrap=sincosf
CFLAGS += -Wl,--wrap=asinf
CFLAGS += -Wl,--wrap=acosf
CFLAGS += -Wl,--wrap=atanf
CFLAGS += -Wl,--wrap=sinhf
CFLAGS += -Wl,--wrap=coshf
CFLAGS += -Wl,--wrap=tanhf
CFLAGS += -Wl,--wrap=asinhf
CFLAGS += -Wl,--wrap=acoshf
CFLAGS += -Wl,--wrap=atanhf
CFLAGS += -Wl,--wrap=exp2f
CFLAGS += -Wl,--wrap=log2f
CFLAGS += -Wl,--wrap=exp10f
CFLAGS += -Wl,--wrap=log10f
CFLAGS += -Wl,--wrap=powf
CFLAGS += -Wl,--wrap=powintf
CFLAGS += -Wl,--wrap=hypotf
CFLAGS += -Wl,--wrap=cbrtf
CFLAGS += -Wl,--wrap=fmodf
CFLAGS += -Wl,--wrap=dremf
CFLAGS += -Wl,--wrap=remainderf
CFLAGS += -Wl,--wrap=remquof
CFLAGS += -Wl,--wrap=expm1f
CFLAGS += -Wl,--wrap=log1pf
CFLAGS += -Wl,--wrap=fmaf

# double precision floating point intrinsics
OPT_DEFS += -DPICO_DOUBLE_IN_RAM=1
OPT_DEFS += -DPICO_DOUBLE_PROPAGATE_NANS=0

CFLAGS += -Wl,--wrap=__aeabi_dadd
CFLAGS += -Wl,--wrap=__aeabi_ddiv
CFLAGS += -Wl,--wrap=__aeabi_dmul
CFLAGS += -Wl,--wrap=__aeabi_drsub
CFLAGS += -Wl,--wrap=__aeabi_dsub
CFLAGS += -Wl,--wrap=__aeabi_cdcmpeq
CFLAGS += -Wl,--wrap=__aeabi_cdrcmple
CFLAGS += -Wl,--wrap=__aeabi_cdcmple
CFLAGS += -Wl,--wrap=__aeabi_dcmpeq
CFLAGS += -Wl,--wrap=__aeabi_dcmplt
CFLAGS += -Wl,--wrap=__aeabi_dcmple
CFLAGS += -Wl,--wrap=__aeabi_dcmpge
CFLAGS += -Wl,--wrap=__aeabi_dcmpgt
CFLAGS += -Wl,--wrap=__aeabi_dcmpun
CFLAGS += -Wl,--wrap=__aeabi_i2d
CFLAGS += -Wl,--wrap=__aeabi_l2d
CFLAGS += -Wl,--wrap=__aeabi_ui2d
CFLAGS += -Wl,--wrap=__aeabi_ul2d
CFLAGS += -Wl,--wrap=__aeabi_d2iz
CFLAGS += -Wl,--wrap=__aeabi_d2lz
CFLAGS += -Wl,--wrap=__aeabi_d2uiz
CFLAGS += -Wl,--wrap=__aeabi_d2ulz
CFLAGS += -Wl,--wrap=__aeabi_d2f
CFLAGS += -Wl,--wrap=sqrt
CFLAGS += -Wl,--wrap=cos
CFLAGS += -Wl,--wrap=sin
CFLAGS += -Wl,--wrap=tan
CFLAGS += -Wl,--wrap=atan2
CFLAGS += -Wl,--wrap=exp
CFLAGS += -Wl,--wrap=log
CFLAGS += -Wl,--wrap=ldexp
CFLAGS += -Wl,--wrap=copysign
CFLAGS += -Wl,--wrap=trunc
CFLAGS += -Wl,--wrap=floor
CFLAGS += -Wl,--wrap=ceil
CFLAGS += -Wl,--wrap=round
CFLAGS += -Wl,--wrap=sincos
CFLAGS += -Wl,--wrap=asin
CFLAGS += -Wl,--wrap=acos
CFLAGS += -Wl,--wrap=atan
CFLAGS += -Wl,--wrap=sinh
CFLAGS += -Wl,--wrap=cosh
CFLAGS += -Wl,--wrap=tanh
CFLAGS += -Wl,--wrap=asinh
CFLAGS += -Wl,--wrap=acosh
CFLAGS += -Wl,--wrap=atanh
CFLAGS += -Wl,--wrap=exp2
CFLAGS += -Wl,--wrap=log2
CFLAGS += -Wl,--wrap=exp10
CFLAGS += -Wl,--wrap=log10
CFLAGS += -Wl,--wrap=pow
CFLAGS += -Wl,--wrap=powint
CFLAGS += -Wl,--wrap=hypot
CFLAGS += -Wl,--wrap=cbrt
CFLAGS += -Wl,--wrap=fmod
CFLAGS += -Wl,--wrap=drem
CFLAGS += -Wl,--wrap=remainder
CFLAGS += -Wl,--wrap=remquo
CFLAGS += -Wl,--wrap=expm1
CFLAGS += -Wl,--wrap=log1p
CFLAGS += -Wl,--wrap=fma

# bit operation intrinsics
OPT_DEFS += -DPICO_BITS_IN_RAM=1

CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzdi2
CFLAGS += -Wl,--wrap=__ctzsi2
CFLAGS += -Wl,--wrap=__ctzdi2
CFLAGS += -Wl,--wrap=__popcountsi2
CFLAGS += -Wl,--wrap=__popcountdi2
CFLAGS += -Wl,--wrap=__clz
CFLAGS += -Wl,--wrap=__clzl
CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzll

# integer division intrinsics
OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

CFLAGS += -Wl,--wrap=__aeabi_idiv
CFLAGS += -Wl,--wrap=__aeabi_idivmod
CFLAGS += -Wl,--wrap=__aeabi_ldivmod
CFLAGS += -Wl,--wrap=__aeabi_uidiv
CFLAGS += -Wl,--wrap=__aeabi_uidivmod
CFLAGS += -Wl,--wrap=__aeabi_uldivmod

# 64bit integer intrinsics
OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

CFLAGS += -Wl,--wrap=__aeabi_lmul

# malloc and friends functions
OPT_DEFS += -DPICO_USE_MALLOC_MUTEX=0
OPT_DEFS += -DPICO_DEBUG_MALLOC=0
OPT_DEFS ?= -DPICO_MALLOC_PANIC=0

CFLAGS += -Wl,--wrap=malloc
CFLAGS += -Wl,--wrap=calloc
CFLAGS += -Wl,--wrap=free

# memory operation intrinsics
OPT_DEFS += -DPICO_MEM_IN_RAM=1

CFLAGS += -Wl,--wrap=memcpy
CFLAGS += -Wl,--wrap=memset
CFLAGS += -Wl,--wrap=__aeabi_memcpy
CFLAGS += -Wl,--wrap=__aeabi_memset
CFLAGS += -Wl,--wrap=__aeabi_memcpy4
CFLAGS += -Wl,--wrap=__aeabi_memset4
CFLAGS += -Wl,--wrap=__aeabi_memcpy8
CFLAGS += -Wl,--wrap=__aeabi_memset8

PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)
endif
# The RP2040 sdk provides optimized compiler intrinsics which override the GCC
# built-ins. Some of these functions are located in the bootrom of the RP2040.
# Execution of these functions is realized via a vtable that is populated on
# bootup. This mechanism needs startup code and linker script support from
# ChibiOS, which is currently not implemented thus these functions are disabled
# ATM.
PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
$(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S

PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \
$(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
$(PICOSDKROOT)/src/rp2_common/hardware_divider/include

# integer division intrinsics utilizing the RP2040 hardware divider
OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

CFLAGS += -Wl,--wrap=__aeabi_idiv
CFLAGS += -Wl,--wrap=__aeabi_idivmod
CFLAGS += -Wl,--wrap=__aeabi_ldivmod
CFLAGS += -Wl,--wrap=__aeabi_uidiv
CFLAGS += -Wl,--wrap=__aeabi_uidivmod
CFLAGS += -Wl,--wrap=__aeabi_uldivmod

# 64bit integer intrinsics
OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

CFLAGS += -Wl,--wrap=__aeabi_lmul

PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)

0 comments on commit d9dc3ee

Please sign in to comment.