Skip to content

Commit

Permalink
Initial support for STM32MP13 HAL
Browse files Browse the repository at this point in the history
This adds support for the STM32MP13 HAL, tested on the STM32MP135F MPU.

Using the HAL this modifies our previous RNG, AES-CBC, AES-GCM, HASH,
ECDSA and DES3 ST HAL acceleration to work with the MPU. It also works
around bugs found in the AES-GCM code of the HAL.

The HAL does not appear to have support for MD5 HASH at the moment, so
this has been given a flag to disable it on this MPU.
  • Loading branch information
LinuxJedi committed Nov 25, 2024
1 parent 40154e1 commit 0bebeec
Show file tree
Hide file tree
Showing 11 changed files with 300 additions and 22 deletions.
1 change: 1 addition & 0 deletions .wolfssl_known_macro_extras
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ __ARCH_STRNCPY_NO_REDIRECT
__ARCH_STRSTR_NO_REDIRECT
__ARM_ARCH_7M__
__ARM_FEATURE_CRYPTO
__ASSEMBLER__
__ATOMIC_RELAXED
__AVR__
__BCPLUSPLUS__
Expand Down
36 changes: 29 additions & 7 deletions wolfcrypt/src/aes.c
Original file line number Diff line number Diff line change
Expand Up @@ -8161,8 +8161,18 @@ static WARN_UNUSED_RESULT int wc_AesGcmEncrypt_STM32(

/* Authentication buffer - must be 4-byte multiple zero padded */
authPadSz = authInSz % sizeof(word32);
#ifdef WOLFSSL_STM32MP13
/* STM32MP13 HAL at least v1.2 and lower has a bug with which it needs a
* minimum of 16 bytes for the auth
*/
if ((authInSz > 0) && (authInSz < 16)) {
authPadSz = 16 - authInSz;
}
#endif
if (authPadSz != 0) {
authPadSz = authInSz + sizeof(word32) - authPadSz;
if (authPadSz < authInSz + sizeof(word32)) {
authPadSz = authInSz + sizeof(word32) - authPadSz;
}
if (authPadSz <= sizeof(authhdr)) {
authInPadded = (byte*)authhdr;
}
Expand All @@ -8185,11 +8195,12 @@ static WARN_UNUSED_RESULT int wc_AesGcmEncrypt_STM32(
/* for cases where hardware cannot be used for authTag calculate it */
/* if IV is not 12 calculate GHASH using software */
if (ivSz != GCM_NONCE_MID_SZ
#ifndef CRYP_HEADERWIDTHUNIT_BYTE
#if !defined(CRYP_HEADERWIDTHUNIT_BYTE) || defined(WOLFSSL_STM32MP13)
/* or hardware that does not support partial block */
|| sz == 0 || partial != 0
#endif
#if !defined(CRYP_HEADERWIDTHUNIT_BYTE) && !defined(STM32_AESGCM_PARTIAL)
#if (!defined(CRYP_HEADERWIDTHUNIT_BYTE) || defined(WOLFSSL_STM32MP13)) \
&& !defined(STM32_AESGCM_PARTIAL)
/* or authIn is not a multiple of 4 */
|| authPadSz != authInSz
#endif
Expand All @@ -8204,13 +8215,14 @@ static WARN_UNUSED_RESULT int wc_AesGcmEncrypt_STM32(
if (ret != 0) {
return ret;
}

#ifdef WOLFSSL_STM32_CUBEMX
hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr;
hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded;

#if defined(STM32_HAL_V2)
hcryp.Init.Algorithm = CRYP_AES_GCM;
#ifdef CRYP_HEADERWIDTHUNIT_BYTE
#if defined(CRYP_HEADERWIDTHUNIT_BYTE) && !defined(WOLFSSL_STM32MP13)
/* V2 with CRYP_HEADERWIDTHUNIT_BYTE uses byte size for header */
hcryp.Init.HeaderSize = authInSz;
#else
Expand Down Expand Up @@ -8693,14 +8705,24 @@ static WARN_UNUSED_RESULT int wc_AesGcmDecrypt_STM32(
authPadSz = authInSz;
}

#ifdef WOLFSSL_STM32MP13
/* STM32MP13 HAL at least v1.2 and lower has a bug with which it needs a
* minimum of 16 bytes for the auth
*/
if ((authInSz > 0) && (authInSz < 16)) {
authPadSz = 16 - authInSz;
}
#endif

/* for cases where hardware cannot be used for authTag calculate it */
/* if IV is not 12 calculate GHASH using software */
if (ivSz != GCM_NONCE_MID_SZ
#ifndef CRYP_HEADERWIDTHUNIT_BYTE
#if !defined(CRYP_HEADERWIDTHUNIT_BYTE) || defined(WOLFSSL_STM32MP13)
/* or hardware that does not support partial block */
|| sz == 0 || partial != 0
#endif
#if !defined(CRYP_HEADERWIDTHUNIT_BYTE) && !defined(STM32_AESGCM_PARTIAL)
#if (!defined(CRYP_HEADERWIDTHUNIT_BYTE) || defined(WOLFSSL_STM32MP13)) \
&& !defined(STM32_AESGCM_PARTIAL)
/* or authIn is not a multiple of 4 */
|| authPadSz != authInSz
#endif
Expand Down Expand Up @@ -8746,7 +8768,7 @@ static WARN_UNUSED_RESULT int wc_AesGcmDecrypt_STM32(

#if defined(STM32_HAL_V2)
hcryp.Init.Algorithm = CRYP_AES_GCM;
#ifdef CRYP_HEADERWIDTHUNIT_BYTE
#if defined(CRYP_HEADERWIDTHUNIT_BYTE) && !defined(WOLFSSL_STM32MP13)
/* V2 with CRYP_HEADERWIDTHUNIT_BYTE uses byte size for header */
hcryp.Init.HeaderSize = authInSz;
#else
Expand Down
18 changes: 14 additions & 4 deletions wolfcrypt/src/des3.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,13 @@
STM32_HAL_TIMEOUT);
}
/* save off IV */
des->reg[0] = hcryp.Instance->IV0LR;
des->reg[1] = hcryp.Instance->IV0RR;
#ifdef WOLFSSL_STM32MP13
des->reg[0] = ((CRYP_TypeDef *)(hcryp.Instance))->IV0LR;
des->reg[1] = ((CRYP_TypeDef *)(hcryp.Instance))->IV0RR;
#else
des->reg[0] = hcryp.Instance->IV0LR;
des->reg[1] = hcryp.Instance->IV0RR;
#endif
#else
while (sz > 0) {
/* if input and output same will overwrite input iv */
Expand Down Expand Up @@ -324,8 +329,13 @@
STM32_HAL_TIMEOUT);
}
/* save off IV */
des->reg[0] = hcryp.Instance->IV0LR;
des->reg[1] = hcryp.Instance->IV0RR;
#ifdef WOLFSSL_STM32MP13
des->reg[0] = ((CRYP_TypeDef *)(hcryp.Instance))->IV0LR;
des->reg[1] = ((CRYP_TypeDef *)(hcryp.Instance))->IV0RR;
#else
des->reg[0] = hcryp.Instance->IV0LR;
des->reg[1] = hcryp.Instance->IV0RR;
#endif
#else
while (sz > 0) {
if (dir == DES_ENCRYPTION) {
Expand Down
1 change: 1 addition & 0 deletions wolfcrypt/src/include.am
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \
wolfcrypt/src/port/st/stm32.c \
wolfcrypt/src/port/st/stsafe.c \
wolfcrypt/src/port/st/README.md \
wolfcrypt/src/port/st/STM32MP13.md \
wolfcrypt/src/port/af_alg/afalg_aes.c \
wolfcrypt/src/port/af_alg/afalg_hash.c \
wolfcrypt/src/port/kcapi/kcapi_aes.c \
Expand Down
2 changes: 1 addition & 1 deletion wolfcrypt/src/md5.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@


/* Hardware Acceleration */
#if defined(STM32_HASH)
#if defined(STM32_HASH) && !defined(STM32_NOMD5)

/* Supports CubeMX HAL or Standard Peripheral Library */
#define HAVE_MD5_CUST_API
Expand Down
8 changes: 5 additions & 3 deletions wolfcrypt/src/port/st/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# ST Ports

Support for the STM32 L4, F1, F2, F4 and F7 on-board crypto hardware acceleration:
Support for the STM32 L4, F1, F2, F4, F7 and MP13 on-board crypto hardware
acceleration:
- symmetric AES (ECB/CBC/CTR/GCM)
- MD5/SHA1/SHA224/SHA256
- MD5/SHA1/SHA224/SHA256 (MP13 does not have MD5 acceleration)

Support for the STM32 PKA on WB55, H7 and other devices with on-board public-key acceleration:
Support for the STM32 PKA on WB55, H7, MP13 and other devices with on-board
public-key acceleration:
- ECC192/ECC224/ECC256/ECC384

Support for the STSAFE-A100 crypto hardware accelerator co-processor via I2C for ECC supporting NIST or Brainpool 256-bit and 384-bit curves. It requires the ST-Safe SDK including wolf stsafe_interface.c/.h files. Please contact ST for these.
Expand Down
212 changes: 212 additions & 0 deletions wolfcrypt/src/port/st/STM32MP13.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# STM32MP13 Port

The STM32MP13 is unique in that it is an MPU instead of an MCU. The HAL also
behaves a little differently. This document outlines how to use it in bare
metal mode. For Linux, this should be used as a normal ARM Linux device.

## Linux

To cross-compile from a Linux host to the STM32MP13 OpenSTLinux, you need to
install the [SDK](https://www.st.com/en/embedded-software/stm32mp1dev.html#get-software).
In this example, I have extracted it to `/opt/st`.

Your build environment is configured by running:

```sh
source /opt/st/stm32mp1/4.2.4-openstlinux-6.1-yocto-mickledore-mpu-v24.06.26/environment-setup-cortexa7t2hf-neon-vfpv4-ostl-linux-gnueabi
```

If you wish to compile with support for `/dev/crypto` then you will also need to
do the following so that the headers are found by the compiler:

```sh
export CFLAGS="$CFLAGS -I /opt/st/stm32mp1/4.2.4-openstlinux-6.1-yocto-mickledore-mpu-v24.06.26/sysroots/cortexa7t2hf-neon-vfpv4-ostl-linux-gnueabi/usr/src/debug/cryptodev-module/1.12-r0/"
```

When running `./configure`, make sure you add `--host=arm-linux-gnueabi` to the
configure options.

## Bare metal

### Setting up

The board itself has dip switches to set the boot mode. These should be set to
off-off-on-off to set the board into "engineering mode". The MPU's SRAM can
then be flashed via the ST-Link.

#### Device Configuration Tool

In the configuration tool, enable and activate the following:

```
CRYP1
HASH1
PKA
RNG1
```

#### DDR RAM

You need to obtain the [STM32MP13 MPU Firmware Package](https://github.com/STMicroelectronics/STM32CubeMP13),
which contains many examples of how to use the board in bare metal mode. One
of the examples is the [DDR Init](https://github.com/STMicroelectronics/STM32CubeMP13/tree/main/Projects/STM32MP135C-DK/Examples/DDR/DDR_Init),
which you will need to use all the features of wolfSSL. This is because the SRAM
is only 128KB, but the DDR RAM is 512MB. This example initializes the DDR RAM,
it also sets the MPU to 650MHz.

#### MMU & Cache

The MMU and cache will increase performance around 50x, so it is highly
recommended. It may, however, make debugging more difficult.

To enable them, in the preprocessor settings, change:

```
NO_MMU_USE
NO_CACHE_USE
```

to:

```
MMU_USE
CACHE_USE
```

Note that the Cube IDE may break this if you make any changes to the Device
Configuration Tool.

#### printf()

If you are using an STM32MP135F-DK board and want to use the ST-Link UART for
`printf()`, then you need to set PD6 and PD8 as the UART 4 RX/TX pins. You can
then enable UART4 and set it to "Asynchronous" mode.

In the code 0 section of `main.c` add:

```c
#ifdef __GNUC__
int __io_putchar(int ch)
#else
int fputc(int ch, FILE *f)
#endif
{
HAL_UART_Transmit(&huart4, (uint8_t *)&ch, 1, 0xFFFF);

return ch;
}
#ifdef __GNUC__
int _write(int file,char *ptr, int len)
{
int DataIdx;
for (DataIdx= 0; DataIdx< len; DataIdx++) {
__io_putchar(*ptr++);
}
return len;
}
#endif
```

UART4 will now be used for `printf()`.



### wolfSSL in your project

There are a few things you need to do to get wolfSSL to run in your project. The
first is setting compile option, these additional ones are needed. The first
allows ARM ASM optimizations to compile, the second stops alignment issues from
crashing the board:

```
-fomit-frame-pointer
-mno-unaligned-access
```

The first of these should also be a flag for the assembler as well.

Then the code needs to be set to use the DDR RAM instead of SRAM. To do this,
edit `STM32MP135FAFX_RAM.ld` and change:

```c
REGION_ALIAS("RAM", SYSRAM_BASE);
```
To this:
```c
REGION_ALIAS("RAM", DDR_BASE);
```

In the Run Configuration menu, make sure that the debugger's startup has the
"monitor reset" command removed. Otherwise the DDR initialization will be reset.

In the `main.c` make sure that `SystemClock_Config();` is not executed. The DDR
Init code will do this, and changing it will likely crash the board. It can
be done like this:

```c
/* USER CODE BEGIN Init */
#if 0
/* USER CODE END Init */

/* Configure the system clock */
SystemClock_Config();

/* USER CODE BEGIN SysInit */
#endif
/* USER CODE END SysInit */
```

### Benchmark

To use the wolfCrypt benchmark, enable and activate the RTC in the Device
Configuration Tool and then add this to your `main.c`:

```c
double current_time(void)
{
RTC_TimeTypeDef time;
RTC_DateTypeDef date;
uint32_t subsec = 0;

/* must get time and date here due to STM32 HW bug */
HAL_RTC_GetTime(&hrtc, &time, RTC_FORMAT_BIN);
HAL_RTC_GetDate(&hrtc, &date, RTC_FORMAT_BIN);
/* Not all STM32 RTCs have subseconds in the struct */
#ifdef RTC_ALARMSUBSECONDMASK_ALL
subsec = (255 - time.SubSeconds) * 1000 / 255;
#endif

(void) date;

/* return seconds.milliseconds */
return ((double) time.Hours * 24) + ((double) time.Minutes * 60)
+ (double) time.Seconds + ((double) subsec / 1000);
}
```
### Compiling wolfSSL
In your `user_settings.h` you should include:
```c
#define WOLFSSL_STM32MP13
#define WOLFSSL_STM32_CUBEMX
#define WOLFSSL_USER_CURRTIME
```

If you want ECDSA acceleration, you should also add:

```c
#define WOLFSSL_STM32_PKA
#define WOLFSSL_STM32_PKA_V2
```

### Running

Once you have compiled everything, to run your code you will first need to run
the DDR Init project. This will initialize the DDR RAM and the blue LED on the
board will flash. You can then run the wolfSSL based project. If the board
loses power, the DDR Init project will need to be run again before you are able
to run the wolfSSL project.
9 changes: 7 additions & 2 deletions wolfcrypt/src/port/st/stm32.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
#elif defined(WOLFSSL_STM32WL)
#include <stm32wlxx_hal_conf.h>
#include <stm32wlxx_hal_pka.h>
#elif defined(WOLFSSL_STM32MP13)
#include <stm32mp13xx_hal_conf.h>
#include <stm32mp13xx_hal_pka.h>
#else
#error Please add the hal_pk.h include
#endif
Expand Down Expand Up @@ -442,8 +445,10 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp)
hcryp->Init.pKey = (STM_CRYPT_TYPE*)aes->key;
#ifdef STM32_HAL_V2
hcryp->Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE;
#ifdef CRYP_HEADERWIDTHUNIT_BYTE
hcryp->Init.HeaderWidthUnit = CRYP_HEADERWIDTHUNIT_BYTE;
#ifdef WOLFSSL_STM32MP13
hcryp->Init.HeaderWidthUnit = CRYP_HEADERWIDTHUNIT_WORD;
#elif defined(CRYP_HEADERWIDTHUNIT_BYTE)
hcryp->Init.HeaderWidthUnit = CRYP_HEADERWIDTHUNIT_BYTE;
#endif
#endif

Expand Down
Loading

0 comments on commit 0bebeec

Please sign in to comment.