Skip to content

Commit

Permalink
Optimize unlikely overflow check
Browse files Browse the repository at this point in the history
  • Loading branch information
raspiduino committed Nov 13, 2024
1 parent 956f8a8 commit c921598
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 45 deletions.
18 changes: 9 additions & 9 deletions build.bat
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
@echo off
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.lst -std=gnu99 main.c -o main.o
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=sdcard.lst -std=gnu99 sdcard.c -o sdcard.o
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=sdprint.lst -std=gnu99 sdprint.c -o sdprint.o
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=spi.lst -std=gnu99 spi.c -o spi.o
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=uart.lst -std=gnu99 uart.c -o uart.o
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -mmcu=atmega328p -I. -DF_CPU=16000000UL -O3 -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -std=gnu99 main.o sdcard.o sdprint.o spi.o uart.o --output main.elf -Wl,-Map=main.map,--cref
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-objcopy -O ihex -R .eeprom main.elf main.hex
C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-size -A -d main.elf
::"C:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avrdude\6.3.0-arduino17/bin/avrdude" "-CC:\Users\mmb\AppData\Local\Arduino15\packages\arduino\tools\avrdude\6.3.0-arduino17/etc/avrdude.conf" -v -V -patmega328p -carduino "-PCOM10" -b115200 -D "-Uflash:w:main.hex:i"
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.lst -std=gnu99 main.c -o main.o
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=sdcard.lst -std=gnu99 sdcard.c -o sdcard.o
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=sdprint.lst -std=gnu99 sdprint.c -o sdprint.o
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=spi.lst -std=gnu99 spi.c -o spi.o
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -c -mmcu=atmega328p -I. -DF_CPU=16000000UL -Ofast -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=uart.lst -std=gnu99 uart.c -o uart.o
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-gcc -mmcu=atmega328p -I. -DF_CPU=16000000UL -O3 -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -std=gnu99 main.o sdcard.o sdprint.o spi.o uart.o --output main.elf -Wl,-Map=main.map,--cref
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-objcopy -O ihex -R .eeprom main.elf main.hex
C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avr-gcc\7.3.0-atmel3.6.1-arduino7\bin\avr-size -A -d main.elf
::"C:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avrdude\6.3.0-arduino17/bin/avrdude" "-CC:\Users\giang\AppData\Local\Arduino15\packages\arduino\tools\avrdude\6.3.0-arduino17/etc/avrdude.conf" -v -V -patmega328p -carduino "-PCOM3" -b115200 -D "-Uflash:w:main.hex:i"
73 changes: 37 additions & 36 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "spi.h"
#include "sdcard.h"
#include "sdprint.h"
#include "types.h"

// mini-rv32ima variables
int fail_on_all_faults = 0;
Expand Down Expand Up @@ -91,38 +90,38 @@ struct cache pool[3];

// Cache hit / miss stat
#ifdef ENABLE_CACHE_STAT
UInt32 icache_hit = 0;
UInt32 icache_miss = 0;
UInt32 dcache_hit = 0;
UInt32 dcache_miss = 0;
uint32_t icache_hit = 0;
uint32_t icache_miss = 0;
uint32_t dcache_hit = 0;
uint32_t dcache_miss = 0;
#endif

// Functions prototype
static UInt32 HandleException( UInt32 ir, UInt32 retval );
static UInt32 HandleControlStore( UInt32 addy, UInt32 val );
static UInt32 HandleControlLoad( UInt32 addy );
static void HandleOtherCSRWrite( UInt8 * image, UInt16 csrno, UInt32 value );
static Int32 HandleOtherCSRRead( UInt8 * image, UInt16 csrno );
static uint32_t HandleException( uint32_t ir, uint32_t retval );
static uint32_t HandleControlStore( uint32_t addy, uint32_t val );
static uint32_t HandleControlLoad( uint32_t addy );
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value );
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno );

// Load / store helper
static UInt32 store4(UInt32 ofs, UInt32 val);
static UInt16 store2(UInt32 ofs, UInt16 val);
static UInt8 store1(UInt32 ofs, UInt8 val);
static uint32_t store4(uint32_t ofs, uint32_t val);
static uint16_t store2(uint32_t ofs, uint16_t val);
static uint8_t store1(uint32_t ofs, uint8_t val);

static UInt32 load4(UInt32 ofs);
static UInt16 load2(UInt32 ofs);
static UInt8 load1(UInt32 ofs);
static uint32_t load4(uint32_t ofs);
static uint16_t load2(uint32_t ofs);
static uint8_t load1(uint32_t ofs);

static UInt32 loadi(UInt32 ofs);
static uint32_t loadi(uint32_t ofs);

// Other
extern int __heap_start;
extern int *__brkval;
UInt32 last_cyclel = 0; // Last cyclel value
uint32_t last_cyclel = 0; // Last cyclel value
void dump_state(void);

// Config
const UInt32 RAM_SIZE = 16777216UL; // Minimum RAM amount (in bytes), just tested (may reduce further by custom kernel)
const uint32_t RAM_SIZE = 16777216UL; // Minimum RAM amount (in bytes), just tested (may reduce further by custom kernel)
#define DTB_SIZE 1536 // DTB size (in bytes), must recount manually each time DTB changes
#define INSTRS_PER_FLIP 1024 // Number of instructions executed before checking status. See loop()
#define TIME_DIVISOR 2
Expand All @@ -146,9 +145,9 @@ const UInt32 RAM_SIZE = 16777216UL; // Minimum RAM amount (in bytes), just teste
#define MINIRV32_STORE2( ofs, val ) store2(ofs, val)
#define MINIRV32_STORE1( ofs, val ) store1(ofs, val)
#define MINIRV32_LOAD4( ofs ) load4(ofs)
#define MINIRV32_LOAD2_SIGNED( ofs ) (Int8)load2(ofs)
#define MINIRV32_LOAD2_SIGNED( ofs ) (int8_t)load2(ofs)
#define MINIRV32_LOAD2( ofs ) load2(ofs)
#define MINIRV32_LOAD1_SIGNED( ofs ) (Int8)load1(ofs)
#define MINIRV32_LOAD1_SIGNED( ofs ) (int8_t)load1(ofs)
#define MINIRV32_LOAD1( ofs ) load1(ofs)
#define MINIRV32_LOADI( ofs ) loadi(ofs)

Expand All @@ -174,9 +173,9 @@ unsigned long millis(void) {
}

// Init cache helper
void init_cache(UInt8 index, UInt32 tag, uint16_t age) {
UInt8 token;
UInt8 t = 0;
void init_cache(uint8_t index, uint32_t tag, uint16_t age) {
uint8_t token;
uint8_t t = 0;

// Read init sector
read_init_begin:
Expand Down Expand Up @@ -234,7 +233,7 @@ int main(void) {

// Setup cache
// Init cache0 as icache
init_cache(0, 0, 0xFFFF); // buf = 0x0 (code begin at 0x0)
init_cache(0, 0, 0xFF); // buf = 0x0 (code begin at 0x0)

// Init cache1 as dcache
init_cache(1, 6552, 0); // buf = second accessed address (got by dumping address)
Expand Down Expand Up @@ -312,7 +311,7 @@ int main(void) {

// Calculate pseudo time
uint64_t * this_ccount = ((uint64_t*)&core->cyclel);
UInt32 elapsedUs = 0;
uint32_t elapsedUs = 0;
elapsedUs = *this_ccount / TIME_DIVISOR - lastTime;
lastTime += elapsedUs;

Expand Down Expand Up @@ -363,7 +362,7 @@ static uint32_t HandleControlLoad( uint32_t addy )
return 0;
}

static void HandleOtherCSRWrite( UInt8 * image, UInt16 csrno, UInt32 value )
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value )
{
if( csrno == 0x136 )
{
Expand Down Expand Up @@ -396,11 +395,11 @@ static void HandleOtherCSRWrite( UInt8 * image, UInt16 csrno, UInt32 value )
}
else if( csrno == 0x139 )
{
UART_putc((UInt8)value);
UART_putc((uint8_t)value);
}
}

static Int32 HandleOtherCSRRead( UInt8 * image, UInt16 csrno )
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno )
{
if( csrno == 0x140 )
{
Expand Down Expand Up @@ -435,9 +434,11 @@ uint8_t *read_buf(uint32_t ofs, bool flag, bool write) {
ret = 0;

// Add age
if (pool[0].age <= 0xFFFE) {
// Why there's no longer overflow check?
// Because it's faster to (if any) overflow, invalidate, and reload cache than to check for overflow on every access.
//if (pool[0].age <= 0xFFFE) {
pool[0].age += 1;
}
//}

// Set dirty flag if needed
if (write) {
Expand All @@ -447,9 +448,9 @@ uint8_t *read_buf(uint32_t ofs, bool flag, bool write) {
ret = 1;

// Add age
if (pool[1].age <= 0xFFFE) {
//if (pool[1].age <= 0xFFFE) {
pool[1].age += 1;
}
//}

// Set dirty flag if needed
if (write) {
Expand All @@ -459,9 +460,9 @@ uint8_t *read_buf(uint32_t ofs, bool flag, bool write) {
ret = 2;

// Add age
if (pool[2].age <= 0xFFFE) {
//if (pool[2].age <= 0xFFFE) {
pool[2].age += 1;
}
//}

// Set dirty flag if needed
// If you think this can be optimized to pool[2].flag = write, think again!
Expand Down Expand Up @@ -498,9 +499,9 @@ uint8_t *read_buf(uint32_t ofs, bool flag, bool write) {
lru = 0;
}

uint8_t token;
continue_without_finding_lru:
// Check if LRU cache if dirty
uint8_t token;
if (!pool[lru].flag) { // false = dirty
// Dirty -> flush to SD
uint8_t t = 0;
Expand Down

0 comments on commit c921598

Please sign in to comment.