Skip to content

Commit

Permalink
⚡️ Optimize speed lookup for AVR
Browse files Browse the repository at this point in the history
Co-Authored-By: tombrazier <[email protected]>
  • Loading branch information
thinkyhead and tombrazier committed Mar 13, 2023
1 parent 3365172 commit 143a870
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 176 deletions.
34 changes: 14 additions & 20 deletions Marlin/src/HAL/AVR/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,32 +84,26 @@ FORCE_INLINE static uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2
return intRes;
}

// intRes = intIn1 * intIn2 >> 8
// charRes = charIn1 * charIn2 >> 8
// uses:
// r1, r0 for the result of mul. After the second mul, r0 holds bits 0-7 of the 24 bit result and
// the top bit of r0 is used for rounding.
// [tmp] to store 0.
// [intRes] (A B) is bits 8-15 and is the returned value.
// r1, r0 for the result of mul. After the mul, r0 holds bits 0-7 of the 16 bit result,
// and the top bit of r0 is used for rounding.
// [charRes] is bits 8-15 and is the returned value.
// [charIn1] is an 8 bit parameter.
// [intIn2] (B A) is a 16 bit parameter.
// [charIn2] is an 8 bit parameter.
//
FORCE_INLINE static uint16_t MultiU8X16toH16(uint8_t charIn1, uint16_t intIn2) {
uint8_t tmp;
uint16_t intRes;
FORCE_INLINE static uint8_t MultiU8X8toH8(uint8_t charIn1, uint8_t charIn2) {
uint8_t charRes;
__asm__ __volatile__ (
A("clr %[tmp]")
A("mul %[charIn1], %B[intIn2]")
A("movw %A[intRes], r0")
A("mul %[charIn1], %A[intIn2]")
A("lsl r0")
A("adc %A[intRes], r1")
A("adc %B[intRes], %[tmp]")
A("mul %[charIn1], %[charIn2]")
A("mov %[charRes], r1")
A("clr r1")
: [intRes] "=&r" (intRes),
[tmp] "=&r" (tmp)
A("lsl r0")
A("adc %[charRes], r1")
: [charRes] "=&r" (charRes)
: [charIn1] "d" (charIn1),
[intIn2] "d" (intIn2)
[charIn2] "d" (charIn2)
: "cc"
);
return intRes;
return charRes;
}
27 changes: 13 additions & 14 deletions Marlin/src/module/stepper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2069,22 +2069,21 @@ hal_timer_t Stepper::calc_timer_interval(uint32_t step_rate) {

// AVR is able to keep up at 30khz Stepping ISR rate.
constexpr uint32_t min_step_rate = (F_CPU) / 500000U; // i.e., 32 or 40
if (step_rate <= min_step_rate) { // lower step rates
step_rate = 0;
return uint16_t(pgm_read_word(uintptr_t(speed_lookuptable_slow)));
if (step_rate >= 0x0800) { // higher step rate
const uintptr_t table_address = uintptr_t(&speed_lookuptable_fast[uint8_t(step_rate >> 8)]);
const uint16_t base = uint16_t(pgm_read_word(table_address));
const uint8_t gain = uint8_t(pgm_read_byte(table_address + 2));
return base - MultiU8X8toH8(uint8_t(step_rate & 0x00FF), gain);
}
else {
else if (step_rate > min_step_rate) { // lower step rates
step_rate -= min_step_rate; // Correct for minimal speed
if (step_rate >= 0x0800) { // higher step rate
const uintptr_t table_address = uintptr_t(&speed_lookuptable_fast[uint8_t(step_rate >> 8)]);
const uint16_t gain = uint16_t(pgm_read_word(table_address + 2));
return uint16_t(pgm_read_word(table_address)) - MultiU8X16toH16(uint8_t(step_rate & 0x00FF), gain);
}
else { // lower step rates
const uintptr_t table_address = uintptr_t(&speed_lookuptable_slow[uint8_t(step_rate >> 3)]);
return uint16_t(pgm_read_word(table_address))
- ((uint16_t(pgm_read_word(table_address + 2)) * uint8_t(step_rate & 0x0007)) >> 3);
}
const uintptr_t table_address = uintptr_t(&speed_lookuptable_slow[uint8_t(step_rate >> 3)]);
return uint16_t(pgm_read_word(table_address))
- ((uint16_t(pgm_read_word(table_address + 2)) * uint8_t(step_rate & 0x0007)) >> 3);
}
else {
step_rate = 0;
return uint16_t(pgm_read_word(uintptr_t(speed_lookuptable_slow)));
}

#endif // !CPU_32_BIT
Expand Down
Loading

0 comments on commit 143a870

Please sign in to comment.