diff --git a/Marlin/stepper.cpp b/Marlin/stepper.cpp index 57bef81a8..9e11aca4e 100644 --- a/Marlin/stepper.cpp +++ b/Marlin/stepper.cpp @@ -243,59 +243,63 @@ volatile int32_t Stepper::endstops_trigsteps[XYZ]; // intRes = longIn1 * longIn2 >> 24 // uses: -// r26 to store 0 -// r27 to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result. +// A[tmp] to store 0 +// B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result. // note that the lower two bytes and the upper byte of the 48bit result are not calculated. // this can cause the result to be out by one as the lower bytes may cause carries into the upper ones. -// B0 A0 are bits 24-39 and are the returned value -// C1 B1 A1 is longIn1 -// D2 C2 B2 A2 is longIn2 +// B A are bits 24-39 and are the returned value +// C B A is longIn1 +// D C B A is longIn2 // -#define MultiU24X32toH16(intRes, longIn1, longIn2) \ - asm volatile ( \ - A("clr r26") \ - A("mul %A1, %B2") \ - A("mov r27, r1") \ - A("mul %B1, %C2") \ - A("movw %A0, r0") \ - A("mul %C1, %C2") \ - A("add %B0, r0") \ - A("mul %C1, %B2") \ - A("add %A0, r0") \ - A("adc %B0, r1") \ - A("mul %A1, %C2") \ - A("add r27, r0") \ - A("adc %A0, r1") \ - A("adc %B0, r26") \ - A("mul %B1, %B2") \ - A("add r27, r0") \ - A("adc %A0, r1") \ - A("adc %B0, r26") \ - A("mul %C1, %A2") \ - A("add r27, r0") \ - A("adc %A0, r1") \ - A("adc %B0, r26") \ - A("mul %B1, %A2") \ - A("add r27, r1") \ - A("adc %A0, r26") \ - A("adc %B0, r26") \ - A("lsr r27") \ - A("adc %A0, r26") \ - A("adc %B0, r26") \ - A("mul %D2, %A1") \ - A("add %A0, r0") \ - A("adc %B0, r1") \ - A("mul %D2, %B1") \ - A("add %B0, r0") \ - A("clr r1") \ - : \ - "=&r" (intRes) \ - : \ - "d" (longIn1), \ - "d" (longIn2) \ - : \ - "r26" , "r27" \ - ) +static FORCE_INLINE uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) { + register uint8_t tmp1; + register uint8_t tmp2; + register uint16_t intRes; + __asm__ __volatile__( + A("clr %[tmp1]") + A("mul %A[longIn1], %B[longIn2]") + A("mov %[tmp2], r1") + A("mul %B[longIn1], %C[longIn2]") + A("movw %A[intRes], r0") + A("mul %C[longIn1], %C[longIn2]") + A("add %B[intRes], r0") + A("mul %C[longIn1], %B[longIn2]") + A("add %A[intRes], r0") + A("adc %B[intRes], r1") + A("mul %A[longIn1], %C[longIn2]") + A("add %[tmp2], r0") + A("adc %A[intRes], r1") + A("adc %B[intRes], %[tmp1]") + A("mul %B[longIn1], %B[longIn2]") + A("add %[tmp2], r0") + A("adc %A[intRes], r1") + A("adc %B[intRes], %[tmp1]") + A("mul %C[longIn1], %A[longIn2]") + A("add %[tmp2], r0") + A("adc %A[intRes], r1") + A("adc %B[intRes], %[tmp1]") + A("mul %B[longIn1], %A[longIn2]") + A("add %[tmp2], r1") + A("adc %A[intRes], %[tmp1]") + A("adc %B[intRes], %[tmp1]") + A("lsr %[tmp2]") + A("adc %A[intRes], %[tmp1]") + A("adc %B[intRes], %[tmp1]") + A("mul %D[longIn2], %A[longIn1]") + A("add %A[intRes], r0") + A("adc %B[intRes], r1") + A("mul %D[longIn2], %B[longIn1]") + A("add %B[intRes], r0") + A("clr r1") + : [intRes] "=&r" (intRes), + [tmp1] "=&r" (tmp1), + [tmp2] "=&r" (tmp2) + : [longIn1] "d" (longIn1), + [longIn2] "d" (longIn2) + : "cc" + ); + return intRes; +} // Some useful constants @@ -1506,10 +1510,7 @@ void Stepper::isr() { ? _eval_bezier_curve(acceleration_time) : current_block->cruise_rate; #else - MultiU24X32toH16(acc_step_rate, acceleration_time, current_block->acceleration_rate); - acc_step_rate += current_block->initial_rate; - - // upper limit + acc_step_rate = MultiU24X32toH16(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate; NOMORE(acc_step_rate, current_block->nominal_rate); #endif @@ -1540,7 +1541,6 @@ void Stepper::isr() { #if ENABLED(BEZIER_JERK_CONTROL) // If this is the 1st time we process the 2nd half of the trapezoid... if (!bezier_2nd_half) { - // Initialize the Bézier speed curve _calc_bezier_curve_coeffs(current_block->cruise_rate, current_block->final_rate, current_block->deceleration_time_inverse); bezier_2nd_half = true; @@ -1553,14 +1553,14 @@ void Stepper::isr() { #else // Using the old trapezoidal control - MultiU24X32toH16(step_rate, deceleration_time, current_block->acceleration_rate); - + step_rate = MultiU24X32toH16(deceleration_time, current_block->acceleration_rate); if (step_rate < acc_step_rate) { // Still decelerating? step_rate = acc_step_rate - step_rate; NOLESS(step_rate, current_block->final_rate); } else step_rate = current_block->final_rate; + #endif // step_rate to timer interval diff --git a/Marlin/stepper.h b/Marlin/stepper.h index 7b3dd599d..5dec78390 100644 --- a/Marlin/stepper.h +++ b/Marlin/stepper.h @@ -61,26 +61,28 @@ extern Stepper stepper; // uses: // r26 to store 0 // r27 to store the byte 1 of the 24 bit result -#define MultiU16X8toH16(intRes, charIn1, intIn2) \ - asm volatile ( \ - A("clr r26") \ - A("mul %A1, %B2") \ - A("movw %A0, r0") \ - A("mul %A1, %A2") \ - A("add %A0, r1") \ - A("adc %B0, r26") \ - A("lsr r0") \ - A("adc %A0, r26") \ - A("adc %B0, r26") \ - A("clr r1") \ - : \ - "=&r" (intRes) \ - : \ - "d" (charIn1), \ - "d" (intIn2) \ - : \ - "r26" \ - ) +static FORCE_INLINE uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) { + register uint8_t tmp; + register uint16_t intRes; + __asm__ __volatile__ ( + A("clr %[tmp]") + A("mul %[charIn1], %B[intIn2]") + A("movw %A[intRes], r0") + A("mul %[charIn1], %A[intIn2]") + A("add %A[intRes], r1") + A("adc %B[intRes], %[tmp]") + A("lsr r0") + A("adc %A[intRes], %[tmp]") + A("adc %B[intRes], %[tmp]") + A("clr r1") + : [intRes] "=&r" (intRes), + [tmp] "=&r" (tmp) + : [charIn1] "d" (charIn1), + [intIn2] "d" (intIn2) + : "cc" + ); + return intRes; +} class Stepper { @@ -346,17 +348,15 @@ class Stepper { NOLESS(step_rate, F_CPU / 500000); step_rate -= F_CPU / 500000; // Correct for minimal speed if (step_rate >= (8 * 256)) { // higher step rate - unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate >> 8)][0]; - unsigned char tmp_step_rate = (step_rate & 0x00FF); - unsigned short gain = (unsigned short)pgm_read_word_near(table_address + 2); - MultiU16X8toH16(timer, tmp_step_rate, gain); - timer = (unsigned short)pgm_read_word_near(table_address) - timer; + uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0], + gain = (uint16_t)pgm_read_word_near(table_address + 2); + timer = (uint16_t)pgm_read_word_near(table_address) - MultiU16X8toH16(step_rate & 0x00FF, gain); } else { // lower step rates - unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0]; + uint16_t table_address = (uint16_t)&speed_lookuptable_slow[0][0]; table_address += ((step_rate) >> 1) & 0xFFFC; - timer = (unsigned short)pgm_read_word_near(table_address); - timer -= (((unsigned short)pgm_read_word_near(table_address + 2) * (unsigned char)(step_rate & 0x0007)) >> 3); + timer = (uint16_t)pgm_read_word_near(table_address) + - (((uint16_t)pgm_read_word_near(table_address + 2) * (uint8_t)(step_rate & 0x0007)) >> 3); } if (timer < 100) { // (20kHz - this should never happen) timer = 100;