Update stepper.cpp Bresenham for 2.0.x parity

2018-05-27 01:52:39 -05:00 · 2018-05-27 01:52:39 -05:00 · 33b7488f68
commit 33b7488f68
parent 0d6ef67e48
1 changed files with 36 additions and 30 deletions
--- a/Marlin/stepper.cpp
+++ b/Marlin/stepper.cpp
@ -100,6 +100,13 @@ bool Stepper::abort_current_block;
  bool Stepper::locked_z_motor = false, Stepper::locked_z2_motor = false;
 #endif

+/**
+ * Marlin uses the Bresenham algorithm. For a detailed explanation of theory and
+ * method see https://www.cs.helsinki.fi/group/goa/mallinnus/lines/bresenh.html
+ *
+ * The implementation used here additionally rounds up the starting seed.
+ */
+
 int32_t Stepper::counter_X = 0,
        Stepper::counter_Y = 0,
        Stepper::counter_Z = 0,
@ -369,15 +376,15 @@ void Stepper::set_directions() {

 #if ENABLED(S_CURVE_ACCELERATION)
  /**
-   *   We are using a quintic (fifth-degree) Bézier polynomial for the velocity curve.
-   *  This gives us a "linear pop" velocity curve; with pop being the sixth derivative of position:
+   *  This uses a quintic (fifth-degree) Bézier polynomial for the velocity curve, giving
+   *  a "linear pop" velocity curve; with pop being the sixth derivative of position:
   *  velocity - 1st, acceleration - 2nd, jerk - 3rd, snap - 4th, crackle - 5th, pop - 6th
   *
   *  The Bézier curve takes the form:
   *
   *  V(t) = P_0 * B_0(t) + P_1 * B_1(t) + P_2 * B_2(t) + P_3 * B_3(t) + P_4 * B_4(t) + P_5 * B_5(t)
   *
-   *   Where 0 <= t <= 1, and V(t) is the velocity. P_0 through P_5 are the control points, and B_0(t)
+   *  Where 0 <= t <= 1, and V(t) is the velocity. P_0 through P_5 are the control points, and B_0(t)
   *  through B_5(t) are the Bernstein basis as follows:
   *
   *        B_0(t) =   (1-t)^5        =   -t^5 +  5t^4 - 10t^3 + 10t^2 -  5t   +   1
@ -390,7 +397,7 @@ void Stepper::set_directions() {
   *                                      |       |       |       |       |       |
   *                                      A       B       C       D       E       F
   *
-   *   Unfortunately, we cannot use forward-differencing to calculate each position through
+   *  Unfortunately, we cannot use forward-differencing to calculate each position through
   *  the curve, as Marlin uses variable timer periods. So, we require a formula of the form:
   *
   *        V_f(t) = A*t^5 + B*t^4 + C*t^3 + D*t^2 + E*t + F
@ -405,7 +412,7 @@ void Stepper::set_directions() {
   *        E = - 5*P_0 +  5*P_1
   *        F =     P_0
   *
-   *   Now, since we will (currently) *always* want the initial acceleration and jerk values to be 0,
+   *  Now, since we will (currently) *always* want the initial acceleration and jerk values to be 0,
   *  We set P_i = P_0 = P_1 = P_2 (initial velocity), and P_t = P_3 = P_4 = P_5 (target velocity),
   *  which, after simplification, resolves to:
   *
@ -416,12 +423,12 @@ void Stepper::set_directions() {
   *        E = 0
   *        F = P_i
   *
-   *   As the t is evaluated in non uniform steps here, there is no other way rather than evaluating
+   *  As the t is evaluated in non uniform steps here, there is no other way rather than evaluating
   *  the Bézier curve at each point:
   *
   *        V_f(t) = A*t^5 + B*t^4 + C*t^3 + F          [0 <= t <= 1]
   *
-   *   Floating point arithmetic execution time cost is prohibitive, so we will transform the math to
+   * Floating point arithmetic execution time cost is prohibitive, so we will transform the math to
   * use fixed point values to be able to evaluate it in realtime. Assuming a maximum of 250000 steps
   * per second (driver pulses should at least be 2µS hi/2µS lo), and allocating 2 bits to avoid
   * overflows on the evaluation of the Bézier curve, means we can use
@ -432,7 +439,7 @@ void Stepper::set_directions() {
   *   C:   signed Q24.7 ,            |range = +/- 250000 *10 * 128 = +/- 320000000 = 0x1312D000 | 29 bits + sign
   *   F:   signed Q24.7 ,            |range = +/- 250000     * 128 =      32000000 = 0x01E84800 | 25 bits + sign
   *
-   *  The trapezoid generator state contains the following information, that we will use to create and evaluate
+   * The trapezoid generator state contains the following information, that we will use to create and evaluate
   * the Bézier curve:
   *
   *  blk->step_event_count [TS] = The total count of steps for this movement. (=distance)
@ -444,7 +451,7 @@ void Stepper::set_directions() {
   *
   *  For Any 32bit CPU:
   *
-   *    At the start of each trapezoid, we calculate the coefficients A,B,C,F and Advance [AV], as follows:
+   *    At the start of each trapezoid, calculate the coefficients A,B,C,F and Advance [AV], as follows:
   *
   *      A =  6*128*(VF - VI) =  768*(VF - VI)
   *      B = 15*128*(VI - VF) = 1920*(VI - VF)
@ -452,7 +459,7 @@ void Stepper::set_directions() {
   *      F =    128*VI        =  128*VI
   *     AV = (1<<32)/TS      ~= 0xFFFFFFFF / TS (To use ARM UDIV, that is 32 bits) (this is computed at the planner, to offload expensive calculations from the ISR)
   *
-   *   And for each point, we will evaluate the curve with the following sequence:
+   *    And for each point, evaluate the curve with the following sequence:
   *
   *      void lsrs(uint32_t& d, uint32_t s, int cnt) {
   *        d = s >> cnt;
@ -505,10 +512,10 @@ void Stepper::set_directions() {
   *        return alo;
   *      }
   *
-   *    This will be rewritten in ARM assembly to get peak performance and will take 43 cycles to execute
+   *  This is rewritten in ARM assembly for optimal performance (43 cycles to execute).
   *
-   *  For AVR, we scale precision of coefficients to make it possible to evaluate the Bézier curve in
-   *    realtime: Let's reduce precision as much as possible. After some experimentation we found that:
+   *  For AVR, the precision of coefficients is scaled so the Bézier curve can be evaluated in real-time:
+   *  Let's reduce precision as much as possible. After some experimentation we found that:
   *
   *    Assume t and AV with 24 bits is enough
   *       A =  6*(VF - VI)
@ -517,9 +524,9 @@ void Stepper::set_directions() {
   *       F =     VI
   *      AV = (1<<24)/TS   (this is computed at the planner, to offload expensive calculations from the ISR)
   *
-   *     Instead of storing sign for each coefficient, we will store its absolute value,
+   *    Instead of storing sign for each coefficient, we will store its absolute value,
   *    and flag the sign of the A coefficient, so we can save to store the sign bit.
-   *     It always holds that sign(A) = - sign(B) = sign(C)
+   *    It always holds that sign(A) = - sign(B) = sign(C)
   *
   *     So, the resulting range of the coefficients are:
   *
@ -529,7 +536,7 @@ void Stepper::set_directions() {
   *       C:   signed Q24 , range = 250000 *10 = 2500000 = 0x1312D0 | 21 bits
   *       F:   signed Q24 , range = 250000     =  250000 = 0x0ED090 | 20 bits
   *
-   *    And for each curve, we estimate its coefficients with:
+   *    And for each curve, estimate its coefficients with:
   *
   *      void _calc_bezier_curve_coeffs(int32_t v0, int32_t v1, uint32_t av) {
   *       // Calculate the Bézier coefficients
@ -548,7 +555,7 @@ void Stepper::set_directions() {
   *       bezier_F = v0;
   *      }
   *
-   *    And for each point, we will evaluate the curve with the following sequence:
+   *    And for each point, evaluate the curve with the following sequence:
   *
   *      // unsigned multiplication of 24 bits x 24bits, return upper 16 bits
   *      void umul24x24to16hi(uint16_t& r, uint24_t op1, uint24_t op2) {
@ -598,9 +605,8 @@ void Stepper::set_directions() {
   *        }
   *        return acc;
   *      }
-   *    Those functions will be translated into assembler to get peak performance. coefficient calculations takes 70 cycles,
-   *    Bezier point evaluation takes 150 cycles
-   *
+   *    These functions are translated to assembler for optimal performance.
+   *    Coefficient calculation takes 70 cycles. Bezier point evaluation takes 150 cycles.
   */

  // For AVR we use assembly to maximize speed
@ -1138,7 +1144,7 @@ hal_timer_t Stepper::isr_scheduler() {

  // Limit the amount of iterations
  uint8_t max_loops = 10;
-  
+
  // We need this variable here to be able to use it in the following loop
  hal_timer_t min_ticks;
  do {
@ -1258,12 +1264,12 @@ void Stepper::stepper_pulse_phase_isr() {
    // Advance the Bresenham counter; start a pulse if the axis needs a step
    #define PULSE_START(AXIS) do{ \
      _COUNTER(AXIS) += current_block->steps[_AXIS(AXIS)]; \
-      if (_COUNTER(AXIS) > 0) { _APPLY_STEP(AXIS)(!_INVERT_STEP_PIN(AXIS), 0); } \
+      if (_COUNTER(AXIS) >= 0) { _APPLY_STEP(AXIS)(!_INVERT_STEP_PIN(AXIS), 0); } \
    }while(0)

    // Advance the Bresenham counter; start a pulse if the axis needs a step
    #define STEP_TICK(AXIS) do { \
-      if (_COUNTER(AXIS) > 0) { \
+      if (_COUNTER(AXIS) >= 0) { \
        _COUNTER(AXIS) -= current_block->step_event_count; \
        count_position[_AXIS(AXIS)] += count_direction[_AXIS(AXIS)]; \
      } \
@ -1351,7 +1357,7 @@ void Stepper::stepper_pulse_phase_isr() {

    #if ENABLED(LIN_ADVANCE)
      counter_E += current_block->steps[E_AXIS];
-      if (counter_E > 0) {
+      if (counter_E >= 0) {
        #if DISABLED(MIXING_EXTRUDER)
          // Don't step E here for mixing extruder
          motor_direction(E_AXIS) ? --e_steps : ++e_steps;
@ -1363,7 +1369,7 @@ void Stepper::stepper_pulse_phase_isr() {
        const bool dir = motor_direction(E_AXIS);
        MIXING_STEPPERS_LOOP(j) {
          counter_m[j] += current_block->steps[E_AXIS];
-          if (counter_m[j] > 0) {
+          if (counter_m[j] >= 0) {
            counter_m[j] -= current_block->mix_event_count[j];
            dir ? --e_steps[j] : ++e_steps[j];
          }
@ -1380,7 +1386,7 @@ void Stepper::stepper_pulse_phase_isr() {
          // Step mixing steppers (proportionally)
          counter_m[j] += current_block->steps[E_AXIS];
          // Step when the counter goes over zero
-          if (counter_m[j] > 0) En_STEP_WRITE(j, !INVERT_E_STEP_PIN);
+          if (counter_m[j] >= 0) En_STEP_WRITE(j, !INVERT_E_STEP_PIN);
        }
      #else // !MIXING_EXTRUDER
        PULSE_START(E);
@ -1420,7 +1426,7 @@ void Stepper::stepper_pulse_phase_isr() {
    #if DISABLED(LIN_ADVANCE)
      #if ENABLED(MIXING_EXTRUDER)
        MIXING_STEPPERS_LOOP(j) {
-          if (counter_m[j] > 0) {
+          if (counter_m[j] >= 0) {
            counter_m[j] -= current_block->mix_event_count[j];
            En_STEP_WRITE(j, INVERT_E_STEP_PIN);
          }
@ -1702,11 +1708,11 @@ uint32_t Stepper::stepper_block_phase_isr() {
        bezier_2nd_half = false;
      #endif

-      // Initialize Bresenham counters to 1/2 the ceiling
-      counter_X = counter_Y = counter_Z = counter_E = -((int32_t)(current_block->step_event_count >> 1));
+      // Initialize Bresenham counters to 1/2 the ceiling, with proper roundup (as explained in the article linked above)
+      counter_X = counter_Y = counter_Z = counter_E = -int32_t((current_block->step_event_count >> 1) + (current_block->step_event_count & 1));
      #if ENABLED(MIXING_EXTRUDER)
        MIXING_STEPPERS_LOOP(i)
-          counter_m[i] = -(current_block->mix_event_count[i] >> 1);
+          counter_m[i] = -int32_t((current_block->mix_event_count[i] >> 1) + (current_block->mix_event_count[i] & 1));
      #endif

      #if ENABLED(Z_LATE_ENABLE)