2018-05-12 08:34:04 -05:00
/**
* Marlin 3 D Printer Firmware
2020-02-03 08:00:57 -06:00
* Copyright ( c ) 2020 MarlinFirmware [ https : //github.com/MarlinFirmware/Marlin]
2018-05-12 08:34:04 -05:00
*
* Based on Sprinter and grbl .
2019-06-27 23:57:50 -05:00
* Copyright ( c ) 2011 Camiel Gubbels / Erik van der Zalm
2018-05-12 08:34:04 -05:00
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
2020-07-23 05:20:14 +02:00
* along with this program . If not , see < https : //www.gnu.org/licenses/>.
2018-05-12 08:34:04 -05:00
*
*/
2018-11-01 22:11:51 +01:00
# pragma once
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
# include "../../inc/MarlinConfigPre.h"
2018-05-12 08:34:04 -05:00
/**
2017-10-08 17:38:10 +01:00
* Busy wait delay cycles routines :
*
* DELAY_CYCLES ( count ) : Delay execution in cycles
* DELAY_NS ( count ) : Delay execution in nanoseconds
* DELAY_US ( count ) : Delay execution in microseconds
*/
2018-05-12 08:34:04 -05:00
2019-09-17 18:16:28 -05:00
# include "../../core/macros.h"
2018-05-11 02:27:36 -03:00
2021-02-06 05:43:51 +01:00
void calibrate_delay_loop ( ) ;
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
# if defined(__arm__) || defined(__thumb__)
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
// We want to have delay_cycle function with the lowest possible overhead, so we adjust at the function at runtime based on the current CPU best feature
typedef void ( * DelayImpl ) ( uint32_t ) ;
extern DelayImpl DelayCycleFnc ;
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
// I've measured 36 cycles on my system to call the cycle waiting method, but it shouldn't change much to have a bit more margin, it only consume a bit more flash
# define TRIP_POINT_FOR_CALLING_FUNCTION 40
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
// A simple recursive template class that output exactly one 'nop' of code per recursion
template < int N > struct NopWriter {
FORCE_INLINE static void build ( ) {
__asm__ __volatile__ ( " nop " ) ;
NopWriter < N - 1 > : : build ( ) ;
2018-11-01 22:11:51 +01:00
}
2021-02-06 05:43:51 +01:00
} ;
// End the loop
template < > struct NopWriter < 0 > { FORCE_INLINE static void build ( ) { } } ;
namespace Private {
// Split recursing template in 2 different class so we don't reach the maximum template instantiation depth limit
template < bool belowTP , int N > struct Helper {
FORCE_INLINE static void build ( ) {
DelayCycleFnc ( N - 2 ) ; // Approximative cost of calling the function (might be off by one or 2 cycles)
}
} ;
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
template < int N > struct Helper < true , N > {
FORCE_INLINE static void build ( ) {
NopWriter < N - 1 > : : build ( ) ;
}
} ;
2018-05-12 08:34:04 -05:00
2021-02-06 05:43:51 +01:00
template < > struct Helper < true , 0 > {
FORCE_INLINE static void build ( ) { }
} ;
2018-11-01 22:11:51 +01:00
2021-02-06 05:43:51 +01:00
}
// Select a behavior based on the constexpr'ness of the parameter
// If called with a compile-time parameter, then write as many NOP as required to reach the asked cycle count
// (there is some tripping point here to start looping when it's more profitable than gruntly executing NOPs)
// If not called from a compile-time parameter, fallback to a runtime loop counting version instead
template < bool compileTime , int Cycles >
struct SmartDelay {
FORCE_INLINE SmartDelay ( int ) {
if ( Cycles = = 0 ) return ;
Private : : Helper < Cycles < TRIP_POINT_FOR_CALLING_FUNCTION , Cycles > : : build ( ) ;
2018-11-01 22:11:51 +01:00
}
2021-02-06 05:43:51 +01:00
} ;
// Runtime version below. There is no way this would run under less than ~TRIP_POINT_FOR_CALLING_FUNCTION cycles
template < int T >
struct SmartDelay < false , T > {
FORCE_INLINE SmartDelay ( int v ) { DelayCycleFnc ( v ) ; }
} ;
2018-11-01 22:11:51 +01:00
2021-02-06 05:43:51 +01:00
# define DELAY_CYCLES(X) do { SmartDelay<IS_CONSTEXPR(X), IS_CONSTEXPR(X) ? X : 0> _smrtdly_X(X); } while(0)
2018-11-01 22:11:51 +01:00
2021-02-06 05:43:51 +01:00
// For delay in microseconds, no smart delay selection is required, directly call the delay function
// Teensy compiler is too old and does not accept smart delay compile-time / run-time selection correctly
# define DELAY_US(x) DelayCycleFnc((x) * ((F_CPU) / 1000000UL))
2018-05-12 08:34:04 -05:00
# elif defined(__AVR__)
2021-07-09 04:55:34 +02:00
FORCE_INLINE static void __delay_up_to_3c ( uint8_t cycles ) {
switch ( cycles ) {
case 3 :
__asm__ __volatile__ ( A ( " RJMP .+0 " ) A ( " NOP " ) ) ;
break ;
case 2 :
__asm__ __volatile__ ( A ( " RJMP .+0 " ) ) ;
break ;
case 1 :
__asm__ __volatile__ ( A ( " NOP " ) ) ;
break ;
}
2018-05-12 08:34:04 -05:00
}
2017-10-08 17:38:10 +01:00
// Delay in cycles
2021-07-09 04:55:34 +02:00
FORCE_INLINE static void DELAY_CYCLES ( uint16_t cycles ) {
if ( __builtin_constant_p ( cycles ) ) {
if ( cycles < = 3 ) {
__delay_up_to_3c ( cycles ) ;
}
else if ( cycles = = 4 ) {
__delay_up_to_3c ( 2 ) ;
__delay_up_to_3c ( 2 ) ;
2018-05-12 08:34:04 -05:00
}
else {
2021-07-09 04:55:34 +02:00
cycles - = 1 + 4 ; // Compensate for the first LDI (1) and the first round (4)
__delay_up_to_3c ( cycles % 4 ) ;
cycles / = 4 ;
// The following code burns [1 + 4 * (rounds+1)] cycles
uint16_t dummy ;
__asm__ __volatile__ (
// "manually" load counter from constants, otherwise the compiler may optimize this part away
A ( " LDI %A[rounds], %[l] " ) // 1c
A ( " LDI %B[rounds], %[h] " ) // 1c (compensating the non branching BRCC)
L ( " 1 " )
A ( " SBIW %[rounds], 1 " ) // 2c
A ( " BRCC 1b " ) // 2c when branching, else 1c (end of loop)
: // Outputs ...
[ rounds ] " =w " ( dummy ) // Restrict to a wo (=) 16 bit register pair (w)
: // Inputs ...
[ l ] " M " ( cycles % 256 ) , // Restrict to 0..255 constant (M)
[ h ] " M " ( cycles / 256 ) // Restrict to 0..255 constant (M)
: // Clobbers ...
" cc " // Indicate we are modifying flags like Carry (cc)
) ;
2018-05-12 08:34:04 -05:00
}
}
2021-07-09 04:55:34 +02:00
else {
__asm__ __volatile__ (
L ( " 1 " )
A ( " SBIW %[cycles], 4 " ) // 2c
A ( " BRCC 1b " ) // 2c when branching, else 1c (end of loop)
: [ cycles ] " +w " ( cycles ) // output: Restrict to a rw (+) 16 bit register pair (w)
: // input: -
: " cc " // clobbers: We are modifying flags like Carry (cc)
) ;
}
2018-05-12 08:34:04 -05:00
}
2021-02-06 05:43:51 +01:00
// Delay in microseconds
# define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
2021-07-22 01:01:23 +01:00
# elif defined(ESP32) || defined(__PLAT_LINUX__) || defined(__PLAT_NATIVE_SIM__)
2019-02-22 19:09:10 -06:00
2021-02-06 05:43:51 +01:00
// DELAY_CYCLES specified inside platform
2019-02-22 19:09:10 -06:00
2021-02-06 05:43:51 +01:00
// Delay in microseconds
# define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
2018-05-12 08:34:04 -05:00
# else
2017-10-08 17:38:10 +01:00
2018-05-12 08:34:04 -05:00
# error "Unsupported MCU architecture"
2017-10-08 17:38:10 +01:00
2018-05-12 08:34:04 -05:00
# endif
2021-04-05 16:34:31 -05:00
/**************************************************************
* Delay in nanoseconds . Requires the F_CPU macro .
* These macros follow avr - libc delay conventions .
*
* For AVR there are three possible operation modes , due to its
* slower clock speeds and thus coarser delay resolution . For
* example , when F_CPU = 16000000 the resolution is 62.5 ns .
*
* Round up ( default )
* Round up the delay according to the CPU clock resolution .
* e . g . , 100 will give a delay of 2 cycles ( 125 ns ) .
*
* Round down ( DELAY_NS_ROUND_DOWN )
* Round down the delay according to the CPU clock resolution .
* e . g . , 100 will be rounded down to 1 cycle ( 62.5 ns ) .
*
* Nearest ( DELAY_NS_ROUND_CLOSEST )
* Round the delay to the nearest number of clock cycles .
* e . g . , 165 will be rounded up to 3 cycles ( 187.5 ns ) because
* it ' s closer to the requested delay than 2 cycle ( 125 ns ) .
*/
2021-02-06 05:43:51 +01:00
2021-04-05 16:34:31 -05:00
# ifndef __AVR__
# undef DELAY_NS_ROUND_DOWN
# undef DELAY_NS_ROUND_CLOSEST
# endif
2021-02-06 05:43:51 +01:00
2021-04-05 16:34:31 -05:00
# if ENABLED(DELAY_NS_ROUND_DOWN)
# define DELAY_NS(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL) / 1000UL) // floor
# elif ENABLED(DELAY_NS_ROUND_CLOSEST)
# define DELAY_NS(x) DELAY_CYCLES(((x) * ((F_CPU) / 1000000UL) + 500) / 1000UL) // round
# else
# define DELAY_NS(x) DELAY_CYCLES(((x) * ((F_CPU) / 1000000UL) + 999) / 1000UL) // "ceil"
# endif