SEGGER_RTT_ASM_ARMv7M.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /*********************************************************************
  2. * (c) SEGGER Microcontroller GmbH *
  3. * The Embedded Experts *
  4. * www.segger.com *
  5. **********************************************************************
  6. -------------------------- END-OF-HEADER -----------------------------
  7. File : SEGGER_RTT_ASM_ARMv7M.S
  8. Purpose : Assembler implementation of RTT functions for ARMv7M
  9. Additional information:
  10. This module is written to be assembler-independent and works with
  11. GCC and clang (Embedded Studio) and IAR.
  12. */
  13. #define SEGGER_RTT_ASM // Used to control processed input from header file
  14. #include "SEGGER_RTT.h"
  15. /*********************************************************************
  16. *
  17. * Defines, fixed
  18. *
  19. **********************************************************************
  20. */
  21. #define _CCIAR 0
  22. #define _CCCLANG 1
  23. #if (defined __SES_ARM) || (defined __GNUC__) || (defined __clang__)
  24. #define _CC_TYPE _CCCLANG
  25. #define _PUB_SYM .global
  26. #define _EXT_SYM .extern
  27. #define _END .end
  28. #define _WEAK .weak
  29. #define _THUMB_FUNC .thumb_func
  30. #define _THUMB_CODE .code 16
  31. #define _WORD .word
  32. #define _SECTION(Sect, Type, AlignExp) .section Sect ##, "ax"
  33. #define _ALIGN(Exp) .align Exp
  34. #define _PLACE_LITS .ltorg
  35. #define _DATA_SECT_START
  36. #define _C_STARTUP _start
  37. #define _STACK_END __stack_end__
  38. #define _RAMFUNC
  39. //
  40. // .text => Link to flash
  41. // .fast => Link to RAM
  42. // OtherSect => Usually link to RAM
  43. // Alignment is 2^x
  44. //
  45. #elif defined (__IASMARM__)
  46. #define _CC_TYPE _CCIAR
  47. #define _PUB_SYM PUBLIC
  48. #define _EXT_SYM EXTERN
  49. #define _END END
  50. #define _WEAK _WEAK
  51. #define _THUMB_FUNC
  52. #define _THUMB_CODE THUMB
  53. #define _WORD DCD
  54. #define _SECTION(Sect, Type, AlignExp) SECTION Sect ## : ## Type ## :REORDER:NOROOT ## (AlignExp)
  55. #define _ALIGN(Exp) alignrom Exp
  56. #define _PLACE_LITS
  57. #define _DATA_SECT_START DATA
  58. #define _C_STARTUP __iar_program_start
  59. #define _STACK_END sfe(CSTACK)
  60. #define _RAMFUNC SECTION_TYPE SHT_PROGBITS, SHF_WRITE | SHF_EXECINSTR
  61. //
  62. // .text => Link to flash
  63. // .textrw => Link to RAM
  64. // OtherSect => Usually link to RAM
  65. // NOROOT => Allows linker to throw away the function, if not referenced
  66. // Alignment is 2^x
  67. //
  68. #endif
  69. #if (_CC_TYPE == _CCIAR)
  70. NAME SEGGER_RTT_ASM_ARMv7M
  71. #else
  72. .syntax unified
  73. #endif
  74. #if defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
  75. #define SHT_PROGBITS 0x1
  76. /*********************************************************************
  77. *
  78. * Public / external symbols
  79. *
  80. **********************************************************************
  81. */
  82. _EXT_SYM __aeabi_memcpy
  83. _EXT_SYM __aeabi_memcpy4
  84. _EXT_SYM _SEGGER_RTT
  85. _PUB_SYM SEGGER_RTT_ASM_WriteSkipNoLock
  86. /*********************************************************************
  87. *
  88. * SEGGER_RTT_WriteSkipNoLock
  89. *
  90. * Function description
  91. * Stores a specified number of characters in SEGGER RTT
  92. * control block which is then read by the host.
  93. * SEGGER_RTT_WriteSkipNoLock does not lock the application and
  94. * skips all data, if the data does not fit into the buffer.
  95. *
  96. * Parameters
  97. * BufferIndex Index of "Up"-buffer to be used (e.g. 0 for "Terminal").
  98. * pBuffer Pointer to character array. Does not need to point to a \0 terminated string.
  99. * NumBytes Number of bytes to be stored in the SEGGER RTT control block.
  100. * MUST be > 0!!!
  101. * This is done for performance reasons, so no initial check has do be done.
  102. *
  103. * Return value
  104. * 1: Data has been copied
  105. * 0: No space, data has not been copied
  106. *
  107. * Notes
  108. * (1) If there is not enough space in the "Up"-buffer, all data is dropped.
  109. * (2) For performance reasons this function does not call Init()
  110. * and may only be called after RTT has been initialized.
  111. * Either by calling SEGGER_RTT_Init() or calling another RTT API function first.
  112. */
  113. _SECTION(.text, CODE, 2)
  114. _ALIGN(2)
  115. _THUMB_FUNC
  116. SEGGER_RTT_ASM_WriteSkipNoLock: // unsigned SEGGER_RTT_WriteSkipNoLock(unsigned BufferIndex, const void* pData, unsigned NumBytes) {
  117. //
  118. // Cases:
  119. // 1) RdOff <= WrOff => Space until wrap-around is sufficient
  120. // 2) RdOff <= WrOff => Space after wrap-around needed (copy in 2 chunks)
  121. // 3) RdOff < WrOff => No space in buf
  122. // 4) RdOff > WrOff => Space is sufficient
  123. // 5) RdOff > WrOff => No space in buf
  124. //
  125. // 1) is the most common case for large buffers and assuming that J-Link reads the data fast enough
  126. //
  127. // Register usage:
  128. // R0 Temporary needed as RdOff, <Tmp> register later on
  129. // R1 pData
  130. // R2 <NumBytes>
  131. // R3 <Tmp> register. Hold free for subroutine calls
  132. // R4 <Rem>
  133. // R5 pRing->pBuffer
  134. // R6 pRing (Points to active struct SEGGER_RTT_BUFFER_DOWN)
  135. // R7 WrOff
  136. //
  137. PUSH {R4-R7}
  138. ADD R3,R0,R0, LSL #+1
  139. LDR.W R0,=_SEGGER_RTT // pRing = &_SEGGER_RTT.aUp[BufferIndex];
  140. ADD R0,R0,R3, LSL #+3
  141. ADD R6,R0,#+24
  142. LDR R0,[R6, #+16] // RdOff = pRing->RdOff;
  143. LDR R7,[R6, #+12] // WrOff = pRing->WrOff;
  144. LDR R5,[R6, #+4] // pRing->pBuffer
  145. CMP R7,R0
  146. BCC.N _CheckCase4 // if (RdOff <= WrOff) { => Case 1), 2) or 3)
  147. //
  148. // Handling for case 1, later on identical to case 4
  149. //
  150. LDR R3,[R6, #+8] // Avail = pRing->SizeOfBuffer - WrOff - 1u; => Space until wrap-around (assume 1 byte not usable for case that RdOff == 0)
  151. SUBS R4,R3,R7 // <Rem> (Used in case we jump into case 2 afterwards)
  152. SUBS R3,R4,#+1 // <Avail>
  153. CMP R3,R2
  154. BCC.N _CheckCase2 // if (Avail >= NumBytes) { => Case 1)?
  155. _Case4:
  156. ADDS R5,R7,R5 // pBuffer += WrOff
  157. ADDS R0,R2,R7 // v = WrOff + NumBytes
  158. //
  159. // 2x unrolling for the copy loop that is used most of the time
  160. // This is a special optimization for small SystemView packets and makes them even faster
  161. //
  162. _ALIGN(2)
  163. _LoopCopyStraight: // memcpy(pRing->pBuffer + WrOff, pData, NumBytes);
  164. LDRB R3,[R1], #+1
  165. STRB R3,[R5], #+1 // *pDest++ = *pSrc++
  166. SUBS R2,R2,#+1
  167. BEQ _CSDone
  168. LDRB R3,[R1], #+1
  169. STRB R3,[R5], #+1 // *pDest++ = *pSrc++
  170. SUBS R2,R2,#+1
  171. BNE _LoopCopyStraight
  172. _CSDone:
  173. #if _CORE_NEEDS_DMB // Do not slow down cores that do not need a DMB instruction here
  174. DMB // Cortex-M7 may delay memory writes and also change the order in which the writes happen. Therefore, make sure that all buffer writes are finished, before updating the <WrOff> in the struct
  175. #endif
  176. STR R0,[R6, #+12] // pRing->WrOff = WrOff + NumBytes;
  177. MOVS R0,#+1
  178. POP {R4-R7}
  179. BX LR // Return 1
  180. _CheckCase2:
  181. ADDS R0,R0,R3 // Avail += RdOff; => Space incl. wrap-around
  182. CMP R0,R2
  183. BCC.N _Case3 // if (Avail >= NumBytes) { => Case 2? => If not, we have case 3) (does not fit)
  184. //
  185. // Handling for case 2
  186. //
  187. ADDS R0,R7,R5 // v = pRing->pBuffer + WrOff => Do not change pRing->pBuffer here because 2nd chunk needs org. value
  188. SUBS R2,R2,R4 // NumBytes -= Rem; (Rem = pRing->SizeOfBuffer - WrOff; => Space until end of buffer)
  189. _LoopCopyBeforeWrapAround: // memcpy(pRing->pBuffer + WrOff, pData, Rem); => Copy 1st chunk
  190. LDRB R3,[R1], #+1
  191. STRB R3,[R0], #+1 // *pDest++ = *pSrc++
  192. SUBS R4,R4,#+1
  193. BNE _LoopCopyBeforeWrapAround
  194. //
  195. // Special case: First check that assumed RdOff == 0 calculated that last element before wrap-around could not be used
  196. // But 2nd check (considering space until wrap-around and until RdOff) revealed that RdOff is not 0, so we can use the last element
  197. // In this case, we may use a copy straight until buffer end anyway without needing to copy 2 chunks
  198. // Therefore, check if 2nd memcpy is necessary at all
  199. //
  200. ADDS R4,R2,#+0 // Save <NumBytes> (needed as counter in loop but must be written to <WrOff> after the loop). Also use this inst to update the flags to skip 2nd loop if possible
  201. BEQ.N _No2ChunkNeeded // if (NumBytes) {
  202. _LoopCopyAfterWrapAround: // memcpy(pRing->pBuffer, pData + Rem, NumBytes);
  203. LDRB R3,[R1], #+1 // pData already points to the next src byte due to copy loop increment before this loop
  204. STRB R3,[R5], #+1 // *pDest++ = *pSrc++
  205. SUBS R2,R2,#+1
  206. BNE _LoopCopyAfterWrapAround
  207. _No2ChunkNeeded:
  208. #if _CORE_NEEDS_DMB // Do not slow down cores that do not need a DMB instruction here
  209. DMB // Cortex-M7 may delay memory writes and also change the order in which the writes happen. Therefore, make sure that all buffer writes are finished, before updating the <WrOff> in the struct
  210. #endif
  211. STR R4,[R6, #+12] // pRing->WrOff = NumBytes; => Must be written after copying data because J-Link may read control block asynchronously while writing into buffer
  212. MOVS R0,#+1
  213. POP {R4-R7}
  214. BX LR // Return 1
  215. _CheckCase4:
  216. SUBS R0,R0,R7
  217. SUBS R0,R0,#+1 // Avail = RdOff - WrOff - 1u;
  218. CMP R0,R2
  219. BCS.N _Case4 // if (Avail >= NumBytes) { => Case 4) == 1) ? => If not, we have case 5) == 3) (does not fit)
  220. _Case3:
  221. MOVS R0,#+0
  222. POP {R4-R7}
  223. BX LR // Return 0
  224. _PLACE_LITS
  225. #endif // defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
  226. _END
  227. /*************************** End of file ****************************/