none.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /******************************************************************************
  2. * @file none.h
  3. * @brief Intrinsincs when no DSP extension available
  4. * @version V1.9.0
  5. * @date 20. July 2020
  6. ******************************************************************************/
  7. /*
  8. * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  9. *
  10. * SPDX-License-Identifier: Apache-2.0
  11. *
  12. * Licensed under the Apache License, Version 2.0 (the License); you may
  13. * not use this file except in compliance with the License.
  14. * You may obtain a copy of the License at
  15. *
  16. * www.apache.org/licenses/LICENSE-2.0
  17. *
  18. * Unless required by applicable law or agreed to in writing, software
  19. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  20. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  21. * See the License for the specific language governing permissions and
  22. * limitations under the License.
  23. */
  24. /*
  25. Definitions in this file are allowing to reuse some versions of the
  26. CMSIS-DSP to build on a core (M0 for instance) or a host where
  27. DSP extension are not available.
  28. Ideally a pure C version should have been used instead.
  29. But those are not always available or use a restricted set
  30. of intrinsics.
  31. */
  32. #ifndef _NONE_H_
  33. #define _NONE_H_
  34. #include "arm_math_types.h"
  35. #ifdef __cplusplus
  36. extern "C"
  37. {
  38. #endif
  39. /*
  40. Normally those kind of definitions are in a compiler file
  41. in Core or Core_A.
  42. But for MSVC compiler it is a bit special. The goal is very specific
  43. to CMSIS-DSP and only to allow the use of this library from other
  44. systems like Python or Matlab.
  45. MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
  46. compiler file in Core or Core_A would not make sense.
  47. */
  48. #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
  49. __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
  50. {
  51. if (data == 0U) { return 32U; }
  52. uint32_t count = 0U;
  53. uint32_t mask = 0x80000000U;
  54. while ((data & mask) == 0U)
  55. {
  56. count += 1U;
  57. mask = mask >> 1U;
  58. }
  59. return count;
  60. }
  61. __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
  62. {
  63. if ((sat >= 1U) && (sat <= 32U))
  64. {
  65. const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
  66. const int32_t min = -1 - max ;
  67. if (val > max)
  68. {
  69. return max;
  70. }
  71. else if (val < min)
  72. {
  73. return min;
  74. }
  75. }
  76. return val;
  77. }
  78. __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
  79. {
  80. if (sat <= 31U)
  81. {
  82. const uint32_t max = ((1U << sat) - 1U);
  83. if (val > (int32_t)max)
  84. {
  85. return max;
  86. }
  87. else if (val < 0)
  88. {
  89. return 0U;
  90. }
  91. }
  92. return (uint32_t)val;
  93. }
  94. /**
  95. \brief Rotate Right in unsigned value (32 bit)
  96. \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
  97. \param [in] op1 Value to rotate
  98. \param [in] op2 Number of Bits to rotate
  99. \return Rotated value
  100. */
  101. __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
  102. {
  103. op2 %= 32U;
  104. if (op2 == 0U)
  105. {
  106. return op1;
  107. }
  108. return (op1 >> op2) | (op1 << (32U - op2));
  109. }
  110. #endif
  111. /**
  112. * @brief Clips Q63 to Q31 values.
  113. */
  114. __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
  115. q63_t x)
  116. {
  117. return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
  118. ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
  119. }
  120. /**
  121. * @brief Clips Q63 to Q15 values.
  122. */
  123. __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
  124. q63_t x)
  125. {
  126. return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
  127. ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
  128. }
  129. /**
  130. * @brief Clips Q31 to Q7 values.
  131. */
  132. __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
  133. q31_t x)
  134. {
  135. return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
  136. ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
  137. }
  138. /**
  139. * @brief Clips Q31 to Q15 values.
  140. */
  141. __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
  142. q31_t x)
  143. {
  144. return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
  145. ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
  146. }
  147. /**
  148. * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
  149. */
  150. __STATIC_FORCEINLINE q63_t mult32x64(
  151. q63_t x,
  152. q31_t y)
  153. {
  154. return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
  155. (((q63_t) (x >> 32) * y) ) );
  156. }
  157. /* SMMLAR */
  158. #define multAcc_32x32_keep32_R(a, x, y) \
  159. a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
  160. /* SMMLSR */
  161. #define multSub_32x32_keep32_R(a, x, y) \
  162. a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
  163. /* SMMULR */
  164. #define mult_32x32_keep32_R(a, x, y) \
  165. a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
  166. /* SMMLA */
  167. #define multAcc_32x32_keep32(a, x, y) \
  168. a += (q31_t) (((q63_t) x * y) >> 32)
  169. /* SMMLS */
  170. #define multSub_32x32_keep32(a, x, y) \
  171. a -= (q31_t) (((q63_t) x * y) >> 32)
  172. /* SMMUL */
  173. #define mult_32x32_keep32(a, x, y) \
  174. a = (q31_t) (((q63_t) x * y ) >> 32)
  175. #ifndef ARM_MATH_DSP
  176. /**
  177. * @brief definition to pack two 16 bit values.
  178. */
  179. #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \
  180. (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) )
  181. #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \
  182. (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) )
  183. #endif
  184. /**
  185. * @brief definition to pack four 8 bit values.
  186. */
  187. #ifndef ARM_MATH_BIG_ENDIAN
  188. #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \
  189. (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
  190. (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
  191. (((int32_t)(v3) << 24) & (int32_t)0xFF000000) )
  192. #else
  193. #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \
  194. (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \
  195. (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
  196. (((int32_t)(v0) << 24) & (int32_t)0xFF000000) )
  197. #endif
  198. /*
  199. * @brief C custom defined intrinsic functions
  200. */
  201. #if !defined (ARM_MATH_DSP)
  202. /*
  203. * @brief C custom defined QADD8
  204. */
  205. __STATIC_FORCEINLINE uint32_t __QADD8(
  206. uint32_t x,
  207. uint32_t y)
  208. {
  209. q31_t r, s, t, u;
  210. r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
  211. s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
  212. t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
  213. u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
  214. return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
  215. }
  216. /*
  217. * @brief C custom defined QSUB8
  218. */
  219. __STATIC_FORCEINLINE uint32_t __QSUB8(
  220. uint32_t x,
  221. uint32_t y)
  222. {
  223. q31_t r, s, t, u;
  224. r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
  225. s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
  226. t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
  227. u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
  228. return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
  229. }
  230. /*
  231. * @brief C custom defined QADD16
  232. */
  233. __STATIC_FORCEINLINE uint32_t __QADD16(
  234. uint32_t x,
  235. uint32_t y)
  236. {
  237. /* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */
  238. q31_t r = 0, s = 0;
  239. r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  240. s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  241. return ((uint32_t)((s << 16) | (r )));
  242. }
  243. /*
  244. * @brief C custom defined SHADD16
  245. */
  246. __STATIC_FORCEINLINE uint32_t __SHADD16(
  247. uint32_t x,
  248. uint32_t y)
  249. {
  250. q31_t r, s;
  251. r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  252. s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  253. return ((uint32_t)((s << 16) | (r )));
  254. }
  255. /*
  256. * @brief C custom defined QSUB16
  257. */
  258. __STATIC_FORCEINLINE uint32_t __QSUB16(
  259. uint32_t x,
  260. uint32_t y)
  261. {
  262. q31_t r, s;
  263. r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  264. s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  265. return ((uint32_t)((s << 16) | (r )));
  266. }
  267. /*
  268. * @brief C custom defined SHSUB16
  269. */
  270. __STATIC_FORCEINLINE uint32_t __SHSUB16(
  271. uint32_t x,
  272. uint32_t y)
  273. {
  274. q31_t r, s;
  275. r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  276. s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  277. return ((uint32_t)((s << 16) | (r )));
  278. }
  279. /*
  280. * @brief C custom defined QASX
  281. */
  282. __STATIC_FORCEINLINE uint32_t __QASX(
  283. uint32_t x,
  284. uint32_t y)
  285. {
  286. q31_t r, s;
  287. r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  288. s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  289. return ((uint32_t)((s << 16) | (r )));
  290. }
  291. /*
  292. * @brief C custom defined SHASX
  293. */
  294. __STATIC_FORCEINLINE uint32_t __SHASX(
  295. uint32_t x,
  296. uint32_t y)
  297. {
  298. q31_t r, s;
  299. r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  300. s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  301. return ((uint32_t)((s << 16) | (r )));
  302. }
  303. /*
  304. * @brief C custom defined QSAX
  305. */
  306. __STATIC_FORCEINLINE uint32_t __QSAX(
  307. uint32_t x,
  308. uint32_t y)
  309. {
  310. q31_t r, s;
  311. r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  312. s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  313. return ((uint32_t)((s << 16) | (r )));
  314. }
  315. /*
  316. * @brief C custom defined SHSAX
  317. */
  318. __STATIC_FORCEINLINE uint32_t __SHSAX(
  319. uint32_t x,
  320. uint32_t y)
  321. {
  322. q31_t r, s;
  323. r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  324. s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  325. return ((uint32_t)((s << 16) | (r )));
  326. }
  327. /*
  328. * @brief C custom defined SMUSDX
  329. */
  330. __STATIC_FORCEINLINE uint32_t __SMUSDX(
  331. uint32_t x,
  332. uint32_t y)
  333. {
  334. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
  335. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
  336. }
  337. /*
  338. * @brief C custom defined SMUADX
  339. */
  340. __STATIC_FORCEINLINE uint32_t __SMUADX(
  341. uint32_t x,
  342. uint32_t y)
  343. {
  344. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  345. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
  346. }
  347. /*
  348. * @brief C custom defined QADD
  349. */
  350. __STATIC_FORCEINLINE int32_t __QADD(
  351. int32_t x,
  352. int32_t y)
  353. {
  354. return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
  355. }
  356. /*
  357. * @brief C custom defined QSUB
  358. */
  359. __STATIC_FORCEINLINE int32_t __QSUB(
  360. int32_t x,
  361. int32_t y)
  362. {
  363. return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
  364. }
  365. /*
  366. * @brief C custom defined SMLAD
  367. */
  368. __STATIC_FORCEINLINE uint32_t __SMLAD(
  369. uint32_t x,
  370. uint32_t y,
  371. uint32_t sum)
  372. {
  373. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  374. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
  375. ( ((q31_t)sum ) ) ));
  376. }
  377. /*
  378. * @brief C custom defined SMLADX
  379. */
  380. __STATIC_FORCEINLINE uint32_t __SMLADX(
  381. uint32_t x,
  382. uint32_t y,
  383. uint32_t sum)
  384. {
  385. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  386. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  387. ( ((q31_t)sum ) ) ));
  388. }
  389. /*
  390. * @brief C custom defined SMLSDX
  391. */
  392. __STATIC_FORCEINLINE uint32_t __SMLSDX(
  393. uint32_t x,
  394. uint32_t y,
  395. uint32_t sum)
  396. {
  397. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
  398. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  399. ( ((q31_t)sum ) ) ));
  400. }
  401. /*
  402. * @brief C custom defined SMLALD
  403. */
  404. __STATIC_FORCEINLINE uint64_t __SMLALD(
  405. uint32_t x,
  406. uint32_t y,
  407. uint64_t sum)
  408. {
  409. /* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
  410. return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  411. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
  412. ( ((q63_t)sum ) ) ));
  413. }
  414. /*
  415. * @brief C custom defined SMLALDX
  416. */
  417. __STATIC_FORCEINLINE uint64_t __SMLALDX(
  418. uint32_t x,
  419. uint32_t y,
  420. uint64_t sum)
  421. {
  422. /* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
  423. return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  424. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  425. ( ((q63_t)sum ) ) ));
  426. }
  427. /*
  428. * @brief C custom defined SMUAD
  429. */
  430. __STATIC_FORCEINLINE uint32_t __SMUAD(
  431. uint32_t x,
  432. uint32_t y)
  433. {
  434. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  435. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
  436. }
  437. /*
  438. * @brief C custom defined SMUSD
  439. */
  440. __STATIC_FORCEINLINE uint32_t __SMUSD(
  441. uint32_t x,
  442. uint32_t y)
  443. {
  444. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
  445. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
  446. }
  447. /*
  448. * @brief C custom defined SXTB16
  449. */
  450. __STATIC_FORCEINLINE uint32_t __SXTB16(
  451. uint32_t x)
  452. {
  453. return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
  454. ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) ));
  455. }
  456. /*
  457. * @brief C custom defined SMMLA
  458. */
  459. __STATIC_FORCEINLINE int32_t __SMMLA(
  460. int32_t x,
  461. int32_t y,
  462. int32_t sum)
  463. {
  464. return (sum + (int32_t) (((int64_t) x * y) >> 32));
  465. }
  466. #endif /* !defined (ARM_MATH_DSP) */
  467. #ifdef __cplusplus
  468. }
  469. #endif
  470. #endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */