math_extras_impl.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /*
  2. * Copyright (c) 2019 Facebook.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. /**
  7. * @file
  8. * @brief Inline implementation of functions declared in math_extras.h.
  9. */
  10. #ifndef ZEPHYR_INCLUDE_SYS_MATH_EXTRAS_H_
  11. #error "please include <sys/math_extras.h> instead of this file"
  12. #endif
  13. #include <toolchain.h>
  14. /*
  15. * Force the use of portable C code (no builtins) by defining
  16. * PORTABLE_MISC_MATH_EXTRAS before including <misc/math_extras.h>.
  17. * This is primarily for use by tests.
  18. *
  19. * We'll #undef use_builtin again at the end of the file.
  20. */
  21. #ifdef PORTABLE_MISC_MATH_EXTRAS
  22. #define use_builtin(x) 0
  23. #else
  24. #define use_builtin(x) HAS_BUILTIN(x)
  25. #endif
  26. #if use_builtin(__builtin_add_overflow)
  27. static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
  28. {
  29. return __builtin_add_overflow(a, b, result);
  30. }
  31. static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
  32. {
  33. return __builtin_add_overflow(a, b, result);
  34. }
  35. static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
  36. {
  37. return __builtin_add_overflow(a, b, result);
  38. }
  39. static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
  40. {
  41. return __builtin_add_overflow(a, b, result);
  42. }
  43. #else /* !use_builtin(__builtin_add_overflow) */
  44. static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
  45. {
  46. uint16_t c = a + b;
  47. *result = c;
  48. return c < a;
  49. }
  50. static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
  51. {
  52. uint32_t c = a + b;
  53. *result = c;
  54. return c < a;
  55. }
  56. static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
  57. {
  58. uint64_t c = a + b;
  59. *result = c;
  60. return c < a;
  61. }
  62. static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
  63. {
  64. size_t c = a + b;
  65. *result = c;
  66. return c < a;
  67. }
  68. #endif /* use_builtin(__builtin_add_overflow) */
  69. #if use_builtin(__builtin_mul_overflow)
  70. static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
  71. {
  72. return __builtin_mul_overflow(a, b, result);
  73. }
  74. static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
  75. {
  76. return __builtin_mul_overflow(a, b, result);
  77. }
  78. static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
  79. {
  80. return __builtin_mul_overflow(a, b, result);
  81. }
  82. static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
  83. {
  84. return __builtin_mul_overflow(a, b, result);
  85. }
  86. #else /* !use_builtin(__builtin_mul_overflow) */
  87. static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
  88. {
  89. uint16_t c = a * b;
  90. *result = c;
  91. return a != 0 && (c / a) != b;
  92. }
  93. static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
  94. {
  95. uint32_t c = a * b;
  96. *result = c;
  97. return a != 0 && (c / a) != b;
  98. }
  99. static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
  100. {
  101. uint64_t c = a * b;
  102. *result = c;
  103. return a != 0 && (c / a) != b;
  104. }
  105. static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
  106. {
  107. size_t c = a * b;
  108. *result = c;
  109. return a != 0 && (c / a) != b;
  110. }
  111. #endif /* use_builtin(__builtin_mul_overflow) */
  112. /*
  113. * The GCC builtins __builtin_clz(), __builtin_ctz(), and 64-bit
  114. * variants are described by the GCC documentation as having undefined
  115. * behavior when the argument is zero. See
  116. * https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
  117. *
  118. * The undefined behavior applies to all architectures, regardless of
  119. * the behavior of the instruction used to implement the builtin.
  120. *
  121. * We don't want to expose users of this API to the undefined behavior,
  122. * so we use a conditional to explicitly provide the correct result when
  123. * x=0.
  124. *
  125. * Most instruction set architectures have a CLZ instruction or similar
  126. * that already computes the correct result for x=0. Both GCC and Clang
  127. * know this and simply generate a CLZ instruction, optimizing away the
  128. * conditional.
  129. *
  130. * For x86, and for compilers that fail to eliminate the conditional,
  131. * there is often another opportunity for optimization since code using
  132. * these functions tends to contain a zero check already. For example,
  133. * from kernel/sched.c:
  134. *
  135. * struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
  136. * {
  137. * if (!pq->bitmask) {
  138. * return NULL;
  139. * }
  140. *
  141. * struct k_thread *thread = NULL;
  142. * sys_dlist_t *l =
  143. * &pq->queues[u32_count_trailing_zeros(pq->bitmask)];
  144. *
  145. * ...
  146. *
  147. * The compiler will often be able to eliminate the redundant x == 0
  148. * check after inlining the call to u32_count_trailing_zeros().
  149. */
  150. #if use_builtin(__builtin_clz)
  151. static inline int u32_count_leading_zeros(uint32_t x)
  152. {
  153. return x == 0 ? 32 : __builtin_clz(x);
  154. }
  155. #else /* !use_builtin(__builtin_clz) */
  156. static inline int u32_count_leading_zeros(uint32_t x)
  157. {
  158. int b;
  159. for (b = 0; b < 32 && (x >> 31) == 0; b++) {
  160. x <<= 1;
  161. }
  162. return b;
  163. }
  164. #endif /* use_builtin(__builtin_clz) */
  165. #if use_builtin(__builtin_clzll)
  166. static inline int u64_count_leading_zeros(uint64_t x)
  167. {
  168. return x == 0 ? 64 : __builtin_clzll(x);
  169. }
  170. #else /* !use_builtin(__builtin_clzll) */
  171. static inline int u64_count_leading_zeros(uint64_t x)
  172. {
  173. if (x == (uint32_t)x) {
  174. return 32 + u32_count_leading_zeros((uint32_t)x);
  175. } else {
  176. return u32_count_leading_zeros(x >> 32);
  177. }
  178. }
  179. #endif /* use_builtin(__builtin_clzll) */
  180. #if use_builtin(__builtin_ctz)
  181. static inline int u32_count_trailing_zeros(uint32_t x)
  182. {
  183. return x == 0 ? 32 : __builtin_ctz(x);
  184. }
  185. #else /* !use_builtin(__builtin_ctz) */
  186. static inline int u32_count_trailing_zeros(uint32_t x)
  187. {
  188. int b;
  189. for (b = 0; b < 32 && (x & 1) == 0; b++) {
  190. x >>= 1;
  191. }
  192. return b;
  193. }
  194. #endif /* use_builtin(__builtin_ctz) */
  195. #if use_builtin(__builtin_ctzll)
  196. static inline int u64_count_trailing_zeros(uint64_t x)
  197. {
  198. return x == 0 ? 64 : __builtin_ctzll(x);
  199. }
  200. #else /* !use_builtin(__builtin_ctzll) */
  201. static inline int u64_count_trailing_zeros(uint64_t x)
  202. {
  203. if ((uint32_t)x) {
  204. return u32_count_trailing_zeros((uint32_t)x);
  205. } else {
  206. return 32 + u32_count_trailing_zeros(x >> 32);
  207. }
  208. }
  209. #endif /* use_builtin(__builtin_ctzll) */
  210. #undef use_builtin