mmu.c 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427
  1. /*
  2. * Copyright (c) 2020 Intel Corporation
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Routines for managing virtual address spaces
  7. */
  8. #include <stdint.h>
  9. #include <kernel_arch_interface.h>
  10. #include <spinlock.h>
  11. #include <mmu.h>
  12. #include <init.h>
  13. #include <kernel_internal.h>
  14. #include <syscall_handler.h>
  15. #include <toolchain.h>
  16. #include <linker/linker-defs.h>
  17. #include <sys/bitarray.h>
  18. #include <timing/timing.h>
  19. #include <logging/log.h>
  20. LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
  21. /*
  22. * General terminology:
  23. * - A page frame is a page-sized physical memory region in RAM. It is a
  24. * container where a data page may be placed. It is always referred to by
  25. * physical address. We have a convention of using uintptr_t for physical
  26. * addresses. We instantiate a struct z_page_frame to store metadata for
  27. * every page frame.
  28. *
  29. * - A data page is a page-sized region of data. It may exist in a page frame,
  30. * or be paged out to some backing store. Its location can always be looked
  31. * up in the CPU's page tables (or equivalent) by virtual address.
  32. * The data type will always be void * or in some cases uint8_t * when we
  33. * want to do pointer arithmetic.
  34. */
  35. /* Spinlock to protect any globals in this file and serialize page table
  36. * updates in arch code
  37. */
  38. struct k_spinlock z_mm_lock;
  39. /*
  40. * General page frame management
  41. */
  42. /* Database of all RAM page frames */
  43. struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
  44. #if __ASSERT_ON
  45. /* Indicator that z_page_frames has been initialized, many of these APIs do
  46. * not work before POST_KERNEL
  47. */
  48. static bool page_frames_initialized;
  49. #endif
  50. /* Add colors to page table dumps to indicate mapping type */
  51. #define COLOR_PAGE_FRAMES 1
  52. #if COLOR_PAGE_FRAMES
  53. #define ANSI_DEFAULT "\x1B[0m"
  54. #define ANSI_RED "\x1B[1;31m"
  55. #define ANSI_GREEN "\x1B[1;32m"
  56. #define ANSI_YELLOW "\x1B[1;33m"
  57. #define ANSI_BLUE "\x1B[1;34m"
  58. #define ANSI_MAGENTA "\x1B[1;35m"
  59. #define ANSI_CYAN "\x1B[1;36m"
  60. #define ANSI_GREY "\x1B[1;90m"
  61. #define COLOR(x) printk(_CONCAT(ANSI_, x))
  62. #else
  63. #define COLOR(x) do { } while (0)
  64. #endif
  65. static void page_frame_dump(struct z_page_frame *pf)
  66. {
  67. if (z_page_frame_is_reserved(pf)) {
  68. COLOR(CYAN);
  69. printk("R");
  70. } else if (z_page_frame_is_busy(pf)) {
  71. COLOR(MAGENTA);
  72. printk("B");
  73. } else if (z_page_frame_is_pinned(pf)) {
  74. COLOR(YELLOW);
  75. printk("P");
  76. } else if (z_page_frame_is_available(pf)) {
  77. COLOR(GREY);
  78. printk(".");
  79. } else if (z_page_frame_is_mapped(pf)) {
  80. COLOR(DEFAULT);
  81. printk("M");
  82. } else {
  83. COLOR(RED);
  84. printk("?");
  85. }
  86. }
  87. void z_page_frames_dump(void)
  88. {
  89. int column = 0;
  90. __ASSERT(page_frames_initialized, "%s called too early", __func__);
  91. printk("Physical memory from 0x%lx to 0x%lx\n",
  92. Z_PHYS_RAM_START, Z_PHYS_RAM_END);
  93. for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
  94. struct z_page_frame *pf = &z_page_frames[i];
  95. page_frame_dump(pf);
  96. column++;
  97. if (column == 64) {
  98. column = 0;
  99. printk("\n");
  100. }
  101. }
  102. COLOR(DEFAULT);
  103. if (column != 0) {
  104. printk("\n");
  105. }
  106. }
  107. #define VIRT_FOREACH(_base, _size, _pos) \
  108. for (_pos = _base; \
  109. _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
  110. #define PHYS_FOREACH(_base, _size, _pos) \
  111. for (_pos = _base; \
  112. _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
  113. /*
  114. * Virtual address space management
  115. *
  116. * Call all of these functions with z_mm_lock held.
  117. *
  118. * Overall virtual memory map: When the kernel starts, it resides in
  119. * virtual memory in the region Z_KERNEL_VIRT_START to
  120. * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
  121. * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
  122. *
  123. * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
  124. * but have a mapping for all RAM in place. This is for special architectural
  125. * purposes and does not otherwise affect page frame accounting or flags;
  126. * the only guarantee is that such RAM mapping outside of the Zephyr image
  127. * won't be disturbed by subsequent memory mapping calls.
  128. *
  129. * +--------------+ <- Z_VIRT_RAM_START
  130. * | Undefined VM | <- May contain ancillary regions like x86_64's locore
  131. * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
  132. * | Mapping for |
  133. * | main kernel |
  134. * | image |
  135. * | |
  136. * | |
  137. * +--------------+ <- Z_FREE_VM_START
  138. * | |
  139. * | Unused, |
  140. * | Available VM |
  141. * | |
  142. * |..............| <- mapping_pos (grows downward as more mappings are made)
  143. * | Mapping |
  144. * +--------------+
  145. * | Mapping |
  146. * +--------------+
  147. * | ... |
  148. * +--------------+
  149. * | Mapping |
  150. * +--------------+ <- mappings start here
  151. * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
  152. * +--------------+ <- Z_VIRT_RAM_END
  153. */
  154. /* Bitmap of virtual addresses where one bit corresponds to one page.
  155. * This is being used for virt_region_alloc() to figure out which
  156. * region of virtual addresses can be used for memory mapping.
  157. *
  158. * Note that bit #0 is the highest address so that allocation is
  159. * done in reverse from highest address.
  160. */
  161. SYS_BITARRAY_DEFINE(virt_region_bitmap,
  162. CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
  163. static bool virt_region_inited;
  164. #define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
  165. #define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
  166. static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
  167. {
  168. return POINTER_TO_UINT(Z_VIRT_RAM_END)
  169. - (offset * CONFIG_MMU_PAGE_SIZE) - size;
  170. }
  171. static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
  172. {
  173. return (POINTER_TO_UINT(Z_VIRT_RAM_END)
  174. - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
  175. }
  176. static void virt_region_init(void)
  177. {
  178. size_t offset, num_bits;
  179. /* There are regions where we should never map via
  180. * k_mem_map() and z_phys_map(). Mark them as
  181. * already allocated so they will never be used.
  182. */
  183. if (Z_VM_RESERVED > 0) {
  184. /* Mark reserved region at end of virtual address space */
  185. num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
  186. (void)sys_bitarray_set_region(&virt_region_bitmap,
  187. num_bits, 0);
  188. }
  189. /* Mark all bits up to Z_FREE_VM_START as allocated */
  190. num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
  191. - POINTER_TO_UINT(Z_VIRT_RAM_START);
  192. offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
  193. num_bits /= CONFIG_MMU_PAGE_SIZE;
  194. (void)sys_bitarray_set_region(&virt_region_bitmap,
  195. num_bits, offset);
  196. virt_region_inited = true;
  197. }
  198. static void *virt_region_alloc(size_t size)
  199. {
  200. uintptr_t dest_addr;
  201. size_t offset;
  202. size_t num_bits;
  203. int ret;
  204. if (unlikely(!virt_region_inited)) {
  205. virt_region_init();
  206. }
  207. num_bits = size / CONFIG_MMU_PAGE_SIZE;
  208. ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
  209. if (ret != 0) {
  210. LOG_ERR("insufficient virtual address space (requested %zu)",
  211. size);
  212. return NULL;
  213. }
  214. /* Remember that bit #0 in bitmap corresponds to the highest
  215. * virtual address. So here we need to go downwards (backwards?)
  216. * to get the starting address of the allocated region.
  217. */
  218. dest_addr = virt_from_bitmap_offset(offset, size);
  219. /* Need to make sure this does not step into kernel memory */
  220. if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
  221. (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
  222. return NULL;
  223. }
  224. return UINT_TO_POINTER(dest_addr);
  225. }
  226. static void virt_region_free(void *vaddr, size_t size)
  227. {
  228. size_t offset, num_bits;
  229. uint8_t *vaddr_u8 = (uint8_t *)vaddr;
  230. if (unlikely(!virt_region_inited)) {
  231. virt_region_init();
  232. }
  233. __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
  234. && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR),
  235. "invalid virtual address region %p (%zu)", vaddr_u8, size);
  236. if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
  237. && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR))) {
  238. return;
  239. }
  240. offset = virt_to_bitmap_offset(vaddr, size);
  241. num_bits = size / CONFIG_MMU_PAGE_SIZE;
  242. (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
  243. }
  244. /*
  245. * Free page frames management
  246. *
  247. * Call all of these functions with z_mm_lock held.
  248. */
  249. /* Linked list of unused and available page frames.
  250. *
  251. * TODO: This is very simple and treats all free page frames as being equal.
  252. * However, there are use-cases to consolidate free pages such that entire
  253. * SRAM banks can be switched off to save power, and so obtaining free pages
  254. * may require a more complex ontology which prefers page frames in RAM banks
  255. * which are still active.
  256. *
  257. * This implies in the future there may be multiple slists managing physical
  258. * pages. Each page frame will still just have one snode link.
  259. */
  260. static sys_slist_t free_page_frame_list;
  261. /* Number of unused and available free page frames */
  262. size_t z_free_page_count;
  263. #define PF_ASSERT(pf, expr, fmt, ...) \
  264. __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
  265. ##__VA_ARGS__)
  266. /* Get an unused page frame. don't care which one, or NULL if there are none */
  267. static struct z_page_frame *free_page_frame_list_get(void)
  268. {
  269. sys_snode_t *node;
  270. struct z_page_frame *pf = NULL;
  271. node = sys_slist_get(&free_page_frame_list);
  272. if (node != NULL) {
  273. z_free_page_count--;
  274. pf = CONTAINER_OF(node, struct z_page_frame, node);
  275. PF_ASSERT(pf, z_page_frame_is_available(pf),
  276. "unavailable but somehow on free list");
  277. }
  278. return pf;
  279. }
  280. /* Release a page frame back into the list of free pages */
  281. static void free_page_frame_list_put(struct z_page_frame *pf)
  282. {
  283. PF_ASSERT(pf, z_page_frame_is_available(pf),
  284. "unavailable page put on free list");
  285. sys_slist_append(&free_page_frame_list, &pf->node);
  286. z_free_page_count++;
  287. }
  288. static void free_page_frame_list_init(void)
  289. {
  290. sys_slist_init(&free_page_frame_list);
  291. }
  292. static void page_frame_free_locked(struct z_page_frame *pf)
  293. {
  294. pf->flags = 0;
  295. free_page_frame_list_put(pf);
  296. }
  297. /*
  298. * Memory Mapping
  299. */
  300. /* Called after the frame is mapped in the arch layer, to update our
  301. * local ontology (and do some assertions while we're at it)
  302. */
  303. static void frame_mapped_set(struct z_page_frame *pf, void *addr)
  304. {
  305. PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
  306. "attempted to map a reserved page frame");
  307. /* We do allow multiple mappings for pinned page frames
  308. * since we will never need to reverse map them.
  309. * This is uncommon, use-cases are for things like the
  310. * Zephyr equivalent of VSDOs
  311. */
  312. PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
  313. "non-pinned and already mapped to %p", pf->addr);
  314. pf->flags |= Z_PAGE_FRAME_MAPPED;
  315. pf->addr = addr;
  316. }
  317. /* Go through page frames to find the physical address mapped
  318. * by a virtual address.
  319. *
  320. * @param[in] virt Virtual Address
  321. * @param[out] phys Physical address mapped to the input virtual address
  322. * if such mapping exists.
  323. *
  324. * @retval 0 if mapping is found and valid
  325. * @retval -EFAULT if virtual address is not mapped
  326. */
  327. static int virt_to_page_frame(void *virt, uintptr_t *phys)
  328. {
  329. uintptr_t paddr;
  330. struct z_page_frame *pf;
  331. int ret = -EFAULT;
  332. Z_PAGE_FRAME_FOREACH(paddr, pf) {
  333. if (z_page_frame_is_mapped(pf)) {
  334. if (virt == pf->addr) {
  335. ret = 0;
  336. *phys = z_page_frame_to_phys(pf);
  337. break;
  338. }
  339. }
  340. }
  341. return ret;
  342. }
  343. __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
  344. #ifdef CONFIG_DEMAND_PAGING
  345. static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
  346. bool page_in, uintptr_t *location_ptr);
  347. static inline void do_backing_store_page_in(uintptr_t location);
  348. static inline void do_backing_store_page_out(uintptr_t location);
  349. #endif /* CONFIG_DEMAND_PAGING */
  350. /* Allocate a free page frame, and map it to a specified virtual address
  351. *
  352. * TODO: Add optional support for copy-on-write mappings to a zero page instead
  353. * of allocating, in which case page frames will be allocated lazily as
  354. * the mappings to the zero page get touched. This will avoid expensive
  355. * page-ins as memory is mapped and physical RAM or backing store storage will
  356. * not be used if the mapped memory is unused. The cost is an empty physical
  357. * page of zeroes.
  358. */
  359. static int map_anon_page(void *addr, uint32_t flags)
  360. {
  361. struct z_page_frame *pf;
  362. uintptr_t phys;
  363. bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
  364. bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
  365. pf = free_page_frame_list_get();
  366. if (pf == NULL) {
  367. #ifdef CONFIG_DEMAND_PAGING
  368. uintptr_t location;
  369. bool dirty;
  370. int ret;
  371. pf = k_mem_paging_eviction_select(&dirty);
  372. __ASSERT(pf != NULL, "failed to get a page frame");
  373. LOG_DBG("evicting %p at 0x%lx", pf->addr,
  374. z_page_frame_to_phys(pf));
  375. ret = page_frame_prepare_locked(pf, &dirty, false, &location);
  376. if (ret != 0) {
  377. return -ENOMEM;
  378. }
  379. if (dirty) {
  380. do_backing_store_page_out(location);
  381. }
  382. pf->flags = 0;
  383. #else
  384. return -ENOMEM;
  385. #endif /* CONFIG_DEMAND_PAGING */
  386. }
  387. phys = z_page_frame_to_phys(pf);
  388. arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
  389. if (lock) {
  390. pf->flags |= Z_PAGE_FRAME_PINNED;
  391. }
  392. frame_mapped_set(pf, addr);
  393. LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
  394. if (!uninit) {
  395. /* If we later implement mappings to a copy-on-write
  396. * zero page, won't need this step
  397. */
  398. memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
  399. }
  400. return 0;
  401. }
  402. void *k_mem_map(size_t size, uint32_t flags)
  403. {
  404. uint8_t *dst;
  405. size_t total_size;
  406. int ret;
  407. k_spinlock_key_t key;
  408. uint8_t *pos;
  409. __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
  410. ((flags & K_MEM_MAP_UNINIT) != 0U)),
  411. "user access to anonymous uninitialized pages is forbidden");
  412. __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
  413. "unaligned size %zu passed to %s", size, __func__);
  414. __ASSERT(size != 0, "zero sized memory mapping");
  415. __ASSERT(page_frames_initialized, "%s called too early", __func__);
  416. __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
  417. "%s does not support explicit cache settings", __func__);
  418. key = k_spin_lock(&z_mm_lock);
  419. /* Need extra for the guard pages (before and after) which we
  420. * won't map.
  421. */
  422. total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
  423. dst = virt_region_alloc(total_size);
  424. if (dst == NULL) {
  425. /* Address space has no free region */
  426. goto out;
  427. }
  428. /* Unmap both guard pages to make sure accessing them
  429. * will generate fault.
  430. */
  431. arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
  432. arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
  433. CONFIG_MMU_PAGE_SIZE);
  434. /* Skip over the "before" guard page in returned address. */
  435. dst += CONFIG_MMU_PAGE_SIZE;
  436. VIRT_FOREACH(dst, size, pos) {
  437. ret = map_anon_page(pos, flags);
  438. if (ret != 0) {
  439. /* TODO: call k_mem_unmap(dst, pos - dst) when
  440. * implmented in #28990 and release any guard virtual
  441. * page as well.
  442. */
  443. dst = NULL;
  444. goto out;
  445. }
  446. }
  447. out:
  448. k_spin_unlock(&z_mm_lock, key);
  449. return dst;
  450. }
  451. void k_mem_unmap(void *addr, size_t size)
  452. {
  453. uintptr_t phys;
  454. uint8_t *pos;
  455. struct z_page_frame *pf;
  456. k_spinlock_key_t key;
  457. size_t total_size;
  458. int ret;
  459. /* Need space for the "before" guard page */
  460. __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
  461. /* Make sure address range is still valid after accounting
  462. * for two guard pages.
  463. */
  464. pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
  465. z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
  466. key = k_spin_lock(&z_mm_lock);
  467. /* Check if both guard pages are unmapped.
  468. * Bail if not, as this is probably a region not mapped
  469. * using k_mem_map().
  470. */
  471. pos = addr;
  472. ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
  473. if (ret == 0) {
  474. __ASSERT(ret == 0,
  475. "%s: cannot find preceding guard page for (%p, %zu)",
  476. __func__, addr, size);
  477. goto out;
  478. }
  479. ret = arch_page_phys_get(pos + size, NULL);
  480. if (ret == 0) {
  481. __ASSERT(ret == 0,
  482. "%s: cannot find succeeding guard page for (%p, %zu)",
  483. __func__, addr, size);
  484. goto out;
  485. }
  486. VIRT_FOREACH(addr, size, pos) {
  487. ret = arch_page_phys_get(pos, &phys);
  488. __ASSERT(ret == 0,
  489. "%s: cannot unmap an unmapped address %p",
  490. __func__, pos);
  491. if (ret != 0) {
  492. /* Found an address not mapped. Do not continue. */
  493. goto out;
  494. }
  495. __ASSERT(z_is_page_frame(phys),
  496. "%s: 0x%lx is not a page frame", __func__, phys);
  497. if (!z_is_page_frame(phys)) {
  498. /* Physical address has no corresponding page frame
  499. * description in the page frame array.
  500. * This should not happen. Do not continue.
  501. */
  502. goto out;
  503. }
  504. /* Grab the corresponding page frame from physical address */
  505. pf = z_phys_to_page_frame(phys);
  506. __ASSERT(z_page_frame_is_mapped(pf),
  507. "%s: 0x%lx is not a mapped page frame", __func__, phys);
  508. if (!z_page_frame_is_mapped(pf)) {
  509. /* Page frame is not marked mapped.
  510. * This should not happen. Do not continue.
  511. */
  512. goto out;
  513. }
  514. arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
  515. /* Put the page frame back into free list */
  516. page_frame_free_locked(pf);
  517. }
  518. /* There are guard pages just before and after the mapped
  519. * region. So we also need to free them from the bitmap.
  520. */
  521. pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
  522. total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
  523. virt_region_free(pos, total_size);
  524. out:
  525. k_spin_unlock(&z_mm_lock, key);
  526. }
  527. size_t k_mem_free_get(void)
  528. {
  529. size_t ret;
  530. k_spinlock_key_t key;
  531. __ASSERT(page_frames_initialized, "%s called too early", __func__);
  532. key = k_spin_lock(&z_mm_lock);
  533. #ifdef CONFIG_DEMAND_PAGING
  534. if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
  535. ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
  536. } else {
  537. ret = 0;
  538. }
  539. #else
  540. ret = z_free_page_count;
  541. #endif
  542. k_spin_unlock(&z_mm_lock, key);
  543. return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
  544. }
  545. /* This may be called from arch early boot code before z_cstart() is invoked.
  546. * Data will be copied and BSS zeroed, but this must not rely on any
  547. * initialization functions being called prior to work correctly.
  548. */
  549. void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
  550. {
  551. uintptr_t aligned_phys, addr_offset;
  552. size_t aligned_size;
  553. k_spinlock_key_t key;
  554. uint8_t *dest_addr;
  555. addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
  556. phys, size,
  557. CONFIG_MMU_PAGE_SIZE);
  558. __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
  559. __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
  560. "wraparound for physical address 0x%lx (size %zu)",
  561. aligned_phys, aligned_size);
  562. key = k_spin_lock(&z_mm_lock);
  563. /* Obtain an appropriately sized chunk of virtual memory */
  564. dest_addr = virt_region_alloc(aligned_size);
  565. if (!dest_addr) {
  566. goto fail;
  567. }
  568. /* If this fails there's something amiss with virt_region_get */
  569. __ASSERT((uintptr_t)dest_addr <
  570. ((uintptr_t)dest_addr + (size - 1)),
  571. "wraparound for virtual address %p (size %zu)",
  572. dest_addr, size);
  573. LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
  574. aligned_phys, aligned_size, flags, addr_offset);
  575. arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
  576. k_spin_unlock(&z_mm_lock, key);
  577. *virt_ptr = dest_addr + addr_offset;
  578. return;
  579. fail:
  580. /* May re-visit this in the future, but for now running out of
  581. * virtual address space or failing the arch_mem_map() call is
  582. * an unrecoverable situation.
  583. *
  584. * Other problems not related to resource exhaustion we leave as
  585. * assertions since they are clearly programming mistakes.
  586. */
  587. LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
  588. phys, size, flags);
  589. k_panic();
  590. }
  591. void z_phys_unmap(uint8_t *virt, size_t size)
  592. {
  593. uintptr_t aligned_virt, addr_offset;
  594. size_t aligned_size;
  595. k_spinlock_key_t key;
  596. addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
  597. POINTER_TO_UINT(virt), size,
  598. CONFIG_MMU_PAGE_SIZE);
  599. __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
  600. __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
  601. "wraparound for virtual address 0x%lx (size %zu)",
  602. aligned_virt, aligned_size);
  603. key = k_spin_lock(&z_mm_lock);
  604. arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
  605. virt_region_free(virt, size);
  606. k_spin_unlock(&z_mm_lock, key);
  607. }
  608. /*
  609. * Miscellaneous
  610. */
  611. size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
  612. uintptr_t addr, size_t size, size_t align)
  613. {
  614. size_t addr_offset;
  615. /* The actual mapped region must be page-aligned. Round down the
  616. * physical address and pad the region size appropriately
  617. */
  618. *aligned_addr = ROUND_DOWN(addr, align);
  619. addr_offset = addr - *aligned_addr;
  620. *aligned_size = ROUND_UP(size + addr_offset, align);
  621. return addr_offset;
  622. }
  623. #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
  624. static void mark_linker_section_pinned(void *start_addr, void *end_addr,
  625. bool pin)
  626. {
  627. struct z_page_frame *pf;
  628. uint8_t *addr;
  629. uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
  630. CONFIG_MMU_PAGE_SIZE);
  631. uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
  632. CONFIG_MMU_PAGE_SIZE);
  633. size_t pinned_size = pinned_end - pinned_start;
  634. VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
  635. {
  636. pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
  637. frame_mapped_set(pf, addr);
  638. if (pin) {
  639. pf->flags |= Z_PAGE_FRAME_PINNED;
  640. } else {
  641. pf->flags &= ~Z_PAGE_FRAME_PINNED;
  642. }
  643. }
  644. }
  645. #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
  646. void z_mem_manage_init(void)
  647. {
  648. uintptr_t phys;
  649. uint8_t *addr;
  650. struct z_page_frame *pf;
  651. k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
  652. free_page_frame_list_init();
  653. ARG_UNUSED(addr);
  654. #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
  655. /* If some page frames are unavailable for use as memory, arch
  656. * code will mark Z_PAGE_FRAME_RESERVED in their flags
  657. */
  658. arch_reserved_pages_update();
  659. #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
  660. #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
  661. /* All pages composing the Zephyr image are mapped at boot in a
  662. * predictable way. This can change at runtime.
  663. */
  664. VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
  665. {
  666. pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
  667. frame_mapped_set(pf, addr);
  668. /* TODO: for now we pin the whole Zephyr image. Demand paging
  669. * currently tested with anonymously-mapped pages which are not
  670. * pinned.
  671. *
  672. * We will need to setup linker regions for a subset of kernel
  673. * code/data pages which are pinned in memory and
  674. * may not be evicted. This will contain critical CPU data
  675. * structures, and any code used to perform page fault
  676. * handling, page-ins, etc.
  677. */
  678. pf->flags |= Z_PAGE_FRAME_PINNED;
  679. }
  680. #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
  681. #ifdef CONFIG_LINKER_USE_BOOT_SECTION
  682. /* Pin the boot section to prevent it from being swapped out during
  683. * boot process. Will be un-pinned once boot process completes.
  684. */
  685. mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
  686. #endif
  687. #ifdef CONFIG_LINKER_USE_PINNED_SECTION
  688. /* Pin the page frames correspondng to the pinned symbols */
  689. mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
  690. #endif
  691. /* Any remaining pages that aren't mapped, reserved, or pinned get
  692. * added to the free pages list
  693. */
  694. Z_PAGE_FRAME_FOREACH(phys, pf) {
  695. if (z_page_frame_is_available(pf)) {
  696. free_page_frame_list_put(pf);
  697. }
  698. }
  699. LOG_DBG("free page frames: %zu", z_free_page_count);
  700. #ifdef CONFIG_DEMAND_PAGING
  701. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  702. z_paging_histogram_init();
  703. #endif
  704. k_mem_paging_backing_store_init();
  705. k_mem_paging_eviction_init();
  706. #endif
  707. #if __ASSERT_ON
  708. page_frames_initialized = true;
  709. #endif
  710. k_spin_unlock(&z_mm_lock, key);
  711. #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
  712. /* If BSS section is not present in memory at boot,
  713. * it would not have been cleared. This needs to be
  714. * done now since paging mechanism has been initialized
  715. * and the BSS pages can be brought into physical
  716. * memory to be cleared.
  717. */
  718. z_bss_zero();
  719. #endif
  720. }
  721. void z_mem_manage_boot_finish(void)
  722. {
  723. #ifdef CONFIG_LINKER_USE_BOOT_SECTION
  724. /* At the end of boot process, unpin the boot sections
  725. * as they don't need to be in memory all the time anymore.
  726. */
  727. mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
  728. #endif
  729. }
  730. #ifdef CONFIG_DEMAND_PAGING
  731. #ifdef CONFIG_DEMAND_PAGING_STATS
  732. struct k_mem_paging_stats_t paging_stats;
  733. extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
  734. extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
  735. extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
  736. #endif
  737. static inline void do_backing_store_page_in(uintptr_t location)
  738. {
  739. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  740. uint32_t time_diff;
  741. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  742. timing_t time_start, time_end;
  743. time_start = timing_counter_get();
  744. #else
  745. uint32_t time_start;
  746. time_start = k_cycle_get_32();
  747. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  748. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  749. k_mem_paging_backing_store_page_in(location);
  750. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  751. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  752. time_end = timing_counter_get();
  753. time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
  754. #else
  755. time_diff = k_cycle_get_32() - time_start;
  756. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  757. z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
  758. time_diff);
  759. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  760. }
  761. static inline void do_backing_store_page_out(uintptr_t location)
  762. {
  763. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  764. uint32_t time_diff;
  765. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  766. timing_t time_start, time_end;
  767. time_start = timing_counter_get();
  768. #else
  769. uint32_t time_start;
  770. time_start = k_cycle_get_32();
  771. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  772. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  773. k_mem_paging_backing_store_page_out(location);
  774. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  775. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  776. time_end = timing_counter_get();
  777. time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
  778. #else
  779. time_diff = k_cycle_get_32() - time_start;
  780. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  781. z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
  782. time_diff);
  783. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  784. }
  785. /* Current implementation relies on interrupt locking to any prevent page table
  786. * access, which falls over if other CPUs are active. Addressing this is not
  787. * as simple as using spinlocks as regular memory reads/writes constitute
  788. * "access" in this sense.
  789. *
  790. * Current needs for demand paging are on uniprocessor systems.
  791. */
  792. BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
  793. static void virt_region_foreach(void *addr, size_t size,
  794. void (*func)(void *))
  795. {
  796. z_mem_assert_virtual_region(addr, size);
  797. for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
  798. func((uint8_t *)addr + offset);
  799. }
  800. }
  801. /*
  802. * Perform some preparatory steps before paging out. The provided page frame
  803. * must be evicted to the backing store immediately after this is called
  804. * with a call to k_mem_paging_backing_store_page_out() if it contains
  805. * a data page.
  806. *
  807. * - Map page frame to scratch area if requested. This always is true if we're
  808. * doing a page fault, but is only set on manual evictions if the page is
  809. * dirty.
  810. * - If mapped:
  811. * - obtain backing store location and populate location parameter
  812. * - Update page tables with location
  813. * - Mark page frame as busy
  814. *
  815. * Returns -ENOMEM if the backing store is full
  816. */
  817. static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
  818. bool page_fault, uintptr_t *location_ptr)
  819. {
  820. uintptr_t phys;
  821. int ret;
  822. bool dirty = *dirty_ptr;
  823. phys = z_page_frame_to_phys(pf);
  824. __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
  825. phys);
  826. /* If the backing store doesn't have a copy of the page, even if it
  827. * wasn't modified, treat as dirty. This can happen for a few
  828. * reasons:
  829. * 1) Page has never been swapped out before, and the backing store
  830. * wasn't pre-populated with this data page.
  831. * 2) Page was swapped out before, but the page contents were not
  832. * preserved after swapping back in.
  833. * 3) Page contents were preserved when swapped back in, but were later
  834. * evicted from the backing store to make room for other evicted
  835. * pages.
  836. */
  837. if (z_page_frame_is_mapped(pf)) {
  838. dirty = dirty || !z_page_frame_is_backed(pf);
  839. }
  840. if (dirty || page_fault) {
  841. arch_mem_scratch(phys);
  842. }
  843. if (z_page_frame_is_mapped(pf)) {
  844. ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
  845. page_fault);
  846. if (ret != 0) {
  847. LOG_ERR("out of backing store memory");
  848. return -ENOMEM;
  849. }
  850. arch_mem_page_out(pf->addr, *location_ptr);
  851. } else {
  852. /* Shouldn't happen unless this function is mis-used */
  853. __ASSERT(!dirty, "un-mapped page determined to be dirty");
  854. }
  855. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  856. /* Mark as busy so that z_page_frame_is_evictable() returns false */
  857. __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
  858. phys);
  859. pf->flags |= Z_PAGE_FRAME_BUSY;
  860. #endif
  861. /* Update dirty parameter, since we set to true if it wasn't backed
  862. * even if otherwise clean
  863. */
  864. *dirty_ptr = dirty;
  865. return 0;
  866. }
  867. static int do_mem_evict(void *addr)
  868. {
  869. bool dirty;
  870. struct z_page_frame *pf;
  871. uintptr_t location;
  872. int key, ret;
  873. uintptr_t flags, phys;
  874. #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
  875. __ASSERT(!k_is_in_isr(),
  876. "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
  877. __func__);
  878. k_sched_lock();
  879. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  880. key = irq_lock();
  881. flags = arch_page_info_get(addr, &phys, false);
  882. __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
  883. "address %p isn't mapped", addr);
  884. if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
  885. /* Un-mapped or already evicted. Nothing to do */
  886. ret = 0;
  887. goto out;
  888. }
  889. dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
  890. pf = z_phys_to_page_frame(phys);
  891. __ASSERT(pf->addr == addr, "page frame address mismatch");
  892. ret = page_frame_prepare_locked(pf, &dirty, false, &location);
  893. if (ret != 0) {
  894. goto out;
  895. }
  896. __ASSERT(ret == 0, "failed to prepare page frame");
  897. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  898. irq_unlock(key);
  899. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  900. if (dirty) {
  901. do_backing_store_page_out(location);
  902. }
  903. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  904. key = irq_lock();
  905. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  906. page_frame_free_locked(pf);
  907. out:
  908. irq_unlock(key);
  909. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  910. k_sched_unlock();
  911. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  912. return ret;
  913. }
  914. int k_mem_page_out(void *addr, size_t size)
  915. {
  916. __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
  917. addr);
  918. z_mem_assert_virtual_region(addr, size);
  919. for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
  920. void *pos = (uint8_t *)addr + offset;
  921. int ret;
  922. ret = do_mem_evict(pos);
  923. if (ret != 0) {
  924. return ret;
  925. }
  926. }
  927. return 0;
  928. }
  929. int z_page_frame_evict(uintptr_t phys)
  930. {
  931. int key, ret;
  932. struct z_page_frame *pf;
  933. bool dirty;
  934. uintptr_t flags;
  935. uintptr_t location;
  936. __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
  937. __func__, phys);
  938. /* Implementation is similar to do_page_fault() except there is no
  939. * data page to page-in, see comments in that function.
  940. */
  941. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  942. __ASSERT(!k_is_in_isr(),
  943. "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
  944. __func__);
  945. k_sched_lock();
  946. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  947. key = irq_lock();
  948. pf = z_phys_to_page_frame(phys);
  949. if (!z_page_frame_is_mapped(pf)) {
  950. /* Nothing to do, free page */
  951. ret = 0;
  952. goto out;
  953. }
  954. flags = arch_page_info_get(pf->addr, NULL, false);
  955. /* Shouldn't ever happen */
  956. __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
  957. dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
  958. ret = page_frame_prepare_locked(pf, &dirty, false, &location);
  959. if (ret != 0) {
  960. goto out;
  961. }
  962. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  963. irq_unlock(key);
  964. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  965. if (dirty) {
  966. do_backing_store_page_out(location);
  967. }
  968. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  969. key = irq_lock();
  970. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  971. page_frame_free_locked(pf);
  972. out:
  973. irq_unlock(key);
  974. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  975. k_sched_unlock();
  976. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  977. return ret;
  978. }
  979. static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
  980. int key)
  981. {
  982. #ifdef CONFIG_DEMAND_PAGING_STATS
  983. bool is_irq_unlocked = arch_irq_unlocked(key);
  984. paging_stats.pagefaults.cnt++;
  985. if (is_irq_unlocked) {
  986. paging_stats.pagefaults.irq_unlocked++;
  987. } else {
  988. paging_stats.pagefaults.irq_locked++;
  989. }
  990. #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
  991. faulting_thread->paging_stats.pagefaults.cnt++;
  992. if (is_irq_unlocked) {
  993. faulting_thread->paging_stats.pagefaults.irq_unlocked++;
  994. } else {
  995. faulting_thread->paging_stats.pagefaults.irq_locked++;
  996. }
  997. #else
  998. ARG_UNUSED(faulting_thread);
  999. #endif
  1000. #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  1001. if (k_is_in_isr()) {
  1002. paging_stats.pagefaults.in_isr++;
  1003. #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
  1004. faulting_thread->paging_stats.pagefaults.in_isr++;
  1005. #endif
  1006. }
  1007. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  1008. #endif /* CONFIG_DEMAND_PAGING_STATS */
  1009. }
  1010. static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
  1011. bool dirty)
  1012. {
  1013. #ifdef CONFIG_DEMAND_PAGING_STATS
  1014. if (dirty) {
  1015. paging_stats.eviction.dirty++;
  1016. } else {
  1017. paging_stats.eviction.clean++;
  1018. }
  1019. #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
  1020. if (dirty) {
  1021. faulting_thread->paging_stats.eviction.dirty++;
  1022. } else {
  1023. faulting_thread->paging_stats.eviction.clean++;
  1024. }
  1025. #else
  1026. ARG_UNUSED(faulting_thread);
  1027. #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
  1028. #endif /* CONFIG_DEMAND_PAGING_STATS */
  1029. }
  1030. static inline struct z_page_frame *do_eviction_select(bool *dirty)
  1031. {
  1032. struct z_page_frame *pf;
  1033. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  1034. uint32_t time_diff;
  1035. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  1036. timing_t time_start, time_end;
  1037. time_start = timing_counter_get();
  1038. #else
  1039. uint32_t time_start;
  1040. time_start = k_cycle_get_32();
  1041. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  1042. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  1043. pf = k_mem_paging_eviction_select(dirty);
  1044. #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
  1045. #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
  1046. time_end = timing_counter_get();
  1047. time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
  1048. #else
  1049. time_diff = k_cycle_get_32() - time_start;
  1050. #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
  1051. z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
  1052. #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
  1053. return pf;
  1054. }
  1055. static bool do_page_fault(void *addr, bool pin)
  1056. {
  1057. struct z_page_frame *pf;
  1058. int key, ret;
  1059. uintptr_t page_in_location, page_out_location;
  1060. enum arch_page_location status;
  1061. bool result;
  1062. bool dirty = false;
  1063. struct k_thread *faulting_thread = _current_cpu->current;
  1064. __ASSERT(page_frames_initialized, "page fault at %p happened too early",
  1065. addr);
  1066. LOG_DBG("page fault at %p", addr);
  1067. /*
  1068. * TODO: Add performance accounting:
  1069. * - k_mem_paging_eviction_select() metrics
  1070. * * periodic timer execution time histogram (if implemented)
  1071. */
  1072. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  1073. /* We lock the scheduler so that other threads are never scheduled
  1074. * during the page-in/out operation.
  1075. *
  1076. * We do however re-enable interrupts during the page-in/page-out
  1077. * operation iff interrupts were enabled when the exception was taken;
  1078. * in this configuration page faults in an ISR are a bug; all their
  1079. * code/data must be pinned.
  1080. *
  1081. * If interrupts were disabled when the exception was taken, the
  1082. * arch code is responsible for keeping them that way when entering
  1083. * this function.
  1084. *
  1085. * If this is not enabled, then interrupts are always locked for the
  1086. * entire operation. This is far worse for system interrupt latency
  1087. * but requires less pinned pages and ISRs may also take page faults.
  1088. *
  1089. * Support for allowing k_mem_paging_backing_store_page_out() and
  1090. * k_mem_paging_backing_store_page_in() to also sleep and allow
  1091. * other threads to run (such as in the case where the transfer is
  1092. * async DMA) is not implemented. Even if limited to thread context,
  1093. * arbitrary memory access triggering exceptions that put a thread to
  1094. * sleep on a contended page fault operation will break scheduling
  1095. * assumptions of cooperative threads or threads that implement
  1096. * crticial sections with spinlocks or disabling IRQs.
  1097. */
  1098. k_sched_lock();
  1099. __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
  1100. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  1101. key = irq_lock();
  1102. status = arch_page_location_get(addr, &page_in_location);
  1103. if (status == ARCH_PAGE_LOCATION_BAD) {
  1104. /* Return false to treat as a fatal error */
  1105. result = false;
  1106. goto out;
  1107. }
  1108. result = true;
  1109. if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
  1110. if (pin) {
  1111. /* It's a physical memory address */
  1112. uintptr_t phys = page_in_location;
  1113. pf = z_phys_to_page_frame(phys);
  1114. pf->flags |= Z_PAGE_FRAME_PINNED;
  1115. }
  1116. /* This if-block is to pin the page if it is
  1117. * already present in physical memory. There is
  1118. * no need to go through the following code to
  1119. * pull in the data pages. So skip to the end.
  1120. */
  1121. goto out;
  1122. }
  1123. __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
  1124. "unexpected status value %d", status);
  1125. paging_stats_faults_inc(faulting_thread, key);
  1126. pf = free_page_frame_list_get();
  1127. if (pf == NULL) {
  1128. /* Need to evict a page frame */
  1129. pf = do_eviction_select(&dirty);
  1130. __ASSERT(pf != NULL, "failed to get a page frame");
  1131. LOG_DBG("evicting %p at 0x%lx", pf->addr,
  1132. z_page_frame_to_phys(pf));
  1133. paging_stats_eviction_inc(faulting_thread, dirty);
  1134. }
  1135. ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
  1136. __ASSERT(ret == 0, "failed to prepare page frame");
  1137. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  1138. irq_unlock(key);
  1139. /* Interrupts are now unlocked if they were not locked when we entered
  1140. * this function, and we may service ISRs. The scheduler is still
  1141. * locked.
  1142. */
  1143. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  1144. if (dirty) {
  1145. do_backing_store_page_out(page_out_location);
  1146. }
  1147. do_backing_store_page_in(page_in_location);
  1148. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  1149. key = irq_lock();
  1150. pf->flags &= ~Z_PAGE_FRAME_BUSY;
  1151. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  1152. if (pin) {
  1153. pf->flags |= Z_PAGE_FRAME_PINNED;
  1154. }
  1155. pf->flags |= Z_PAGE_FRAME_MAPPED;
  1156. pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
  1157. & ~(CONFIG_MMU_PAGE_SIZE - 1));
  1158. arch_mem_page_in(addr, z_page_frame_to_phys(pf));
  1159. k_mem_paging_backing_store_page_finalize(pf, page_in_location);
  1160. out:
  1161. irq_unlock(key);
  1162. #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
  1163. k_sched_unlock();
  1164. #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
  1165. return result;
  1166. }
  1167. static void do_page_in(void *addr)
  1168. {
  1169. bool ret;
  1170. ret = do_page_fault(addr, false);
  1171. __ASSERT(ret, "unmapped memory address %p", addr);
  1172. (void)ret;
  1173. }
  1174. void k_mem_page_in(void *addr, size_t size)
  1175. {
  1176. __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
  1177. "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
  1178. __func__);
  1179. virt_region_foreach(addr, size, do_page_in);
  1180. }
  1181. static void do_mem_pin(void *addr)
  1182. {
  1183. bool ret;
  1184. ret = do_page_fault(addr, true);
  1185. __ASSERT(ret, "unmapped memory address %p", addr);
  1186. (void)ret;
  1187. }
  1188. void k_mem_pin(void *addr, size_t size)
  1189. {
  1190. __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
  1191. "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
  1192. __func__);
  1193. virt_region_foreach(addr, size, do_mem_pin);
  1194. }
  1195. bool z_page_fault(void *addr)
  1196. {
  1197. return do_page_fault(addr, false);
  1198. }
  1199. static void do_mem_unpin(void *addr)
  1200. {
  1201. struct z_page_frame *pf;
  1202. int key;
  1203. uintptr_t flags, phys;
  1204. key = irq_lock();
  1205. flags = arch_page_info_get(addr, &phys, false);
  1206. __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
  1207. "invalid data page at %p", addr);
  1208. if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
  1209. pf = z_phys_to_page_frame(phys);
  1210. pf->flags &= ~Z_PAGE_FRAME_PINNED;
  1211. }
  1212. irq_unlock(key);
  1213. }
  1214. void k_mem_unpin(void *addr, size_t size)
  1215. {
  1216. __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
  1217. addr);
  1218. virt_region_foreach(addr, size, do_mem_unpin);
  1219. }
  1220. #endif /* CONFIG_DEMAND_PAGING */