linux-master/mm/page_alloc.c

14  *  Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
70  * Skip free page reporting notification for the (possibly merged) page.
71  * This does not hinder free page reporting from grabbing the page,
73  * the free page reporting infrastructure about a newly freed page. For
74  * example, used when temporarily pulling a page from a freelist and
80  * Place the (possibly merged) page to the tail of the freelist. Will ignore
81  * page shuffling (relevant code - e.g., memory onlining - is expected to
86  *       (memory onlining) or untouched pages (page isolation, free page
214 static void __free_pages_ok(struct page *page, unsigned int order,
288 static bool page_contains_unaccepted(struct page *page, unsigned int order);
289 static void accept_page(struct page *page, unsigned int order);
292 static bool __free_unaccepted(struct page *page);
328 static inline unsigned long *get_pageblock_bitmap(const struct page *page,
334 	return page_zone(page)->pageblock_flags;
338 static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
343 	pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn);
350  * @page: The page within the block of interest
351  * @pfn: The target page frame number
356 unsigned long get_pfnblock_flags_mask(const struct page *page,
363 	bitmap = get_pageblock_bitmap(page, pfn);
364 	bitidx = pfn_to_bitidx(page, pfn);
376 static __always_inline int get_pfnblock_migratetype(const struct page *page,
379 	return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
384  * @page: The page within the block of interest
386  * @pfn: The target page frame number
389 void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
400 	bitmap = get_pageblock_bitmap(page, pfn);
401 	bitidx = pfn_to_bitidx(page, pfn);
405 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
415 void set_pageblock_migratetype(struct page *page, int migratetype)
421 	set_pfnblock_flags_mask(page, (unsigned long)migratetype,
422 				page_to_pfn(page), MIGRATETYPE_MASK);
426 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
430 	unsigned long pfn = page_to_pfn(page);
441 		pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",
451 static bool __maybe_unused bad_range(struct zone *zone, struct page *page)
453 	if (page_outside_zone_boundaries(zone, page))
455 	if (zone != page_zone(page))
461 static inline bool __maybe_unused bad_range(struct zone *zone, struct page *page)
467 static void bad_page(struct page *page, const char *reason)
484 			      "BUG: Bad page state: %lu messages suppressed\n",
493 	pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
494 		current->comm, page_to_pfn(page));
495 	dump_page(page, reason);
501 	page_mapcount_reset(page); /* remove PageBuddy */
547  * The first PAGE_SIZE page is called the "head page" and have PG_head set.
550  * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
552  * The first tail page's ->compound_order holds the order of allocation.
556 void prep_compound_page(struct page *page, unsigned int order)
561 	__SetPageHead(page);
563 		prep_compound_tail(page, i);
565 	prep_compound_head(page, order);
568 static inline void set_buddy_order(struct page *page, unsigned int order)
570 	set_page_private(page, order);
571 	__SetPageBuddy(page);
581 		!capc->page &&
586 compaction_capture(struct capture_control *capc, struct page *page,
602 	 * have trouble finding a high-order free page.
608 	capc->page = page;
619 compaction_capture(struct capture_control *capc, struct page *page,
639 static inline void __add_to_free_list(struct page *page, struct zone *zone,
645 	VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
646 		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
647 		     get_pageblock_migratetype(page), migratetype, 1 << order);
650 		list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
652 		list_add(&page->buddy_list, &area->free_list[migratetype]);
661 static inline void move_to_free_list(struct page *page, struct zone *zone,
666 	/* Free page moving can fail, so it happens before the type update */
667 	VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
668 		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
669 		     get_pageblock_migratetype(page), old_mt, 1 << order);
671 	list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
677 static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
680         VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
681 		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
682 		     get_pageblock_migratetype(page), migratetype, 1 << order);
684 	/* clear reported state and update reported page count */
685 	if (page_reported(page))
686 		__ClearPageReported(page);
688 	list_del(&page->buddy_list);
689 	__ClearPageBuddy(page);
690 	set_page_private(page, 0);
694 static inline void del_page_from_free_list(struct page *page, struct zone *zone,
697 	__del_page_from_free_list(page, zone, order, migratetype);
701 static inline struct page *get_page_from_free_area(struct free_area *area,
705 					struct page, buddy_list);
709  * If this is not the largest possible page, check if the buddy
712  * that is happening, add the free page to the tail of the list
714  * as a higher order page
718 		   struct page *page, unsigned int order)
721 	struct page *higher_page;
727 	higher_page = page + (higher_page_pfn - pfn);
747  * Page's order is recorded in page_private(page) field.
757 static inline void __free_one_page(struct page *page,
765 	struct page *buddy;
769 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
772 	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
773 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
780 		if (compaction_capture(capc, page, order, migratetype)) {
785 		buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn);
805 		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
823 		page = page + (combined_pfn - pfn);
829 	set_buddy_order(page, order);
836 		to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
838 	__add_to_free_list(page, zone, order, migratetype, to_tail);
840 	/* Notify page reporting subsystem of freed page */
846  * A bad page could be due to a number of fields. Instead of multiple branches,
850 static inline bool page_expected_state(struct page *page,
853 	if (unlikely(atomic_read(&page->_mapcount) != -1))
856 	if (unlikely((unsigned long)page->mapping |
857 			page_ref_count(page) |
859 			page->memcg_data |
862 			((page->pp_magic & ~0x3UL) == PP_SIGNATURE) |
864 			(page->flags & check_flags)))
870 static const char *page_bad_reason(struct page *page, unsigned long flags)
874 	if (unlikely(atomic_read(&page->_mapcount) != -1))
876 	if (unlikely(page->mapping != NULL))
878 	if (unlikely(page_ref_count(page) != 0))
880 	if (unlikely(page->flags & flags)) {
887 	if (unlikely(page->memcg_data))
888 		bad_reason = "page still charged to cgroup";
891 	if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE))
897 static void free_page_is_bad_report(struct page *page)
899 	bad_page(page,
900 		 page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
903 static inline bool free_page_is_bad(struct page *page)
905 	if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
909 	free_page_is_bad_report(page);
918 static int free_tail_page_prepare(struct page *head_page, struct page *page)
924 	 * We rely page->lru.next never has bit 0 set, unless the page
933 	switch (page - head_page) {
935 		/* the first tail page: these may be in place of ->mapping */
937 			bad_page(page, "nonzero entire_mapcount");
941 			bad_page(page, "nonzero large_mapcount");
945 			bad_page(page, "nonzero nr_pages_mapped");
949 			bad_page(page, "nonzero pincount");
954 		/* the second tail page: deferred_list overlaps ->mapping */
956 			bad_page(page, "on deferred list");
961 		if (page->mapping != TAIL_MAPPING) {
962 			bad_page(page, "corrupted mapping in tail page");
967 	if (unlikely(!PageTail(page))) {
968 		bad_page(page, "PageTail not set");
971 	if (unlikely(compound_head(page) != head_page)) {
972 		bad_page(page, "compound_head not consistent");
977 	page->mapping = NULL;
978 	clear_compound_head(page);
987  *    using page tags instead (see below).
988  * 2. For tag-based KASAN modes: the page has a match-all KASAN tag, indicating
989  *    that error detection is disabled for accesses via the page address.
1006  * KASAN memory tracking as the poison will be properly inserted at page
1011 static inline bool should_skip_kasan_poison(struct page *page)
1016 	return page_kasan_tag(page) == KASAN_TAG_KERNEL;
1019 void kernel_init_pages(struct page *page, int numpages)
1026 		clear_highpage_kasan_tagged(page + i);
1030 __always_inline bool free_pages_prepare(struct page *page,
1034 	bool skip_kasan_poison = should_skip_kasan_poison(page);
1036 	bool compound = PageCompound(page);
1038 	VM_BUG_ON_PAGE(PageTail(page), page);
1040 	trace_mm_page_free(page, order);
1041 	kmsan_free_page(page, order);
1043 	if (memcg_kmem_online() && PageMemcgKmem(page))
1044 		__memcg_kmem_uncharge_page(page, order);
1046 	if (unlikely(PageHWPoison(page)) && !order) {
1048 		reset_page_owner(page, order);
1049 		page_table_check_free(page, order);
1050 		pgalloc_tag_sub(page, 1 << order);
1054 	VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
1057 	 * Check tail pages before head page information is cleared to
1064 			page[1].flags &= ~PAGE_FLAGS_SECOND;
1067 				bad += free_tail_page_prepare(page, page + i);
1069 				if (free_page_is_bad(page + i)) {
1074 			(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
1077 	if (PageMappingFlags(page))
1078 		page->mapping = NULL;
1080 		if (free_page_is_bad(page))
1086 	page_cpupid_reset_last(page);
1087 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
1088 	reset_page_owner(page, order);
1089 	page_table_check_free(page, order);
1090 	pgalloc_tag_sub(page, 1 << order);
1092 	if (!PageHighMem(page)) {
1093 		debug_check_no_locks_freed(page_address(page),
1095 		debug_check_no_obj_freed(page_address(page),
1099 	kernel_poison_pages(page, 1 << order);
1107 	 * page becomes unavailable via debug_pagealloc or arch_free_page.
1110 		kasan_poison_pages(page, order, init);
1117 		kernel_init_pages(page, 1 << order);
1120 	 * arch_free_page() can make the page's contents inaccessible.  s390
1121 	 * does this.  So nothing which can access the page's contents should
1124 	arch_free_page(page, order);
1126 	debug_pagealloc_unmap_pages(page, 1 << order);
1142 	struct page *page;
1172 			page = list_last_entry(list, struct page, pcp_list);
1173 			pfn = page_to_pfn(page);
1174 			mt = get_pfnblock_migratetype(page, pfn);
1177 			list_del(&page->pcp_list);
1181 			__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
1182 			trace_mm_page_pcpu_drain(page, order, mt);
1189 static void free_one_page(struct zone *zone, struct page *page,
1197 	migratetype = get_pfnblock_migratetype(page, pfn);
1198 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
1202 static void __free_pages_ok(struct page *page, unsigned int order,
1205 	unsigned long pfn = page_to_pfn(page);
1206 	struct zone *zone = page_zone(page);
1208 	if (!free_pages_prepare(page, order))
1211 	free_one_page(zone, page, pfn, order, fpi_flags);
1216 void __free_pages_core(struct page *page, unsigned int order)
1219 	struct page *p = page;
1236 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
1238 	if (page_contains_unaccepted(page, order)) {
1239 		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
1242 		accept_page(page, order);
1249 	__free_pages_ok(page, order, FPI_TO_TAIL);
1257  * Return struct page pointer of start_pfn, or NULL if checks were not passed.
1264  * the first and last page of a pageblock and avoid checking each individual
1265  * page in a pageblock.
1267  * Note: the function may return non-NULL struct page even for a page block
1276 struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
1279 	struct page *start_page;
1280 	struct page *end_page;
1318 static inline void expand(struct zone *zone, struct page *page,
1327 		VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
1330 		 * Mark as guard pages (or page), that will allow to
1332 		 * Corresponding page table entries will not be touched,
1335 		if (set_page_guard(zone, &page[size], high))
1338 		__add_to_free_list(&page[size], zone, high, migratetype, false);
1339 		set_buddy_order(&page[size], high);
1345 static void check_new_page_bad(struct page *page)
1347 	if (unlikely(page->flags & __PG_HWPOISON)) {
1349 		page_mapcount_reset(page); /* remove PageBuddy */
1353 	bad_page(page,
1354 		 page_bad_reason(page, PAGE_FLAGS_CHECK_AT_PREP));
1358  * This page is about to be returned from the page allocator
1360 static bool check_new_page(struct page *page)
1362 	if (likely(page_expected_state(page,
1366 	check_new_page_bad(page);
1370 static inline bool check_new_pages(struct page *page, unsigned int order)
1374 			struct page *p = page + i;
1412 inline void post_alloc_hook(struct page *page, unsigned int order,
1420 	set_page_private(page, 0);
1421 	set_page_refcounted(page);
1423 	arch_alloc_page(page, order);
1424 	debug_pagealloc_map_pages(page, 1 << order);
1429 	 * allocations and the page unpoisoning code will complain.
1431 	kernel_unpoison_pages(page, 1 << order);
1446 			tag_clear_highpage(page + i);
1452 	    kasan_unpoison_pages(page, order, init)) {
1458 		 * If memory tags have not been set by KASAN, reset the page
1462 			page_kasan_tag_reset(page + i);
1466 		kernel_init_pages(page, 1 << order);
1468 	set_page_owner(page, order, gfp_flags);
1469 	page_table_check_alloc(page, order);
1470 	pgalloc_tag_add(page, current, 1 << order);
1473 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1476 	post_alloc_hook(page, order, gfp_flags);
1479 		prep_compound_page(page, order);
1482 	 * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
1483 	 * allocate the page. The expectation is that the caller is taking
1484 	 * steps that will free more memory. The caller should avoid the page
1488 		set_page_pfmemalloc(page);
1490 		clear_page_pfmemalloc(page);
1495  * the smallest available page from the freelists
1498 struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
1503 	struct page *page;
1505 	/* Find a page of the appropriate size in the preferred list */
1508 		page = get_page_from_free_area(area, migratetype);
1509 		if (!page)
1511 		del_page_from_free_list(page, zone, current_order, migratetype);
1512 		expand(zone, page, order, current_order, migratetype);
1513 		trace_mm_page_alloc_zone_locked(page, order, migratetype,
1516 		return page;
1536 static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1542 static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1553 	struct page *page;
1562 		page = pfn_to_page(pfn);
1563 		if (!PageBuddy(page)) {
1569 		VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
1570 		VM_BUG_ON_PAGE(page_zone(page) != zone, page);
1572 		order = buddy_order(page);
1574 		move_to_free_list(page, zone, order, old_mt, new_mt);
1585 static bool prep_move_freepages_block(struct zone *zone, struct page *page,
1591 	pfn = page_to_pfn(page);
1613 			page = pfn_to_page(pfn);
1614 			if (PageBuddy(page)) {
1615 				int nr = 1 << buddy_order(page);
1626 			if (PageLRU(page) || __PageMovable(page))
1635 static int move_freepages_block(struct zone *zone, struct page *page,
1640 	if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
1651 	struct page *page;
1654 	while (!PageBuddy(page = pfn_to_page(pfn))) {
1664 	if (pfn + (1 << buddy_order(page)) > start_pfn)
1671 /* Split a multi-block free page into its individual pageblocks */
1672 static void split_large_buddy(struct zone *zone, struct page *page,
1680 	/* Caller removed page from freelist, buddy info cleared! */
1681 	VM_WARN_ON_ONCE(PageBuddy(page));
1684 		int mt = get_pfnblock_migratetype(page, pfn);
1686 		__free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
1688 		page = pfn_to_page(pfn);
1693  * move_freepages_block_isolate - move free pages in block for page isolation
1695  * @page: the pageblock page
1699  * case encountered in page isolation, where the block of interest
1702  * Unlike the regular page allocator path, which moves pages while
1703  * stealing buddies off the freelist, page isolation is interested in
1711 bool move_freepages_block_isolate(struct zone *zone, struct page *page,
1716 	if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
1726 		struct page *buddy = pfn_to_page(pfn);
1731 		set_pageblock_migratetype(page, migratetype);
1737 	if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
1738 		int order = buddy_order(page);
1740 		del_page_from_free_list(page, zone, order,
1741 					get_pfnblock_migratetype(page, pfn));
1742 		set_pageblock_migratetype(page, migratetype);
1743 		split_large_buddy(zone, page, pfn, order);
1748 			       get_pfnblock_migratetype(page, start_pfn),
1754 static void change_pageblock_range(struct page *pageblock_page,
1770  * If we are stealing a relatively large buddy page, it is likely there will
1772  * reclaimable and unmovable allocations, we steal regardless of page size,
1843 static struct page *
1844 steal_suitable_fallback(struct zone *zone, struct page *page,
1852 	block_type = get_pageblock_migratetype(page);
1863 		del_page_from_free_list(page, zone, current_order, block_type);
1864 		change_pageblock_range(page, current_order, start_type);
1865 		expand(zone, page, order, current_order, start_type);
1866 		return page;
1882 	if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages,
1918 	del_page_from_free_list(page, zone, current_order, block_type);
1919 	expand(zone, page, order, current_order, block_type);
1920 	return page;
1959  * there are no empty page blocks that contain a page with a suitable order
1961 static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
1985 	mt = get_pageblock_migratetype(page);
1988 		if (move_freepages_block(zone, page, mt,
2012 	struct page *page;
2031 			page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
2032 			if (!page)
2035 			mt = get_pageblock_migratetype(page);
2037 			 * In page freeing path, migratetype change is racy so
2065 			ret = move_freepages_block(zone, page, mt,
2084  * Try finding a free buddy page on the fallback list and put it on the free
2093 static __always_inline struct page *
2100 	struct page *page;
2113 	 * Find the largest available free page in the other list. This roughly
2128 		 * steal and split the smallest available page instead of the
2129 		 * largest available page, because even if the next movable
2153 	 * when looking for the largest page.
2158 	page = get_page_from_free_area(area, fallback_mt);
2161 	page = steal_suitable_fallback(zone, page, current_order, order,
2164 	trace_mm_page_alloc_extfrag(page, order, current_order,
2167 	return page;
2174 static __always_inline struct page *
2178 	struct page *page;
2189 			page = __rmqueue_cma_fallback(zone, order);
2190 			if (page)
2191 				return page;
2195 	page = __rmqueue_smallest(zone, order, migratetype);
2196 	if (unlikely(!page)) {
2198 			page = __rmqueue_cma_fallback(zone, order);
2200 		if (!page)
2201 			page = __rmqueue_fallback(zone, order, migratetype,
2204 	return page;
2221 		struct page *page = __rmqueue(zone, order, migratetype,
2223 		if (unlikely(page == NULL))
2228 		 * physical page order. The page is added to the tail of
2230 		 * is ordered by page number under some conditions. This is
2232 		 * head, thus also in the physical page order. This is useful
2236 		list_add_tail(&page->pcp_list, list);
2502 				   struct page *page, int migratetype,
2517 	list_add(&page->pcp_list, &pcp->lists[pindex]);
2550  * Free a pcp page
2552 void free_unref_page(struct page *page, unsigned int order)
2557 	unsigned long pfn = page_to_pfn(page);
2561 		__free_pages_ok(page, order, FPI_NONE);
2565 	if (!free_pages_prepare(page, order))
2573 	 * excessively into the page allocator
2575 	migratetype = get_pfnblock_migratetype(page, pfn);
2578 			free_one_page(page_zone(page), page, pfn, order, FPI_NONE);
2584 	zone = page_zone(page);
2588 		free_unref_page_commit(zone, pcp, page, migratetype, order);
2591 		free_one_page(zone, page, pfn, order, FPI_NONE);
2614 		if (!free_pages_prepare(&folio->page, order))
2621 			free_one_page(folio_zone(folio), &folio->page,
2640 		migratetype = get_pfnblock_migratetype(&folio->page, pfn);
2657 				free_one_page(zone, &folio->page, pfn,
2670 				free_one_page(zone, &folio->page, pfn,
2684 		trace_mm_page_free_batched(&folio->page);
2685 		free_unref_page_commit(zone, pcp, &folio->page, migratetype,
2697  * split_page takes a non-compound higher-order page, and splits it into
2698  * n (1<<order) sub-pages: page[0..n]
2699  * Each sub-page must be freed individually.
2704 void split_page(struct page *page, unsigned int order)
2708 	VM_BUG_ON_PAGE(PageCompound(page), page);
2709 	VM_BUG_ON_PAGE(!page_count(page), page);
2712 		set_page_refcounted(page + i);
2713 	split_page_owner(page, order, 0);
2714 	pgalloc_tag_split(page, 1 << order);
2715 	split_page_memcg(page, order, 0);
2719 int __isolate_free_page(struct page *page, unsigned int order)
2721 	struct zone *zone = page_zone(page);
2722 	int mt = get_pageblock_migratetype(page);
2727 		 * Obey watermarks as if the page was being allocated. We can
2729 		 * watermark, because we already know our high-order page
2737 	del_page_from_free_list(page, zone, order, mt);
2740 	 * Set the pageblock if the isolated page is at least half of a
2744 		struct page *endpage = page + (1 << order) - 1;
2745 		for (; page < endpage; page += pageblock_nr_pages) {
2746 			int mt = get_pageblock_migratetype(page);
2752 				move_freepages_block(zone, page, mt,
2761  * __putback_isolated_page - Return a now-isolated page back where we got it
2762  * @page: Page that was isolated
2763  * @order: Order of the isolated page
2764  * @mt: The page's pageblock's migratetype
2766  * This function is meant to return a page pulled from the free lists via
2769 void __putback_isolated_page(struct page *page, unsigned int order, int mt)
2771 	struct zone *zone = page_zone(page);
2776 	/* Return isolated page to tail of freelist. */
2777 	__free_one_page(page, page_to_pfn(page), zone, order, mt,
2808 struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
2812 	struct page *page;
2816 		page = NULL;
2819 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
2820 		if (!page) {
2821 			page = __rmqueue(zone, order, migratetype, alloc_flags);
2829 			if (!page && (alloc_flags & ALLOC_OOM))
2830 				page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
2832 			if (!page) {
2838 	} while (check_new_pages(page, order));
2840 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2843 	return page;
2896 /* Remove page from the per-cpu list, caller must protect the list */
2898 struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
2904 	struct page *page;
2920 		page = list_first_entry(list, struct page, pcp_list);
2921 		list_del(&page->pcp_list);
2923 	} while (check_new_pages(page, order));
2925 	return page;
2928 /* Lock and remove page from the per-cpu list */
2929 static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2935 	struct page *page;
2953 	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
2956 	if (page) {
2957 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2960 	return page;
2964  * Allocate a page from the given zone.
2976 struct page *rmqueue(struct zone *preferred_zone,
2981 	struct page *page;
2985 	 * allocate greater than order-1 page units with __GFP_NOFAIL.
2990 		page = rmqueue_pcplist(preferred_zone, zone, order,
2992 		if (likely(page))
2996 	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
3007 	VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
3008 	return page;
3045  * one free page of a suitable size. Checking now avoids taking the zone lock
3090 	 * even if a suitable page happened to be free.
3099 	/* For a high-order request, check at least one suitable page is free */
3258  * a page.
3260 static struct page *
3279 		struct page *page;
3287 		 * When allocating a page cache page for writing, we
3335 		 * premature page reclaiming.  Detection is done here to
3399 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
3401 		if (page) {
3402 			prep_new_page(page, order, gfp_mask, alloc_flags);
3409 				reserve_highatomic_pageblock(page, zone);
3411 			return page;
3484 static inline struct page *
3489 	struct page *page;
3491 	page = get_page_from_freelist(gfp_mask, order,
3497 	if (!page)
3498 		page = get_page_from_freelist(gfp_mask, order,
3501 	return page;
3504 static inline struct page *
3515 	struct page *page;
3536 	page = get_page_from_freelist((gfp_mask | __GFP_HARDWALL) &
3539 	if (page)
3583 			page = __alloc_pages_cpuset_fallback(gfp_mask, order,
3588 	return page;
3599 static struct page *
3604 	struct page *page = NULL;
3616 								prio, &page);
3630 	/* Prep a captured page if available */
3631 	if (page)
3632 		prep_new_page(page, order, gfp_mask, alloc_flags);
3634 	/* Try get a page from the freelist if available */
3635 	if (!page)
3636 		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
3638 	if (page) {
3639 		struct zone *zone = page_zone(page);
3644 		return page;
3686 	 * Compaction managed to coalesce some page blocks, but the
3724 static inline struct page *
3844 /* Perform direct synchronous page reclaim */
3871 static inline struct page *
3876 	struct page *page = NULL;
3886 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
3893 	if (!page && !drained) {
3902 	return page;
3938 	 * The caller may dip into page reserves a bit more if the caller
4130 static inline struct page *
4137 	struct page *page = NULL;
4193 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4194 	if (page)
4210 		page = __alloc_pages_direct_compact(gfp_mask, order,
4214 		if (page)
4219 		 * includes some THP page fault allocations
4274 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4275 	if (page)
4287 	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
4289 	if (page)
4293 	page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
4295 	if (page)
4336 	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
4337 	if (page)
4395 		page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
4396 		if (page)
4404 			"page allocation failure: order:%u", order);
4406 	return page;
4461  * This is a batched version of the page allocator that attempts to
4475 			struct page **page_array)
4477 	struct page *page;
4507 	/* Use the single page allocator for one page. */
4516 	 * force the caller to allocate one page at a time as it'll have
4523 	/* May set ALLOC_NOFRAGMENT, fragmentation will return 1 page. */
4554 	 * try to allocate a single page and reclaim if necessary.
4575 		page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
4577 		if (unlikely(!page)) {
4578 			/* Try and allocate at least one page */
4587 		prep_new_page(page, 0, gfp, 0);
4589 			list_add(&page->lru, page_list);
4591 			page_array[nr_populated] = page;
4608 	page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
4609 	if (page) {
4611 			list_add(&page->lru, page_list);
4613 			page_array[nr_populated] = page;
4624 struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
4627 	struct page *page;
4660 	page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
4661 	if (likely(page))
4673 	page = __alloc_pages_slowpath(alloc_gfp, order, &ac);
4676 	if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT) && page &&
4677 	    unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
4678 		__free_pages(page, order);
4679 		page = NULL;
4682 	trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
4683 	kmsan_alloc_page(page, order, alloc_gfp);
4685 	return page;
4692 	struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order,
4694 	return page_rmappable_folio(page);
4705 	struct page *page;
4707 	page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order);
4708 	if (!page)
4710 	return (unsigned long) page_address(page);
4722  * @page: The page pointer returned from alloc_pages().
4725  * This function can free multi-page allocations that are not compound
4730  * If the last reference to this page is speculative, it will be released
4731  * by put_page() which only frees the first page of a non-compound
4733  * the subsequent pages here.  If you want to use the page's reference
4735  * compound page, and use put_page() instead of __free_pages().
4740 void __free_pages(struct page *page, unsigned int order)
4743 	int head = PageHead(page);
4744 	struct alloc_tag *tag = pgalloc_tag_get(page);
4746 	if (put_page_testzero(page))
4747 		free_unref_page(page, order);
4751 			free_unref_page(page + (1 << order), order);
4769  *  within a 0 or higher order page.  Multiple fragments within that page
4770  *  are individually refcounted, in the page's reference counter.
4773  * page fragments.  This is used by the network stack and network device
4777 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
4780 	struct page *page = NULL;
4786 	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
4788 	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
4790 	if (unlikely(!page))
4791 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
4793 	nc->va = page ? page_address(page) : NULL;
4795 	return page;
4808 void __page_frag_cache_drain(struct page *page, unsigned int count)
4810 	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
4812 	if (page_ref_sub_and_test(page, count))
4813 		free_unref_page(page, compound_order(page));
4822 	struct page *page;
4827 		page = __page_frag_cache_refill(nc, gfp_mask);
4828 		if (!page)
4835 		/* Even if we own the page, we do not use atomic_set().
4838 		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
4840 		/* reset page count bias and offset to start of new frag */
4841 		nc->pfmemalloc = page_is_pfmemalloc(page);
4848 		page = virt_to_page(nc->va);
4850 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
4854 			free_unref_page(page, compound_order(page));
4862 		/* OK, page count is 0, we can safely set it */
4863 		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
4865 		/* reset page count bias and offset to start of new frag */
4874 			 * We don't release the cache page because
4891  * Frees a page fragment allocated out of either a compound or order 0 page.
4895 	struct page *page = virt_to_head_page(addr);
4897 	if (unlikely(put_page_testzero(page)))
4898 		free_unref_page(page, compound_order(page));
4907 		struct page *page = virt_to_page((void *)addr);
4908 		struct page *last = page + nr;
4910 		split_page_owner(page, order, 0);
4911 		pgalloc_tag_split(page, 1 << order);
4912 		split_page_memcg(page, order, 0);
4913 		while (page < --last)
4916 		last = page + (1UL << order);
4917 		for (page += nr; page < last; page++)
4918 			__free_pages_ok(page, 0, FPI_TO_TAIL);
4966 	struct page *p;
5382 	 * F.e. the percpu allocator needs the page allocator which
5455 	 * of pages of one half of the possible page colors
5467 	 * of contiguous memory as there's no hardware page translation to
5649  * page high values need to be recalculated.
5738 void adjust_managed_page_count(struct page *page, long count)
5740 	atomic_long_add(count, &page_zone(page)->managed_pages);
5743 	if (PageHighMem(page))
5757 		struct page *page = virt_to_page(pos);
5767 		direct_map_addr = page_address(page);
5776 		free_reserved_page(page);
5937 			 * deltas control async page reclaim, and so should
6294 		struct page *page;
6297 		list_for_each_entry(page, page_list, lru)
6298 			dump_page(page, "migration failure");
6321 	struct page *page;
6352 			list_for_each_entry(page, &cc->migratepages, lru) {
6353 				struct folio *folio = page_folio(page);
6430 	 * have different sizes, and due to the way page allocator
6436 	 * range back to page allocator as MIGRATE_ISOLATE.
6438 	 * When this is done, we take the pages in range from page
6440 	 * page allocator will never consider using them.
6445 	 * put back to page allocator so that buddy can use them.
6455 	 * In case of -EBUSY, we'd like to know which page causes problem.
6457 	 * which will report the busy page.
6472 	 * more, all pages in [start, end) are free in page allocator.
6474 	 * [start, end) (that is remove them from page allocator).
6478 	 * page allocator holds, ie. they can be part of higher order
6525 	struct page *page;
6528 		page = pfn_to_online_page(i);
6529 		if (!page)
6532 		if (page_zone(page) != z)
6535 		if (PageReserved(page))
6538 		if (PageHuge(page))
6564  * The allocated memory is always aligned to a page boundary. If nr_pages is a
6569  * __free_page() on each allocated page.
6573 struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
6616 		struct page *page = pfn_to_page(pfn);
6618 		count += page_count(page) != 1;
6619 		__free_page(page);
6627  * and draining all cpus. A concurrent page freeing on another CPU that's about
6628  * to put the page on pcplist will either finish before the drain and the page
6674 	struct page *page;
6683 		page = pfn_to_page(pfn);
6685 		 * The HWPoisoned page may be not in buddy system, and
6688 		if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
6696 		if (PageOffline(page)) {
6697 			BUG_ON(page_count(page));
6698 			BUG_ON(PageBuddy(page));
6703 		BUG_ON(page_count(page));
6704 		BUG_ON(!PageBuddy(page));
6705 		VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE);
6706 		order = buddy_order(page);
6707 		del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE);
6717 bool is_free_buddy_page(const struct page *page)
6719 	unsigned long pfn = page_to_pfn(page);
6723 		const struct page *head = page - (pfn & ((1 << order) - 1));
6735 static inline void add_to_free_list(struct page *page, struct zone *zone,
6739 	__add_to_free_list(page, zone, order, migratetype, tail);
6744  * Break down a higher-order page in sub-pages, and keep our target out of
6747 static void break_down_buddy_pages(struct zone *zone, struct page *page,
6748 				   struct page *target, int low, int high,
6752 	struct page *current_buddy;
6758 		if (target >= &page[size]) {
6759 			current_buddy = page;
6760 			page = page + size;
6762 			current_buddy = page + size;
6774  * Take a page that will be marked as poisoned off the buddy allocator.
6776 bool take_page_off_buddy(struct page *page)
6778 	struct zone *zone = page_zone(page);
6779 	unsigned long pfn = page_to_pfn(page);
6786 		struct page *page_head = page - (pfn & ((1 << order) - 1));
6796 			break_down_buddy_pages(zone, page_head, page, 0,
6798 			SetPageHWPoisonTakenOff(page);
6812 bool put_page_back_buddy(struct page *page)
6814 	struct zone *zone = page_zone(page);
6819 	if (put_page_testzero(page)) {
6820 		unsigned long pfn = page_to_pfn(page);
6821 		int migratetype = get_pfnblock_migratetype(page, pfn);
6823 		ClearPageHWPoisonTakenOff(page);
6824 		__free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
6825 		if (TestClearPageHWPoison(page)) {
6871 static bool page_contains_unaccepted(struct page *page, unsigned int order)
6873 	phys_addr_t start = page_to_phys(page);
6879 static void accept_page(struct page *page, unsigned int order)
6881 	phys_addr_t start = page_to_phys(page);
6889 	struct page *page;
6896 	page = list_first_entry_or_null(&zone->unaccepted_pages,
6897 					struct page, lru);
6898 	if (!page) {
6903 	list_del(&page->lru);
6910 	accept_page(page, MAX_PAGE_ORDER);
6912 	__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
6930 	/* Accept at least one page */
6946 static bool __free_unaccepted(struct page *page)
6948 	struct zone *zone = page_zone(page);
6957 	list_add_tail(&page->lru, &zone->unaccepted_pages);
6970 static bool page_contains_unaccepted(struct page *page, unsigned int order)
6975 static void accept_page(struct page *page, unsigned int order)
6989 static bool __free_unaccepted(struct page *page)