From 0ef546de30c37e30a0236aeb1732dd95ed8bd26e Mon Sep 17 00:00:00 2001 Message-Id: <0ef546de30c37e30a0236aeb1732dd95ed8bd26e.1601675151.git.zanussi@kernel.org> In-Reply-To: <5b5a156f9808b1acf1205606e03da117214549ea.1601675151.git.zanussi@kernel.org> References: <5b5a156f9808b1acf1205606e03da117214549ea.1601675151.git.zanussi@kernel.org> From: Peter Zijlstra Date: Mon, 28 May 2018 15:24:21 +0200 Subject: [PATCH 073/333] Split IRQ-off and zone->lock while freeing pages from PCP list #2 Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.148-rt64.tar.xz Split the IRQ-off section while accessing the PCP list from zone->lock while freeing pages. Introcude isolate_pcp_pages() which separates the pages from the PCP list onto a temporary list and then free the temporary list via free_pcppages_bulk(). Signed-off-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior --- mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47732cad4a51..e43e374a9415 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1105,8 +1105,8 @@ static inline void prefetch_buddy(struct page *page) * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ -static void free_pcppages_bulk(struct zone *zone, int count, - struct list_head *head) +static void free_pcppages_bulk(struct zone *zone, struct list_head *head, + bool zone_retry) { bool isolated_pageblocks; struct page *page, *tmp; @@ -1121,12 +1121,27 @@ static void free_pcppages_bulk(struct zone *zone, int count, */ list_for_each_entry_safe(page, tmp, head, lru) { int mt = get_pcppage_migratetype(page); + + if (page_zone(page) != zone) { + /* + * free_unref_page_list() sorts pages by zone. If we end + * up with pages from a different NUMA nodes belonging + * to the same ZONE index then we need to redo with the + * correct ZONE pointer. Skip the page for now, redo it + * on the next iteration. + */ + WARN_ON_ONCE(zone_retry == false); + if (zone_retry) + continue; + } + /* MIGRATE_ISOLATE page should not go to pcplists */ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); /* Pageblock could have been isolated meanwhile */ if (unlikely(isolated_pageblocks)) mt = get_pageblock_migratetype(page); + list_del(&page->lru); __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); } @@ -2563,7 +2578,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) local_irq_restore(flags); if (to_drain > 0) - free_pcppages_bulk(zone, to_drain, &dst); + free_pcppages_bulk(zone, &dst, false); } #endif @@ -2593,7 +2608,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) local_irq_restore(flags); if (count) - free_pcppages_bulk(zone, count, &dst); + free_pcppages_bulk(zone, &dst, false); } /* @@ -2786,7 +2801,8 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn) return true; } -static void free_unref_page_commit(struct page *page, unsigned long pfn) +static void free_unref_page_commit(struct page *page, unsigned long pfn, + struct list_head *dst) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; @@ -2815,10 +2831,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); - LIST_HEAD(dst); - isolate_pcp_pages(batch, pcp, &dst); - free_pcppages_bulk(zone, batch, &dst); + isolate_pcp_pages(batch, pcp, dst); } } @@ -2829,13 +2843,17 @@ void free_unref_page(struct page *page) { unsigned long flags; unsigned long pfn = page_to_pfn(page); + struct zone *zone = page_zone(page); + LIST_HEAD(dst); if (!free_unref_page_prepare(page, pfn)) return; local_irq_save(flags); - free_unref_page_commit(page, pfn); + free_unref_page_commit(page, pfn, &dst); local_irq_restore(flags); + if (!list_empty(&dst)) + free_pcppages_bulk(zone, &dst, false); } /* @@ -2846,6 +2864,11 @@ void free_unref_page_list(struct list_head *list) struct page *page, *next; unsigned long flags, pfn; int batch_count = 0; + struct list_head dsts[__MAX_NR_ZONES]; + int i; + + for (i = 0; i < __MAX_NR_ZONES; i++) + INIT_LIST_HEAD(&dsts[i]); /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { @@ -2858,10 +2881,12 @@ void free_unref_page_list(struct list_head *list) local_irq_save(flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); + enum zone_type type; set_page_private(page, 0); trace_mm_page_free_batched(page); - free_unref_page_commit(page, pfn); + type = page_zonenum(page); + free_unref_page_commit(page, pfn, &dsts[type]); /* * Guard against excessive IRQ disabled times when we get @@ -2874,6 +2899,21 @@ void free_unref_page_list(struct list_head *list) } } local_irq_restore(flags); + + for (i = 0; i < __MAX_NR_ZONES; ) { + struct page *page; + struct zone *zone; + + if (list_empty(&dsts[i])) { + i++; + continue; + } + + page = list_first_entry(&dsts[i], struct page, lru); + zone = page_zone(page); + + free_pcppages_bulk(zone, &dsts[i], true); + } } /* -- 2.17.1