mm: disable interrupts while initializing deferred pages Vlastimil Babka reported about a window issue during which when deferred pages are initialized, and the current version of on-demand initialization is finished, allocations may fail. While this is highly unlikely scenario, since this kind of allocation request must be large, and must come from interrupt handler, we still want to cover it. We solve this by initializing deferred pages with interrupts disabled, and holding node_size_lock spin lock while pages in the node are being initialized. The on-demand deferred page initialization that comes later will use the same lock, and thus synchronize with deferred_init_memmap(). It is unlikely for threads that initialize deferred pages to be interrupted. They run soon after smp_init(), but before modules are initialized, and long before user space programs. This is why there is no adverse effect of having these threads running with interrupts disabled. [[email protected]: v6] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Pavel Tatashin <[email protected]> Reviewed-by: Andrew Morton <[email protected]> Cc: Steven Sistare <[email protected]> Cc: Daniel Jordan <[email protected]> Cc: Masayoshi Mizuma <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: AKASHI Takahiro <[email protected]> Cc: Gioh Kim <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Yaowei Bai <[email protected]> Cc: Wei Yang <[email protected]> Cc: Paul Burton <[email protected]> Cc: Miles Chen <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Johannes Weiner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>

commit: 3a2d7fa8a3d5ae740bd0c21d933acc6220857ed0 [log] [tgz]
author: Pavel Tatashin <[email protected]> Thu Apr 05 16:22:27 2018 -0700
committer: Linus Torvalds <[email protected]> Thu Apr 05 21:36:24 2018 -0700
tree: 30eb570bb5050346cb4f5315743faebcdb6e389c
parent: 8e7a0c9100cade3bdbf851206b892b6f98eb39c9 [diff] [blame]
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 531d6ac..cf5555d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -1506,7 +1506,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
 		} else if (!(pfn & nr_pgmask)) {
 			deferred_free_range(pfn - nr_free, nr_free);
 			nr_free = 1;
-			cond_resched();
+			touch_nmi_watchdog();
 		} else {
 			nr_free++;
 		}
@@ -1535,7 +1535,7 @@ static unsigned long  __init deferred_init_pages(int nid, int zid,
 			continue;
 		} else if (!page || !(pfn & nr_pgmask)) {
 			page = pfn_to_page(pfn);
-			cond_resched();
+			touch_nmi_watchdog();
 		} else {
 			page++;
 		}
@@ -1552,23 +1552,25 @@ static int __init deferred_init_memmap(void *data)
 	int nid = pgdat->node_id;
 	unsigned long start = jiffies;
 	unsigned long nr_pages = 0;
-	unsigned long spfn, epfn;
+	unsigned long spfn, epfn, first_init_pfn, flags;
 	phys_addr_t spa, epa;
 	int zid;
 	struct zone *zone;
-	unsigned long first_init_pfn = pgdat->first_deferred_pfn;
 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 	u64 i;
 
-	if (first_init_pfn == ULONG_MAX) {
-		pgdat_init_report_one_done();
-		return 0;
-	}
-
 	/* Bind memory initialisation thread to a local node if possible */
 	if (!cpumask_empty(cpumask))
 		set_cpus_allowed_ptr(current, cpumask);
 
+	pgdat_resize_lock(pgdat, &flags);
+	first_init_pfn = pgdat->first_deferred_pfn;
+	if (first_init_pfn == ULONG_MAX) {
+		pgdat_resize_unlock(pgdat, &flags);
+		pgdat_init_report_one_done();
+		return 0;
+	}
+
 	/* Sanity check boundaries */
 	BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
 	BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1600,7 @@ static int __init deferred_init_memmap(void *data)
 		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
 		deferred_free_pages(nid, zid, spfn, epfn);
 	}
+	pgdat_resize_unlock(pgdat, &flags);
 
 	/* Sanity check that the next zone really is unpopulated */
 	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
commit	3a2d7fa8a3d5ae740bd0c21d933acc6220857ed0	[log] [tgz]
author	Pavel Tatashin <[email protected]>	Thu Apr 05 16:22:27 2018 -0700
committer	Linus Torvalds <[email protected]>	Thu Apr 05 21:36:24 2018 -0700
tree	30eb570bb5050346cb4f5315743faebcdb6e389c
parent	8e7a0c9100cade3bdbf851206b892b6f98eb39c9 [diff] [blame]