diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 8dc8a554f77427..dd0cf4834eaa8a 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -608,11 +608,11 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } -pg_data_t *arch_alloc_nodedata(int nid) +pg_data_t * __init arch_alloc_nodedata(int nid) { unsigned long size = compute_pernodesize(nid); - return kzalloc(size, GFP_KERNEL); + return memblock_alloc(size, SMP_CACHE_BYTES); } void arch_free_nodedata(pg_data_t *pgdat) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4355983b364d33..cdd66bfdf855bf 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -44,7 +44,7 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); */ #define generic_alloc_nodedata(nid) \ ({ \ - kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ + memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ }) /* * This definition is just for error path in node hotadd. diff --git a/mm/internal.h b/mm/internal.h index 827a2e4133c188..9a5674bd0a742e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -717,4 +717,6 @@ void vunmap_range_noflush(unsigned long start, unsigned long end); int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); +DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); + #endif /* __MM_INTERNAL_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0139b77c51d5d2..11f39d0e76ec36 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1162,19 +1162,21 @@ static void reset_node_present_pages(pg_data_t *pgdat) } /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -static pg_data_t __ref *hotadd_new_pgdat(int nid) +static pg_data_t __ref *hotadd_init_pgdat(int nid) { struct pglist_data *pgdat; pgdat = NODE_DATA(nid); - if (!pgdat) { - pgdat = arch_alloc_nodedata(nid); - if (!pgdat) - return NULL; + /* + * NODE_DATA is preallocated (free_area_init) but its internal + * state is not allocated completely. Add missing pieces. + * Completely offline nodes stay around and they just need + * reintialization. + */ + if (pgdat->per_cpu_nodestats == &boot_nodestats) { pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - arch_refresh_nodedata(nid, pgdat); } else { int cpu; /* @@ -1193,8 +1195,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid) } } - /* we can use NODE_DATA(nid) from here */ - pgdat->node_id = nid; pgdat->node_start_pfn = 0; /* init node's zones as empty zones, we don't have any present pages.*/ @@ -1246,7 +1246,7 @@ static int __try_online_node(int nid, bool set_node_online) if (node_online(nid)) return 0; - pgdat = hotadd_new_pgdat(nid); + pgdat = hotadd_init_pgdat(nid); if (!pgdat) { pr_err("Cannot online node %d due to NULL pgdat\n", nid); ret = -ENOMEM; @@ -1445,9 +1445,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) return ret; error: - /* rollback pgdat allocation and others */ - if (new_node) - rollback_node_hotadd(nid); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); error_mem_hotplug_end: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 79d4ad7a4f0513..66243e63a4c469 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6407,7 +6407,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta #define BOOT_PAGESET_BATCH 1 static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats); -static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); +DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); static void __build_all_zonelists(void *data) { @@ -6429,7 +6429,11 @@ static void __build_all_zonelists(void *data) if (self && !node_online(self->node_id)) { build_zonelists(self); } else { - for_each_online_node(nid) { + /* + * All possible nodes have pgdat preallocated + * in free_area_init + */ + for_each_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); @@ -8129,8 +8133,32 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); + for_each_node(nid) { + pg_data_t *pgdat; + + if (!node_online(nid)) { + pr_info("Initializing node %d as memoryless\n", nid); + + /* Allocator not initialized yet */ + pgdat = arch_alloc_nodedata(nid); + if (!pgdat) { + pr_err("Cannot allocate %zuB for node %d.\n", + sizeof(*pgdat), nid); + continue; + } + arch_refresh_nodedata(nid, pgdat); + free_area_init_memoryless_node(nid); + /* + * not marking this node online because we do not want to + * confuse userspace by sysfs files/directories for node + * without any memory attached to it (see topology_init) + * The pgdat will get fully initialized when a memory is + * hotpluged into it by hotadd_init_pgdat + */ + continue; + } + + pgdat = NODE_DATA(nid); free_area_init_node(nid); /* Any memory on that node */