1/*
2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * $Id: numa.c,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $
5 */
6#include <linux/kernel.h>
7#include <linux/mm.h>
8#include <linux/string.h>
9#include <linux/init.h>
10#include <linux/bootmem.h>
11#include <linux/mmzone.h>
12#include <linux/blk.h>
13#include <asm/e820.h>
14#include <asm/proto.h>
15#include <asm/dma.h>
16
17#undef Dprintk
18#define Dprintk(...)
19
20plat_pg_data_t *plat_node_data[MAXNODE];
21bootmem_data_t plat_node_bdata[MAX_NUMNODES];
22
23#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
24
25static int numa_off __initdata;
26
27unsigned long nodes_present;
28int maxnode;
29
30/* Initialize bootmem allocator for a node */
31void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
32{
33	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
34	unsigned long nodedata_phys;
35	const int pgdat_size = round_up(sizeof(plat_pg_data_t), PAGE_SIZE);
36
37	start = round_up(start, ZONE_ALIGN);
38
39	printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
40
41	start_pfn = start >> PAGE_SHIFT;
42	end_pfn = end >> PAGE_SHIFT;
43
44	nodedata_phys = find_e820_area(start, end, pgdat_size);
45	if (nodedata_phys == -1L)
46		panic("Cannot find memory pgdat in node %d\n", nodeid);
47
48	Dprintk("nodedata_phys %lx\n", nodedata_phys);
49
50	PLAT_NODE_DATA(nodeid) = phys_to_virt(nodedata_phys);
51	memset(PLAT_NODE_DATA(nodeid), 0, sizeof(plat_pg_data_t));
52	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
53
54	/* Find a place for the bootmem map */
55	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
56	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
57	bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
58	if (bootmap_start == -1L)
59		panic("Not enough continuous space for bootmap on node %d", nodeid);
60	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
61
62	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
63					 bootmap_start >> PAGE_SHIFT,
64					 start_pfn, end_pfn);
65
66	e820_bootmem_free(NODE_DATA(nodeid), start, end);
67
68	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
69	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
70
71	PLAT_NODE_DATA(nodeid)->start_pfn = start_pfn;
72	PLAT_NODE_DATA(nodeid)->end_pfn = end_pfn;
73
74	if (nodeid > maxnode)
75		maxnode = nodeid;
76	nodes_present |= (1UL << nodeid);
77}
78
79/* Initialize final allocator for a zone */
80void __init setup_node_zones(int nodeid)
81{
82	unsigned long start_pfn, end_pfn;
83	unsigned long zones[MAX_NR_ZONES];
84	unsigned long dma_end_pfn;
85	unsigned long lmax_mapnr;
86
87	memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES);
88
89	start_pfn = PLAT_NODE_DATA(nodeid)->start_pfn;
90	end_pfn = PLAT_NODE_DATA(nodeid)->end_pfn;
91
92	printk("setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
93
94	/* All nodes > 0 have a zero length zone DMA */
95	dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
96	if (start_pfn < dma_end_pfn) {
97		zones[ZONE_DMA] = dma_end_pfn - start_pfn;
98		zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
99	} else {
100		zones[ZONE_NORMAL] = end_pfn - start_pfn;
101	}
102
103	free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones,
104			    start_pfn<<PAGE_SHIFT, NULL);
105	lmax_mapnr = PLAT_NODE_DATA_STARTNR(nodeid) + PLAT_NODE_DATA_SIZE(nodeid);
106	if (lmax_mapnr > max_mapnr)
107		max_mapnr = lmax_mapnr;
108}
109
110int fake_node;
111
112int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
113{
114#ifdef CONFIG_K8_NUMA
115	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
116		return 0;
117#endif
118	printk(KERN_INFO "%s\n",
119	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
120
121	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
122	       start_pfn << PAGE_SHIFT,
123	       end_pfn << PAGE_SHIFT);
124	/* setup dummy node covering all memory */
125	fake_node = 1;
126	memnode_shift = 63;
127	memnodemap[0] = 0;
128	setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
129	return -1;
130}
131
132#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
133				if ((1UL << (x)) & nodes_present)
134
135unsigned long __init numa_free_all_bootmem(void)
136{
137	int i;
138	unsigned long pages = 0;
139	for_all_nodes(i) {
140		pages += free_all_bootmem_node(NODE_DATA(i));
141	}
142	return pages;
143}
144
145void __init paging_init(void)
146{
147	int i;
148	for_all_nodes(i) {
149		setup_node_zones(i);
150	}
151}
152
153void show_mem(void)
154{
155	long i,free = 0,total = 0,reserved = 0;
156	long shared = 0, cached = 0;
157	int nid;
158
159	printk("\nMem-info:\n");
160	show_free_areas();
161	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
162	for_all_nodes (nid) {
163		mem_map_t * lmem_map = NODE_MEM_MAP(nid);
164		i = PLAT_NODE_DATA_SIZE(nid);
165		while (i-- > 0) {
166			total++;
167			if (PageReserved(lmem_map+i))
168				reserved++;
169			else if (PageSwapCache(lmem_map+i))
170				cached++;
171			else if (!page_count(lmem_map+i))
172				free++;
173			else
174				shared += atomic_read(&lmem_map[i].count) - 1;
175		}
176	}
177	printk("%ld pages of RAM\n",total);
178	printk("%ld free pages\n",free);
179	printk("%ld reserved pages\n",reserved);
180	printk("%ld pages shared\n",shared);
181	printk("%ld pages swap cached\n",cached);
182	show_buffers();
183}
184
185/* [numa=off] */
186static __init int numa_setup(char *opt)
187{
188	if (!strcmp(opt,"off"))
189		numa_off = 1;
190	return 1;
191}
192
193__setup("numa=", numa_setup);
194
195