1/* 2 * Generic VM initialization for x86-64 NUMA setups. 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * $Id: numa.c,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $ 5 */ 6#include <linux/kernel.h> 7#include <linux/mm.h> 8#include <linux/string.h> 9#include <linux/init.h> 10#include <linux/bootmem.h> 11#include <linux/mmzone.h> 12#include <linux/blk.h> 13#include <asm/e820.h> 14#include <asm/proto.h> 15#include <asm/dma.h> 16 17#undef Dprintk 18#define Dprintk(...) 19 20plat_pg_data_t *plat_node_data[MAXNODE]; 21bootmem_data_t plat_node_bdata[MAX_NUMNODES]; 22 23#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 24 25static int numa_off __initdata; 26 27unsigned long nodes_present; 28int maxnode; 29 30/* Initialize bootmem allocator for a node */ 31void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 32{ 33 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 34 unsigned long nodedata_phys; 35 const int pgdat_size = round_up(sizeof(plat_pg_data_t), PAGE_SIZE); 36 37 start = round_up(start, ZONE_ALIGN); 38 39 printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); 40 41 start_pfn = start >> PAGE_SHIFT; 42 end_pfn = end >> PAGE_SHIFT; 43 44 nodedata_phys = find_e820_area(start, end, pgdat_size); 45 if (nodedata_phys == -1L) 46 panic("Cannot find memory pgdat in node %d\n", nodeid); 47 48 Dprintk("nodedata_phys %lx\n", nodedata_phys); 49 50 PLAT_NODE_DATA(nodeid) = phys_to_virt(nodedata_phys); 51 memset(PLAT_NODE_DATA(nodeid), 0, sizeof(plat_pg_data_t)); 52 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; 53 54 /* Find a place for the bootmem map */ 55 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 56 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 57 bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); 58 if (bootmap_start == -1L) 59 panic("Not enough continuous space for bootmap on node %d", nodeid); 60 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 61 62 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 63 bootmap_start >> PAGE_SHIFT, 64 start_pfn, end_pfn); 65 66 e820_bootmem_free(NODE_DATA(nodeid), start, end); 67 68 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 69 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); 70 71 PLAT_NODE_DATA(nodeid)->start_pfn = start_pfn; 72 PLAT_NODE_DATA(nodeid)->end_pfn = end_pfn; 73 74 if (nodeid > maxnode) 75 maxnode = nodeid; 76 nodes_present |= (1UL << nodeid); 77} 78 79/* Initialize final allocator for a zone */ 80void __init setup_node_zones(int nodeid) 81{ 82 unsigned long start_pfn, end_pfn; 83 unsigned long zones[MAX_NR_ZONES]; 84 unsigned long dma_end_pfn; 85 unsigned long lmax_mapnr; 86 87 memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 88 89 start_pfn = PLAT_NODE_DATA(nodeid)->start_pfn; 90 end_pfn = PLAT_NODE_DATA(nodeid)->end_pfn; 91 92 printk("setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); 93 94 /* All nodes > 0 have a zero length zone DMA */ 95 dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 96 if (start_pfn < dma_end_pfn) { 97 zones[ZONE_DMA] = dma_end_pfn - start_pfn; 98 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 99 } else { 100 zones[ZONE_NORMAL] = end_pfn - start_pfn; 101 } 102 103 free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones, 104 start_pfn<<PAGE_SHIFT, NULL); 105 lmax_mapnr = PLAT_NODE_DATA_STARTNR(nodeid) + PLAT_NODE_DATA_SIZE(nodeid); 106 if (lmax_mapnr > max_mapnr) 107 max_mapnr = lmax_mapnr; 108} 109 110int fake_node; 111 112int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 113{ 114#ifdef CONFIG_K8_NUMA 115 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) 116 return 0; 117#endif 118 printk(KERN_INFO "%s\n", 119 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 120 121 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 122 start_pfn << PAGE_SHIFT, 123 end_pfn << PAGE_SHIFT); 124 /* setup dummy node covering all memory */ 125 fake_node = 1; 126 memnode_shift = 63; 127 memnodemap[0] = 0; 128 setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 129 return -1; 130} 131 132#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \ 133 if ((1UL << (x)) & nodes_present) 134 135unsigned long __init numa_free_all_bootmem(void) 136{ 137 int i; 138 unsigned long pages = 0; 139 for_all_nodes(i) { 140 pages += free_all_bootmem_node(NODE_DATA(i)); 141 } 142 return pages; 143} 144 145void __init paging_init(void) 146{ 147 int i; 148 for_all_nodes(i) { 149 setup_node_zones(i); 150 } 151} 152 153void show_mem(void) 154{ 155 long i,free = 0,total = 0,reserved = 0; 156 long shared = 0, cached = 0; 157 int nid; 158 159 printk("\nMem-info:\n"); 160 show_free_areas(); 161 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); 162 for_all_nodes (nid) { 163 mem_map_t * lmem_map = NODE_MEM_MAP(nid); 164 i = PLAT_NODE_DATA_SIZE(nid); 165 while (i-- > 0) { 166 total++; 167 if (PageReserved(lmem_map+i)) 168 reserved++; 169 else if (PageSwapCache(lmem_map+i)) 170 cached++; 171 else if (!page_count(lmem_map+i)) 172 free++; 173 else 174 shared += atomic_read(&lmem_map[i].count) - 1; 175 } 176 } 177 printk("%ld pages of RAM\n",total); 178 printk("%ld free pages\n",free); 179 printk("%ld reserved pages\n",reserved); 180 printk("%ld pages shared\n",shared); 181 printk("%ld pages swap cached\n",cached); 182 show_buffers(); 183} 184 185/* [numa=off] */ 186static __init int numa_setup(char *opt) 187{ 188 if (!strcmp(opt,"off")) 189 numa_off = 1; 190 return 1; 191} 192 193__setup("numa=", numa_setup); 194 195