1/* 2 * Initialize MMU support. 3 * 4 * Copyright (C) 1998-2002 Hewlett-Packard Co 5 * David Mosberger-Tang <davidm@hpl.hp.com> 6 */ 7#include <linux/config.h> 8#include <linux/kernel.h> 9#include <linux/init.h> 10 11#include <linux/bootmem.h> 12#include <linux/mm.h> 13#include <linux/personality.h> 14#include <linux/reboot.h> 15#include <linux/slab.h> 16#include <linux/swap.h> 17#include <linux/efi.h> 18 19#include <asm/bitops.h> 20#include <asm/dma.h> 21#include <asm/ia32.h> 22#include <asm/io.h> 23#include <asm/machvec.h> 24#include <asm/pgalloc.h> 25#include <asm/sal.h> 26#include <asm/system.h> 27#include <asm/uaccess.h> 28#include <asm/tlb.h> 29 30mmu_gather_t mmu_gathers[NR_CPUS]; 31 32/* References to section boundaries: */ 33extern char _stext, _etext, _edata, __init_begin, __init_end; 34 35extern void ia64_tlb_init (void); 36 37unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; 38 39static unsigned long totalram_pages; 40 41#ifdef CONFIG_VIRTUAL_MEM_MAP 42unsigned long vmalloc_end = VMALLOC_END_INIT; 43 44static struct page *vmem_map; 45static unsigned long num_dma_physpages; 46#endif 47 48int 49do_check_pgt_cache (int low, int high) 50{ 51 int freed = 0; 52 53 if (pgtable_cache_size > high) { 54 do { 55 if (pgd_quicklist) 56 free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed; 57 if (pmd_quicklist) 58 free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed; 59 if (pte_quicklist) 60 free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed; 61 } while (pgtable_cache_size > low); 62 } 63 return freed; 64} 65 66/* 67 * This performs some platform-dependent address space initialization. 68 * On IA-64, we want to setup the VM area for the register backing 69 * store (which grows upwards) and install the gateway page which is 70 * used for signal trampolines, etc. 71 */ 72void 73ia64_init_addr_space (void) 74{ 75 struct vm_area_struct *vma; 76 77 /* 78 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore 79 * the problem. When the process attempts to write to the register backing store 80 * for the first time, it will get a SEGFAULT in this case. 81 */ 82 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 83 if (vma) { 84 vma->vm_mm = current->mm; 85 vma->vm_start = IA64_RBS_BOT; 86 vma->vm_end = vma->vm_start + PAGE_SIZE; 87 vma->vm_page_prot = PAGE_COPY; 88 vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; 89 vma->vm_ops = NULL; 90 vma->vm_pgoff = 0; 91 vma->vm_file = NULL; 92 vma->vm_private_data = NULL; 93 insert_vm_struct(current->mm, vma); 94 } 95 96 /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ 97 if (!(current->personality & MMAP_PAGE_ZERO)) { 98 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 99 if (vma) { 100 memset(vma, 0, sizeof(*vma)); 101 vma->vm_mm = current->mm; 102 vma->vm_end = PAGE_SIZE; 103 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); 104 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; 105 insert_vm_struct(current->mm, vma); 106 } 107 } 108} 109 110void 111free_initmem (void) 112{ 113 unsigned long addr; 114 115 addr = (unsigned long) &__init_begin; 116 for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) { 117 clear_bit(PG_reserved, &virt_to_page(addr)->flags); 118 set_page_count(virt_to_page(addr), 1); 119 free_page(addr); 120 ++totalram_pages; 121 } 122 printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n", 123 (&__init_end - &__init_begin) >> 10); 124} 125 126void 127free_initrd_mem(unsigned long start, unsigned long end) 128{ 129 /* 130 * EFI uses 4KB pages while the kernel can use 4KB or bigger. 131 * Thus EFI and the kernel may have different page sizes. It is 132 * therefore possible to have the initrd share the same page as 133 * the end of the kernel (given current setup). 134 * 135 * To avoid freeing/using the wrong page (kernel sized) we: 136 * - align up the beginning of initrd 137 * - align down the end of initrd 138 * 139 * | | 140 * |=============| a000 141 * | | 142 * | | 143 * | | 9000 144 * |/////////////| 145 * |/////////////| 146 * |=============| 8000 147 * |///INITRD////| 148 * |/////////////| 149 * |/////////////| 7000 150 * | | 151 * |KKKKKKKKKKKKK| 152 * |=============| 6000 153 * |KKKKKKKKKKKKK| 154 * |KKKKKKKKKKKKK| 155 * K=kernel using 8KB pages 156 * 157 * In this example, we must free page 8000 ONLY. So we must align up 158 * initrd_start and keep initrd_end as is. 159 */ 160 start = PAGE_ALIGN(start); 161 end = end & PAGE_MASK; 162 163 if (start < end) 164 printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); 165 166 for (; start < end; start += PAGE_SIZE) { 167 if (!VALID_PAGE(virt_to_page(start))) 168 continue; 169 clear_bit(PG_reserved, &virt_to_page(start)->flags); 170 set_page_count(virt_to_page(start), 1); 171 free_page(start); 172 ++totalram_pages; 173 } 174} 175 176void 177si_meminfo (struct sysinfo *val) 178{ 179 val->totalram = totalram_pages; 180 val->sharedram = 0; 181 val->freeram = nr_free_pages(); 182 val->bufferram = atomic_read(&buffermem_pages); 183 val->totalhigh = 0; 184 val->freehigh = 0; 185 val->mem_unit = PAGE_SIZE; 186 return; 187} 188 189void 190show_mem(void) 191{ 192 int i, total = 0, reserved = 0; 193 int shared = 0, cached = 0; 194 195 printk("Mem-info:\n"); 196 show_free_areas(); 197 198#ifdef CONFIG_DISCONTIGMEM 199 { 200 pg_data_t *pgdat = pgdat_list; 201 202 printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 203 do { 204 printk("Node ID: %d\n", pgdat->node_id); 205 for(i = 0; i < pgdat->node_size; i++) { 206 if (PageReserved(pgdat->node_mem_map+i)) 207 reserved++; 208 else if (PageSwapCache(pgdat->node_mem_map+i)) 209 cached++; 210 else if (page_count(pgdat->node_mem_map + i)) 211 shared += page_count(pgdat->node_mem_map + i) - 1; 212 } 213 printk("\t%d pages of RAM\n", pgdat->node_size); 214 printk("\t%d reserved pages\n", reserved); 215 printk("\t%d pages shared\n", shared); 216 printk("\t%d pages swap cached\n", cached); 217 pgdat = pgdat->node_next; 218 } while (pgdat); 219 printk("Total of %ld pages in page table cache\n", pgtable_cache_size); 220 show_buffers(); 221 printk("%d free buffer pages\n", nr_free_buffer_pages()); 222 } 223#else /* !CONFIG_DISCONTIGMEM */ 224 printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 225 i = max_mapnr; 226 while (i-- > 0) { 227 if (!VALID_PAGE(mem_map + i)) 228 continue; 229 total++; 230 if (PageReserved(mem_map+i)) 231 reserved++; 232 else if (PageSwapCache(mem_map+i)) 233 cached++; 234 else if (page_count(mem_map + i)) 235 shared += page_count(mem_map + i) - 1; 236 } 237 printk("%d pages of RAM\n", total); 238 printk("%d reserved pages\n", reserved); 239 printk("%d pages shared\n", shared); 240 printk("%d pages swap cached\n", cached); 241 printk("%ld pages in page table cache\n", pgtable_cache_size); 242 show_buffers(); 243#endif /* !CONFIG_DISCONTIGMEM */ 244} 245 246/* 247 * This is like put_dirty_page() but installs a clean page with PAGE_GATE protection 248 * (execute-only, typically). 249 */ 250struct page * 251put_gate_page (struct page *page, unsigned long address) 252{ 253 pgd_t *pgd; 254 pmd_t *pmd; 255 pte_t *pte; 256 257 if (!PageReserved(page)) 258 printk("put_gate_page: gate page at 0x%p not in reserved memory\n", 259 page_address(page)); 260 261 pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ 262 263 spin_lock(&init_mm.page_table_lock); 264 { 265 pmd = pmd_alloc(&init_mm, pgd, address); 266 if (!pmd) 267 goto out; 268 pte = pte_alloc(&init_mm, pmd, address); 269 if (!pte) 270 goto out; 271 if (!pte_none(*pte)) { 272 pte_ERROR(*pte); 273 goto out; 274 } 275 flush_page_to_ram(page); 276 set_pte(pte, mk_pte(page, PAGE_GATE)); 277 } 278 out: spin_unlock(&init_mm.page_table_lock); 279 /* no need for flush_tlb */ 280 return page; 281} 282 283void __init 284ia64_mmu_init (void *my_cpu_data) 285{ 286 unsigned long psr, rid, pta, impl_va_bits; 287 extern void __init tlb_init (void); 288#ifdef CONFIG_DISABLE_VHPT 289# define VHPT_ENABLE_BIT 0 290#else 291# define VHPT_ENABLE_BIT 1 292#endif 293 294 /* 295 * Set up the kernel identity mapping for regions 6 and 5. The mapping for region 296 * 7 is setup up in _start(). 297 */ 298 psr = ia64_clear_ic(); 299 300 rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET); 301 ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (IA64_GRANULE_SHIFT << 2)); 302 303 rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START); 304 ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1); 305 306 /* ensure rr6 is up-to-date before inserting the PERCPU_ADDR translation: */ 307 ia64_srlz_d(); 308 309 ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, 310 pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)), PAGE_SHIFT); 311 312 ia64_set_psr(psr); 313 ia64_srlz_i(); 314 315 /* 316 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped 317 * address space. The IA-64 architecture guarantees that at least 50 bits of 318 * virtual address space are implemented but if we pick a large enough page size 319 * (e.g., 64KB), the mapped address space is big enough that it will overlap with 320 * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, 321 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a 322 * problem in practice. Alternatively, we could truncate the top of the mapped 323 * address space to not permit mappings that would overlap with the VMLPT. 324 * --davidm 00/12/06 325 */ 326# define pte_bits 3 327# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) 328 /* 329 * The virtual page table has to cover the entire implemented address space within 330 * a region even though not all of this space may be mappable. The reason for 331 * this is that the Access bit and Dirty bit fault handlers perform 332 * non-speculative accesses to the virtual page table, so the address range of the 333 * virtual page table itself needs to be covered by virtual page table. 334 */ 335# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) 336# define POW2(n) (1ULL << (n)) 337 338 impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); 339 340 if (impl_va_bits < 51 || impl_va_bits > 61) 341 panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); 342 343 /* place the VMLPT at the end of each page-table mapped region: */ 344 pta = POW2(61) - POW2(vmlpt_bits); 345 346 if (POW2(mapped_space_bits) >= pta) 347 panic("mm/init: overlap between virtually mapped linear page table and " 348 "mapped kernel space!"); 349 /* 350 * Set the (virtually mapped linear) page table address. Bit 351 * 8 selects between the short and long format, bits 2-7 the 352 * size of the table, and bit 0 whether the VHPT walker is 353 * enabled. 354 */ 355 ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); 356 357 ia64_tlb_init(); 358} 359 360#ifdef CONFIG_VIRTUAL_MEM_MAP 361 362#include <asm/pgtable.h> 363 364static int 365create_mem_map_page_table (u64 start, u64 end, void *arg) 366{ 367 unsigned long address, start_page, end_page; 368 struct page *map_start, *map_end; 369 pgd_t *pgd; 370 pmd_t *pmd; 371 pte_t *pte; 372 373 /* should we use platform_map_nr here? */ 374 375 map_start = vmem_map + MAP_NR_DENSE(start); 376 map_end = vmem_map + MAP_NR_DENSE(end); 377 378 start_page = (unsigned long) map_start & PAGE_MASK; 379 end_page = PAGE_ALIGN((unsigned long) map_end); 380 381 for (address = start_page; address < end_page; address += PAGE_SIZE) { 382 pgd = pgd_offset_k(address); 383 if (pgd_none(*pgd)) 384 pgd_populate(&init_mm, pgd, alloc_bootmem_pages(PAGE_SIZE)); 385 pmd = pmd_offset(pgd, address); 386 387 if (pmd_none(*pmd)) 388 pmd_populate(&init_mm, pmd, alloc_bootmem_pages(PAGE_SIZE)); 389 pte = pte_offset(pmd, address); 390 391 if (pte_none(*pte)) 392 set_pte(pte, mk_pte_phys(__pa(alloc_bootmem_pages(PAGE_SIZE)), 393 PAGE_KERNEL)); 394 } 395 return 0; 396} 397 398struct memmap_init_callback_data { 399 memmap_init_callback_t *memmap_init; 400 struct page *start; 401 struct page *end; 402 int zone; 403 int highmem; 404}; 405 406static int 407virtual_memmap_init (u64 start, u64 end, void *arg) 408{ 409 struct memmap_init_callback_data *args; 410 struct page *map_start, *map_end; 411 412 args = (struct memmap_init_callback_data *) arg; 413 414 /* Should we use platform_map_nr here? */ 415 416 map_start = vmem_map + MAP_NR_DENSE(start); 417 map_end = vmem_map + MAP_NR_DENSE(end); 418 419 if (map_start < args->start) 420 map_start = args->start; 421 if (map_end > args->end) 422 map_end = args->end; 423 424 /* 425 * We have to initialize "out of bounds" struct page elements 426 * that fit completely on the same pages that were allocated 427 * for the "in bounds" elements because they may be referenced 428 * later (and found to be "reserved"). 429 */ 430 map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) 431 / sizeof(struct page); 432 map_end += ((PAGE_ALIGN((unsigned long) map_end) - 433 (unsigned long) map_end) 434 / sizeof(struct page)); 435 436 if (map_start < map_end) 437 (*args->memmap_init)(map_start, map_end, args->zone, 438 page_to_phys(map_start), args->highmem); 439 440 return 0; 441} 442 443unsigned long 444arch_memmap_init (memmap_init_callback_t *memmap_init, struct page *start, 445 struct page *end, int zone, unsigned long start_paddr, int highmem) 446{ 447 struct memmap_init_callback_data args; 448 449 args.memmap_init = memmap_init; 450 args.start = start; 451 args.end = end; 452 args.zone = zone; 453 args.highmem = highmem; 454 455 efi_memmap_walk(virtual_memmap_init, &args); 456 457 return page_to_phys(end); 458} 459 460static int 461count_dma_pages (u64 start, u64 end, void *arg) 462{ 463 unsigned long *count = arg; 464 465 if (end <= MAX_DMA_ADDRESS) 466 *count += (end - start) >> PAGE_SHIFT; 467 return 0; 468} 469 470int 471ia64_page_valid (struct page *page) 472{ 473 char byte; 474 475 return __get_user(byte, (char *) page) == 0; 476} 477 478#endif /* CONFIG_VIRTUAL_MEM_MAP */ 479 480static int 481count_pages (u64 start, u64 end, void *arg) 482{ 483 unsigned long *count = arg; 484 485 *count += (end - start) >> PAGE_SHIFT; 486 return 0; 487} 488 489/* 490 * Set up the page tables. 491 */ 492void 493paging_init (void) 494{ 495 unsigned long max_dma, zones_size[MAX_NR_ZONES]; 496 497 /* initialize mem_map[] */ 498 499 memset(zones_size, 0, sizeof(zones_size)); 500 501 num_physpages = 0; 502 efi_memmap_walk(count_pages, &num_physpages); 503 504 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; 505 506#ifdef CONFIG_VIRTUAL_MEM_MAP 507 { 508 unsigned long zholes_size[MAX_NR_ZONES]; 509 unsigned long map_size; 510 511 memset(zholes_size, 0, sizeof(zholes_size)); 512 513 num_dma_physpages = 0; 514 efi_memmap_walk(count_dma_pages, &num_dma_physpages); 515 516 if (max_low_pfn < max_dma) { 517 zones_size[ZONE_DMA] = max_low_pfn; 518 zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages; 519 } else { 520 zones_size[ZONE_DMA] = max_dma; 521 zholes_size[ZONE_DMA] = max_dma - num_dma_physpages; 522 if (num_physpages > num_dma_physpages) { 523 zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; 524 zholes_size[ZONE_NORMAL] = ((max_low_pfn - max_dma) 525 - (num_physpages - num_dma_physpages)); 526 } 527 } 528 529 /* allocate virtual mem_map: */ 530 531 map_size = PAGE_ALIGN(max_low_pfn*sizeof(struct page)); 532 vmalloc_end -= map_size; 533 vmem_map = (struct page *) vmalloc_end; 534 efi_memmap_walk(create_mem_map_page_table, 0); 535 536 free_area_init_node(0, NULL, vmem_map, zones_size, 0, zholes_size); 537 printk("Virtual mem_map starts at 0x%p\n", mem_map); 538 } 539#else /* !CONFIG_VIRTUAL_MEM_MAP */ 540 if (max_low_pfn < max_dma) 541 zones_size[ZONE_DMA] = max_low_pfn; 542 else { 543 zones_size[ZONE_DMA] = max_dma; 544 zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; 545 } 546 free_area_init(zones_size); 547#endif /* !CONFIG_VIRTUAL_MEM_MAP */ 548} 549 550static int 551count_reserved_pages (u64 start, u64 end, void *arg) 552{ 553 unsigned long num_reserved = 0; 554 unsigned long *count = arg; 555 struct page *pg; 556 557 for (pg = virt_to_page(start); pg < virt_to_page(end); ++pg) 558 if (PageReserved(pg)) 559 ++num_reserved; 560 *count += num_reserved; 561 return 0; 562} 563 564void 565mem_init (void) 566{ 567 extern char __start_gate_section[]; 568 long reserved_pages, codesize, datasize, initsize; 569 unsigned long num_pgt_pages; 570 571#ifdef CONFIG_PCI 572 /* 573 * This needs to be called _after_ the command line has been parsed but _before_ 574 * any drivers that may need the PCI DMA interface are initialized or bootmem has 575 * been freed. 576 */ 577 platform_pci_dma_init(); 578#endif 579 580 if (!mem_map) 581 BUG(); 582 583 max_mapnr = max_low_pfn; 584 high_memory = __va(max_low_pfn * PAGE_SIZE); 585 586 totalram_pages += free_all_bootmem(); 587 588 reserved_pages = 0; 589 efi_memmap_walk(count_reserved_pages, &reserved_pages); 590 591 codesize = (unsigned long) &_etext - (unsigned long) &_stext; 592 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 593 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 594 595 printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n", 596 (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10), 597 num_physpages << (PAGE_SHIFT - 10), codesize >> 10, 598 reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); 599 600 /* 601 * Allow for enough (cached) page table pages so that we can map the entire memory 602 * at least once. Each task also needs a couple of page tables pages, so add in a 603 * fudge factor for that (don't use "threads-max" here; that would be wrong!). 604 * Don't allow the cache to be more than 10% of total memory, though. 605 */ 606# define NUM_TASKS 500 /* typical number of tasks */ 607 num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS; 608 if (num_pgt_pages > nr_free_pages() / 10) 609 num_pgt_pages = nr_free_pages() / 10; 610 if (num_pgt_pages > pgt_cache_water[1]) 611 pgt_cache_water[1] = num_pgt_pages; 612 613 /* install the gate page in the global page table: */ 614 put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR); 615 616#ifdef CONFIG_IA32_SUPPORT 617 ia32_gdt_init(); 618#endif 619} 620