1/* 2 * linux/arch/x86_64/mm/init.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> 7 */ 8 9#include <linux/signal.h> 10#include <linux/sched.h> 11#include <linux/kernel.h> 12#include <linux/errno.h> 13#include <linux/string.h> 14#include <linux/types.h> 15#include <linux/ptrace.h> 16#include <linux/mman.h> 17#include <linux/mm.h> 18#include <linux/swap.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/pagemap.h> 22#include <linux/bootmem.h> 23#include <linux/proc_fs.h> 24#include <linux/pci.h> 25#include <linux/pfn.h> 26#include <linux/poison.h> 27#include <linux/dma-mapping.h> 28#include <linux/module.h> 29#include <linux/memory_hotplug.h> 30#include <linux/nmi.h> 31 32#include <asm/processor.h> 33#include <asm/system.h> 34#include <asm/uaccess.h> 35#include <asm/pgtable.h> 36#include <asm/pgalloc.h> 37#include <asm/dma.h> 38#include <asm/fixmap.h> 39#include <asm/e820.h> 40#include <asm/apic.h> 41#include <asm/tlb.h> 42#include <asm/mmu_context.h> 43#include <asm/proto.h> 44#include <asm/smp.h> 45#include <asm/sections.h> 46 47#ifndef Dprintk 48#define Dprintk(x...) 49#endif 50 51const struct dma_mapping_ops* dma_ops; 52EXPORT_SYMBOL(dma_ops); 53 54static unsigned long dma_reserve __initdata; 55 56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 57 58/* 59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 60 * physical space so we can cache the place of the first one and move 61 * around without checking the pgd every time. 62 */ 63 64void show_mem(void) 65{ 66 long i, total = 0, reserved = 0; 67 long shared = 0, cached = 0; 68 pg_data_t *pgdat; 69 struct page *page; 70 71 printk(KERN_INFO "Mem-info:\n"); 72 show_free_areas(); 73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 74 75 for_each_online_pgdat(pgdat) { 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 77 /* this loop can take a while with 256 GB and 4k pages 78 so update the NMI watchdog */ 79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { 80 touch_nmi_watchdog(); 81 } 82 if (!pfn_valid(pgdat->node_start_pfn + i)) 83 continue; 84 page = pfn_to_page(pgdat->node_start_pfn + i); 85 total++; 86 if (PageReserved(page)) 87 reserved++; 88 else if (PageSwapCache(page)) 89 cached++; 90 else if (page_count(page)) 91 shared += page_count(page) - 1; 92 } 93 } 94 printk(KERN_INFO "%lu pages of RAM\n", total); 95 printk(KERN_INFO "%lu reserved pages\n",reserved); 96 printk(KERN_INFO "%lu pages shared\n",shared); 97 printk(KERN_INFO "%lu pages swap cached\n",cached); 98} 99 100int after_bootmem; 101 102static __init void *spp_getpage(void) 103{ 104 void *ptr; 105 if (after_bootmem) 106 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 107 else 108 ptr = alloc_bootmem_pages(PAGE_SIZE); 109 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) 110 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); 111 112 Dprintk("spp_getpage %p\n", ptr); 113 return ptr; 114} 115 116static __init void set_pte_phys(unsigned long vaddr, 117 unsigned long phys, pgprot_t prot) 118{ 119 pgd_t *pgd; 120 pud_t *pud; 121 pmd_t *pmd; 122 pte_t *pte, new_pte; 123 124 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); 125 126 pgd = pgd_offset_k(vaddr); 127 if (pgd_none(*pgd)) { 128 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); 129 return; 130 } 131 pud = pud_offset(pgd, vaddr); 132 if (pud_none(*pud)) { 133 pmd = (pmd_t *) spp_getpage(); 134 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); 135 if (pmd != pmd_offset(pud, 0)) { 136 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); 137 return; 138 } 139 } 140 pmd = pmd_offset(pud, vaddr); 141 if (pmd_none(*pmd)) { 142 pte = (pte_t *) spp_getpage(); 143 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); 144 if (pte != pte_offset_kernel(pmd, 0)) { 145 printk("PAGETABLE BUG #02!\n"); 146 return; 147 } 148 } 149 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); 150 151 pte = pte_offset_kernel(pmd, vaddr); 152 if (!pte_none(*pte) && 153 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) 154 pte_ERROR(*pte); 155 set_pte(pte, new_pte); 156 157 /* 158 * It's enough to flush this one mapping. 159 * (PGE mappings get flushed as well) 160 */ 161 __flush_tlb_one(vaddr); 162} 163 164/* NOTE: this is meant to be run only at boot */ 165void __init 166__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 167{ 168 unsigned long address = __fix_to_virt(idx); 169 170 if (idx >= __end_of_fixed_addresses) { 171 printk("Invalid __set_fixmap\n"); 172 return; 173 } 174 set_pte_phys(address, phys, prot); 175} 176 177unsigned long __meminitdata table_start, table_end; 178 179static __meminit void *alloc_low_page(unsigned long *phys) 180{ 181 unsigned long pfn = table_end++; 182 void *adr; 183 184 if (after_bootmem) { 185 adr = (void *)get_zeroed_page(GFP_ATOMIC); 186 *phys = __pa(adr); 187 return adr; 188 } 189 190 if (pfn >= end_pfn) 191 panic("alloc_low_page: ran out of memory"); 192 193 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); 194 memset(adr, 0, PAGE_SIZE); 195 *phys = pfn * PAGE_SIZE; 196 return adr; 197} 198 199static __meminit void unmap_low_page(void *adr) 200{ 201 202 if (after_bootmem) 203 return; 204 205 early_iounmap(adr, PAGE_SIZE); 206} 207 208/* Must run before zap_low_mappings */ 209__meminit void *early_ioremap(unsigned long addr, unsigned long size) 210{ 211 unsigned long vaddr; 212 pmd_t *pmd, *last_pmd; 213 int i, pmds; 214 215 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; 216 vaddr = __START_KERNEL_map; 217 pmd = level2_kernel_pgt; 218 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; 219 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { 220 for (i = 0; i < pmds; i++) { 221 if (pmd_present(pmd[i])) 222 goto next; 223 } 224 vaddr += addr & ~PMD_MASK; 225 addr &= PMD_MASK; 226 for (i = 0; i < pmds; i++, addr += PMD_SIZE) 227 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); 228 __flush_tlb(); 229 return (void *)vaddr; 230 next: 231 ; 232 } 233 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); 234 return NULL; 235} 236 237/* To avoid virtual aliases later */ 238__meminit void early_iounmap(void *addr, unsigned long size) 239{ 240 unsigned long vaddr; 241 pmd_t *pmd; 242 int i, pmds; 243 244 vaddr = (unsigned long)addr; 245 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; 246 pmd = level2_kernel_pgt + pmd_index(vaddr); 247 for (i = 0; i < pmds; i++) 248 pmd_clear(pmd + i); 249 __flush_tlb(); 250} 251 252static void __meminit 253phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) 254{ 255 int i = pmd_index(address); 256 257 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 258 unsigned long entry; 259 pmd_t *pmd = pmd_page + pmd_index(address); 260 261 if (address >= end) { 262 if (!after_bootmem) 263 for (; i < PTRS_PER_PMD; i++, pmd++) 264 set_pmd(pmd, __pmd(0)); 265 break; 266 } 267 268 if (pmd_val(*pmd)) 269 continue; 270 271 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; 272 entry &= __supported_pte_mask; 273 set_pmd(pmd, __pmd(entry)); 274 } 275} 276 277static void __meminit 278phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 279{ 280 pmd_t *pmd = pmd_offset(pud,0); 281 spin_lock(&init_mm.page_table_lock); 282 phys_pmd_init(pmd, address, end); 283 spin_unlock(&init_mm.page_table_lock); 284 __flush_tlb_all(); 285} 286 287static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) 288{ 289 int i = pud_index(addr); 290 291 292 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { 293 unsigned long pmd_phys; 294 pud_t *pud = pud_page + pud_index(addr); 295 pmd_t *pmd; 296 297 if (addr >= end) 298 break; 299 300 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { 301 set_pud(pud, __pud(0)); 302 continue; 303 } 304 305 if (pud_val(*pud)) { 306 phys_pmd_update(pud, addr, end); 307 continue; 308 } 309 310 pmd = alloc_low_page(&pmd_phys); 311 spin_lock(&init_mm.page_table_lock); 312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 313 phys_pmd_init(pmd, addr, end); 314 spin_unlock(&init_mm.page_table_lock); 315 unmap_low_page(pmd); 316 } 317 __flush_tlb(); 318} 319 320static void __init find_early_table_space(unsigned long end) 321{ 322 unsigned long puds, pmds, tables, start; 323 324 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 325 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 326 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + 327 round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 328 329 /* RED-PEN putting page tables only on node 0 could 330 cause a hotspot and fill up ZONE_DMA. The page tables 331 need roughly 0.5KB per GB. */ 332 start = 0x8000; 333 table_start = find_e820_area(start, end, tables); 334 if (table_start == -1UL) 335 panic("Cannot find space for the kernel page tables"); 336 337 table_start >>= PAGE_SHIFT; 338 table_end = table_start; 339 340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 341 end, table_start << PAGE_SHIFT, 342 (table_start << PAGE_SHIFT) + tables); 343} 344 345/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 346 This runs before bootmem is initialized and gets pages directly from the 347 physical memory. To access them they are temporarily mapped. */ 348void __meminit init_memory_mapping(unsigned long start, unsigned long end) 349{ 350 unsigned long next; 351 352 Dprintk("init_memory_mapping\n"); 353 354 /* 355 * Find space for the kernel direct mapping tables. 356 * Later we should allocate these tables in the local node of the memory 357 * mapped. Unfortunately this is done currently before the nodes are 358 * discovered. 359 */ 360 if (!after_bootmem) 361 find_early_table_space(end); 362 363 start = (unsigned long)__va(start); 364 end = (unsigned long)__va(end); 365 366 for (; start < end; start = next) { 367 unsigned long pud_phys; 368 pgd_t *pgd = pgd_offset_k(start); 369 pud_t *pud; 370 371 if (after_bootmem) 372 pud = pud_offset(pgd, start & PGDIR_MASK); 373 else 374 pud = alloc_low_page(&pud_phys); 375 376 next = start + PGDIR_SIZE; 377 if (next > end) 378 next = end; 379 phys_pud_init(pud, __pa(start), __pa(next)); 380 if (!after_bootmem) 381 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 382 unmap_low_page(pud); 383 } 384 385 if (!after_bootmem) 386 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); 387 __flush_tlb_all(); 388} 389 390#ifndef CONFIG_NUMA 391void __init paging_init(void) 392{ 393 unsigned long max_zone_pfns[MAX_NR_ZONES]; 394 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 395 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 396 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 397 max_zone_pfns[ZONE_NORMAL] = end_pfn; 398 399 memory_present(0, 0, end_pfn); 400 sparse_init(); 401 free_area_init_nodes(max_zone_pfns); 402} 403#endif 404 405/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches 406 from the CPU leading to inconsistent cache lines. address and size 407 must be aligned to 2MB boundaries. 408 Does nothing when the mapping doesn't exist. */ 409void __init clear_kernel_mapping(unsigned long address, unsigned long size) 410{ 411 unsigned long end = address + size; 412 413 BUG_ON(address & ~LARGE_PAGE_MASK); 414 BUG_ON(size & ~LARGE_PAGE_MASK); 415 416 for (; address < end; address += LARGE_PAGE_SIZE) { 417 pgd_t *pgd = pgd_offset_k(address); 418 pud_t *pud; 419 pmd_t *pmd; 420 if (pgd_none(*pgd)) 421 continue; 422 pud = pud_offset(pgd, address); 423 if (pud_none(*pud)) 424 continue; 425 pmd = pmd_offset(pud, address); 426 if (!pmd || pmd_none(*pmd)) 427 continue; 428 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 429 /* Could handle this, but it should not happen currently. */ 430 printk(KERN_ERR 431 "clear_kernel_mapping: mapping has been split. will leak memory\n"); 432 pmd_ERROR(*pmd); 433 } 434 set_pmd(pmd, __pmd(0)); 435 } 436 __flush_tlb_all(); 437} 438 439/* 440 * Memory hotplug specific functions 441 */ 442void online_page(struct page *page) 443{ 444 ClearPageReserved(page); 445 init_page_count(page); 446 __free_page(page); 447 totalram_pages++; 448 num_physpages++; 449} 450 451#ifdef CONFIG_MEMORY_HOTPLUG 452/* 453 * Memory is added always to NORMAL zone. This means you will never get 454 * additional DMA/DMA32 memory. 455 */ 456int arch_add_memory(int nid, u64 start, u64 size) 457{ 458 struct pglist_data *pgdat = NODE_DATA(nid); 459 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 460 unsigned long start_pfn = start >> PAGE_SHIFT; 461 unsigned long nr_pages = size >> PAGE_SHIFT; 462 int ret; 463 464 init_memory_mapping(start, (start + size -1)); 465 466 ret = __add_pages(zone, start_pfn, nr_pages); 467 if (ret) 468 goto error; 469 470 return ret; 471error: 472 printk("%s: Problem encountered in __add_pages!\n", __func__); 473 return ret; 474} 475EXPORT_SYMBOL_GPL(arch_add_memory); 476 477int remove_memory(u64 start, u64 size) 478{ 479 return -EINVAL; 480} 481EXPORT_SYMBOL_GPL(remove_memory); 482 483#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) 484int memory_add_physaddr_to_nid(u64 start) 485{ 486 return 0; 487} 488EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 489#endif 490 491#endif /* CONFIG_MEMORY_HOTPLUG */ 492 493#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE 494/* 495 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, 496 * just online the pages. 497 */ 498int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) 499{ 500 int err = -EIO; 501 unsigned long pfn; 502 unsigned long total = 0, mem = 0; 503 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 504 if (pfn_valid(pfn)) { 505 online_page(pfn_to_page(pfn)); 506 err = 0; 507 mem++; 508 } 509 total++; 510 } 511 if (!err) { 512 z->spanned_pages += total; 513 z->present_pages += mem; 514 z->zone_pgdat->node_spanned_pages += total; 515 z->zone_pgdat->node_present_pages += mem; 516 } 517 return err; 518} 519#endif 520 521static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 522 kcore_vsyscall; 523 524void __init mem_init(void) 525{ 526 long codesize, reservedpages, datasize, initsize; 527 528 pci_iommu_alloc(); 529 530 /* clear the zero-page */ 531 memset(empty_zero_page, 0, PAGE_SIZE); 532 533 reservedpages = 0; 534 535 /* this will put all low memory onto the freelists */ 536#ifdef CONFIG_NUMA 537 totalram_pages = numa_free_all_bootmem(); 538#else 539 totalram_pages = free_all_bootmem(); 540#endif 541 reservedpages = end_pfn - totalram_pages - 542 absent_pages_in_range(0, end_pfn); 543 544 after_bootmem = 1; 545 546 codesize = (unsigned long) &_etext - (unsigned long) &_text; 547 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 548 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 549 550 /* Register memory areas for /proc/kcore */ 551 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 552 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 553 VMALLOC_END-VMALLOC_START); 554 kclist_add(&kcore_kernel, &_stext, _end - _stext); 555 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); 556 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 557 VSYSCALL_END - VSYSCALL_START); 558 559 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", 560 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 561 end_pfn << (PAGE_SHIFT-10), 562 codesize >> 10, 563 reservedpages << (PAGE_SHIFT-10), 564 datasize >> 10, 565 initsize >> 10); 566} 567 568void free_init_pages(char *what, unsigned long begin, unsigned long end) 569{ 570 unsigned long addr; 571 572 if (begin >= end) 573 return; 574 575 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 576 for (addr = begin; addr < end; addr += PAGE_SIZE) { 577 ClearPageReserved(virt_to_page(addr)); 578 init_page_count(virt_to_page(addr)); 579 memset((void *)(addr & ~(PAGE_SIZE-1)), 580 POISON_FREE_INITMEM, PAGE_SIZE); 581 if (addr >= __START_KERNEL_map) 582 change_page_attr_addr(addr, 1, __pgprot(0)); 583 free_page(addr); 584 totalram_pages++; 585 } 586 if (addr > __START_KERNEL_map) 587 global_flush_tlb(); 588} 589 590void free_initmem(void) 591{ 592 free_init_pages("unused kernel memory", 593 (unsigned long)(&__init_begin), 594 (unsigned long)(&__init_end)); 595} 596 597#ifdef CONFIG_DEBUG_RODATA 598 599void mark_rodata_ro(void) 600{ 601 unsigned long start = (unsigned long)_stext, end; 602 603#ifdef CONFIG_HOTPLUG_CPU 604 /* It must still be possible to apply SMP alternatives. */ 605 if (num_possible_cpus() > 1) 606 start = (unsigned long)_etext; 607#endif 608 609#ifdef CONFIG_KPROBES 610 start = (unsigned long)__start_rodata; 611#endif 612 613 end = (unsigned long)__end_rodata; 614 start = (start + PAGE_SIZE - 1) & PAGE_MASK; 615 end &= PAGE_MASK; 616 if (end <= start) 617 return; 618 619 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); 620 621 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 622 (end - start) >> 10); 623 624 /* 625 * change_page_attr_addr() requires a global_flush_tlb() call after it. 626 * We do this after the printk so that if something went wrong in the 627 * change, the printk gets out at least to give a better debug hint 628 * of who is the culprit. 629 */ 630 global_flush_tlb(); 631} 632#endif 633 634#ifdef CONFIG_BLK_DEV_INITRD 635void free_initrd_mem(unsigned long start, unsigned long end) 636{ 637 free_init_pages("initrd memory", start, end); 638} 639#endif 640 641void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 642{ 643#ifdef CONFIG_NUMA 644 int nid = phys_to_nid(phys); 645#endif 646 unsigned long pfn = phys >> PAGE_SHIFT; 647 if (pfn >= end_pfn) { 648 /* This can happen with kdump kernels when accessing firmware 649 tables. */ 650 if (pfn < end_pfn_map) 651 return; 652 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", 653 phys, len); 654 return; 655 } 656 657 /* Should check here against the e820 map to avoid double free */ 658#ifdef CONFIG_NUMA 659 reserve_bootmem_node(NODE_DATA(nid), phys, len); 660#else 661 reserve_bootmem(phys, len); 662#endif 663 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { 664 dma_reserve += len / PAGE_SIZE; 665 set_dma_reserve(dma_reserve); 666 } 667} 668 669int kern_addr_valid(unsigned long addr) 670{ 671 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 672 pgd_t *pgd; 673 pud_t *pud; 674 pmd_t *pmd; 675 pte_t *pte; 676 677 if (above != 0 && above != -1UL) 678 return 0; 679 680 pgd = pgd_offset_k(addr); 681 if (pgd_none(*pgd)) 682 return 0; 683 684 pud = pud_offset(pgd, addr); 685 if (pud_none(*pud)) 686 return 0; 687 688 pmd = pmd_offset(pud, addr); 689 if (pmd_none(*pmd)) 690 return 0; 691 if (pmd_large(*pmd)) 692 return pfn_valid(pmd_pfn(*pmd)); 693 694 pte = pte_offset_kernel(pmd, addr); 695 if (pte_none(*pte)) 696 return 0; 697 return pfn_valid(pte_pfn(*pte)); 698} 699 700#ifdef CONFIG_SYSCTL 701#include <linux/sysctl.h> 702 703extern int exception_trace, page_fault_trace; 704 705static ctl_table debug_table2[] = { 706 { 707 .ctl_name = 99, 708 .procname = "exception-trace", 709 .data = &exception_trace, 710 .maxlen = sizeof(int), 711 .mode = 0644, 712 .proc_handler = proc_dointvec 713 }, 714 {} 715}; 716 717static ctl_table debug_root_table2[] = { 718 { 719 .ctl_name = CTL_DEBUG, 720 .procname = "debug", 721 .mode = 0555, 722 .child = debug_table2 723 }, 724 {} 725}; 726 727static __init int x8664_sysctl_init(void) 728{ 729 register_sysctl_table(debug_root_table2); 730 return 0; 731} 732__initcall(x8664_sysctl_init); 733#endif 734 735/* A pseudo VMA to allow ptrace access for the vsyscall page. This only 736 covers the 64bit vsyscall page now. 32bit has a real VMA now and does 737 not need special handling anymore. */ 738 739static struct vm_area_struct gate_vma = { 740 .vm_start = VSYSCALL_START, 741 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), 742 .vm_page_prot = PAGE_READONLY_EXEC, 743 .vm_flags = VM_READ | VM_EXEC 744}; 745 746struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 747{ 748#ifdef CONFIG_IA32_EMULATION 749 if (test_tsk_thread_flag(tsk, TIF_IA32)) 750 return NULL; 751#endif 752 return &gate_vma; 753} 754 755int in_gate_area(struct task_struct *task, unsigned long addr) 756{ 757 struct vm_area_struct *vma = get_gate_vma(task); 758 if (!vma) 759 return 0; 760 return (addr >= vma->vm_start) && (addr < vma->vm_end); 761} 762 763/* Use this when you have no reliable task/vma, typically from interrupt 764 * context. It is less reliable than using the task's vma and may give 765 * false positives. 766 */ 767int in_gate_area_no_task(unsigned long addr) 768{ 769 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 770} 771 772void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) 773{ 774 return __alloc_bootmem_core(pgdat->bdata, size, 775 SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); 776} 777