1/*
2 *  linux/arch/x86_64/mm/init.c
3 *
4 *  Copyright (C) 1995  Linus Torvalds
5 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
24#include <linux/pci.h>
25#include <linux/pfn.h>
26#include <linux/poison.h>
27#include <linux/dma-mapping.h>
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
30#include <linux/nmi.h>
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
45#include <asm/sections.h>
46
47#ifndef Dprintk
48#define Dprintk(x...)
49#endif
50
51const struct dma_mapping_ops* dma_ops;
52EXPORT_SYMBOL(dma_ops);
53
54static unsigned long dma_reserve __initdata;
55
56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58/*
59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60 * physical space so we can cache the place of the first one and move
61 * around without checking the pgd every time.
62 */
63
64void show_mem(void)
65{
66	long i, total = 0, reserved = 0;
67	long shared = 0, cached = 0;
68	pg_data_t *pgdat;
69	struct page *page;
70
71	printk(KERN_INFO "Mem-info:\n");
72	show_free_areas();
73	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
74
75	for_each_online_pgdat(pgdat) {
76               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77			/* this loop can take a while with 256 GB and 4k pages
78			   so update the NMI watchdog */
79			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80				touch_nmi_watchdog();
81			}
82			if (!pfn_valid(pgdat->node_start_pfn + i))
83				continue;
84			page = pfn_to_page(pgdat->node_start_pfn + i);
85			total++;
86			if (PageReserved(page))
87				reserved++;
88			else if (PageSwapCache(page))
89				cached++;
90			else if (page_count(page))
91				shared += page_count(page) - 1;
92               }
93	}
94	printk(KERN_INFO "%lu pages of RAM\n", total);
95	printk(KERN_INFO "%lu reserved pages\n",reserved);
96	printk(KERN_INFO "%lu pages shared\n",shared);
97	printk(KERN_INFO "%lu pages swap cached\n",cached);
98}
99
100int after_bootmem;
101
102static __init void *spp_getpage(void)
103{
104	void *ptr;
105	if (after_bootmem)
106		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
107	else
108		ptr = alloc_bootmem_pages(PAGE_SIZE);
109	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
110		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
111
112	Dprintk("spp_getpage %p\n", ptr);
113	return ptr;
114}
115
116static __init void set_pte_phys(unsigned long vaddr,
117			 unsigned long phys, pgprot_t prot)
118{
119	pgd_t *pgd;
120	pud_t *pud;
121	pmd_t *pmd;
122	pte_t *pte, new_pte;
123
124	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
125
126	pgd = pgd_offset_k(vaddr);
127	if (pgd_none(*pgd)) {
128		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
129		return;
130	}
131	pud = pud_offset(pgd, vaddr);
132	if (pud_none(*pud)) {
133		pmd = (pmd_t *) spp_getpage();
134		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
135		if (pmd != pmd_offset(pud, 0)) {
136			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
137			return;
138		}
139	}
140	pmd = pmd_offset(pud, vaddr);
141	if (pmd_none(*pmd)) {
142		pte = (pte_t *) spp_getpage();
143		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
144		if (pte != pte_offset_kernel(pmd, 0)) {
145			printk("PAGETABLE BUG #02!\n");
146			return;
147		}
148	}
149	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
150
151	pte = pte_offset_kernel(pmd, vaddr);
152	if (!pte_none(*pte) &&
153	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
154		pte_ERROR(*pte);
155	set_pte(pte, new_pte);
156
157	/*
158	 * It's enough to flush this one mapping.
159	 * (PGE mappings get flushed as well)
160	 */
161	__flush_tlb_one(vaddr);
162}
163
164/* NOTE: this is meant to be run only at boot */
165void __init
166__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167{
168	unsigned long address = __fix_to_virt(idx);
169
170	if (idx >= __end_of_fixed_addresses) {
171		printk("Invalid __set_fixmap\n");
172		return;
173	}
174	set_pte_phys(address, phys, prot);
175}
176
177unsigned long __meminitdata table_start, table_end;
178
179static __meminit void *alloc_low_page(unsigned long *phys)
180{
181	unsigned long pfn = table_end++;
182	void *adr;
183
184	if (after_bootmem) {
185		adr = (void *)get_zeroed_page(GFP_ATOMIC);
186		*phys = __pa(adr);
187		return adr;
188	}
189
190	if (pfn >= end_pfn)
191		panic("alloc_low_page: ran out of memory");
192
193	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
194	memset(adr, 0, PAGE_SIZE);
195	*phys  = pfn * PAGE_SIZE;
196	return adr;
197}
198
199static __meminit void unmap_low_page(void *adr)
200{
201
202	if (after_bootmem)
203		return;
204
205	early_iounmap(adr, PAGE_SIZE);
206}
207
208/* Must run before zap_low_mappings */
209__meminit void *early_ioremap(unsigned long addr, unsigned long size)
210{
211	unsigned long vaddr;
212	pmd_t *pmd, *last_pmd;
213	int i, pmds;
214
215	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
216	vaddr = __START_KERNEL_map;
217	pmd = level2_kernel_pgt;
218	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
219	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
220		for (i = 0; i < pmds; i++) {
221			if (pmd_present(pmd[i]))
222				goto next;
223		}
224		vaddr += addr & ~PMD_MASK;
225		addr &= PMD_MASK;
226		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
227			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
228		__flush_tlb();
229		return (void *)vaddr;
230	next:
231		;
232	}
233	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
234	return NULL;
235}
236
237/* To avoid virtual aliases later */
238__meminit void early_iounmap(void *addr, unsigned long size)
239{
240	unsigned long vaddr;
241	pmd_t *pmd;
242	int i, pmds;
243
244	vaddr = (unsigned long)addr;
245	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
246	pmd = level2_kernel_pgt + pmd_index(vaddr);
247	for (i = 0; i < pmds; i++)
248		pmd_clear(pmd + i);
249	__flush_tlb();
250}
251
252static void __meminit
253phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254{
255	int i = pmd_index(address);
256
257	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258		unsigned long entry;
259		pmd_t *pmd = pmd_page + pmd_index(address);
260
261		if (address >= end) {
262			if (!after_bootmem)
263				for (; i < PTRS_PER_PMD; i++, pmd++)
264					set_pmd(pmd, __pmd(0));
265			break;
266		}
267
268		if (pmd_val(*pmd))
269			continue;
270
271		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
272		entry &= __supported_pte_mask;
273		set_pmd(pmd, __pmd(entry));
274	}
275}
276
277static void __meminit
278phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
279{
280	pmd_t *pmd = pmd_offset(pud,0);
281	spin_lock(&init_mm.page_table_lock);
282	phys_pmd_init(pmd, address, end);
283	spin_unlock(&init_mm.page_table_lock);
284	__flush_tlb_all();
285}
286
287static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
288{
289	int i = pud_index(addr);
290
291
292	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
293		unsigned long pmd_phys;
294		pud_t *pud = pud_page + pud_index(addr);
295		pmd_t *pmd;
296
297		if (addr >= end)
298			break;
299
300		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
301			set_pud(pud, __pud(0));
302			continue;
303		}
304
305		if (pud_val(*pud)) {
306			phys_pmd_update(pud, addr, end);
307			continue;
308		}
309
310		pmd = alloc_low_page(&pmd_phys);
311		spin_lock(&init_mm.page_table_lock);
312		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313		phys_pmd_init(pmd, addr, end);
314		spin_unlock(&init_mm.page_table_lock);
315		unmap_low_page(pmd);
316	}
317	__flush_tlb();
318}
319
320static void __init find_early_table_space(unsigned long end)
321{
322	unsigned long puds, pmds, tables, start;
323
324	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328
329 	/* RED-PEN putting page tables only on node 0 could
330 	   cause a hotspot and fill up ZONE_DMA. The page tables
331 	   need roughly 0.5KB per GB. */
332 	start = 0x8000;
333 	table_start = find_e820_area(start, end, tables);
334	if (table_start == -1UL)
335		panic("Cannot find space for the kernel page tables");
336
337	table_start >>= PAGE_SHIFT;
338	table_end = table_start;
339
340	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341		end, table_start << PAGE_SHIFT,
342		(table_start << PAGE_SHIFT) + tables);
343}
344
345/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346   This runs before bootmem is initialized and gets pages directly from the
347   physical memory. To access them they are temporarily mapped. */
348void __meminit init_memory_mapping(unsigned long start, unsigned long end)
349{
350	unsigned long next;
351
352	Dprintk("init_memory_mapping\n");
353
354	/*
355	 * Find space for the kernel direct mapping tables.
356	 * Later we should allocate these tables in the local node of the memory
357	 * mapped.  Unfortunately this is done currently before the nodes are
358	 * discovered.
359	 */
360	if (!after_bootmem)
361		find_early_table_space(end);
362
363	start = (unsigned long)__va(start);
364	end = (unsigned long)__va(end);
365
366	for (; start < end; start = next) {
367		unsigned long pud_phys;
368		pgd_t *pgd = pgd_offset_k(start);
369		pud_t *pud;
370
371		if (after_bootmem)
372			pud = pud_offset(pgd, start & PGDIR_MASK);
373		else
374			pud = alloc_low_page(&pud_phys);
375
376		next = start + PGDIR_SIZE;
377		if (next > end)
378			next = end;
379		phys_pud_init(pud, __pa(start), __pa(next));
380		if (!after_bootmem)
381			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
382		unmap_low_page(pud);
383	}
384
385	if (!after_bootmem)
386		asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
387	__flush_tlb_all();
388}
389
390#ifndef CONFIG_NUMA
391void __init paging_init(void)
392{
393	unsigned long max_zone_pfns[MAX_NR_ZONES];
394	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
395	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
396	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
397	max_zone_pfns[ZONE_NORMAL] = end_pfn;
398
399	memory_present(0, 0, end_pfn);
400	sparse_init();
401	free_area_init_nodes(max_zone_pfns);
402}
403#endif
404
405/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
406   from the CPU leading to inconsistent cache lines. address and size
407   must be aligned to 2MB boundaries.
408   Does nothing when the mapping doesn't exist. */
409void __init clear_kernel_mapping(unsigned long address, unsigned long size)
410{
411	unsigned long end = address + size;
412
413	BUG_ON(address & ~LARGE_PAGE_MASK);
414	BUG_ON(size & ~LARGE_PAGE_MASK);
415
416	for (; address < end; address += LARGE_PAGE_SIZE) {
417		pgd_t *pgd = pgd_offset_k(address);
418		pud_t *pud;
419		pmd_t *pmd;
420		if (pgd_none(*pgd))
421			continue;
422		pud = pud_offset(pgd, address);
423		if (pud_none(*pud))
424			continue;
425		pmd = pmd_offset(pud, address);
426		if (!pmd || pmd_none(*pmd))
427			continue;
428		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
429			/* Could handle this, but it should not happen currently. */
430			printk(KERN_ERR
431	       "clear_kernel_mapping: mapping has been split. will leak memory\n");
432			pmd_ERROR(*pmd);
433		}
434		set_pmd(pmd, __pmd(0));
435	}
436	__flush_tlb_all();
437}
438
439/*
440 * Memory hotplug specific functions
441 */
442void online_page(struct page *page)
443{
444	ClearPageReserved(page);
445	init_page_count(page);
446	__free_page(page);
447	totalram_pages++;
448	num_physpages++;
449}
450
451#ifdef CONFIG_MEMORY_HOTPLUG
452/*
453 * Memory is added always to NORMAL zone. This means you will never get
454 * additional DMA/DMA32 memory.
455 */
456int arch_add_memory(int nid, u64 start, u64 size)
457{
458	struct pglist_data *pgdat = NODE_DATA(nid);
459	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
460	unsigned long start_pfn = start >> PAGE_SHIFT;
461	unsigned long nr_pages = size >> PAGE_SHIFT;
462	int ret;
463
464	init_memory_mapping(start, (start + size -1));
465
466	ret = __add_pages(zone, start_pfn, nr_pages);
467	if (ret)
468		goto error;
469
470	return ret;
471error:
472	printk("%s: Problem encountered in __add_pages!\n", __func__);
473	return ret;
474}
475EXPORT_SYMBOL_GPL(arch_add_memory);
476
477int remove_memory(u64 start, u64 size)
478{
479	return -EINVAL;
480}
481EXPORT_SYMBOL_GPL(remove_memory);
482
483#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
484int memory_add_physaddr_to_nid(u64 start)
485{
486	return 0;
487}
488EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
489#endif
490
491#endif /* CONFIG_MEMORY_HOTPLUG */
492
493#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
494/*
495 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
496 * just online the pages.
497 */
498int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
499{
500	int err = -EIO;
501	unsigned long pfn;
502	unsigned long total = 0, mem = 0;
503	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
504		if (pfn_valid(pfn)) {
505			online_page(pfn_to_page(pfn));
506			err = 0;
507			mem++;
508		}
509		total++;
510	}
511	if (!err) {
512		z->spanned_pages += total;
513		z->present_pages += mem;
514		z->zone_pgdat->node_spanned_pages += total;
515		z->zone_pgdat->node_present_pages += mem;
516	}
517	return err;
518}
519#endif
520
521static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
522			 kcore_vsyscall;
523
524void __init mem_init(void)
525{
526	long codesize, reservedpages, datasize, initsize;
527
528	pci_iommu_alloc();
529
530	/* clear the zero-page */
531	memset(empty_zero_page, 0, PAGE_SIZE);
532
533	reservedpages = 0;
534
535	/* this will put all low memory onto the freelists */
536#ifdef CONFIG_NUMA
537	totalram_pages = numa_free_all_bootmem();
538#else
539	totalram_pages = free_all_bootmem();
540#endif
541	reservedpages = end_pfn - totalram_pages -
542					absent_pages_in_range(0, end_pfn);
543
544	after_bootmem = 1;
545
546	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
547	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
548	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
549
550	/* Register memory areas for /proc/kcore */
551	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
552	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
553		   VMALLOC_END-VMALLOC_START);
554	kclist_add(&kcore_kernel, &_stext, _end - _stext);
555	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
556	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
557				 VSYSCALL_END - VSYSCALL_START);
558
559	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
560		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
561		end_pfn << (PAGE_SHIFT-10),
562		codesize >> 10,
563		reservedpages << (PAGE_SHIFT-10),
564		datasize >> 10,
565		initsize >> 10);
566}
567
568void free_init_pages(char *what, unsigned long begin, unsigned long end)
569{
570	unsigned long addr;
571
572	if (begin >= end)
573		return;
574
575	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
576	for (addr = begin; addr < end; addr += PAGE_SIZE) {
577		ClearPageReserved(virt_to_page(addr));
578		init_page_count(virt_to_page(addr));
579		memset((void *)(addr & ~(PAGE_SIZE-1)),
580			POISON_FREE_INITMEM, PAGE_SIZE);
581		if (addr >= __START_KERNEL_map)
582			change_page_attr_addr(addr, 1, __pgprot(0));
583		free_page(addr);
584		totalram_pages++;
585	}
586	if (addr > __START_KERNEL_map)
587		global_flush_tlb();
588}
589
590void free_initmem(void)
591{
592	free_init_pages("unused kernel memory",
593			(unsigned long)(&__init_begin),
594			(unsigned long)(&__init_end));
595}
596
597#ifdef CONFIG_DEBUG_RODATA
598
599void mark_rodata_ro(void)
600{
601	unsigned long start = (unsigned long)_stext, end;
602
603#ifdef CONFIG_HOTPLUG_CPU
604	/* It must still be possible to apply SMP alternatives. */
605	if (num_possible_cpus() > 1)
606		start = (unsigned long)_etext;
607#endif
608
609#ifdef CONFIG_KPROBES
610	start = (unsigned long)__start_rodata;
611#endif
612
613	end = (unsigned long)__end_rodata;
614	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
615	end &= PAGE_MASK;
616	if (end <= start)
617		return;
618
619	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
620
621	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
622	       (end - start) >> 10);
623
624	/*
625	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
626	 * We do this after the printk so that if something went wrong in the
627	 * change, the printk gets out at least to give a better debug hint
628	 * of who is the culprit.
629	 */
630	global_flush_tlb();
631}
632#endif
633
634#ifdef CONFIG_BLK_DEV_INITRD
635void free_initrd_mem(unsigned long start, unsigned long end)
636{
637	free_init_pages("initrd memory", start, end);
638}
639#endif
640
641void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
642{
643#ifdef CONFIG_NUMA
644	int nid = phys_to_nid(phys);
645#endif
646	unsigned long pfn = phys >> PAGE_SHIFT;
647	if (pfn >= end_pfn) {
648		/* This can happen with kdump kernels when accessing firmware
649		   tables. */
650		if (pfn < end_pfn_map)
651			return;
652		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
653				phys, len);
654		return;
655	}
656
657	/* Should check here against the e820 map to avoid double free */
658#ifdef CONFIG_NUMA
659  	reserve_bootmem_node(NODE_DATA(nid), phys, len);
660#else
661	reserve_bootmem(phys, len);
662#endif
663	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
664		dma_reserve += len / PAGE_SIZE;
665		set_dma_reserve(dma_reserve);
666	}
667}
668
669int kern_addr_valid(unsigned long addr)
670{
671	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
672       pgd_t *pgd;
673       pud_t *pud;
674       pmd_t *pmd;
675       pte_t *pte;
676
677	if (above != 0 && above != -1UL)
678		return 0;
679
680	pgd = pgd_offset_k(addr);
681	if (pgd_none(*pgd))
682		return 0;
683
684	pud = pud_offset(pgd, addr);
685	if (pud_none(*pud))
686		return 0;
687
688	pmd = pmd_offset(pud, addr);
689	if (pmd_none(*pmd))
690		return 0;
691	if (pmd_large(*pmd))
692		return pfn_valid(pmd_pfn(*pmd));
693
694	pte = pte_offset_kernel(pmd, addr);
695	if (pte_none(*pte))
696		return 0;
697	return pfn_valid(pte_pfn(*pte));
698}
699
700#ifdef CONFIG_SYSCTL
701#include <linux/sysctl.h>
702
703extern int exception_trace, page_fault_trace;
704
705static ctl_table debug_table2[] = {
706	{
707		.ctl_name	= 99,
708		.procname	= "exception-trace",
709		.data		= &exception_trace,
710		.maxlen		= sizeof(int),
711		.mode		= 0644,
712		.proc_handler	= proc_dointvec
713	},
714	{}
715};
716
717static ctl_table debug_root_table2[] = {
718	{
719		.ctl_name = CTL_DEBUG,
720		.procname = "debug",
721		.mode = 0555,
722		.child = debug_table2
723	},
724	{}
725};
726
727static __init int x8664_sysctl_init(void)
728{
729	register_sysctl_table(debug_root_table2);
730	return 0;
731}
732__initcall(x8664_sysctl_init);
733#endif
734
735/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
736   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
737   not need special handling anymore. */
738
739static struct vm_area_struct gate_vma = {
740	.vm_start = VSYSCALL_START,
741	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
742	.vm_page_prot = PAGE_READONLY_EXEC,
743	.vm_flags = VM_READ | VM_EXEC
744};
745
746struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
747{
748#ifdef CONFIG_IA32_EMULATION
749	if (test_tsk_thread_flag(tsk, TIF_IA32))
750		return NULL;
751#endif
752	return &gate_vma;
753}
754
755int in_gate_area(struct task_struct *task, unsigned long addr)
756{
757	struct vm_area_struct *vma = get_gate_vma(task);
758	if (!vma)
759		return 0;
760	return (addr >= vma->vm_start) && (addr < vma->vm_end);
761}
762
763/* Use this when you have no reliable task/vma, typically from interrupt
764 * context.  It is less reliable than using the task's vma and may give
765 * false positives.
766 */
767int in_gate_area_no_task(unsigned long addr)
768{
769	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
770}
771
772void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
773{
774	return __alloc_bootmem_core(pgdat->bdata, size,
775			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
776}
777