x86_xpmap.c revision 1.2
1/*	$NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $	*/
2
3/*
4 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 *    notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 *    notice, this list of conditions and the following disclaimer in the
29 *    documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 *    must display the following acknowledgement:
32 *	This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 *    derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49/*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 *    notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 *    notice, this list of conditions and the following disclaimer in the
61 *    documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 *    must display the following acknowledgement:
64 *      This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 *    derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81#include <sys/cdefs.h>
82__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $");
83
84#include "opt_xen.h"
85
86#include <sys/param.h>
87#include <sys/systm.h>
88
89#include <uvm/uvm.h>
90
91#include <machine/pmap.h>
92#include <machine/gdt.h>
93#include <xen/xenfunc.h>
94
95#include <dev/isa/isareg.h>
96#include <machine/isa_machdep.h>
97
98#undef	XENDEBUG
99/* #define XENDEBUG_SYNC */
100/* #define	XENDEBUG_LOW */
101
102#ifdef XENDEBUG
103#define	XENPRINTF(x) printf x
104#define	XENPRINTK(x) printk x
105#define	XENPRINTK2(x) /* printk x */
106
107static char XBUF[256];
108#else
109#define	XENPRINTF(x)
110#define	XENPRINTK(x)
111#define	XENPRINTK2(x)
112#endif
113#define	PRINTF(x) printf x
114#define	PRINTK(x) printk x
115
116volatile shared_info_t *HYPERVISOR_shared_info;
117union start_info_union start_info_union;
118
119void xen_failsafe_handler(void);
120
121#ifdef XEN3
122#define HYPERVISOR_mmu_update_self(req, count, success_count) \
123	HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
124#else
125#define HYPERVISOR_mmu_update_self(req, count, success_count) \
126	HYPERVISOR_mmu_update((req), (count), (success_count))
127#endif
128
129void
130xen_failsafe_handler(void)
131{
132
133	panic("xen_failsafe_handler called!\n");
134}
135
136
137#ifndef __x86_64__
138void
139xen_update_descriptor(union descriptor *table, union descriptor *entry)
140{
141	paddr_t pa;
142	pt_entry_t *ptp;
143
144	ptp = kvtopte((vaddr_t)table);
145	pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
146	if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
147		panic("HYPERVISOR_update_descriptor failed\n");
148}
149#endif
150
151void
152xen_set_ldt(vaddr_t base, uint32_t entries)
153{
154	vaddr_t va;
155	vaddr_t end;
156	pt_entry_t *ptp, *maptp;
157	int s;
158
159#ifdef __x86_64__
160	end = base + (entries << 3);
161#else
162	end = base + entries * sizeof(union descriptor);
163#endif
164
165	for (va = base; va < end; va += PAGE_SIZE) {
166		KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
167		ptp = kvtopte(va);
168		maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
169		XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
170			      entries, ptp, maptp));
171		PTE_CLEARBITS(ptp, maptp, PG_RW);
172	}
173	s = splvm();
174	PTE_UPDATES_FLUSH();
175
176	xpq_queue_set_ldt(base, entries);
177	xpq_flush_queue();
178	splx(s);
179}
180
181#ifdef XENDEBUG
182void xpq_debug_dump(void);
183#endif
184
185#define XPQUEUE_SIZE 2048
186static mmu_update_t xpq_queue[XPQUEUE_SIZE];
187static int xpq_idx = 0;
188
189void
190xpq_flush_queue()
191{
192	int i, ok;
193
194	XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
195	for (i = 0; i < xpq_idx; i++)
196		XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
197		    (u_int)xpq_queue[i].val));
198	if (xpq_idx != 0 &&
199	    HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
200		printf("xpq_flush_queue: %d entries \n", xpq_idx);
201		for (i = 0; i < xpq_idx; i++)
202			printf("0x%16lx: 0x%16lx\n",
203			   xpq_queue[i].ptr, xpq_queue[i].val);
204		panic("HYPERVISOR_mmu_update failed\n");
205	}
206	xpq_idx = 0;
207}
208
209static inline void
210xpq_increment_idx(void)
211{
212
213	xpq_idx++;
214	if (__predict_false(xpq_idx == XPQUEUE_SIZE))
215		xpq_flush_queue();
216}
217
218void
219xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
220{
221	XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
222	xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
223	xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
224	xpq_increment_idx();
225#ifdef XENDEBUG_SYNC
226	xpq_flush_queue();
227#endif
228}
229
230void
231xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
232{
233
234	KASSERT(((paddr_t)ptr & 3) == 0);
235	xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
236	xpq_queue[xpq_idx].val = val;
237	xpq_increment_idx();
238#ifdef XENDEBUG_SYNC
239	xpq_flush_queue();
240#endif
241}
242
243void
244xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
245{
246
247	KASSERT(((paddr_t)ptr & 3) == 0);
248	xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
249	xpq_queue[xpq_idx].val = val;
250	xpq_increment_idx();
251#ifdef XENDEBUG_SYNC
252	xpq_flush_queue();
253#endif
254}
255
256#ifdef XEN3
257void
258xpq_queue_pt_switch(paddr_t pa)
259{
260	struct mmuext_op op;
261	xpq_flush_queue();
262
263	XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
264	op.cmd = MMUEXT_NEW_BASEPTR;
265	op.arg1.mfn = pa >> PAGE_SHIFT;
266	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
267		panic("xpq_queue_pt_switch");
268}
269
270void
271xpq_queue_pin_table(paddr_t pa)
272{
273	struct mmuext_op op;
274	xpq_flush_queue();
275
276	XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
277	op.arg1.mfn = pa >> PAGE_SHIFT;
278
279#ifdef __x86_64__
280	op.cmd = MMUEXT_PIN_L4_TABLE;
281#else
282	op.cmd = MMUEXT_PIN_L2_TABLE;
283#endif
284	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
285		panic("xpq_queue_pin_table");
286}
287
288void
289xpq_queue_unpin_table(paddr_t pa)
290{
291	struct mmuext_op op;
292	xpq_flush_queue();
293
294	XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
295	op.arg1.mfn = pa >> PAGE_SHIFT;
296	op.cmd = MMUEXT_UNPIN_TABLE;
297	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
298		panic("xpq_queue_unpin_table");
299}
300
301void
302xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
303{
304	struct mmuext_op op;
305	xpq_flush_queue();
306
307	XENPRINTK2(("xpq_queue_set_ldt\n"));
308	KASSERT(va == (va & ~PAGE_MASK));
309	op.cmd = MMUEXT_SET_LDT;
310	op.arg1.linear_addr = va;
311	op.arg2.nr_ents = entries;
312	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
313		panic("xpq_queue_set_ldt");
314}
315
316void
317xpq_queue_tlb_flush()
318{
319	struct mmuext_op op;
320	xpq_flush_queue();
321
322	XENPRINTK2(("xpq_queue_tlb_flush\n"));
323	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
324	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
325		panic("xpq_queue_tlb_flush");
326}
327
328void
329xpq_flush_cache()
330{
331	struct mmuext_op op;
332	int s = splvm();
333	xpq_flush_queue();
334
335	XENPRINTK2(("xpq_queue_flush_cache\n"));
336	op.cmd = MMUEXT_FLUSH_CACHE;
337	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
338		panic("xpq_flush_cache");
339	splx(s);
340}
341
342void
343xpq_queue_invlpg(vaddr_t va)
344{
345	struct mmuext_op op;
346	xpq_flush_queue();
347
348	XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
349	op.cmd = MMUEXT_INVLPG_LOCAL;
350	op.arg1.linear_addr = (va & ~PAGE_MASK);
351	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
352		panic("xpq_queue_invlpg");
353}
354
355int
356xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
357{
358	mmu_update_t op;
359	int ok;
360	xpq_flush_queue();
361
362	op.ptr = (paddr_t)ptr;
363	op.val = val;
364	if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
365		return EFAULT;
366	return (0);
367}
368#else /* XEN3 */
369void
370xpq_queue_pt_switch(paddr_t pa)
371{
372
373	XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
374	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
375	xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
376	xpq_increment_idx();
377}
378
379void
380xpq_queue_pin_table(paddr_t pa)
381{
382
383	XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
384	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
385	xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
386	xpq_increment_idx();
387}
388
389void
390xpq_queue_unpin_table(paddr_t pa)
391{
392
393	XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
394	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
395	xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
396	xpq_increment_idx();
397}
398
399void
400xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
401{
402
403	XENPRINTK2(("xpq_queue_set_ldt\n"));
404	KASSERT(va == (va & ~PAGE_MASK));
405	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
406	xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
407	xpq_increment_idx();
408}
409
410void
411xpq_queue_tlb_flush()
412{
413
414	XENPRINTK2(("xpq_queue_tlb_flush\n"));
415	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
416	xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
417	xpq_increment_idx();
418}
419
420void
421xpq_flush_cache()
422{
423	int s = splvm();
424
425	XENPRINTK2(("xpq_queue_flush_cache\n"));
426	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
427	xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
428	xpq_increment_idx();
429	xpq_flush_queue();
430	splx(s);
431}
432
433void
434xpq_queue_invlpg(vaddr_t va)
435{
436
437	XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
438	xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
439	xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
440	xpq_increment_idx();
441}
442
443int
444xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
445{
446	mmu_update_t xpq_up[3];
447
448	xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
449	xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
450	xpq_up[1].ptr = (paddr_t)ptr;
451	xpq_up[1].val = val;
452	if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
453		return EFAULT;
454	return (0);
455}
456#endif /* XEN3 */
457
458#ifdef XENDEBUG
459void
460xpq_debug_dump()
461{
462	int i;
463
464	XENPRINTK2(("idx: %d\n", xpq_idx));
465	for (i = 0; i < xpq_idx; i++) {
466		sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
467		    (u_int)xpq_queue[i].val);
468		if (++i < xpq_idx)
469			sprintf(XBUF + strlen(XBUF), "%x %08x ",
470			    (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
471		if (++i < xpq_idx)
472			sprintf(XBUF + strlen(XBUF), "%x %08x ",
473			    (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
474		if (++i < xpq_idx)
475			sprintf(XBUF + strlen(XBUF), "%x %08x ",
476			    (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
477		XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
478	}
479}
480#endif
481
482
483#ifdef __x86_64__
484extern volatile struct xencons_interface *xencons_interface; /* XXX */
485extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
486
487static void xen_bt_set_readonly (vaddr_t);
488static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
489
490/* How many PDEs ? */
491#if L2_SLOT_KERNBASE > 0
492#define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
493#else
494#define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
495#endif
496
497/*
498 * Construct and switch to new pagetables
499 * first_avail is the first vaddr we can use after
500 * we get rid of Xen pagetables
501 */
502
503vaddr_t xen_pmap_bootstrap (void);
504
505/*
506 * Function to get rid of Xen bootstrap tables
507 */
508
509vaddr_t
510xen_pmap_bootstrap()
511{
512	int count, iocount = 0;
513	vaddr_t bootstrap_tables, init_tables;
514
515	xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
516	init_tables = xen_start_info.pt_base;
517	__PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
518
519	/* Space after Xen boostrap tables should be free */
520	bootstrap_tables = xen_start_info.pt_base +
521		(xen_start_info.nr_pt_frames * PAGE_SIZE);
522
523	/* Calculate how many tables we need */
524	count = TABLE_L2_ENTRIES;
525
526#ifdef DOM0OPS
527	if (xen_start_info.flags & SIF_INITDOMAIN) {
528		/* space for ISA I/O mem */
529		iocount = IOM_SIZE / PAGE_SIZE;
530	}
531#endif
532
533	/*
534	 * Xen space we'll reclaim may not be enough for our new page tables,
535	 * move bootstrap tables if necessary
536	 */
537
538	if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE))
539		bootstrap_tables = init_tables +
540					((count+3+iocount) * PAGE_SIZE);
541
542	/* Create temporary tables */
543	xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
544		xen_start_info.nr_pt_frames, count, 0);
545
546	/* get vaddr space for the shared info and the console pages */
547
548	/* Create final tables */
549	xen_bootstrap_tables(bootstrap_tables, init_tables,
550	    count + 3, count, 1);
551
552	return (init_tables + ((count + 3) * PAGE_SIZE));
553}
554
555
556/*
557 * Build a new table and switch to it
558 * old_count is # of old tables (including PGD, PDTPE and PDE)
559 * new_count is # of new tables (PTE only)
560 * we assume areas don't overlap
561 */
562
563
564static void
565xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
566	int old_count, int new_count, int final)
567{
568	pd_entry_t *pdtpe, *pde, *pte;
569	pd_entry_t *cur_pgd, *bt_pgd;
570	paddr_t addr, page;
571	vaddr_t avail, text_end, map_end;
572	int i;
573	extern char __data_start;
574
575	__PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
576	    old_pgd, new_pgd, old_count, new_count));
577	text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
578	/*
579	 * size of R/W area after kernel text:
580	 *  xencons_interface (if present)
581	 *  xenstore_interface (if present)
582	 *  table pages (new_count + 3 entries)
583	 *  UAREA
584	 *  dummy user PGD
585	 * extra mappings (only when final is true):
586	 *  HYPERVISOR_shared_info
587	 *  ISA I/O mem (if needed)
588	 */
589	map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
590	if (final) {
591		HYPERVISOR_shared_info = (struct shared_info *)map_end;
592		map_end += NBPG;
593	}
594#ifdef DOM0OPS
595	if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
596		/* ISA I/O mem */
597		atdevbase = map_end;
598		map_end += IOM_SIZE;
599	}
600#endif /* DOM0OPS */
601
602	__PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
603	    text_end, map_end));
604
605	/*
606	 * Create bootstrap page tables
607	 * What we need:
608	 * - a PGD (level 4)
609	 * - a PDTPE (level 3)
610	 * - a PDE (level2)
611	 * - some PTEs (level 1)
612	 */
613
614	cur_pgd = (pd_entry_t *) old_pgd;
615	bt_pgd = (pd_entry_t *) new_pgd;
616	memset (bt_pgd, 0, PAGE_SIZE);
617	avail = new_pgd + PAGE_SIZE;
618
619	/* Install level 3 */
620	pdtpe = (pd_entry_t *) avail;
621	memset (pdtpe, 0, PAGE_SIZE);
622	avail += PAGE_SIZE;
623
624	addr = ((paddr_t) pdtpe) - KERNBASE;
625	bt_pgd[pl4_pi(KERNTEXTOFF)] =
626	    xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
627
628	__PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
629	    pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
630
631	/* Level 2 */
632	pde = (pd_entry_t *) avail;
633	memset(pde, 0, PAGE_SIZE);
634	avail += PAGE_SIZE;
635
636	addr = ((paddr_t) pde) - KERNBASE;
637	pdtpe[pl3_pi(KERNTEXTOFF)] =
638	    xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
639	__PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
640	    pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
641
642	/* Level 1 */
643	page = KERNTEXTOFF;
644	for (i = 0; i < new_count; i ++) {
645		paddr_t cur_page = page;
646
647		pte = (pd_entry_t *) avail;
648		avail += PAGE_SIZE;
649
650		memset(pte, 0, PAGE_SIZE);
651		while (pl2_pi(page) == pl2_pi (cur_page)) {
652			if (page >= map_end) {
653				/* not mapped at all */
654				pte[pl1_pi(page)] = 0;
655				page += PAGE_SIZE;
656				continue;
657			}
658			pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
659			if (page == (vaddr_t)HYPERVISOR_shared_info) {
660				pte[pl1_pi(page)] = xen_start_info.shared_info;
661				__PRINTK(("HYPERVISOR_shared_info "
662				    "va 0x%lx pte 0x%lx\n",
663				    HYPERVISOR_shared_info, pte[pl1_pi(page)]));
664			}
665			if (xpmap_ptom_masked(page - KERNBASE) ==
666			    (xen_start_info.console_mfn << PAGE_SHIFT)) {
667				xencons_interface = (void *)page;
668				pte[pl1_pi(page)] =
669				    (xen_start_info.console_mfn << PAGE_SHIFT);
670				__PRINTK(("xencons_interface "
671				    va 0x%lx pte 0x%lx\n",
672				    xencons_interface, pte[pl1_pi(page)]));
673			}
674			if (xpmap_ptom_masked(page - KERNBASE) ==
675			    (xen_start_info.store_mfn << PAGE_SHIFT)) {
676				xenstore_interface = (void *)page;
677				pte[pl1_pi(page)] =
678				    (xen_start_info.store_mfn << PAGE_SHIFT);
679				__PRINTK(("xenstore_interface "
680				    "va 0x%lx pte 0x%lx\n",
681				    xenstore_interface, pte[pl1_pi(page)]));
682			}
683#ifdef DOM0OPS
684			if (page >= (vaddr_t)atdevbase &&
685			    page < (vaddr_t)atdevbase + IOM_SIZE) {
686				pte[pl1_pi(page)] =
687				    IOM_BEGIN + (page - (vaddr_t)atdevbase);
688			}
689#endif
690			pte[pl1_pi(page)] |= PG_u | PG_V;
691			if (page < text_end) {
692				/* map kernel text RO */
693				pte[pl1_pi(page)] |= 0;
694			} else if (page >= old_pgd
695			    && page < old_pgd + (old_count * PAGE_SIZE)) {
696				/* map old page tables RO */
697				pte[pl1_pi(page)] |= 0;
698			} else if (page >= new_pgd &&
699			    page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
700				/* map new page tables RO */
701				pte[pl1_pi(page)] |= 0;
702			} else {
703				/* map page RW */
704				pte[pl1_pi(page)] |= PG_RW;
705			}
706			if (page  == old_pgd)
707				__PRINTK(("va 0x%lx pa 0x%lx
708				    "entry 0x%lx -> L1[0x%x]\n",
709				    page, page - KERNBASE,
710				    pte[pl1_pi(page)], pl1_pi(page)));
711			page += PAGE_SIZE;
712		}
713
714		addr = ((paddr_t) pte) - KERNBASE;
715		pde[pl2_pi(cur_page)] =
716		    xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
717		__PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
718		    pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
719		/* Mark readonly */
720		xen_bt_set_readonly((vaddr_t) pte);
721	}
722
723	/* Install recursive page tables mapping */
724	bt_pgd[PDIR_SLOT_PTE] =
725	    xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
726	__PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
727	    new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
728
729	/* Mark tables RO */
730	xen_bt_set_readonly((vaddr_t) pde);
731	xen_bt_set_readonly((vaddr_t) pdtpe);
732	xen_bt_set_readonly(new_pgd);
733	/* Pin the PGD */
734	__PRINTK(("pin PDG\n"));
735	xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
736	/* Switch to new tables */
737	__PRINTK(("switch to PDG\n"));
738	xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
739	__PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
740	    bt_pgd[PDIR_SLOT_PTE]));
741	__PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
742	__PRINTK(("value 0x%lx\n", *L4_BASE));
743	__PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
744
745	/* Now we can safely reclaim space taken by old tables */
746
747	__PRINTK(("unpin old PDG\n"));
748	/* Unpin old PGD */
749	xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
750	/* Mark old tables RW */
751	page = old_pgd;
752	addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
753	addr = xpmap_mtop(addr);
754	pte = (pd_entry_t *) (addr + KERNBASE);
755	pte += pl1_pi(page);
756	__PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
757	    pde[pl2_pi(page)], addr, pte));
758	while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
759		addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
760		xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
761		page += PAGE_SIZE;
762		/*
763		 * Our ptes are contiguous
764		 * so it's safe to just "++" here
765		 */
766		pte++;
767	}
768	xpq_flush_queue();
769}
770
771
772void
773xen_set_user_pgd(paddr_t page)
774{
775	struct mmuext_op op;
776	int s = splvm();
777
778	xpq_flush_queue();
779	op.cmd = MMUEXT_NEW_USER_BASEPTR;
780	op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
781        if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
782		panic("xen_set_user_pgd: failed to install new user page"
783			" directory %lx", page);
784	splx(s);
785}
786
787/*
788 * Bootstrap helper functions
789 */
790
791/*
792 * Mark a page readonly
793 * XXX: assuming vaddr = paddr + KERNBASE
794 */
795
796static void
797xen_bt_set_readonly (vaddr_t page)
798{
799	pt_entry_t entry;
800
801	entry = xpmap_ptom_masked(page - KERNBASE);
802	entry |= PG_u | PG_V;
803
804	HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
805}
806#endif /* x86_64 */
807