1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1997 Jonathan Lemon
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/priv.h>
32#include <sys/proc.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
35#include <sys/mutex.h>
36
37#include <vm/vm.h>
38#include <vm/pmap.h>
39#include <vm/vm_map.h>
40#include <vm/vm_page.h>
41
42#include <machine/md_var.h>
43#include <machine/pcb.h>
44#include <machine/pcb_ext.h>
45#include <machine/psl.h>
46#include <machine/specialreg.h>
47#include <machine/sysarch.h>
48
49extern int vm86pa;
50extern struct pcb *vm86pcb;
51
52static struct mtx vm86_lock;
53
54extern int vm86_bioscall(struct vm86frame *);
55extern void vm86_biosret(struct vm86frame *);
56
57void vm86_prepcall(struct vm86frame *);
58
59struct system_map {
60	int		type;
61	vm_offset_t	start;
62	vm_offset_t	end;
63};
64
65#define	HLT	0xf4
66#define	CLI	0xfa
67#define	STI	0xfb
68#define	PUSHF	0x9c
69#define	POPF	0x9d
70#define	INTn	0xcd
71#define	IRET	0xcf
72#define	CALLm	0xff
73#define OPERAND_SIZE_PREFIX	0x66
74#define ADDRESS_SIZE_PREFIX	0x67
75#define PUSH_MASK	~(PSL_VM | PSL_RF | PSL_I)
76#define POP_MASK	~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
77
78static int
79vm86_suword16(volatile void *base, int word)
80{
81
82	if (curthread->td_critnest != 0) {
83		*(volatile uint16_t *)base = word;
84		return (0);
85	}
86	return (suword16(base, word));
87}
88
89static int
90vm86_suword(volatile void *base, long word)
91{
92
93	if (curthread->td_critnest != 0) {
94		*(volatile long *)base = word;
95		return (0);
96	}
97	return (suword(base, word));
98}
99
100static int
101vm86_fubyte(volatile const void *base)
102{
103
104	if (curthread->td_critnest != 0)
105		return (*(volatile const u_char *)base);
106	return (fubyte(base));
107}
108
109static int
110vm86_fuword16(volatile const void *base)
111{
112
113	if (curthread->td_critnest != 0)
114		return (*(volatile const uint16_t *)base);
115	return (fuword16(base));
116}
117
118static long
119vm86_fuword(volatile const void *base)
120{
121
122	if (curthread->td_critnest != 0)
123		return (*(volatile const long *)base);
124	return (fuword(base));
125}
126
127static __inline caddr_t
128MAKE_ADDR(u_short sel, u_short off)
129{
130	return ((caddr_t)((sel << 4) + off));
131}
132
133static __inline void
134GET_VEC(u_int vec, u_short *sel, u_short *off)
135{
136	*sel = vec >> 16;
137	*off = vec & 0xffff;
138}
139
140static __inline u_int
141MAKE_VEC(u_short sel, u_short off)
142{
143	return ((sel << 16) | off);
144}
145
146static __inline void
147PUSH(u_short x, struct vm86frame *vmf)
148{
149	vmf->vmf_sp -= 2;
150	vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
151}
152
153static __inline void
154PUSHL(u_int x, struct vm86frame *vmf)
155{
156	vmf->vmf_sp -= 4;
157	vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
158}
159
160static __inline u_short
161POP(struct vm86frame *vmf)
162{
163	u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
164
165	vmf->vmf_sp += 2;
166	return (x);
167}
168
169static __inline u_int
170POPL(struct vm86frame *vmf)
171{
172	u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
173
174	vmf->vmf_sp += 4;
175	return (x);
176}
177
178int
179vm86_emulate(struct vm86frame *vmf)
180{
181	struct vm86_kernel *vm86;
182	caddr_t addr;
183	u_char i_byte;
184	u_int temp_flags;
185	int inc_ip = 1;
186	int retcode = 0;
187
188	/*
189	 * pcb_ext contains the address of the extension area, or zero if
190	 * the extension is not present.  (This check should not be needed,
191	 * as we can't enter vm86 mode until we set up an extension area)
192	 */
193	if (curpcb->pcb_ext == 0)
194		return (SIGBUS);
195	vm86 = &curpcb->pcb_ext->ext_vm86;
196
197	if (vmf->vmf_eflags & PSL_T)
198		retcode = SIGTRAP;
199
200	addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
201	i_byte = vm86_fubyte(addr);
202	if (i_byte == ADDRESS_SIZE_PREFIX) {
203		i_byte = vm86_fubyte(++addr);
204		inc_ip++;
205	}
206
207	if (vm86->vm86_has_vme) {
208		switch (i_byte) {
209		case OPERAND_SIZE_PREFIX:
210			i_byte = vm86_fubyte(++addr);
211			inc_ip++;
212			switch (i_byte) {
213			case PUSHF:
214				if (vmf->vmf_eflags & PSL_VIF)
215					PUSHL((vmf->vmf_eflags & PUSH_MASK)
216					    | PSL_IOPL | PSL_I, vmf);
217				else
218					PUSHL((vmf->vmf_eflags & PUSH_MASK)
219					    | PSL_IOPL, vmf);
220				vmf->vmf_ip += inc_ip;
221				return (retcode);
222
223			case POPF:
224				temp_flags = POPL(vmf) & POP_MASK;
225				vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
226				    | temp_flags | PSL_VM | PSL_I;
227				vmf->vmf_ip += inc_ip;
228				if (temp_flags & PSL_I) {
229					vmf->vmf_eflags |= PSL_VIF;
230					if (vmf->vmf_eflags & PSL_VIP)
231						break;
232				} else {
233					vmf->vmf_eflags &= ~PSL_VIF;
234				}
235				return (retcode);
236			}
237			break;
238
239		/* VME faults here if VIP is set, but does not set VIF. */
240		case STI:
241			vmf->vmf_eflags |= PSL_VIF;
242			vmf->vmf_ip += inc_ip;
243			if ((vmf->vmf_eflags & PSL_VIP) == 0) {
244				uprintf("fatal sti\n");
245				return (SIGKILL);
246			}
247			break;
248
249		/* VME if no redirection support */
250		case INTn:
251			break;
252
253		/* VME if trying to set PSL_T, or PSL_I when VIP is set */
254		case POPF:
255			temp_flags = POP(vmf) & POP_MASK;
256			vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
257			    | temp_flags | PSL_VM | PSL_I;
258			vmf->vmf_ip += inc_ip;
259			if (temp_flags & PSL_I) {
260				vmf->vmf_eflags |= PSL_VIF;
261				if (vmf->vmf_eflags & PSL_VIP)
262					break;
263			} else {
264				vmf->vmf_eflags &= ~PSL_VIF;
265			}
266			return (retcode);
267
268		/* VME if trying to set PSL_T, or PSL_I when VIP is set */
269		case IRET:
270			vmf->vmf_ip = POP(vmf);
271			vmf->vmf_cs = POP(vmf);
272			temp_flags = POP(vmf) & POP_MASK;
273			vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
274			    | temp_flags | PSL_VM | PSL_I;
275			if (temp_flags & PSL_I) {
276				vmf->vmf_eflags |= PSL_VIF;
277				if (vmf->vmf_eflags & PSL_VIP)
278					break;
279			} else {
280				vmf->vmf_eflags &= ~PSL_VIF;
281			}
282			return (retcode);
283		}
284		return (SIGBUS);
285	}
286
287	switch (i_byte) {
288	case OPERAND_SIZE_PREFIX:
289		i_byte = vm86_fubyte(++addr);
290		inc_ip++;
291		switch (i_byte) {
292		case PUSHF:
293			if (vm86->vm86_eflags & PSL_VIF)
294				PUSHL((vmf->vmf_flags & PUSH_MASK)
295				    | PSL_IOPL | PSL_I, vmf);
296			else
297				PUSHL((vmf->vmf_flags & PUSH_MASK)
298				    | PSL_IOPL, vmf);
299			vmf->vmf_ip += inc_ip;
300			return (retcode);
301
302		case POPF:
303			temp_flags = POPL(vmf) & POP_MASK;
304			vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
305			    | temp_flags | PSL_VM | PSL_I;
306			vmf->vmf_ip += inc_ip;
307			if (temp_flags & PSL_I) {
308				vm86->vm86_eflags |= PSL_VIF;
309				if (vm86->vm86_eflags & PSL_VIP)
310					break;
311			} else {
312				vm86->vm86_eflags &= ~PSL_VIF;
313			}
314			return (retcode);
315		}
316		return (SIGBUS);
317
318	case CLI:
319		vm86->vm86_eflags &= ~PSL_VIF;
320		vmf->vmf_ip += inc_ip;
321		return (retcode);
322
323	case STI:
324		/* if there is a pending interrupt, go to the emulator */
325		vm86->vm86_eflags |= PSL_VIF;
326		vmf->vmf_ip += inc_ip;
327		if (vm86->vm86_eflags & PSL_VIP)
328			break;
329		return (retcode);
330
331	case PUSHF:
332		if (vm86->vm86_eflags & PSL_VIF)
333			PUSH((vmf->vmf_flags & PUSH_MASK)
334			    | PSL_IOPL | PSL_I, vmf);
335		else
336			PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
337		vmf->vmf_ip += inc_ip;
338		return (retcode);
339
340	case INTn:
341		i_byte = vm86_fubyte(addr + 1);
342		if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
343			break;
344		if (vm86->vm86_eflags & PSL_VIF)
345			PUSH((vmf->vmf_flags & PUSH_MASK)
346			    | PSL_IOPL | PSL_I, vmf);
347		else
348			PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
349		PUSH(vmf->vmf_cs, vmf);
350		PUSH(vmf->vmf_ip + inc_ip + 1, vmf);	/* increment IP */
351		GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
352		     &vmf->vmf_cs, &vmf->vmf_ip);
353		vmf->vmf_flags &= ~PSL_T;
354		vm86->vm86_eflags &= ~PSL_VIF;
355		return (retcode);
356
357	case IRET:
358		vmf->vmf_ip = POP(vmf);
359		vmf->vmf_cs = POP(vmf);
360		temp_flags = POP(vmf) & POP_MASK;
361		vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
362		    | temp_flags | PSL_VM | PSL_I;
363		if (temp_flags & PSL_I) {
364			vm86->vm86_eflags |= PSL_VIF;
365			if (vm86->vm86_eflags & PSL_VIP)
366				break;
367		} else {
368			vm86->vm86_eflags &= ~PSL_VIF;
369		}
370		return (retcode);
371
372	case POPF:
373		temp_flags = POP(vmf) & POP_MASK;
374		vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
375		    | temp_flags | PSL_VM | PSL_I;
376		vmf->vmf_ip += inc_ip;
377		if (temp_flags & PSL_I) {
378			vm86->vm86_eflags |= PSL_VIF;
379			if (vm86->vm86_eflags & PSL_VIP)
380				break;
381		} else {
382			vm86->vm86_eflags &= ~PSL_VIF;
383		}
384		return (retcode);
385	}
386	return (SIGBUS);
387}
388
389#define PGTABLE_SIZE	((1024 + 64) * 1024 / PAGE_SIZE)
390#define INTMAP_SIZE	32
391#define IOMAP_SIZE	ctob(IOPAGES)
392#define TSS_SIZE \
393	(sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
394	 INTMAP_SIZE + IOMAP_SIZE + 1)
395
396struct vm86_layout_pae {
397	uint64_t	vml_pgtbl[PGTABLE_SIZE];
398	struct 	pcb vml_pcb;
399	struct	pcb_ext vml_ext;
400	char	vml_intmap[INTMAP_SIZE];
401	char	vml_iomap[IOMAP_SIZE];
402	char	vml_iomap_trailer;
403};
404
405struct vm86_layout_nopae {
406	uint32_t	vml_pgtbl[PGTABLE_SIZE];
407	struct 	pcb vml_pcb;
408	struct	pcb_ext vml_ext;
409	char	vml_intmap[INTMAP_SIZE];
410	char	vml_iomap[IOMAP_SIZE];
411	char	vml_iomap_trailer;
412};
413
414_Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
415    "struct vm86_layout_pae exceeds space allocated in locore.s");
416_Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
417    "struct vm86_layout_nopae exceeds space allocated in locore.s");
418
419static void
420vm86_initialize_pae(void)
421{
422	int i;
423	u_int *addr;
424	struct vm86_layout_pae *vml;
425	struct pcb *pcb;
426	struct pcb_ext *ext;
427	struct soft_segment_descriptor ssd = {
428		0,			/* segment base address (overwritten) */
429		0,			/* length (overwritten) */
430		SDT_SYS386TSS,		/* segment type */
431		0,			/* priority level */
432		1,			/* descriptor present */
433		0, 0,
434		0,			/* default 16 size */
435		0			/* granularity */
436	};
437
438	/*
439	 * Below is the memory layout that we use for the vm86 region.
440	 *
441	 * +--------+
442	 * |        |
443	 * |        |
444	 * | page 0 |
445	 * |        | +--------+
446	 * |        | | stack  |
447	 * +--------+ +--------+ <--------- vm86paddr
448	 * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
449	 * |        | +--------+
450	 * |        | |  PCB   | size: ~240 bytes
451	 * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
452	 * |        | +--------+
453	 * |        | |int map |
454	 * |        | +--------+
455	 * +--------+ |        |
456	 * | page 2 | |  I/O   |
457	 * +--------+ | bitmap |
458	 * | page 3 | |        |
459	 * |        | +--------+
460	 * +--------+
461	 */
462
463	/*
464	 * A rudimentary PCB must be installed, in order to get to the
465	 * PCB extension area.  We use the PCB area as a scratchpad for
466	 * data storage, the layout of which is shown below.
467	 *
468	 * pcb_esi	= new PTD entry 0
469	 * pcb_ebp	= pointer to frame on vm86 stack
470	 * pcb_esp	=    stack frame pointer at time of switch
471	 * pcb_ebx	= va of vm86 page table
472	 * pcb_eip	=    argument pointer to initial call
473	 * pcb_vm86[0]	=    saved TSS descriptor, word 0
474	 * pcb_vm86[1]	=    saved TSS descriptor, word 1
475	 */
476#define new_ptd		pcb_esi
477#define vm86_frame	pcb_ebp
478#define pgtable_va	pcb_ebx
479
480	vml = (struct vm86_layout_pae *)vm86paddr;
481	pcb = &vml->vml_pcb;
482	ext = &vml->vml_ext;
483
484	mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
485
486	bzero(pcb, sizeof(struct pcb));
487	pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
488	pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
489	pcb->pgtable_va = vm86paddr;
490	pcb->pcb_flags = PCB_VM86CALL;
491	pcb->pcb_ext = ext;
492
493	bzero(ext, sizeof(struct pcb_ext));
494	ext->ext_tss.tss_esp0 = vm86paddr;
495	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
496	ext->ext_tss.tss_ioopt =
497		((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
498	ext->ext_iomap = vml->vml_iomap;
499	ext->ext_vm86.vm86_intmap = vml->vml_intmap;
500
501	if (cpu_feature & CPUID_VME)
502		ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
503
504	addr = (u_int *)ext->ext_vm86.vm86_intmap;
505	for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
506		*addr++ = 0;
507	vml->vml_iomap_trailer = 0xff;
508
509	ssd.ssd_base = (u_int)&ext->ext_tss;
510	ssd.ssd_limit = TSS_SIZE - 1;
511	ssdtosd(&ssd, &ext->ext_tssd);
512
513	vm86pcb = pcb;
514
515#if 0
516        /*
517         * use whatever is leftover of the vm86 page layout as a
518         * message buffer so we can capture early output.
519         */
520        msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
521            ctob(3) - sizeof(struct vm86_layout));
522#endif
523}
524
525static void
526vm86_initialize_nopae(void)
527{
528	int i;
529	u_int *addr;
530	struct vm86_layout_nopae *vml;
531	struct pcb *pcb;
532	struct pcb_ext *ext;
533	struct soft_segment_descriptor ssd = {
534		0,			/* segment base address (overwritten) */
535		0,			/* length (overwritten) */
536		SDT_SYS386TSS,		/* segment type */
537		0,			/* priority level */
538		1,			/* descriptor present */
539		0, 0,
540		0,			/* default 16 size */
541		0			/* granularity */
542	};
543
544	vml = (struct vm86_layout_nopae *)vm86paddr;
545	pcb = &vml->vml_pcb;
546	ext = &vml->vml_ext;
547
548	mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
549
550	bzero(pcb, sizeof(struct pcb));
551	pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
552	pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
553	pcb->pgtable_va = vm86paddr;
554	pcb->pcb_flags = PCB_VM86CALL;
555	pcb->pcb_ext = ext;
556
557	bzero(ext, sizeof(struct pcb_ext));
558	ext->ext_tss.tss_esp0 = vm86paddr;
559	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
560	ext->ext_tss.tss_ioopt =
561		((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
562	ext->ext_iomap = vml->vml_iomap;
563	ext->ext_vm86.vm86_intmap = vml->vml_intmap;
564
565	if (cpu_feature & CPUID_VME)
566		ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
567
568	addr = (u_int *)ext->ext_vm86.vm86_intmap;
569	for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
570		*addr++ = 0;
571	vml->vml_iomap_trailer = 0xff;
572
573	ssd.ssd_base = (u_int)&ext->ext_tss;
574	ssd.ssd_limit = TSS_SIZE - 1;
575	ssdtosd(&ssd, &ext->ext_tssd);
576
577	vm86pcb = pcb;
578
579#if 0
580        /*
581         * use whatever is leftover of the vm86 page layout as a
582         * message buffer so we can capture early output.
583         */
584        msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
585            ctob(3) - sizeof(struct vm86_layout));
586#endif
587}
588
589void
590vm86_initialize(void)
591{
592
593	if (pae_mode)
594		vm86_initialize_pae();
595	else
596		vm86_initialize_nopae();
597}
598
599vm_offset_t
600vm86_getpage(struct vm86context *vmc, int pagenum)
601{
602	int i;
603
604	for (i = 0; i < vmc->npages; i++)
605		if (vmc->pmap[i].pte_num == pagenum)
606			return (vmc->pmap[i].kva);
607	return (0);
608}
609
610vm_offset_t
611vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
612{
613	int i, flags = 0;
614
615	for (i = 0; i < vmc->npages; i++)
616		if (vmc->pmap[i].pte_num == pagenum)
617			goto overlap;
618
619	if (vmc->npages == VM86_PMAPSIZE)
620		goto full;			/* XXX grow map? */
621
622	if (kva == 0) {
623		kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
624		flags = VMAP_MALLOC;
625	}
626
627	i = vmc->npages++;
628	vmc->pmap[i].flags = flags;
629	vmc->pmap[i].kva = kva;
630	vmc->pmap[i].pte_num = pagenum;
631	return (kva);
632overlap:
633	panic("vm86_addpage: overlap");
634full:
635	panic("vm86_addpage: not enough room");
636}
637
638/*
639 * called from vm86_bioscall, while in vm86 address space, to finalize setup.
640 */
641void
642vm86_prepcall(struct vm86frame *vmf)
643{
644	struct vm86_kernel *vm86;
645	uint32_t *stack;
646	uint8_t *code;
647
648	code = (void *)0xa00;
649	stack = (void *)(0x1000 - 2);	/* keep aligned */
650	if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
651		/* interrupt call requested */
652		code[0] = INTn;
653		code[1] = vmf->vmf_trapno & 0xff;
654		code[2] = HLT;
655		vmf->vmf_ip = (uintptr_t)code;
656		vmf->vmf_cs = 0;
657	} else {
658		code[0] = HLT;
659		stack--;
660		stack[0] = MAKE_VEC(0, (uintptr_t)code);
661	}
662	vmf->vmf_sp = (uintptr_t)stack;
663	vmf->vmf_ss = 0;
664	vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
665	vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
666
667	vm86 = &curpcb->pcb_ext->ext_vm86;
668	if (!vm86->vm86_has_vme)
669		vm86->vm86_eflags = vmf->vmf_eflags;  /* save VIF, VIP */
670}
671
672/*
673 * vm86 trap handler; determines whether routine succeeded or not.
674 * Called while in vm86 space, returns to calling process.
675 */
676void
677vm86_trap(struct vm86frame *vmf)
678{
679	void (*p)(struct vm86frame *);
680	caddr_t addr;
681
682	/* "should not happen" */
683	if ((vmf->vmf_eflags & PSL_VM) == 0)
684		panic("vm86_trap called, but not in vm86 mode");
685
686	addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
687	if (*(u_char *)addr == HLT)
688		vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
689	else
690		vmf->vmf_trapno = vmf->vmf_trapno << 16;
691
692	p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
693	    setidt_disp);
694	p(vmf);
695}
696
697int
698vm86_intcall(int intnum, struct vm86frame *vmf)
699{
700	int (*p)(struct vm86frame *);
701	int retval;
702
703	if (intnum < 0 || intnum > 0xff)
704		return (EINVAL);
705
706	vmf->vmf_trapno = intnum;
707	p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
708	    setidt_disp);
709	mtx_lock(&vm86_lock);
710	critical_enter();
711	retval = p(vmf);
712	critical_exit();
713	mtx_unlock(&vm86_lock);
714	return (retval);
715}
716
717/*
718 * struct vm86context contains the page table to use when making
719 * vm86 calls.  If intnum is a valid interrupt number (0-255), then
720 * the "interrupt trampoline" will be used, otherwise we use the
721 * caller's cs:ip routine.
722 */
723int
724vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
725{
726	uint64_t *pte_pae;
727	uint32_t *pte_nopae;
728	int (*p)(struct vm86frame *);
729	vm_paddr_t page;
730	int i, entry, retval;
731
732	mtx_lock(&vm86_lock);
733	if (pae_mode) {
734		pte_pae = (uint64_t *)vm86paddr;
735		for (i = 0; i < vmc->npages; i++) {
736			page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
737			entry = vmc->pmap[i].pte_num;
738			vmc->pmap[i].old_pte = pte_pae[entry];
739			pte_pae[entry] = page | PG_V | PG_RW | PG_U;
740			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
741		}
742	} else {
743		pte_nopae = (uint32_t *)vm86paddr;
744		for (i = 0; i < vmc->npages; i++) {
745			page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
746			entry = vmc->pmap[i].pte_num;
747			vmc->pmap[i].old_pte = pte_nopae[entry];
748			pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
749			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
750		}
751	}
752
753	vmf->vmf_trapno = intnum;
754	p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
755	    setidt_disp);
756	critical_enter();
757	retval = p(vmf);
758	critical_exit();
759
760	if (pae_mode) {
761		for (i = 0; i < vmc->npages; i++) {
762			entry = vmc->pmap[i].pte_num;
763			pte_pae[entry] = vmc->pmap[i].old_pte;
764			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
765		}
766	} else {
767		for (i = 0; i < vmc->npages; i++) {
768			entry = vmc->pmap[i].pte_num;
769			pte_nopae[entry] = vmc->pmap[i].old_pte;
770			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
771		}
772	}
773	mtx_unlock(&vm86_lock);
774
775	return (retval);
776}
777
778vm_offset_t
779vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
780{
781	int i, page;
782	vm_offset_t addr;
783
784	addr = (vm_offset_t)MAKE_ADDR(sel, off);
785	page = addr >> PAGE_SHIFT;
786	for (i = 0; i < vmc->npages; i++)
787		if (page == vmc->pmap[i].pte_num)
788			return (vmc->pmap[i].kva + (addr & PAGE_MASK));
789	return (0);
790}
791
792int
793vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
794     u_short *off)
795{
796	int i;
797
798	for (i = 0; i < vmc->npages; i++)
799		if (kva >= vmc->pmap[i].kva &&
800		    kva < vmc->pmap[i].kva + PAGE_SIZE) {
801			*off = kva - vmc->pmap[i].kva;
802			*sel = vmc->pmap[i].pte_num << 8;
803			return (1);
804		}
805	return (0);
806}
807
808int
809vm86_sysarch(struct thread *td, char *args)
810{
811	int error = 0;
812	struct i386_vm86_args ua;
813	struct vm86_kernel *vm86;
814
815	if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
816		return (error);
817
818	if (td->td_pcb->pcb_ext == 0)
819		if ((error = i386_extend_pcb(td)) != 0)
820			return (error);
821	vm86 = &td->td_pcb->pcb_ext->ext_vm86;
822
823	switch (ua.sub_op) {
824	case VM86_INIT: {
825		struct vm86_init_args sa;
826
827		if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
828			return (error);
829		if (cpu_feature & CPUID_VME)
830			vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
831		else
832			vm86->vm86_has_vme = 0;
833		vm86->vm86_inited = 1;
834		vm86->vm86_debug = sa.debug;
835		bcopy(&sa.int_map, vm86->vm86_intmap, 32);
836		}
837		break;
838
839#if 0
840	case VM86_SET_VME: {
841		struct vm86_vme_args sa;
842
843		if ((cpu_feature & CPUID_VME) == 0)
844			return (ENODEV);
845
846		if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
847			return (error);
848		if (sa.state)
849			load_cr4(rcr4() | CR4_VME);
850		else
851			load_cr4(rcr4() & ~CR4_VME);
852		}
853		break;
854#endif
855
856	case VM86_GET_VME: {
857		struct vm86_vme_args sa;
858
859		sa.state = (rcr4() & CR4_VME ? 1 : 0);
860        	error = copyout(&sa, ua.sub_args, sizeof(sa));
861		}
862		break;
863
864	case VM86_INTCALL: {
865		struct vm86_intcall_args sa;
866
867		if ((error = priv_check(td, PRIV_VM86_INTCALL)))
868			return (error);
869		if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
870			return (error);
871		if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
872			return (error);
873		error = copyout(&sa, ua.sub_args, sizeof(sa));
874		}
875		break;
876
877	default:
878		error = EINVAL;
879	}
880	return (error);
881}
882