trap.c revision 113686
1/* $FreeBSD: head/sys/ia64/ia64/trap.c 113686 2003-04-18 20:20:00Z jhb $ */
2/* From: src/sys/alpha/alpha/trap.c,v 1.33 */
3/* $NetBSD: trap.c,v 1.31 1998/03/26 02:21:46 thorpej Exp $ */
4
5/*
6 * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
7 * All rights reserved.
8 *
9 * Author: Chris G. Demetriou
10 *
11 * Permission to use, copy, modify and distribute this software and
12 * its documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation.
16 *
17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20 *
21 * Carnegie Mellon requests users of this software to return to
22 *
23 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24 *  School of Computer Science
25 *  Carnegie Mellon University
26 *  Pittsburgh PA 15213-3890
27 *
28 * any improvements or extensions that they make and grant Carnegie the
29 * rights to redistribute these changes.
30 */
31
32#include "opt_ddb.h"
33#include "opt_ktrace.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/ktr.h>
38#include <sys/sysproto.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/exec.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/smp.h>
45#include <sys/vmmeter.h>
46#include <sys/sysent.h>
47#include <sys/syscall.h>
48#include <sys/pioctl.h>
49#include <vm/vm.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
52#include <vm/vm_map.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_param.h>
55#include <sys/user.h>
56#include <sys/ptrace.h>
57#include <machine/clock.h>
58#include <machine/cpu.h>
59#include <machine/md_var.h>
60#include <machine/reg.h>
61#include <machine/pal.h>
62#include <machine/fpu.h>
63#include <machine/efi.h>
64
65#ifdef KTRACE
66#include <sys/uio.h>
67#include <sys/ktrace.h>
68#endif
69
70#ifdef DDB
71#include <ddb/ddb.h>
72#endif
73
74extern int unaligned_fixup(struct trapframe *framep, struct thread *td);
75static void ia32_syscall(struct trapframe *framep);
76
77/*
78 * EFI-Provided FPSWA interface (Floating Point SoftWare Assist
79 */
80
81/* The function entry address */
82extern FPSWA_INTERFACE *fpswa_interface;
83
84/* Copy of the faulting instruction bundle */
85typedef struct {
86	u_int64_t	bundle_low64;
87	u_int64_t	bundle_high64;
88} FPSWA_BUNDLE;
89
90/*
91 * The fp state descriptor... tell FPSWA where the "true" copy is.
92 * We save some registers in the trapframe, so we have to point some of
93 * these there.  The rest of the registers are "live"
94 */
95typedef struct {
96	u_int64_t	bitmask_low64;		/* f63 - f2 */
97	u_int64_t	bitmask_high64;		/* f127 - f64 */
98	struct ia64_fpreg *fp_low_preserved;	/* f2 - f5 */
99	struct ia64_fpreg *fp_low_volatile;	/* f6 - f15 */
100	struct ia64_fpreg *fp_high_preserved;	/* f16 - f31 */
101	struct ia64_fpreg *fp_high_volatile;	/* f32 - f127 */
102} FP_STATE;
103
104#ifdef WITNESS
105extern char *syscallnames[];
106#endif
107
108static const char *ia64_vector_names[] = {
109	"VHPT Translation",			/* 0 */
110	"Instruction TLB",			/* 1 */
111	"Data TLB",				/* 2 */
112	"Alternate Instruction TLB",		/* 3 */
113	"Alternate Data TLB",			/* 4 */
114	"Data Nested TLB",			/* 5 */
115	"Instruction Key Miss",			/* 6 */
116	"Data Key Miss",			/* 7 */
117	"Dirty-Bit",				/* 8 */
118	"Instruction Access-Bit",		/* 9 */
119	"Data Access-Bit",			/* 10 */
120	"Break Instruction",			/* 11 */
121	"External Interrupt",			/* 12 */
122	"Reserved 13",				/* 13 */
123	"Reserved 14",				/* 14 */
124	"Reserved 15",				/* 15 */
125	"Reserved 16",				/* 16 */
126	"Reserved 17",				/* 17 */
127	"Reserved 18",				/* 18 */
128	"Reserved 19",				/* 19 */
129	"Page Not Present",			/* 20 */
130	"Key Permission",			/* 21 */
131	"Instruction Access Rights",		/* 22 */
132	"Data Access Rights",			/* 23 */
133	"General Exception",			/* 24 */
134	"Disabled FP-Register",			/* 25 */
135	"NaT Consumption",			/* 26 */
136	"Speculation",				/* 27 */
137	"Reserved 28",				/* 28 */
138	"Debug",				/* 29 */
139	"Unaligned Reference",			/* 30 */
140	"Unsupported Data Reference",		/* 31 */
141	"Floating-point Fault",			/* 32 */
142	"Floating-point Trap",			/* 33 */
143	"Lower-Privilege Transfer Trap",	/* 34 */
144	"Taken Branch Trap",			/* 35 */
145	"Single Step Trap",			/* 36 */
146	"Reserved 37",				/* 37 */
147	"Reserved 38",				/* 38 */
148	"Reserved 39",				/* 39 */
149	"Reserved 40",				/* 40 */
150	"Reserved 41",				/* 41 */
151	"Reserved 42",				/* 42 */
152	"Reserved 43",				/* 43 */
153	"Reserved 44",				/* 44 */
154	"IA-32 Exception",			/* 45 */
155	"IA-32 Intercept",			/* 46 */
156	"IA-32 Interrupt",			/* 47 */
157	"Reserved 48",				/* 48 */
158	"Reserved 49",				/* 49 */
159	"Reserved 50",				/* 50 */
160	"Reserved 51",				/* 51 */
161	"Reserved 52",				/* 52 */
162	"Reserved 53",				/* 53 */
163	"Reserved 54",				/* 54 */
164	"Reserved 55",				/* 55 */
165	"Reserved 56",				/* 56 */
166	"Reserved 57",				/* 57 */
167	"Reserved 58",				/* 58 */
168	"Reserved 59",				/* 59 */
169	"Reserved 60",				/* 60 */
170	"Reserved 61",				/* 61 */
171	"Reserved 62",				/* 62 */
172	"Reserved 63",				/* 63 */
173	"Reserved 64",				/* 64 */
174	"Reserved 65",				/* 65 */
175	"Reserved 66",				/* 66 */
176	"Reserved 67",				/* 67 */
177};
178
179struct bitname {
180	u_int64_t mask;
181	const char* name;
182};
183
184static void
185printbits(u_int64_t mask, struct bitname *bn, int count)
186{
187	int i, first = 1;
188	u_int64_t bit;
189
190	for (i = 0; i < count; i++) {
191		/*
192		 * Handle fields wider than one bit.
193		 */
194		bit = bn[i].mask & ~(bn[i].mask - 1);
195		if (bn[i].mask > bit) {
196			if (first)
197				first = 0;
198			else
199				printf(",");
200			printf("%s=%ld", bn[i].name,
201			       (mask & bn[i].mask) / bit);
202		} else if (mask & bit) {
203			if (first)
204				first = 0;
205			else
206				printf(",");
207			printf("%s", bn[i].name);
208		}
209	}
210}
211
212struct bitname psr_bits[] = {
213	{IA64_PSR_BE,	"be"},
214	{IA64_PSR_UP,	"up"},
215	{IA64_PSR_AC,	"ac"},
216	{IA64_PSR_MFL,	"mfl"},
217	{IA64_PSR_MFH,	"mfh"},
218	{IA64_PSR_IC,	"ic"},
219	{IA64_PSR_I,	"i"},
220	{IA64_PSR_PK,	"pk"},
221	{IA64_PSR_DT,	"dt"},
222	{IA64_PSR_DFL,	"dfl"},
223	{IA64_PSR_DFH,	"dfh"},
224	{IA64_PSR_SP,	"sp"},
225	{IA64_PSR_PP,	"pp"},
226	{IA64_PSR_DI,	"di"},
227	{IA64_PSR_SI,	"si"},
228	{IA64_PSR_DB,	"db"},
229	{IA64_PSR_LP,	"lp"},
230	{IA64_PSR_TB,	"tb"},
231	{IA64_PSR_RT,	"rt"},
232	{IA64_PSR_CPL,	"cpl"},
233	{IA64_PSR_IS,	"is"},
234	{IA64_PSR_MC,	"mc"},
235	{IA64_PSR_IT,	"it"},
236	{IA64_PSR_ID,	"id"},
237	{IA64_PSR_DA,	"da"},
238	{IA64_PSR_DD,	"dd"},
239	{IA64_PSR_SS,	"ss"},
240	{IA64_PSR_RI,	"ri"},
241	{IA64_PSR_ED,	"ed"},
242	{IA64_PSR_BN,	"bn"},
243	{IA64_PSR_IA,	"ia"},
244};
245
246static void
247printpsr(u_int64_t psr)
248{
249	printbits(psr, psr_bits, sizeof(psr_bits)/sizeof(psr_bits[0]));
250}
251
252struct bitname isr_bits[] = {
253	{IA64_ISR_CODE,	"code"},
254	{IA64_ISR_VECTOR, "vector"},
255	{IA64_ISR_X,	"x"},
256	{IA64_ISR_W,	"w"},
257	{IA64_ISR_R,	"r"},
258	{IA64_ISR_NA,	"na"},
259	{IA64_ISR_SP,	"sp"},
260	{IA64_ISR_RS,	"rs"},
261	{IA64_ISR_IR,	"ir"},
262	{IA64_ISR_NI,	"ni"},
263	{IA64_ISR_SO,	"so"},
264	{IA64_ISR_EI,	"ei"},
265	{IA64_ISR_ED,	"ed"},
266};
267
268static void printisr(u_int64_t isr)
269{
270	printbits(isr, isr_bits, sizeof(isr_bits)/sizeof(isr_bits[0]));
271}
272
273static void
274printtrap(int vector, int imm, struct trapframe *framep, int isfatal, int user)
275{
276	printf("\n");
277	printf("%s %s trap (cpu %d):\n", isfatal? "fatal" : "handled",
278	       user ? "user" : "kernel", PCPU_GET(cpuid));
279	printf("\n");
280	printf("    trap vector = 0x%x (%s)\n",
281	       vector, ia64_vector_names[vector]);
282	printf("    cr.iip      = 0x%lx\n", framep->tf_cr_iip);
283	printf("    cr.ipsr     = 0x%lx (", framep->tf_cr_ipsr);
284	printpsr(framep->tf_cr_ipsr);
285	printf(")\n");
286	printf("    cr.isr      = 0x%lx (", framep->tf_cr_isr);
287	printisr(framep->tf_cr_isr);
288	printf(")\n");
289	printf("    cr.ifa      = 0x%lx\n", framep->tf_cr_ifa);
290	printf("    cr.iim      = 0x%x\n", imm);
291	if (framep->tf_cr_ipsr & IA64_PSR_IS) {
292		printf("    ar.cflg     = 0x%lx\n", ia64_get_cflg());
293		printf("    ar.csd      = 0x%lx\n", ia64_get_csd());
294		printf("    ar.ssd      = 0x%lx\n", ia64_get_ssd());
295	}
296	printf("    curthread   = %p\n", curthread);
297	if (curthread != NULL)
298		printf("        pid = %d, comm = %s\n",
299		       curthread->td_proc->p_pid, curthread->td_proc->p_comm);
300	printf("\n");
301}
302
303/*
304 * Trap is called from exception.s to handle most types of processor traps.
305 * System calls are broken out for efficiency and ASTs are broken out
306 * to make the code a bit cleaner and more representative of the
307 * architecture.
308 */
309/*ARGSUSED*/
310void
311trap(int vector, int imm, struct trapframe *framep)
312{
313	struct thread *td;
314	struct proc *p;
315	int i;
316	u_int64_t ucode;
317	u_int sticks;
318	int user;
319
320	cnt.v_trap++;
321	td = curthread;
322	p = td->td_proc;
323	ucode = 0;
324
325	/*
326	 * Make sure we have a sane floating-point state in case the
327	 * user has trashed it.
328	 */
329	ia64_set_fpsr(IA64_FPSR_DEFAULT);
330
331	user = ((framep->tf_cr_ipsr & IA64_PSR_CPL) == IA64_PSR_CPL_USER);
332	if (user) {
333		sticks = td->td_sticks;
334		td->td_frame = framep;
335		if (td->td_ucred != p->p_ucred)
336			cred_update_thread(td);
337		PROC_LOCK(p);
338		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
339			mtx_lock_spin(&sched_lock);
340			thread_exit();
341			/* NOTREACHED */
342		}
343		PROC_UNLOCK(p);
344	} else {
345		sticks = 0;		/* XXX bogus -Wuninitialized warning */
346		KASSERT(cold || td->td_ucred != NULL,
347		    ("kernel trap doesn't have ucred"));
348	}
349
350	switch (vector) {
351	case IA64_VEC_UNALIGNED_REFERENCE:
352		/*
353		 * If user-land, do whatever fixups, printing, and
354		 * signalling is appropriate (based on system-wide
355		 * and per-process unaligned-access-handling flags).
356		 */
357		if (user) {
358			mtx_lock(&Giant);
359			i = unaligned_fixup(framep, td);
360			mtx_unlock(&Giant);
361			if (i == 0)
362				goto out;
363			ucode = framep->tf_cr_ifa;	/* VA */
364			break;
365		}
366
367		/*
368		 * Unaligned access from kernel mode is always an error,
369		 * EVEN IF A COPY FAULT HANDLER IS SET!
370		 *
371		 * It's an error if a copy fault handler is set because
372		 * the various routines which do user-initiated copies
373		 * do so in a bcopy-like manner.  In other words, the
374		 * kernel never assumes that pointers provided by the
375		 * user are properly aligned, and so if the kernel
376		 * does cause an unaligned access it's a kernel bug.
377		 */
378		goto dopanic;
379
380	case IA64_VEC_FLOATING_POINT_FAULT:
381	{
382		FP_STATE fp_state;
383		FPSWA_RET fpswa_ret;
384		FPSWA_BUNDLE bundle;
385
386		/* Always fatal in kernel.  Should never happen. */
387		if (!user)
388			goto dopanic;
389		if (fpswa_interface == NULL) {
390			i = SIGFPE;
391			ucode = 0;
392			break;
393		}
394		mtx_lock(&Giant);
395	        i = copyin((const void *)(framep->tf_cr_iip), &bundle, 16);
396		mtx_unlock(&Giant);
397		if (i) {
398			i = SIGBUS;		/* EFAULT, basically */
399			ucode = /*a0*/ 0;	/* exception summary */
400			break;
401		}
402		/* f6-f15 are saved in exception_save */
403		fp_state.bitmask_low64 = 0xffc0;	/* bits 6 - 15 */
404		fp_state.bitmask_high64 = 0x0;
405		fp_state.fp_low_preserved = NULL;
406		fp_state.fp_low_volatile = framep->tf_f;
407		fp_state.fp_high_preserved = NULL;
408		fp_state.fp_high_volatile = NULL;
409		/* The docs are unclear.  Is Fpswa reentrant? */
410		fpswa_ret = fpswa_interface->Fpswa(1, &bundle,
411		    &framep->tf_cr_ipsr, &framep->tf_ar_fpsr,
412		    &framep->tf_cr_isr, &framep->tf_pr,
413		    &framep->tf_cr_ifs, &fp_state);
414		if (fpswa_ret.status == 0) {
415			/* fixed.  update ipsr and iip to next insn */
416			int ei;
417
418			ei = (framep->tf_cr_isr >> 41) & 0x03;
419			if (ei == 0) {		/* no template for this case */
420				framep->tf_cr_ipsr &= ~IA64_ISR_EI;
421				framep->tf_cr_ipsr |= IA64_ISR_EI_1;
422			} else if (ei == 1) {	/* MFI or MFB */
423				framep->tf_cr_ipsr &= ~IA64_ISR_EI;
424				framep->tf_cr_ipsr |= IA64_ISR_EI_2;
425			} else if (ei == 2) {	/* MMF */
426				framep->tf_cr_ipsr &= ~IA64_ISR_EI;
427				framep->tf_cr_iip += 0x10;
428			}
429			goto out;
430		} else if (fpswa_ret.status == -1) {
431			printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
432			    fpswa_ret.err1, fpswa_ret.err2, fpswa_ret.err3);
433			panic("fpswa fatal error on fp fault");
434		} else if (fpswa_ret.status > 0) {
435#if 0
436			if (fpswa_ret.status & 1) {
437				/*
438				 * New exception needs to be raised.
439				 * If set then the following bits also apply:
440				 * & 2 -> fault was converted to a trap
441				 * & 4 -> SIMD caused the exception
442				 */
443				i = SIGFPE;
444				ucode = /*a0*/ 0;	/* exception summary */
445				break;
446			}
447#endif
448			i = SIGFPE;
449			ucode = /*a0*/ 0;		/* exception summary */
450			break;
451		} else {
452			panic("bad fpswa return code %lx", fpswa_ret.status);
453		}
454	}
455
456	case IA64_VEC_FLOATING_POINT_TRAP:
457	{
458		FP_STATE fp_state;
459		FPSWA_RET fpswa_ret;
460		FPSWA_BUNDLE bundle;
461
462		/* Always fatal in kernel.  Should never happen. */
463		if (!user)
464			goto dopanic;
465		if (fpswa_interface == NULL) {
466			i = SIGFPE;
467			ucode = 0;
468			break;
469		}
470		mtx_lock(&Giant);
471	        i = copyin((const void *)(framep->tf_cr_iip), &bundle, 16);
472		mtx_unlock(&Giant);
473		if (i) {
474			i = SIGBUS;			/* EFAULT, basically */
475			ucode = /*a0*/ 0;		/* exception summary */
476			break;
477		}
478		/* f6-f15 are saved in exception_save */
479		fp_state.bitmask_low64 = 0xffc0;	/* bits 6 - 15 */
480		fp_state.bitmask_high64 = 0x0;
481		fp_state.fp_low_preserved = NULL;
482		fp_state.fp_low_volatile = framep->tf_f;
483		fp_state.fp_high_preserved = NULL;
484		fp_state.fp_high_volatile = NULL;
485		/* The docs are unclear.  Is Fpswa reentrant? */
486		fpswa_ret = fpswa_interface->Fpswa(0, &bundle,
487		    &framep->tf_cr_ipsr, &framep->tf_ar_fpsr,
488		    &framep->tf_cr_isr, &framep->tf_pr,
489		    &framep->tf_cr_ifs, &fp_state);
490		if (fpswa_ret.status == 0) {
491			/* fixed */
492			/*
493			 * should we increment iip like the fault case?
494			 * or has fpswa done something like normalizing a
495			 * register so that we should just rerun it?
496			 */
497			goto out;
498		} else if (fpswa_ret.status == -1) {
499			printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
500			    fpswa_ret.err1, fpswa_ret.err2, fpswa_ret.err3);
501			panic("fpswa fatal error on fp trap");
502		} else if (fpswa_ret.status > 0) {
503			i = SIGFPE;
504			ucode = /*a0*/ 0;		/* exception summary */
505			break;
506		} else {
507			panic("bad fpswa return code %lx", fpswa_ret.status);
508		}
509	}
510
511	case IA64_VEC_DISABLED_FP:
512		/*
513		 * on exit from the kernel, if thread == fpcurthread,
514		 * FP is enabled.
515		 */
516		if (PCPU_GET(fpcurthread) == td) {
517			printf("trap: fp disabled for fpcurthread == %p", td);
518			goto dopanic;
519		}
520
521		ia64_fpstate_switch(td);
522		goto out;
523		break;
524
525	case IA64_VEC_PAGE_NOT_PRESENT:
526	case IA64_VEC_INST_ACCESS_RIGHTS:
527	case IA64_VEC_DATA_ACCESS_RIGHTS:
528	{
529		vm_offset_t va;
530		struct vmspace *vm;
531		vm_map_t map;
532		vm_prot_t ftype;
533		int rv;
534
535		rv = 0;
536		va = framep->tf_cr_ifa;
537
538		/*
539		 * If it was caused by fuswintr or suswintr, just punt. Note
540		 * that we check the faulting address against the address
541		 * accessed by [fs]uswintr, in case another fault happens when
542		 * they are running.
543		 */
544		if (!user && td != NULL && td->td_pcb->pcb_accessaddr == va &&
545		    td->td_pcb->pcb_onfault == (unsigned long)fswintrberr) {
546			framep->tf_cr_iip = td->td_pcb->pcb_onfault;
547			framep->tf_cr_ipsr &= ~IA64_PSR_RI;
548			td->td_pcb->pcb_onfault = 0;
549			goto out;
550		}
551
552		va = trunc_page((vm_offset_t)va);
553
554		if (va >= VM_MIN_KERNEL_ADDRESS) {
555			/*
556			 * Don't allow user-mode faults for kernel virtual
557			 * addresses
558			 */
559			if (user)
560				goto no_fault_in;
561			map = kernel_map;
562		} else {
563			vm = (p != NULL) ? p->p_vmspace : NULL;
564			if (vm == NULL)
565				goto no_fault_in;
566			map = &vm->vm_map;
567		}
568
569		if (framep->tf_cr_isr & IA64_ISR_X)
570			ftype = VM_PROT_EXECUTE;
571		else if (framep->tf_cr_isr & IA64_ISR_W)
572			ftype = VM_PROT_WRITE;
573		else
574			ftype = VM_PROT_READ;
575
576		if (map != kernel_map) {
577			/*
578			 * Keep swapout from messing with us during this
579			 * critical time.
580			 */
581			PROC_LOCK(p);
582			++p->p_lock;
583			PROC_UNLOCK(p);
584
585			/* Fault in the user page: */
586			rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE)
587			    ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
588
589			PROC_LOCK(p);
590			--p->p_lock;
591			PROC_UNLOCK(p);
592		} else {
593			/*
594			 * Don't have to worry about process locking or
595			 * stacks in the kernel.
596			 */
597			rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
598		}
599
600		if (rv == KERN_SUCCESS)
601			goto out;
602
603	no_fault_in:
604		if (!user) {
605			/* Check for copyin/copyout fault. */
606			if (td != NULL && td->td_pcb->pcb_onfault != 0) {
607				framep->tf_cr_iip = td->td_pcb->pcb_onfault;
608				framep->tf_cr_ipsr &= ~IA64_PSR_RI;
609				td->td_pcb->pcb_onfault = 0;
610				goto out;
611			}
612			goto dopanic;
613		}
614		ucode = va;
615		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
616		break;
617	}
618
619	case IA64_VEC_SINGLE_STEP_TRAP:
620	case IA64_VEC_DEBUG:
621	case IA64_VEC_TAKEN_BRANCH_TRAP:
622	case IA64_VEC_BREAK:
623		/*
624		 * These are always fatal in kernel, and should never happen.
625		 */
626		if (!user) {
627#ifdef DDB
628			/*
629			 * ...unless, of course, DDB is configured.
630			 */
631			if (kdb_trap(vector, framep))
632				return;
633
634			/*
635			 * If we get here, DDB did _not_ handle the
636			 * trap, and we need to PANIC!
637			 */
638#endif
639			goto dopanic;
640		}
641		i = SIGTRAP;
642		break;
643
644	case IA64_VEC_GENERAL_EXCEPTION:
645		if (user) {
646			ucode = vector;
647			i = SIGILL;
648			break;
649		}
650		goto dopanic;
651
652	case IA64_VEC_UNSUPP_DATA_REFERENCE:
653	case IA64_VEC_LOWER_PRIVILEGE_TRANSFER:
654		if (user) {
655			ucode = vector;
656			i = SIGBUS;
657			break;
658		}
659		goto dopanic;
660
661	case IA64_VEC_IA32_EXCEPTION:
662	{
663		u_int64_t isr = framep->tf_cr_isr;
664
665		switch ((isr >> 16) & 0xffff) {
666		case IA32_EXCEPTION_DIVIDE:
667			ucode = FPE_INTDIV;
668			i = SIGFPE;
669			break;
670
671		case IA32_EXCEPTION_DEBUG:
672		case IA32_EXCEPTION_BREAK:
673			i = SIGTRAP;
674			break;
675
676		case IA32_EXCEPTION_OVERFLOW:
677			ucode = FPE_INTOVF;
678			i = SIGFPE;
679			break;
680
681		case IA32_EXCEPTION_BOUND:
682			ucode = FPE_FLTSUB;
683			i = SIGFPE;
684			break;
685
686		case IA32_EXCEPTION_DNA:
687			ucode = 0;
688			i = SIGFPE;
689			break;
690
691		case IA32_EXCEPTION_NOT_PRESENT:
692		case IA32_EXCEPTION_STACK_FAULT:
693		case IA32_EXCEPTION_GPFAULT:
694			ucode = (isr & 0xffff) + BUS_SEGM_FAULT;
695			i = SIGBUS;
696			break;
697
698		case IA32_EXCEPTION_FPERROR:
699			ucode = 0; /* XXX */
700			i = SIGFPE;
701			break;
702
703		case IA32_EXCEPTION_ALIGNMENT_CHECK:
704			ucode = framep->tf_cr_ifa;	/* VA */
705			i = SIGBUS;
706			break;
707
708		case IA32_EXCEPTION_STREAMING_SIMD:
709			ucode = 0; /* XXX */
710			i = SIGFPE;
711			break;
712
713		default:
714			goto dopanic;
715		}
716		break;
717	}
718
719	case IA64_VEC_IA32_INTERRUPT:
720		/*
721		 * INT n instruction - probably a syscall.
722		 */
723		if (((framep->tf_cr_isr >> 16) & 0xffff) == 0x80) {
724			ia32_syscall(framep);
725			goto out;
726		} else {
727			ucode = (framep->tf_cr_isr >> 16) & 0xffff;
728			i = SIGILL;
729			break;
730		}
731
732	case IA64_VEC_IA32_INTERCEPT:
733		/*
734		 * Maybe need to emulate ia32 instruction.
735		 */
736		goto dopanic;
737
738	default:
739		goto dopanic;
740	}
741
742#ifdef DEBUG
743	printtrap(vector, imm, framep, 1, user);
744#endif
745	trapsignal(td, i, ucode);
746out:
747	if (user) {
748		userret(td, framep, sticks);
749		mtx_assert(&Giant, MA_NOTOWNED);
750#ifdef DIAGNOSTIC
751		cred_free_thread(td);
752#endif
753	}
754	return;
755
756dopanic:
757	printtrap(vector, imm, framep, 1, user);
758
759	/* XXX dump registers */
760
761#ifdef DDB
762	kdb_trap(vector, framep);
763#endif
764
765	panic("trap");
766}
767
768/*
769 * Process a system call.
770 *
771 * System calls are strange beasts.  They are passed the syscall number
772 * in r15, and the arguments in the registers (as normal).  They return
773 * an error flag in r10 (if r10 != 0 on return, the syscall had an error),
774 * and the return value (if any) in r8 and r9.
775 *
776 * The assembly stub takes care of moving the call number into a register
777 * we can get to, and moves all of the argument registers into a stack
778 * buffer.  On return, it restores r8-r10 from the frame before
779 * returning to the user process.
780 */
781void
782syscall(int code, u_int64_t *args, struct trapframe *framep)
783{
784	struct sysent *callp;
785	struct thread *td;
786	struct proc *p;
787	int error = 0;
788	u_int64_t oldip, oldri;
789	u_int sticks;
790
791	cnt.v_syscall++;
792	td = curthread;
793	p = td->td_proc;
794
795	td->td_frame = framep;
796	sticks = td->td_sticks;
797	if (td->td_ucred != p->p_ucred)
798		cred_update_thread(td);
799
800	/*
801	 * Skip past the break instruction. Remember old address in case
802	 * we have to restart.
803	 */
804	oldip = framep->tf_cr_iip;
805	oldri = framep->tf_cr_ipsr & IA64_PSR_RI;
806	framep->tf_cr_ipsr += IA64_PSR_RI_1;
807	if ((framep->tf_cr_ipsr & IA64_PSR_RI) > IA64_PSR_RI_2) {
808		framep->tf_cr_ipsr &= ~IA64_PSR_RI;
809		framep->tf_cr_iip += 16;
810	}
811
812	if (p->p_flag & P_THREADED)
813		thread_user_enter(p, td);
814#ifdef DIAGNOSTIC
815	ia64_fpstate_check(td);
816#endif
817
818	if (p->p_sysent->sv_prepsyscall) {
819		/* (*p->p_sysent->sv_prepsyscall)(framep, args, &code, &params); */
820		panic("prepsyscall");
821	} else {
822		/*
823		 * syscall() and __syscall() are handled the same on
824		 * the ia64, as everything is 64-bit aligned, anyway.
825		 */
826		if (code == SYS_syscall || code == SYS___syscall) {
827			/*
828			 * Code is first argument, followed by actual args.
829			 */
830			code = args[0];
831			args++;
832		}
833	}
834
835 	if (p->p_sysent->sv_mask)
836 		code &= p->p_sysent->sv_mask;
837
838 	if (code >= p->p_sysent->sv_size)
839 		callp = &p->p_sysent->sv_table[0];
840  	else
841 		callp = &p->p_sysent->sv_table[code];
842
843	/*
844	 * Try to run the syscall without Giant if the syscall is MP safe.
845	 */
846	if ((callp->sy_narg & SYF_MPSAFE) == 0)
847		mtx_lock(&Giant);
848#ifdef KTRACE
849	if (KTRPOINT(td, KTR_SYSCALL))
850		ktrsyscall(code, (callp->sy_narg & SYF_ARGMASK), args);
851#endif
852	if (error == 0) {
853		td->td_retval[0] = 0;
854		td->td_retval[1] = 0;
855
856		STOPEVENT(p, S_SCE, (callp->sy_narg & SYF_ARGMASK));
857
858		error = (*callp->sy_call)(td, args);
859	}
860
861
862	switch (error) {
863	case 0:
864		framep->tf_r[FRAME_R8] = td->td_retval[0];
865		framep->tf_r[FRAME_R9] = td->td_retval[1];
866		framep->tf_r[FRAME_R10] = 0;
867		break;
868	case ERESTART:
869		framep->tf_cr_iip = oldip;
870		framep->tf_cr_ipsr =
871			(framep->tf_cr_ipsr & ~IA64_PSR_RI) | oldri;
872		break;
873	case EJUSTRETURN:
874		break;
875	default:
876		if (p->p_sysent->sv_errsize) {
877			if (error >= p->p_sysent->sv_errsize)
878				error = -1; /* XXX */
879			else
880				error = p->p_sysent->sv_errtbl[error];
881		}
882		framep->tf_r[FRAME_R8] = error;
883		framep->tf_r[FRAME_R10] = 1;
884		break;
885	}
886
887	/*
888	 * Release Giant if we had to get it.
889	 */
890	if ((callp->sy_narg & SYF_MPSAFE) == 0)
891		mtx_unlock(&Giant);
892
893	userret(td, framep, sticks);
894
895#ifdef KTRACE
896	if (KTRPOINT(td, KTR_SYSRET))
897		ktrsysret(code, error, td->td_retval[0]);
898#endif
899	/*
900	 * This works because errno is findable through the
901	 * register set.  If we ever support an emulation where this
902	 * is not the case, this code will need to be revisited.
903	 */
904	STOPEVENT(p, S_SCX, code);
905
906#ifdef DIAGNOSTIC
907	cred_free_thread(td);
908#endif
909	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
910	    syscallnames[code]);
911	mtx_assert(&sched_lock, MA_NOTOWNED);
912	mtx_assert(&Giant, MA_NOTOWNED);
913}
914
915#include <i386/include/psl.h>
916
917static void
918ia32_syscall(struct trapframe *framep)
919{
920	caddr_t params;
921	int i;
922	struct sysent *callp;
923	struct thread *td = curthread;
924	struct proc *p = td->td_proc;
925	register_t orig_eflags;
926	u_int sticks;
927	int error;
928	int narg;
929	u_int32_t args[8];
930	u_int64_t args64[8];
931	u_int code;
932
933	/*
934	 * note: PCPU_LAZY_INC() can only be used if we can afford
935	 * occassional inaccuracy in the count.
936	 */
937	cnt.v_syscall++;
938
939	sticks = td->td_sticks;
940	td->td_frame = framep;
941	if (td->td_ucred != p->p_ucred)
942		cred_update_thread(td);
943	params = (caddr_t)(framep->tf_r[FRAME_SP] & ((1L<<32)-1))
944		+ sizeof(u_int32_t);
945	code = framep->tf_r[FRAME_R8]; /* eax */
946	orig_eflags = ia64_get_eflag();
947
948	if (p->p_sysent->sv_prepsyscall) {
949		/*
950		 * The prep code is MP aware.
951		 */
952		(*p->p_sysent->sv_prepsyscall)(framep, args, &code, &params);
953	} else {
954		/*
955		 * Need to check if this is a 32 bit or 64 bit syscall.
956		 * fuword is MP aware.
957		 */
958		if (code == SYS_syscall) {
959			/*
960			 * Code is first argument, followed by actual args.
961			 */
962			code = fuword32(params);
963			params += sizeof(int);
964		} else if (code == SYS___syscall) {
965			/*
966			 * Like syscall, but code is a quad, so as to maintain
967			 * quad alignment for the rest of the arguments.
968			 * We use a 32-bit fetch in case params is not
969			 * aligned.
970			 */
971			code = fuword32(params);
972			params += sizeof(quad_t);
973		}
974	}
975
976 	if (p->p_sysent->sv_mask)
977 		code &= p->p_sysent->sv_mask;
978
979 	if (code >= p->p_sysent->sv_size)
980 		callp = &p->p_sysent->sv_table[0];
981  	else
982 		callp = &p->p_sysent->sv_table[code];
983
984	narg = callp->sy_narg & SYF_ARGMASK;
985
986	/*
987	 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
988	 */
989	if (params != NULL && narg != 0)
990		error = copyin(params, (caddr_t)args,
991		    (u_int)(narg * sizeof(int)));
992	else
993		error = 0;
994
995	for (i = 0; i < narg; i++)
996		args64[i] = args[i];
997
998#ifdef KTRACE
999	if (KTRPOINT(td, KTR_SYSCALL))
1000		ktrsyscall(code, narg, args64);
1001#endif
1002	/*
1003	 * Try to run the syscall without Giant if the syscall
1004	 * is MP safe.
1005	 */
1006	if ((callp->sy_narg & SYF_MPSAFE) == 0)
1007		mtx_lock(&Giant);
1008
1009	if (error == 0) {
1010		td->td_retval[0] = 0;
1011		td->td_retval[1] = framep->tf_r[FRAME_R10]; /* edx */
1012
1013		STOPEVENT(p, S_SCE, narg);
1014
1015		error = (*callp->sy_call)(td, args64);
1016	}
1017
1018	switch (error) {
1019	case 0:
1020		framep->tf_r[FRAME_R8] = td->td_retval[0]; /* eax */
1021		framep->tf_r[FRAME_R10] = td->td_retval[1]; /* edx */
1022		ia64_set_eflag(ia64_get_eflag() & ~PSL_C);
1023		break;
1024
1025	case ERESTART:
1026		/*
1027		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
1028		 * int 0x80 is 2 bytes. XXX Assume int 0x80.
1029		 */
1030		framep->tf_cr_iip -= 2;
1031		break;
1032
1033	case EJUSTRETURN:
1034		break;
1035
1036	default:
1037 		if (p->p_sysent->sv_errsize) {
1038 			if (error >= p->p_sysent->sv_errsize)
1039  				error = -1;	/* XXX */
1040   			else
1041  				error = p->p_sysent->sv_errtbl[error];
1042		}
1043		framep->tf_r[FRAME_R8] = error;
1044		ia64_set_eflag(ia64_get_eflag() | PSL_C);
1045		break;
1046	}
1047
1048	/*
1049	 * Traced syscall.
1050	 */
1051	if ((orig_eflags & PSL_T) && !(orig_eflags & PSL_VM)) {
1052		ia64_set_eflag(ia64_get_eflag() & ~PSL_T);
1053		trapsignal(td, SIGTRAP, 0);
1054	}
1055
1056	/*
1057	 * Release Giant if we previously set it.
1058	 */
1059	if ((callp->sy_narg & SYF_MPSAFE) == 0)
1060		mtx_unlock(&Giant);
1061
1062	/*
1063	 * Handle reschedule and other end-of-syscall issues
1064	 */
1065	userret(td, framep, sticks);
1066
1067#ifdef KTRACE
1068	if (KTRPOINT(td, KTR_SYSRET))
1069		ktrsysret(code, error, td->td_retval[0]);
1070#endif
1071
1072	/*
1073	 * This works because errno is findable through the
1074	 * register set.  If we ever support an emulation where this
1075	 * is not the case, this code will need to be revisited.
1076	 */
1077	STOPEVENT(p, S_SCX, code);
1078
1079#ifdef DIAGNOSTIC
1080	cred_free_thread(td);
1081#endif
1082	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
1083	    syscallnames[code]);
1084	mtx_assert(&sched_lock, MA_NOTOWNED);
1085	mtx_assert(&Giant, MA_NOTOWNED);
1086}
1087