trap.c revision 158651
1/*-
2 * Copyright (c) 2005 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/ia64/ia64/trap.c 158651 2006-05-16 14:37:58Z phk $");
29
30#include "opt_ddb.h"
31#include "opt_ktrace.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kdb.h>
36#include <sys/ktr.h>
37#include <sys/sysproto.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/exec.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/sched.h>
44#include <sys/smp.h>
45#include <sys/vmmeter.h>
46#include <sys/sysent.h>
47#include <sys/signalvar.h>
48#include <sys/syscall.h>
49#include <sys/pioctl.h>
50#include <sys/ptrace.h>
51#include <sys/sysctl.h>
52#include <vm/vm.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57#include <vm/vm_param.h>
58#include <sys/ptrace.h>
59#include <machine/cpu.h>
60#include <machine/md_var.h>
61#include <machine/reg.h>
62#include <machine/pal.h>
63#include <machine/fpu.h>
64#include <machine/efi.h>
65#include <machine/pcb.h>
66#ifdef SMP
67#include <machine/smp.h>
68#endif
69
70#ifdef KTRACE
71#include <sys/uio.h>
72#include <sys/ktrace.h>
73#endif
74
75#include <ia64/disasm/disasm.h>
76
77static int print_usertrap = 0;
78SYSCTL_INT(_machdep, OID_AUTO, print_usertrap,
79    CTLFLAG_RW, &print_usertrap, 0, "");
80
81static void break_syscall(struct trapframe *tf);
82
83/*
84 * EFI-Provided FPSWA interface (Floating Point SoftWare Assist)
85 */
86extern struct fpswa_iface *fpswa_iface;
87
88#ifdef WITNESS
89extern char *syscallnames[];
90#endif
91
92static const char *ia64_vector_names[] = {
93	"VHPT Translation",			/* 0 */
94	"Instruction TLB",			/* 1 */
95	"Data TLB",				/* 2 */
96	"Alternate Instruction TLB",		/* 3 */
97	"Alternate Data TLB",			/* 4 */
98	"Data Nested TLB",			/* 5 */
99	"Instruction Key Miss",			/* 6 */
100	"Data Key Miss",			/* 7 */
101	"Dirty-Bit",				/* 8 */
102	"Instruction Access-Bit",		/* 9 */
103	"Data Access-Bit",			/* 10 */
104	"Break Instruction",			/* 11 */
105	"External Interrupt",			/* 12 */
106	"Reserved 13",				/* 13 */
107	"Reserved 14",				/* 14 */
108	"Reserved 15",				/* 15 */
109	"Reserved 16",				/* 16 */
110	"Reserved 17",				/* 17 */
111	"Reserved 18",				/* 18 */
112	"Reserved 19",				/* 19 */
113	"Page Not Present",			/* 20 */
114	"Key Permission",			/* 21 */
115	"Instruction Access Rights",		/* 22 */
116	"Data Access Rights",			/* 23 */
117	"General Exception",			/* 24 */
118	"Disabled FP-Register",			/* 25 */
119	"NaT Consumption",			/* 26 */
120	"Speculation",				/* 27 */
121	"Reserved 28",				/* 28 */
122	"Debug",				/* 29 */
123	"Unaligned Reference",			/* 30 */
124	"Unsupported Data Reference",		/* 31 */
125	"Floating-point Fault",			/* 32 */
126	"Floating-point Trap",			/* 33 */
127	"Lower-Privilege Transfer Trap",	/* 34 */
128	"Taken Branch Trap",			/* 35 */
129	"Single Step Trap",			/* 36 */
130	"Reserved 37",				/* 37 */
131	"Reserved 38",				/* 38 */
132	"Reserved 39",				/* 39 */
133	"Reserved 40",				/* 40 */
134	"Reserved 41",				/* 41 */
135	"Reserved 42",				/* 42 */
136	"Reserved 43",				/* 43 */
137	"Reserved 44",				/* 44 */
138	"IA-32 Exception",			/* 45 */
139	"IA-32 Intercept",			/* 46 */
140	"IA-32 Interrupt",			/* 47 */
141	"Reserved 48",				/* 48 */
142	"Reserved 49",				/* 49 */
143	"Reserved 50",				/* 50 */
144	"Reserved 51",				/* 51 */
145	"Reserved 52",				/* 52 */
146	"Reserved 53",				/* 53 */
147	"Reserved 54",				/* 54 */
148	"Reserved 55",				/* 55 */
149	"Reserved 56",				/* 56 */
150	"Reserved 57",				/* 57 */
151	"Reserved 58",				/* 58 */
152	"Reserved 59",				/* 59 */
153	"Reserved 60",				/* 60 */
154	"Reserved 61",				/* 61 */
155	"Reserved 62",				/* 62 */
156	"Reserved 63",				/* 63 */
157	"Reserved 64",				/* 64 */
158	"Reserved 65",				/* 65 */
159	"Reserved 66",				/* 66 */
160	"Reserved 67",				/* 67 */
161};
162
163struct bitname {
164	uint64_t mask;
165	const char* name;
166};
167
168static void
169printbits(uint64_t mask, struct bitname *bn, int count)
170{
171	int i, first = 1;
172	uint64_t bit;
173
174	for (i = 0; i < count; i++) {
175		/*
176		 * Handle fields wider than one bit.
177		 */
178		bit = bn[i].mask & ~(bn[i].mask - 1);
179		if (bn[i].mask > bit) {
180			if (first)
181				first = 0;
182			else
183				printf(",");
184			printf("%s=%ld", bn[i].name,
185			       (mask & bn[i].mask) / bit);
186		} else if (mask & bit) {
187			if (first)
188				first = 0;
189			else
190				printf(",");
191			printf("%s", bn[i].name);
192		}
193	}
194}
195
196struct bitname psr_bits[] = {
197	{IA64_PSR_BE,	"be"},
198	{IA64_PSR_UP,	"up"},
199	{IA64_PSR_AC,	"ac"},
200	{IA64_PSR_MFL,	"mfl"},
201	{IA64_PSR_MFH,	"mfh"},
202	{IA64_PSR_IC,	"ic"},
203	{IA64_PSR_I,	"i"},
204	{IA64_PSR_PK,	"pk"},
205	{IA64_PSR_DT,	"dt"},
206	{IA64_PSR_DFL,	"dfl"},
207	{IA64_PSR_DFH,	"dfh"},
208	{IA64_PSR_SP,	"sp"},
209	{IA64_PSR_PP,	"pp"},
210	{IA64_PSR_DI,	"di"},
211	{IA64_PSR_SI,	"si"},
212	{IA64_PSR_DB,	"db"},
213	{IA64_PSR_LP,	"lp"},
214	{IA64_PSR_TB,	"tb"},
215	{IA64_PSR_RT,	"rt"},
216	{IA64_PSR_CPL,	"cpl"},
217	{IA64_PSR_IS,	"is"},
218	{IA64_PSR_MC,	"mc"},
219	{IA64_PSR_IT,	"it"},
220	{IA64_PSR_ID,	"id"},
221	{IA64_PSR_DA,	"da"},
222	{IA64_PSR_DD,	"dd"},
223	{IA64_PSR_SS,	"ss"},
224	{IA64_PSR_RI,	"ri"},
225	{IA64_PSR_ED,	"ed"},
226	{IA64_PSR_BN,	"bn"},
227	{IA64_PSR_IA,	"ia"},
228};
229
230static void
231printpsr(uint64_t psr)
232{
233	printbits(psr, psr_bits, sizeof(psr_bits)/sizeof(psr_bits[0]));
234}
235
236struct bitname isr_bits[] = {
237	{IA64_ISR_CODE,	"code"},
238	{IA64_ISR_VECTOR, "vector"},
239	{IA64_ISR_X,	"x"},
240	{IA64_ISR_W,	"w"},
241	{IA64_ISR_R,	"r"},
242	{IA64_ISR_NA,	"na"},
243	{IA64_ISR_SP,	"sp"},
244	{IA64_ISR_RS,	"rs"},
245	{IA64_ISR_IR,	"ir"},
246	{IA64_ISR_NI,	"ni"},
247	{IA64_ISR_SO,	"so"},
248	{IA64_ISR_EI,	"ei"},
249	{IA64_ISR_ED,	"ed"},
250};
251
252static void printisr(uint64_t isr)
253{
254	printbits(isr, isr_bits, sizeof(isr_bits)/sizeof(isr_bits[0]));
255}
256
257static void
258printtrap(int vector, struct trapframe *tf, int isfatal, int user)
259{
260	printf("\n");
261	printf("%s %s trap (cpu %d):\n", isfatal? "fatal" : "handled",
262	       user ? "user" : "kernel", PCPU_GET(cpuid));
263	printf("\n");
264	printf("    trap vector = 0x%x (%s)\n",
265	       vector, ia64_vector_names[vector]);
266	printf("    cr.iip      = 0x%lx\n", tf->tf_special.iip);
267	printf("    cr.ipsr     = 0x%lx (", tf->tf_special.psr);
268	printpsr(tf->tf_special.psr);
269	printf(")\n");
270	printf("    cr.isr      = 0x%lx (", tf->tf_special.isr);
271	printisr(tf->tf_special.isr);
272	printf(")\n");
273	printf("    cr.ifa      = 0x%lx\n", tf->tf_special.ifa);
274	if (tf->tf_special.psr & IA64_PSR_IS) {
275		printf("    ar.cflg     = 0x%lx\n", ia64_get_cflg());
276		printf("    ar.csd      = 0x%lx\n", ia64_get_csd());
277		printf("    ar.ssd      = 0x%lx\n", ia64_get_ssd());
278	}
279	printf("    curthread   = %p\n", curthread);
280	if (curthread != NULL)
281		printf("        pid = %d, comm = %s\n",
282		       curthread->td_proc->p_pid, curthread->td_proc->p_comm);
283	printf("\n");
284}
285
286/*
287 * We got a trap caused by a break instruction and the immediate was 0.
288 * This indicates that we may have a break.b with some non-zero immediate.
289 * The break.b doesn't cause the immediate to be put in cr.iim.  Hence,
290 * we need to disassemble the bundle and return the immediate found there.
291 * This may be a 0 value anyway.  Return 0 for any error condition.  This
292 * will result in a SIGILL, which is pretty much the best thing to do.
293 */
294static uint64_t
295trap_decode_break(struct trapframe *tf)
296{
297	struct asm_bundle bundle;
298	struct asm_inst *inst;
299	int slot;
300
301	if (!asm_decode(tf->tf_special.iip, &bundle))
302		return (0);
303
304	slot = ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_0) ? 0 :
305            ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_1) ? 1 : 2;
306	inst = bundle.b_inst + slot;
307
308	/*
309	 * Sanity checking: It must be a break instruction and the operand
310	 * that has the break value must be an immediate.
311	 */
312	if (inst->i_op != ASM_OP_BREAK ||
313	    inst->i_oper[1].o_type != ASM_OPER_IMM)
314		return (0);
315
316	return (inst->i_oper[1].o_value);
317}
318
319void
320trap_panic(int vector, struct trapframe *tf)
321{
322
323	printtrap(vector, tf, 1, TRAPF_USERMODE(tf));
324#ifdef KDB
325	kdb_trap(vector, 0, tf);
326#endif
327	panic("trap");
328}
329
330/*
331 *
332 */
333int
334do_ast(struct trapframe *tf)
335{
336
337	disable_intr();
338	while (curthread->td_flags & (TDF_ASTPENDING|TDF_NEEDRESCHED)) {
339		enable_intr();
340		ast(tf);
341		disable_intr();
342	}
343	/*
344	 * Keep interrupts disabled. We return r10 as a favor to the EPC
345	 * syscall code so that it can quicky determine if the syscall
346	 * needs to be restarted or not.
347	 */
348	return (tf->tf_scratch.gr10);
349}
350
351/*
352 * Trap is called from exception.s to handle most types of processor traps.
353 */
354/*ARGSUSED*/
355void
356trap(int vector, struct trapframe *tf)
357{
358	struct proc *p;
359	struct thread *td;
360	uint64_t ucode;
361	int error, sig, user;
362	ksiginfo_t ksi;
363
364	user = TRAPF_USERMODE(tf) ? 1 : 0;
365
366	PCPU_LAZY_INC(cnt.v_trap);
367
368	td = curthread;
369	p = td->td_proc;
370	ucode = 0;
371
372	if (user) {
373		ia64_set_fpsr(IA64_FPSR_DEFAULT);
374		td->td_pticks = 0;
375		td->td_frame = tf;
376		if (td->td_ucred != p->p_ucred)
377			cred_update_thread(td);
378	} else {
379		KASSERT(cold || td->td_ucred != NULL,
380		    ("kernel trap doesn't have ucred"));
381#ifdef KDB
382		if (kdb_active)
383			kdb_reenter();
384#endif
385	}
386
387	sig = 0;
388	switch (vector) {
389	case IA64_VEC_VHPT:
390		/*
391		 * This one is tricky. We should hardwire the VHPT, but
392		 * don't at this time. I think we're mostly lucky that
393		 * the VHPT is mapped.
394		 */
395		trap_panic(vector, tf);
396		break;
397
398	case IA64_VEC_ITLB:
399	case IA64_VEC_DTLB:
400	case IA64_VEC_EXT_INTR:
401		/* We never call trap() with these vectors. */
402		trap_panic(vector, tf);
403		break;
404
405	case IA64_VEC_ALT_ITLB:
406	case IA64_VEC_ALT_DTLB:
407		/*
408		 * These should never happen, because regions 0-4 use the
409		 * VHPT. If we get one of these it means we didn't program
410		 * the region registers correctly.
411		 */
412		trap_panic(vector, tf);
413		break;
414
415	case IA64_VEC_NESTED_DTLB:
416		/*
417		 * We never call trap() with this vector. We may want to
418		 * do that in the future in case the nested TLB handler
419		 * could not find the translation it needs. In that case
420		 * we could switch to a special (hardwired) stack and
421		 * come here to produce a nice panic().
422		 */
423		trap_panic(vector, tf);
424		break;
425
426	case IA64_VEC_IKEY_MISS:
427	case IA64_VEC_DKEY_MISS:
428	case IA64_VEC_KEY_PERMISSION:
429		/*
430		 * We don't use protection keys, so we should never get
431		 * these faults.
432		 */
433		trap_panic(vector, tf);
434		break;
435
436	case IA64_VEC_DIRTY_BIT:
437	case IA64_VEC_INST_ACCESS:
438	case IA64_VEC_DATA_ACCESS:
439		/*
440		 * We get here if we read or write to a page of which the
441		 * PTE does not have the access bit or dirty bit set and
442		 * we can not find the PTE in our datastructures. This
443		 * either means we have a stale PTE in the TLB, or we lost
444		 * the PTE in our datastructures.
445		 */
446		trap_panic(vector, tf);
447		break;
448
449	case IA64_VEC_BREAK:
450		if (user) {
451			ucode = (int)tf->tf_special.ifa & 0x1FFFFF;
452			if (ucode == 0) {
453				/*
454				 * A break.b doesn't cause the immediate to be
455				 * stored in cr.iim (and saved in the TF in
456				 * tf_special.ifa).  We need to decode the
457				 * instruction to find out what the immediate
458				 * was.  Note that if the break instruction
459				 * didn't happen to be a break.b, but any
460				 * other break with an immediate of 0, we
461				 * will do unnecessary work to get the value
462				 * we already had.  Not an issue, because a
463				 * break 0 is invalid.
464				 */
465				ucode = trap_decode_break(tf);
466			}
467			if (ucode < 0x80000) {
468				/* Software interrupts. */
469				switch (ucode) {
470				case 0:		/* Unknown error. */
471					sig = SIGILL;
472					break;
473				case 1:		/* Integer divide by zero. */
474					sig = SIGFPE;
475					ucode = FPE_INTDIV;
476					break;
477				case 2:		/* Integer overflow. */
478					sig = SIGFPE;
479					ucode = FPE_INTOVF;
480					break;
481				case 3:		/* Range check/bounds check. */
482					sig = SIGFPE;
483					ucode = FPE_FLTSUB;
484					break;
485				case 6: 	/* Decimal overflow. */
486				case 7: 	/* Decimal divide by zero. */
487				case 8: 	/* Packed decimal error. */
488				case 9: 	/* Invalid ASCII digit. */
489				case 10:	/* Invalid decimal digit. */
490					sig = SIGFPE;
491					ucode = FPE_FLTINV;
492					break;
493				case 4:		/* Null pointer dereference. */
494				case 5:		/* Misaligned data. */
495				case 11:	/* Paragraph stack overflow. */
496					sig = SIGSEGV;
497					break;
498				default:
499					sig = SIGILL;
500					break;
501				}
502			} else if (ucode < 0x100000) {
503				/* Debugger breakpoint. */
504				tf->tf_special.psr &= ~IA64_PSR_SS;
505				sig = SIGTRAP;
506			} else if (ucode == 0x100000) {
507				break_syscall(tf);
508				return;		/* do_ast() already called. */
509			} else if (ucode == 0x180000) {
510				mcontext_t mc;
511
512				error = copyin((void*)tf->tf_scratch.gr8,
513				    &mc, sizeof(mc));
514				if (!error) {
515					set_mcontext(td, &mc);
516					return;	/* Don't call do_ast()!!! */
517				}
518				sig = SIGSEGV;
519				ucode = tf->tf_scratch.gr8;
520			} else
521				sig = SIGILL;
522		} else {
523#ifdef KDB
524			if (kdb_trap(vector, 0, tf))
525				return;
526			panic("trap");
527#else
528			trap_panic(vector, tf);
529#endif
530		}
531		break;
532
533	case IA64_VEC_PAGE_NOT_PRESENT:
534	case IA64_VEC_INST_ACCESS_RIGHTS:
535	case IA64_VEC_DATA_ACCESS_RIGHTS: {
536		vm_offset_t va;
537		struct vmspace *vm;
538		vm_map_t map;
539		vm_prot_t ftype;
540		int rv;
541
542		rv = 0;
543		va = trunc_page(tf->tf_special.ifa);
544
545		if (va >= VM_MAX_ADDRESS) {
546			/*
547			 * Don't allow user-mode faults for kernel virtual
548			 * addresses, including the gateway page.
549			 */
550			if (user)
551				goto no_fault_in;
552			map = kernel_map;
553		} else {
554			vm = (p != NULL) ? p->p_vmspace : NULL;
555			if (vm == NULL)
556				goto no_fault_in;
557			map = &vm->vm_map;
558		}
559
560		if (tf->tf_special.isr & IA64_ISR_X)
561			ftype = VM_PROT_EXECUTE;
562		else if (tf->tf_special.isr & IA64_ISR_W)
563			ftype = VM_PROT_WRITE;
564		else
565			ftype = VM_PROT_READ;
566
567		if (map != kernel_map) {
568			/*
569			 * Keep swapout from messing with us during this
570			 * critical time.
571			 */
572			PROC_LOCK(p);
573			++p->p_lock;
574			PROC_UNLOCK(p);
575
576			/* Fault in the user page: */
577			rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE)
578			    ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
579
580			PROC_LOCK(p);
581			--p->p_lock;
582			PROC_UNLOCK(p);
583		} else {
584			/*
585			 * Don't have to worry about process locking or
586			 * stacks in the kernel.
587			 */
588			rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
589		}
590
591		if (rv == KERN_SUCCESS)
592			goto out;
593
594	no_fault_in:
595		if (!user) {
596			/* Check for copyin/copyout fault. */
597			if (td != NULL && td->td_pcb->pcb_onfault != 0) {
598				tf->tf_special.iip =
599				    td->td_pcb->pcb_onfault;
600				tf->tf_special.psr &= ~IA64_PSR_RI;
601				td->td_pcb->pcb_onfault = 0;
602				goto out;
603			}
604			trap_panic(vector, tf);
605		}
606		ucode = va;
607		sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
608		break;
609	}
610
611	case IA64_VEC_GENERAL_EXCEPTION:
612	case IA64_VEC_NAT_CONSUMPTION:
613	case IA64_VEC_SPECULATION:
614	case IA64_VEC_UNSUPP_DATA_REFERENCE:
615		if (user) {
616			ucode = vector;
617			sig = SIGILL;
618		} else
619			trap_panic(vector, tf);
620		break;
621
622	case IA64_VEC_DISABLED_FP: {
623		struct pcpu *pcpu;
624		struct pcb *pcb;
625		struct thread *thr;
626
627		/* Always fatal in kernel. Should never happen. */
628		if (!user)
629			trap_panic(vector, tf);
630
631		sched_pin();
632		thr = PCPU_GET(fpcurthread);
633		if (thr == td) {
634			/*
635			 * Short-circuit handling the trap when this CPU
636			 * already holds the high FP registers for this
637			 * thread.  We really shouldn't get the trap in the
638			 * first place, but since it's only a performance
639			 * issue and not a correctness issue, we emit a
640			 * message for now, enable the high FP registers and
641			 * return.
642			 */
643			printf("XXX: bogusly disabled high FP regs\n");
644			tf->tf_special.psr &= ~IA64_PSR_DFH;
645			sched_unpin();
646			goto out;
647		} else if (thr != NULL) {
648			mtx_lock_spin(&thr->td_md.md_highfp_mtx);
649			pcb = thr->td_pcb;
650			save_high_fp(&pcb->pcb_high_fp);
651			pcb->pcb_fpcpu = NULL;
652			PCPU_SET(fpcurthread, NULL);
653			mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
654			thr = NULL;
655		}
656
657		mtx_lock_spin(&td->td_md.md_highfp_mtx);
658		pcb = td->td_pcb;
659		pcpu = pcb->pcb_fpcpu;
660
661#ifdef SMP
662		if (pcpu != NULL) {
663			mtx_unlock_spin(&td->td_md.md_highfp_mtx);
664			ipi_send(pcpu, IPI_HIGH_FP);
665			while (pcb->pcb_fpcpu == pcpu)
666				DELAY(100);
667			mtx_lock_spin(&td->td_md.md_highfp_mtx);
668			pcpu = pcb->pcb_fpcpu;
669			thr = PCPU_GET(fpcurthread);
670		}
671#endif
672
673		if (thr == NULL && pcpu == NULL) {
674			restore_high_fp(&pcb->pcb_high_fp);
675			PCPU_SET(fpcurthread, td);
676			pcb->pcb_fpcpu = pcpup;
677			tf->tf_special.psr &= ~IA64_PSR_MFH;
678			tf->tf_special.psr &= ~IA64_PSR_DFH;
679		}
680
681		mtx_unlock_spin(&td->td_md.md_highfp_mtx);
682		sched_unpin();
683		goto out;
684	}
685
686	case IA64_VEC_DEBUG:
687	case IA64_VEC_SINGLE_STEP_TRAP:
688		tf->tf_special.psr &= ~IA64_PSR_SS;
689		if (!user) {
690#ifdef KDB
691			if (kdb_trap(vector, 0, tf))
692				return;
693			panic("trap");
694#else
695			trap_panic(vector, tf);
696#endif
697		}
698		sig = SIGTRAP;
699		break;
700
701	case IA64_VEC_UNALIGNED_REFERENCE:
702		/*
703		 * If user-land, do whatever fixups, printing, and
704		 * signalling is appropriate (based on system-wide
705		 * and per-process unaligned-access-handling flags).
706		 */
707		if (user) {
708			sig = unaligned_fixup(tf, td);
709			if (sig == 0)
710				goto out;
711			ucode = tf->tf_special.ifa;	/* VA */
712		} else {
713			/* Check for copyin/copyout fault. */
714			if (td != NULL && td->td_pcb->pcb_onfault != 0) {
715				tf->tf_special.iip =
716				    td->td_pcb->pcb_onfault;
717				tf->tf_special.psr &= ~IA64_PSR_RI;
718				td->td_pcb->pcb_onfault = 0;
719				goto out;
720			}
721			trap_panic(vector, tf);
722		}
723		break;
724
725	case IA64_VEC_FLOATING_POINT_FAULT:
726	case IA64_VEC_FLOATING_POINT_TRAP: {
727		struct fpswa_bundle bundle;
728		struct fpswa_fpctx fpctx;
729		struct fpswa_ret ret;
730		char *ip;
731		u_long fault;
732
733		/* Always fatal in kernel. Should never happen. */
734		if (!user)
735			trap_panic(vector, tf);
736
737		if (fpswa_iface == NULL) {
738			sig = SIGFPE;
739			ucode = 0;
740			break;
741		}
742
743		ip = (char *)tf->tf_special.iip;
744		if (vector == IA64_VEC_FLOATING_POINT_TRAP &&
745		    (tf->tf_special.psr & IA64_PSR_RI) == 0)
746			ip -= 16;
747		error = copyin(ip, &bundle, sizeof(bundle));
748		if (error) {
749			sig = SIGBUS;	/* EFAULT, basically */
750			ucode = 0;	/* exception summary */
751			break;
752		}
753
754		/* f6-f15 are saved in exception_save */
755		fpctx.mask_low = 0xffc0;		/* bits 6 - 15 */
756		fpctx.mask_high = 0;
757		fpctx.fp_low_preserved = NULL;
758		fpctx.fp_low_volatile = &tf->tf_scratch_fp.fr6;
759		fpctx.fp_high_preserved = NULL;
760		fpctx.fp_high_volatile = NULL;
761
762		fault = (vector == IA64_VEC_FLOATING_POINT_FAULT) ? 1 : 0;
763
764		/*
765		 * We have the high FP registers disabled while in the
766		 * kernel. Enable them for the FPSWA handler only.
767		 */
768		ia64_enable_highfp();
769
770		/* The docs are unclear.  Is Fpswa reentrant? */
771		ret = fpswa_iface->if_fpswa(fault, &bundle,
772		    &tf->tf_special.psr, &tf->tf_special.fpsr,
773		    &tf->tf_special.isr, &tf->tf_special.pr,
774		    &tf->tf_special.cfm, &fpctx);
775
776		ia64_disable_highfp();
777
778		/*
779		 * Update ipsr and iip to next instruction. We only
780		 * have to do that for faults.
781		 */
782		if (fault && (ret.status == 0 || (ret.status & 2))) {
783			int ei;
784
785			ei = (tf->tf_special.isr >> 41) & 0x03;
786			if (ei == 0) {		/* no template for this case */
787				tf->tf_special.psr &= ~IA64_ISR_EI;
788				tf->tf_special.psr |= IA64_ISR_EI_1;
789			} else if (ei == 1) {	/* MFI or MFB */
790				tf->tf_special.psr &= ~IA64_ISR_EI;
791				tf->tf_special.psr |= IA64_ISR_EI_2;
792			} else if (ei == 2) {	/* MMF */
793				tf->tf_special.psr &= ~IA64_ISR_EI;
794				tf->tf_special.iip += 0x10;
795			}
796		}
797
798		if (ret.status == 0) {
799			goto out;
800		} else if (ret.status == -1) {
801			printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
802			    ret.err1, ret.err2, ret.err3);
803			panic("fpswa fatal error on fp fault");
804		} else {
805			sig = SIGFPE;
806			ucode = 0;		/* XXX exception summary */
807			break;
808		}
809	}
810
811	case IA64_VEC_LOWER_PRIVILEGE_TRANSFER:
812		/*
813		 * The lower-privilege transfer trap is used by the EPC
814		 * syscall code to trigger re-entry into the kernel when the
815		 * process should be single stepped. The problem is that
816		 * there's no way to set single stepping directly without
817		 * using the rfi instruction. So instead we enable the
818		 * lower-privilege transfer trap and when we get here we
819		 * know that the process is about to enter userland (and
820		 * has already lowered its privilege).
821		 * However, there's another gotcha. When the process has
822		 * lowered it's privilege it's still running in the gateway
823		 * page. If we enable single stepping, we'll be stepping
824		 * the code in the gateway page. In and by itself this is
825		 * not a problem, but it's an address debuggers won't know
826		 * anything about. Hence, it can only cause confusion.
827		 * We know that we need to branch to get out of the gateway
828		 * page, so what we do here is enable the taken branch
829		 * trap and just let the process continue. When we branch
830		 * out of the gateway page we'll get back into the kernel
831		 * and then we enable single stepping.
832		 * Since this a rather round-about way of enabling single
833		 * stepping, don't make things complicated even more by
834		 * calling userret() and do_ast(). We do that later...
835		 */
836		tf->tf_special.psr &= ~IA64_PSR_LP;
837		tf->tf_special.psr |= IA64_PSR_TB;
838		return;
839
840	case IA64_VEC_TAKEN_BRANCH_TRAP:
841		/*
842		 * Don't assume there aren't any branches other than the
843		 * branch that takes us out of the gateway page. Check the
844		 * iip and raise SIGTRAP only when it's an user address.
845		 */
846		if (tf->tf_special.iip >= VM_MAX_ADDRESS)
847			return;
848		tf->tf_special.psr &= ~IA64_PSR_TB;
849		sig = SIGTRAP;
850		break;
851
852	case IA64_VEC_IA32_EXCEPTION:
853	case IA64_VEC_IA32_INTERCEPT:
854	case IA64_VEC_IA32_INTERRUPT:
855		sig = SIGEMT;
856		ucode = tf->tf_special.iip;
857		break;
858
859	default:
860		/* Reserved vectors get here. Should never happen of course. */
861		trap_panic(vector, tf);
862		break;
863	}
864
865	KASSERT(sig != 0, ("foo"));
866
867	if (print_usertrap)
868		printtrap(vector, tf, 1, user);
869
870	ksiginfo_init(&ksi);
871	ksi.ksi_signo = sig;
872	ksi.ksi_code = ucode;
873	trapsignal(td, &ksi);
874
875out:
876	if (user) {
877		userret(td, tf);
878		mtx_assert(&Giant, MA_NOTOWNED);
879		do_ast(tf);
880	}
881	return;
882}
883
884/*
885 * Handle break instruction based system calls.
886 */
887void
888break_syscall(struct trapframe *tf)
889{
890	uint64_t *bsp, *tfp;
891	uint64_t iip, psr;
892	int error, nargs;
893
894	/* Save address of break instruction. */
895	iip = tf->tf_special.iip;
896	psr = tf->tf_special.psr;
897
898	/* Advance to the next instruction. */
899	tf->tf_special.psr += IA64_PSR_RI_1;
900	if ((tf->tf_special.psr & IA64_PSR_RI) > IA64_PSR_RI_2) {
901		tf->tf_special.iip += 16;
902		tf->tf_special.psr &= ~IA64_PSR_RI;
903	}
904
905	/*
906	 * Copy the arguments on the register stack into the trapframe
907	 * to avoid having interleaved NaT collections.
908	 */
909	tfp = &tf->tf_scratch.gr16;
910	nargs = tf->tf_special.cfm & 0x7f;
911	bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty +
912	    (tf->tf_special.bspstore & 0x1ffUL));
913	bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs;
914	while (nargs--) {
915		*tfp++ = *bsp++;
916		if (((uintptr_t)bsp & 0x1ff) == 0x1f8)
917			bsp++;
918	}
919	error = syscall(tf);
920	if (error == ERESTART) {
921		tf->tf_special.iip = iip;
922		tf->tf_special.psr = psr;
923	}
924
925	do_ast(tf);
926}
927
928/*
929 * Process a system call.
930 *
931 * See syscall.s for details as to how we get here. In order to support
932 * the ERESTART case, we return the error to our caller. They deal with
933 * the hairy details.
934 */
935int
936syscall(struct trapframe *tf)
937{
938	struct sysent *callp;
939	struct proc *p;
940	struct thread *td;
941	uint64_t *args;
942	int code, error;
943
944	ia64_set_fpsr(IA64_FPSR_DEFAULT);
945
946	code = tf->tf_scratch.gr15;
947	args = &tf->tf_scratch.gr16;
948
949	PCPU_LAZY_INC(cnt.v_syscall);
950
951	td = curthread;
952	td->td_frame = tf;
953	p = td->td_proc;
954
955	td->td_pticks = 0;
956	if (td->td_ucred != p->p_ucred)
957		cred_update_thread(td);
958	if (p->p_flag & P_SA)
959		thread_user_enter(td);
960
961	if (p->p_sysent->sv_prepsyscall) {
962		/* (*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params); */
963		panic("prepsyscall");
964	} else {
965		/*
966		 * syscall() and __syscall() are handled the same on
967		 * the ia64, as everything is 64-bit aligned, anyway.
968		 */
969		if (code == SYS_syscall || code == SYS___syscall) {
970			/*
971			 * Code is first argument, followed by actual args.
972			 */
973			code = args[0];
974			args++;
975		}
976	}
977
978 	if (p->p_sysent->sv_mask)
979 		code &= p->p_sysent->sv_mask;
980
981 	if (code >= p->p_sysent->sv_size)
982 		callp = &p->p_sysent->sv_table[0];
983  	else
984 		callp = &p->p_sysent->sv_table[code];
985
986#ifdef KTRACE
987	if (KTRPOINT(td, KTR_SYSCALL))
988		ktrsyscall(code, (callp->sy_narg & SYF_ARGMASK), args);
989#endif
990
991	td->td_retval[0] = 0;
992	td->td_retval[1] = 0;
993	tf->tf_scratch.gr10 = EJUSTRETURN;
994
995	STOPEVENT(p, S_SCE, (callp->sy_narg & SYF_ARGMASK));
996
997	PTRACESTOP_SC(p, td, S_PT_SCE);
998
999	/*
1000	 * Grab Giant if the syscall is not flagged as MP safe.
1001	 */
1002	if ((callp->sy_narg & SYF_MPSAFE) == 0) {
1003		mtx_lock(&Giant);
1004		error = (*callp->sy_call)(td, args);
1005		mtx_unlock(&Giant);
1006	} else
1007		error = (*callp->sy_call)(td, args);
1008
1009	if (error != EJUSTRETURN) {
1010		/*
1011		 * Save the "raw" error code in r10. We use this to handle
1012		 * syscall restarts (see do_ast()).
1013		 */
1014		tf->tf_scratch.gr10 = error;
1015		if (error == 0) {
1016			tf->tf_scratch.gr8 = td->td_retval[0];
1017			tf->tf_scratch.gr9 = td->td_retval[1];
1018		} else if (error != ERESTART) {
1019			if (error < p->p_sysent->sv_errsize)
1020				error = p->p_sysent->sv_errtbl[error];
1021			/*
1022			 * Translated error codes are returned in r8. User
1023			 * processes use the translated error code.
1024			 */
1025			tf->tf_scratch.gr8 = error;
1026		}
1027	}
1028
1029	userret(td, tf);
1030
1031#ifdef KTRACE
1032	if (KTRPOINT(td, KTR_SYSRET))
1033		ktrsysret(code, error, td->td_retval[0]);
1034#endif
1035
1036	/*
1037	 * This works because errno is findable through the
1038	 * register set.  If we ever support an emulation where this
1039	 * is not the case, this code will need to be revisited.
1040	 */
1041	STOPEVENT(p, S_SCX, code);
1042
1043	PTRACESTOP_SC(p, td, S_PT_SCX);
1044
1045	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
1046	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
1047	mtx_assert(&sched_lock, MA_NOTOWNED);
1048	mtx_assert(&Giant, MA_NOTOWNED);
1049
1050	return (error);
1051}
1052