trap.c revision 177091
1/*-
2 * Copyright (c) 2005 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/ia64/ia64/trap.c 177091 2008-03-12 10:12:01Z jeff $");
29
30#include "opt_ddb.h"
31#include "opt_ktrace.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kdb.h>
36#include <sys/ktr.h>
37#include <sys/sysproto.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/exec.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/sched.h>
44#include <sys/smp.h>
45#include <sys/vmmeter.h>
46#include <sys/sysent.h>
47#include <sys/signalvar.h>
48#include <sys/syscall.h>
49#include <sys/pioctl.h>
50#include <sys/ptrace.h>
51#include <sys/sysctl.h>
52#include <vm/vm.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57#include <vm/vm_param.h>
58#include <sys/ptrace.h>
59#include <machine/cpu.h>
60#include <machine/md_var.h>
61#include <machine/reg.h>
62#include <machine/pal.h>
63#include <machine/fpu.h>
64#include <machine/efi.h>
65#include <machine/pcb.h>
66#ifdef SMP
67#include <machine/smp.h>
68#endif
69
70#ifdef KTRACE
71#include <sys/uio.h>
72#include <sys/ktrace.h>
73#endif
74
75#include <security/audit/audit.h>
76
77#include <ia64/disasm/disasm.h>
78
79static int print_usertrap = 0;
80SYSCTL_INT(_machdep, OID_AUTO, print_usertrap,
81    CTLFLAG_RW, &print_usertrap, 0, "");
82
83static void break_syscall(struct trapframe *tf);
84
85/*
86 * EFI-Provided FPSWA interface (Floating Point SoftWare Assist)
87 */
88extern struct fpswa_iface *fpswa_iface;
89
90extern char *syscallnames[];
91
92static const char *ia64_vector_names[] = {
93	"VHPT Translation",			/* 0 */
94	"Instruction TLB",			/* 1 */
95	"Data TLB",				/* 2 */
96	"Alternate Instruction TLB",		/* 3 */
97	"Alternate Data TLB",			/* 4 */
98	"Data Nested TLB",			/* 5 */
99	"Instruction Key Miss",			/* 6 */
100	"Data Key Miss",			/* 7 */
101	"Dirty-Bit",				/* 8 */
102	"Instruction Access-Bit",		/* 9 */
103	"Data Access-Bit",			/* 10 */
104	"Break Instruction",			/* 11 */
105	"External Interrupt",			/* 12 */
106	"Reserved 13",				/* 13 */
107	"Reserved 14",				/* 14 */
108	"Reserved 15",				/* 15 */
109	"Reserved 16",				/* 16 */
110	"Reserved 17",				/* 17 */
111	"Reserved 18",				/* 18 */
112	"Reserved 19",				/* 19 */
113	"Page Not Present",			/* 20 */
114	"Key Permission",			/* 21 */
115	"Instruction Access Rights",		/* 22 */
116	"Data Access Rights",			/* 23 */
117	"General Exception",			/* 24 */
118	"Disabled FP-Register",			/* 25 */
119	"NaT Consumption",			/* 26 */
120	"Speculation",				/* 27 */
121	"Reserved 28",				/* 28 */
122	"Debug",				/* 29 */
123	"Unaligned Reference",			/* 30 */
124	"Unsupported Data Reference",		/* 31 */
125	"Floating-point Fault",			/* 32 */
126	"Floating-point Trap",			/* 33 */
127	"Lower-Privilege Transfer Trap",	/* 34 */
128	"Taken Branch Trap",			/* 35 */
129	"Single Step Trap",			/* 36 */
130	"Reserved 37",				/* 37 */
131	"Reserved 38",				/* 38 */
132	"Reserved 39",				/* 39 */
133	"Reserved 40",				/* 40 */
134	"Reserved 41",				/* 41 */
135	"Reserved 42",				/* 42 */
136	"Reserved 43",				/* 43 */
137	"Reserved 44",				/* 44 */
138	"IA-32 Exception",			/* 45 */
139	"IA-32 Intercept",			/* 46 */
140	"IA-32 Interrupt",			/* 47 */
141	"Reserved 48",				/* 48 */
142	"Reserved 49",				/* 49 */
143	"Reserved 50",				/* 50 */
144	"Reserved 51",				/* 51 */
145	"Reserved 52",				/* 52 */
146	"Reserved 53",				/* 53 */
147	"Reserved 54",				/* 54 */
148	"Reserved 55",				/* 55 */
149	"Reserved 56",				/* 56 */
150	"Reserved 57",				/* 57 */
151	"Reserved 58",				/* 58 */
152	"Reserved 59",				/* 59 */
153	"Reserved 60",				/* 60 */
154	"Reserved 61",				/* 61 */
155	"Reserved 62",				/* 62 */
156	"Reserved 63",				/* 63 */
157	"Reserved 64",				/* 64 */
158	"Reserved 65",				/* 65 */
159	"Reserved 66",				/* 66 */
160	"Reserved 67",				/* 67 */
161};
162
163struct bitname {
164	uint64_t mask;
165	const char* name;
166};
167
168static void
169printbits(uint64_t mask, struct bitname *bn, int count)
170{
171	int i, first = 1;
172	uint64_t bit;
173
174	for (i = 0; i < count; i++) {
175		/*
176		 * Handle fields wider than one bit.
177		 */
178		bit = bn[i].mask & ~(bn[i].mask - 1);
179		if (bn[i].mask > bit) {
180			if (first)
181				first = 0;
182			else
183				printf(",");
184			printf("%s=%ld", bn[i].name,
185			       (mask & bn[i].mask) / bit);
186		} else if (mask & bit) {
187			if (first)
188				first = 0;
189			else
190				printf(",");
191			printf("%s", bn[i].name);
192		}
193	}
194}
195
196struct bitname psr_bits[] = {
197	{IA64_PSR_BE,	"be"},
198	{IA64_PSR_UP,	"up"},
199	{IA64_PSR_AC,	"ac"},
200	{IA64_PSR_MFL,	"mfl"},
201	{IA64_PSR_MFH,	"mfh"},
202	{IA64_PSR_IC,	"ic"},
203	{IA64_PSR_I,	"i"},
204	{IA64_PSR_PK,	"pk"},
205	{IA64_PSR_DT,	"dt"},
206	{IA64_PSR_DFL,	"dfl"},
207	{IA64_PSR_DFH,	"dfh"},
208	{IA64_PSR_SP,	"sp"},
209	{IA64_PSR_PP,	"pp"},
210	{IA64_PSR_DI,	"di"},
211	{IA64_PSR_SI,	"si"},
212	{IA64_PSR_DB,	"db"},
213	{IA64_PSR_LP,	"lp"},
214	{IA64_PSR_TB,	"tb"},
215	{IA64_PSR_RT,	"rt"},
216	{IA64_PSR_CPL,	"cpl"},
217	{IA64_PSR_IS,	"is"},
218	{IA64_PSR_MC,	"mc"},
219	{IA64_PSR_IT,	"it"},
220	{IA64_PSR_ID,	"id"},
221	{IA64_PSR_DA,	"da"},
222	{IA64_PSR_DD,	"dd"},
223	{IA64_PSR_SS,	"ss"},
224	{IA64_PSR_RI,	"ri"},
225	{IA64_PSR_ED,	"ed"},
226	{IA64_PSR_BN,	"bn"},
227	{IA64_PSR_IA,	"ia"},
228};
229
230static void
231printpsr(uint64_t psr)
232{
233	printbits(psr, psr_bits, sizeof(psr_bits)/sizeof(psr_bits[0]));
234}
235
236struct bitname isr_bits[] = {
237	{IA64_ISR_CODE,	"code"},
238	{IA64_ISR_VECTOR, "vector"},
239	{IA64_ISR_X,	"x"},
240	{IA64_ISR_W,	"w"},
241	{IA64_ISR_R,	"r"},
242	{IA64_ISR_NA,	"na"},
243	{IA64_ISR_SP,	"sp"},
244	{IA64_ISR_RS,	"rs"},
245	{IA64_ISR_IR,	"ir"},
246	{IA64_ISR_NI,	"ni"},
247	{IA64_ISR_SO,	"so"},
248	{IA64_ISR_EI,	"ei"},
249	{IA64_ISR_ED,	"ed"},
250};
251
252static void printisr(uint64_t isr)
253{
254	printbits(isr, isr_bits, sizeof(isr_bits)/sizeof(isr_bits[0]));
255}
256
257static void
258printtrap(int vector, struct trapframe *tf, int isfatal, int user)
259{
260	printf("\n");
261	printf("%s %s trap (cpu %d):\n", isfatal? "fatal" : "handled",
262	       user ? "user" : "kernel", PCPU_GET(cpuid));
263	printf("\n");
264	printf("    trap vector = 0x%x (%s)\n",
265	       vector, ia64_vector_names[vector]);
266	printf("    cr.iip      = 0x%lx\n", tf->tf_special.iip);
267	printf("    cr.ipsr     = 0x%lx (", tf->tf_special.psr);
268	printpsr(tf->tf_special.psr);
269	printf(")\n");
270	printf("    cr.isr      = 0x%lx (", tf->tf_special.isr);
271	printisr(tf->tf_special.isr);
272	printf(")\n");
273	printf("    cr.ifa      = 0x%lx\n", tf->tf_special.ifa);
274	if (tf->tf_special.psr & IA64_PSR_IS) {
275		printf("    ar.cflg     = 0x%lx\n", ia64_get_cflg());
276		printf("    ar.csd      = 0x%lx\n", ia64_get_csd());
277		printf("    ar.ssd      = 0x%lx\n", ia64_get_ssd());
278	}
279	printf("    curthread   = %p\n", curthread);
280	if (curthread != NULL)
281		printf("        pid = %d, comm = %s\n",
282		       curthread->td_proc->p_pid, curthread->td_name);
283	printf("\n");
284}
285
286/*
287 * We got a trap caused by a break instruction and the immediate was 0.
288 * This indicates that we may have a break.b with some non-zero immediate.
289 * The break.b doesn't cause the immediate to be put in cr.iim.  Hence,
290 * we need to disassemble the bundle and return the immediate found there.
291 * This may be a 0 value anyway.  Return 0 for any error condition.  This
292 * will result in a SIGILL, which is pretty much the best thing to do.
293 */
294static uint64_t
295trap_decode_break(struct trapframe *tf)
296{
297	struct asm_bundle bundle;
298	struct asm_inst *inst;
299	int slot;
300
301	if (!asm_decode(tf->tf_special.iip, &bundle))
302		return (0);
303
304	slot = ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_0) ? 0 :
305            ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_1) ? 1 : 2;
306	inst = bundle.b_inst + slot;
307
308	/*
309	 * Sanity checking: It must be a break instruction and the operand
310	 * that has the break value must be an immediate.
311	 */
312	if (inst->i_op != ASM_OP_BREAK ||
313	    inst->i_oper[1].o_type != ASM_OPER_IMM)
314		return (0);
315
316	return (inst->i_oper[1].o_value);
317}
318
319void
320trap_panic(int vector, struct trapframe *tf)
321{
322
323	printtrap(vector, tf, 1, TRAPF_USERMODE(tf));
324#ifdef KDB
325	kdb_trap(vector, 0, tf);
326#endif
327	panic("trap");
328}
329
330/*
331 *
332 */
333int
334do_ast(struct trapframe *tf)
335{
336
337	disable_intr();
338	while (curthread->td_flags & (TDF_ASTPENDING|TDF_NEEDRESCHED)) {
339		enable_intr();
340		ast(tf);
341		disable_intr();
342	}
343	/*
344	 * Keep interrupts disabled. We return r10 as a favor to the EPC
345	 * syscall code so that it can quicky determine if the syscall
346	 * needs to be restarted or not.
347	 */
348	return (tf->tf_scratch.gr10);
349}
350
351/*
352 * Trap is called from exception.s to handle most types of processor traps.
353 */
354/*ARGSUSED*/
355void
356trap(int vector, struct trapframe *tf)
357{
358	struct proc *p;
359	struct thread *td;
360	uint64_t ucode;
361	int error, sig, user;
362	ksiginfo_t ksi;
363
364	user = TRAPF_USERMODE(tf) ? 1 : 0;
365
366	PCPU_INC(cnt.v_trap);
367
368	td = curthread;
369	p = td->td_proc;
370	ucode = 0;
371
372	if (user) {
373		ia64_set_fpsr(IA64_FPSR_DEFAULT);
374		td->td_pticks = 0;
375		td->td_frame = tf;
376		if (td->td_ucred != p->p_ucred)
377			cred_update_thread(td);
378	} else {
379		KASSERT(cold || td->td_ucred != NULL,
380		    ("kernel trap doesn't have ucred"));
381#ifdef KDB
382		if (kdb_active)
383			kdb_reenter();
384#endif
385	}
386
387	sig = 0;
388	switch (vector) {
389	case IA64_VEC_VHPT:
390		/*
391		 * This one is tricky. We should hardwire the VHPT, but
392		 * don't at this time. I think we're mostly lucky that
393		 * the VHPT is mapped.
394		 */
395		trap_panic(vector, tf);
396		break;
397
398	case IA64_VEC_ITLB:
399	case IA64_VEC_DTLB:
400	case IA64_VEC_EXT_INTR:
401		/* We never call trap() with these vectors. */
402		trap_panic(vector, tf);
403		break;
404
405	case IA64_VEC_ALT_ITLB:
406	case IA64_VEC_ALT_DTLB:
407		/*
408		 * These should never happen, because regions 0-4 use the
409		 * VHPT. If we get one of these it means we didn't program
410		 * the region registers correctly.
411		 */
412		trap_panic(vector, tf);
413		break;
414
415	case IA64_VEC_NESTED_DTLB:
416		/*
417		 * We never call trap() with this vector. We may want to
418		 * do that in the future in case the nested TLB handler
419		 * could not find the translation it needs. In that case
420		 * we could switch to a special (hardwired) stack and
421		 * come here to produce a nice panic().
422		 */
423		trap_panic(vector, tf);
424		break;
425
426	case IA64_VEC_IKEY_MISS:
427	case IA64_VEC_DKEY_MISS:
428	case IA64_VEC_KEY_PERMISSION:
429		/*
430		 * We don't use protection keys, so we should never get
431		 * these faults.
432		 */
433		trap_panic(vector, tf);
434		break;
435
436	case IA64_VEC_DIRTY_BIT:
437	case IA64_VEC_INST_ACCESS:
438	case IA64_VEC_DATA_ACCESS:
439		/*
440		 * We get here if we read or write to a page of which the
441		 * PTE does not have the access bit or dirty bit set and
442		 * we can not find the PTE in our datastructures. This
443		 * either means we have a stale PTE in the TLB, or we lost
444		 * the PTE in our datastructures.
445		 */
446		trap_panic(vector, tf);
447		break;
448
449	case IA64_VEC_BREAK:
450		if (user) {
451			ucode = (int)tf->tf_special.ifa & 0x1FFFFF;
452			if (ucode == 0) {
453				/*
454				 * A break.b doesn't cause the immediate to be
455				 * stored in cr.iim (and saved in the TF in
456				 * tf_special.ifa).  We need to decode the
457				 * instruction to find out what the immediate
458				 * was.  Note that if the break instruction
459				 * didn't happen to be a break.b, but any
460				 * other break with an immediate of 0, we
461				 * will do unnecessary work to get the value
462				 * we already had.  Not an issue, because a
463				 * break 0 is invalid.
464				 */
465				ucode = trap_decode_break(tf);
466			}
467			if (ucode < 0x80000) {
468				/* Software interrupts. */
469				switch (ucode) {
470				case 0:		/* Unknown error. */
471					sig = SIGILL;
472					break;
473				case 1:		/* Integer divide by zero. */
474					sig = SIGFPE;
475					ucode = FPE_INTDIV;
476					break;
477				case 2:		/* Integer overflow. */
478					sig = SIGFPE;
479					ucode = FPE_INTOVF;
480					break;
481				case 3:		/* Range check/bounds check. */
482					sig = SIGFPE;
483					ucode = FPE_FLTSUB;
484					break;
485				case 6: 	/* Decimal overflow. */
486				case 7: 	/* Decimal divide by zero. */
487				case 8: 	/* Packed decimal error. */
488				case 9: 	/* Invalid ASCII digit. */
489				case 10:	/* Invalid decimal digit. */
490					sig = SIGFPE;
491					ucode = FPE_FLTINV;
492					break;
493				case 4:		/* Null pointer dereference. */
494				case 5:		/* Misaligned data. */
495				case 11:	/* Paragraph stack overflow. */
496					sig = SIGSEGV;
497					break;
498				default:
499					sig = SIGILL;
500					break;
501				}
502			} else if (ucode < 0x100000) {
503				/* Debugger breakpoint. */
504				tf->tf_special.psr &= ~IA64_PSR_SS;
505				sig = SIGTRAP;
506			} else if (ucode == 0x100000) {
507				break_syscall(tf);
508				return;		/* do_ast() already called. */
509			} else if (ucode == 0x180000) {
510				mcontext_t mc;
511
512				error = copyin((void*)tf->tf_scratch.gr8,
513				    &mc, sizeof(mc));
514				if (!error) {
515					set_mcontext(td, &mc);
516					return;	/* Don't call do_ast()!!! */
517				}
518				sig = SIGSEGV;
519				ucode = tf->tf_scratch.gr8;
520			} else
521				sig = SIGILL;
522		} else {
523#ifdef KDB
524			if (kdb_trap(vector, 0, tf))
525				return;
526			panic("trap");
527#else
528			trap_panic(vector, tf);
529#endif
530		}
531		break;
532
533	case IA64_VEC_PAGE_NOT_PRESENT:
534	case IA64_VEC_INST_ACCESS_RIGHTS:
535	case IA64_VEC_DATA_ACCESS_RIGHTS: {
536		vm_offset_t va;
537		struct vmspace *vm;
538		vm_map_t map;
539		vm_prot_t ftype;
540		int rv;
541
542		rv = 0;
543		va = trunc_page(tf->tf_special.ifa);
544
545		if (va >= VM_MAX_ADDRESS) {
546			/*
547			 * Don't allow user-mode faults for kernel virtual
548			 * addresses, including the gateway page.
549			 */
550			if (user)
551				goto no_fault_in;
552			map = kernel_map;
553		} else {
554			vm = (p != NULL) ? p->p_vmspace : NULL;
555			if (vm == NULL)
556				goto no_fault_in;
557			map = &vm->vm_map;
558		}
559
560		if (tf->tf_special.isr & IA64_ISR_X)
561			ftype = VM_PROT_EXECUTE;
562		else if (tf->tf_special.isr & IA64_ISR_W)
563			ftype = VM_PROT_WRITE;
564		else
565			ftype = VM_PROT_READ;
566
567		if (map != kernel_map) {
568			/*
569			 * Keep swapout from messing with us during this
570			 * critical time.
571			 */
572			PROC_LOCK(p);
573			++p->p_lock;
574			PROC_UNLOCK(p);
575
576			/* Fault in the user page: */
577			rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE)
578			    ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
579
580			PROC_LOCK(p);
581			--p->p_lock;
582			PROC_UNLOCK(p);
583		} else {
584			/*
585			 * Don't have to worry about process locking or
586			 * stacks in the kernel.
587			 */
588			rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
589		}
590
591		if (rv == KERN_SUCCESS)
592			goto out;
593
594	no_fault_in:
595		if (!user) {
596			/* Check for copyin/copyout fault. */
597			if (td != NULL && td->td_pcb->pcb_onfault != 0) {
598				tf->tf_special.iip =
599				    td->td_pcb->pcb_onfault;
600				tf->tf_special.psr &= ~IA64_PSR_RI;
601				td->td_pcb->pcb_onfault = 0;
602				goto out;
603			}
604			trap_panic(vector, tf);
605		}
606		ucode = va;
607		sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
608		break;
609	}
610
611	case IA64_VEC_GENERAL_EXCEPTION: {
612		int code;
613
614		if (!user)
615			trap_panic(vector, tf);
616
617		code = tf->tf_special.isr & (IA64_ISR_CODE & 0xf0ull);
618		switch (code) {
619		case 0x0:	/* Illegal Operation Fault. */
620			sig = ia64_emulate(tf, td);
621			break;
622		default:
623			sig = SIGILL;
624			break;
625		}
626		if (sig == 0)
627			goto out;
628		ucode = vector;
629		break;
630	}
631
632	case IA64_VEC_SPECULATION:
633		/*
634		 * The branching behaviour of the chk instruction is not
635		 * implemented by the processor. All we need to do is
636		 * compute the target address of the branch and make sure
637		 * that control is transfered to that address.
638		 * We should do this in the IVT table and not by entring
639		 * the kernel...
640		 */
641		tf->tf_special.iip += tf->tf_special.ifa << 4;
642		tf->tf_special.psr &= ~IA64_PSR_RI;
643		goto out;
644
645	case IA64_VEC_NAT_CONSUMPTION:
646	case IA64_VEC_UNSUPP_DATA_REFERENCE:
647		if (user) {
648			ucode = vector;
649			sig = SIGILL;
650		} else
651			trap_panic(vector, tf);
652		break;
653
654	case IA64_VEC_DISABLED_FP: {
655		struct pcpu *pcpu;
656		struct pcb *pcb;
657		struct thread *thr;
658
659		/* Always fatal in kernel. Should never happen. */
660		if (!user)
661			trap_panic(vector, tf);
662
663		sched_pin();
664		thr = PCPU_GET(fpcurthread);
665		if (thr == td) {
666			/*
667			 * Short-circuit handling the trap when this CPU
668			 * already holds the high FP registers for this
669			 * thread.  We really shouldn't get the trap in the
670			 * first place, but since it's only a performance
671			 * issue and not a correctness issue, we emit a
672			 * message for now, enable the high FP registers and
673			 * return.
674			 */
675			printf("XXX: bogusly disabled high FP regs\n");
676			tf->tf_special.psr &= ~IA64_PSR_DFH;
677			sched_unpin();
678			goto out;
679		} else if (thr != NULL) {
680			mtx_lock_spin(&thr->td_md.md_highfp_mtx);
681			pcb = thr->td_pcb;
682			save_high_fp(&pcb->pcb_high_fp);
683			pcb->pcb_fpcpu = NULL;
684			PCPU_SET(fpcurthread, NULL);
685			mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
686			thr = NULL;
687		}
688
689		mtx_lock_spin(&td->td_md.md_highfp_mtx);
690		pcb = td->td_pcb;
691		pcpu = pcb->pcb_fpcpu;
692
693#ifdef SMP
694		if (pcpu != NULL) {
695			mtx_unlock_spin(&td->td_md.md_highfp_mtx);
696			ipi_send(pcpu, IPI_HIGH_FP);
697			while (pcb->pcb_fpcpu == pcpu)
698				DELAY(100);
699			mtx_lock_spin(&td->td_md.md_highfp_mtx);
700			pcpu = pcb->pcb_fpcpu;
701			thr = PCPU_GET(fpcurthread);
702		}
703#endif
704
705		if (thr == NULL && pcpu == NULL) {
706			restore_high_fp(&pcb->pcb_high_fp);
707			PCPU_SET(fpcurthread, td);
708			pcb->pcb_fpcpu = pcpup;
709			tf->tf_special.psr &= ~IA64_PSR_MFH;
710			tf->tf_special.psr &= ~IA64_PSR_DFH;
711		}
712
713		mtx_unlock_spin(&td->td_md.md_highfp_mtx);
714		sched_unpin();
715		goto out;
716	}
717
718	case IA64_VEC_DEBUG:
719	case IA64_VEC_SINGLE_STEP_TRAP:
720		tf->tf_special.psr &= ~IA64_PSR_SS;
721		if (!user) {
722#ifdef KDB
723			if (kdb_trap(vector, 0, tf))
724				return;
725			panic("trap");
726#else
727			trap_panic(vector, tf);
728#endif
729		}
730		sig = SIGTRAP;
731		break;
732
733	case IA64_VEC_UNALIGNED_REFERENCE:
734		/*
735		 * If user-land, do whatever fixups, printing, and
736		 * signalling is appropriate (based on system-wide
737		 * and per-process unaligned-access-handling flags).
738		 */
739		if (user) {
740			sig = unaligned_fixup(tf, td);
741			if (sig == 0)
742				goto out;
743			ucode = tf->tf_special.ifa;	/* VA */
744		} else {
745			/* Check for copyin/copyout fault. */
746			if (td != NULL && td->td_pcb->pcb_onfault != 0) {
747				tf->tf_special.iip =
748				    td->td_pcb->pcb_onfault;
749				tf->tf_special.psr &= ~IA64_PSR_RI;
750				td->td_pcb->pcb_onfault = 0;
751				goto out;
752			}
753			trap_panic(vector, tf);
754		}
755		break;
756
757	case IA64_VEC_FLOATING_POINT_FAULT:
758	case IA64_VEC_FLOATING_POINT_TRAP: {
759		struct fpswa_bundle bundle;
760		struct fpswa_fpctx fpctx;
761		struct fpswa_ret ret;
762		char *ip;
763		u_long fault;
764
765		/* Always fatal in kernel. Should never happen. */
766		if (!user)
767			trap_panic(vector, tf);
768
769		if (fpswa_iface == NULL) {
770			sig = SIGFPE;
771			ucode = 0;
772			break;
773		}
774
775		ip = (char *)tf->tf_special.iip;
776		if (vector == IA64_VEC_FLOATING_POINT_TRAP &&
777		    (tf->tf_special.psr & IA64_PSR_RI) == 0)
778			ip -= 16;
779		error = copyin(ip, &bundle, sizeof(bundle));
780		if (error) {
781			sig = SIGBUS;	/* EFAULT, basically */
782			ucode = 0;	/* exception summary */
783			break;
784		}
785
786		/* f6-f15 are saved in exception_save */
787		fpctx.mask_low = 0xffc0;		/* bits 6 - 15 */
788		fpctx.mask_high = 0;
789		fpctx.fp_low_preserved = NULL;
790		fpctx.fp_low_volatile = &tf->tf_scratch_fp.fr6;
791		fpctx.fp_high_preserved = NULL;
792		fpctx.fp_high_volatile = NULL;
793
794		fault = (vector == IA64_VEC_FLOATING_POINT_FAULT) ? 1 : 0;
795
796		/*
797		 * We have the high FP registers disabled while in the
798		 * kernel. Enable them for the FPSWA handler only.
799		 */
800		ia64_enable_highfp();
801
802		/* The docs are unclear.  Is Fpswa reentrant? */
803		ret = fpswa_iface->if_fpswa(fault, &bundle,
804		    &tf->tf_special.psr, &tf->tf_special.fpsr,
805		    &tf->tf_special.isr, &tf->tf_special.pr,
806		    &tf->tf_special.cfm, &fpctx);
807
808		ia64_disable_highfp();
809
810		/*
811		 * Update ipsr and iip to next instruction. We only
812		 * have to do that for faults.
813		 */
814		if (fault && (ret.status == 0 || (ret.status & 2))) {
815			int ei;
816
817			ei = (tf->tf_special.isr >> 41) & 0x03;
818			if (ei == 0) {		/* no template for this case */
819				tf->tf_special.psr &= ~IA64_ISR_EI;
820				tf->tf_special.psr |= IA64_ISR_EI_1;
821			} else if (ei == 1) {	/* MFI or MFB */
822				tf->tf_special.psr &= ~IA64_ISR_EI;
823				tf->tf_special.psr |= IA64_ISR_EI_2;
824			} else if (ei == 2) {	/* MMF */
825				tf->tf_special.psr &= ~IA64_ISR_EI;
826				tf->tf_special.iip += 0x10;
827			}
828		}
829
830		if (ret.status == 0) {
831			goto out;
832		} else if (ret.status == -1) {
833			printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
834			    ret.err1, ret.err2, ret.err3);
835			panic("fpswa fatal error on fp fault");
836		} else {
837			sig = SIGFPE;
838			ucode = 0;		/* XXX exception summary */
839			break;
840		}
841	}
842
843	case IA64_VEC_LOWER_PRIVILEGE_TRANSFER:
844		/*
845		 * The lower-privilege transfer trap is used by the EPC
846		 * syscall code to trigger re-entry into the kernel when the
847		 * process should be single stepped. The problem is that
848		 * there's no way to set single stepping directly without
849		 * using the rfi instruction. So instead we enable the
850		 * lower-privilege transfer trap and when we get here we
851		 * know that the process is about to enter userland (and
852		 * has already lowered its privilege).
853		 * However, there's another gotcha. When the process has
854		 * lowered it's privilege it's still running in the gateway
855		 * page. If we enable single stepping, we'll be stepping
856		 * the code in the gateway page. In and by itself this is
857		 * not a problem, but it's an address debuggers won't know
858		 * anything about. Hence, it can only cause confusion.
859		 * We know that we need to branch to get out of the gateway
860		 * page, so what we do here is enable the taken branch
861		 * trap and just let the process continue. When we branch
862		 * out of the gateway page we'll get back into the kernel
863		 * and then we enable single stepping.
864		 * Since this a rather round-about way of enabling single
865		 * stepping, don't make things complicated even more by
866		 * calling userret() and do_ast(). We do that later...
867		 */
868		tf->tf_special.psr &= ~IA64_PSR_LP;
869		tf->tf_special.psr |= IA64_PSR_TB;
870		return;
871
872	case IA64_VEC_TAKEN_BRANCH_TRAP:
873		/*
874		 * Don't assume there aren't any branches other than the
875		 * branch that takes us out of the gateway page. Check the
876		 * iip and raise SIGTRAP only when it's an user address.
877		 */
878		if (tf->tf_special.iip >= VM_MAX_ADDRESS)
879			return;
880		tf->tf_special.psr &= ~IA64_PSR_TB;
881		sig = SIGTRAP;
882		break;
883
884	case IA64_VEC_IA32_EXCEPTION:
885	case IA64_VEC_IA32_INTERCEPT:
886	case IA64_VEC_IA32_INTERRUPT:
887		sig = SIGEMT;
888		ucode = tf->tf_special.iip;
889		break;
890
891	default:
892		/* Reserved vectors get here. Should never happen of course. */
893		trap_panic(vector, tf);
894		break;
895	}
896
897	KASSERT(sig != 0, ("foo"));
898
899	if (print_usertrap)
900		printtrap(vector, tf, 1, user);
901
902	ksiginfo_init(&ksi);
903	ksi.ksi_signo = sig;
904	ksi.ksi_code = ucode;
905	trapsignal(td, &ksi);
906
907out:
908	if (user) {
909		userret(td, tf);
910		mtx_assert(&Giant, MA_NOTOWNED);
911		do_ast(tf);
912	}
913	return;
914}
915
916/*
917 * Handle break instruction based system calls.
918 */
919void
920break_syscall(struct trapframe *tf)
921{
922	uint64_t *bsp, *tfp;
923	uint64_t iip, psr;
924	int error, nargs;
925
926	/* Save address of break instruction. */
927	iip = tf->tf_special.iip;
928	psr = tf->tf_special.psr;
929
930	/* Advance to the next instruction. */
931	tf->tf_special.psr += IA64_PSR_RI_1;
932	if ((tf->tf_special.psr & IA64_PSR_RI) > IA64_PSR_RI_2) {
933		tf->tf_special.iip += 16;
934		tf->tf_special.psr &= ~IA64_PSR_RI;
935	}
936
937	/*
938	 * Copy the arguments on the register stack into the trapframe
939	 * to avoid having interleaved NaT collections.
940	 */
941	tfp = &tf->tf_scratch.gr16;
942	nargs = tf->tf_special.cfm & 0x7f;
943	bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty +
944	    (tf->tf_special.bspstore & 0x1ffUL));
945	bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs;
946	while (nargs--) {
947		*tfp++ = *bsp++;
948		if (((uintptr_t)bsp & 0x1ff) == 0x1f8)
949			bsp++;
950	}
951	error = syscall(tf);
952	if (error == ERESTART) {
953		tf->tf_special.iip = iip;
954		tf->tf_special.psr = psr;
955	}
956
957	do_ast(tf);
958}
959
960/*
961 * Process a system call.
962 *
963 * See syscall.s for details as to how we get here. In order to support
964 * the ERESTART case, we return the error to our caller. They deal with
965 * the hairy details.
966 */
967int
968syscall(struct trapframe *tf)
969{
970	struct sysent *callp;
971	struct proc *p;
972	struct thread *td;
973	uint64_t *args;
974	int code, error;
975
976	ia64_set_fpsr(IA64_FPSR_DEFAULT);
977
978	code = tf->tf_scratch.gr15;
979	args = &tf->tf_scratch.gr16;
980
981	PCPU_INC(cnt.v_syscall);
982
983	td = curthread;
984	td->td_frame = tf;
985	p = td->td_proc;
986
987	td->td_pticks = 0;
988	if (td->td_ucred != p->p_ucred)
989		cred_update_thread(td);
990
991	if (p->p_sysent->sv_prepsyscall) {
992		/* (*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params); */
993		panic("prepsyscall");
994	} else {
995		/*
996		 * syscall() and __syscall() are handled the same on
997		 * the ia64, as everything is 64-bit aligned, anyway.
998		 */
999		if (code == SYS_syscall || code == SYS___syscall) {
1000			/*
1001			 * Code is first argument, followed by actual args.
1002			 */
1003			code = args[0];
1004			args++;
1005		}
1006	}
1007
1008 	if (p->p_sysent->sv_mask)
1009 		code &= p->p_sysent->sv_mask;
1010
1011 	if (code >= p->p_sysent->sv_size)
1012 		callp = &p->p_sysent->sv_table[0];
1013  	else
1014 		callp = &p->p_sysent->sv_table[code];
1015
1016#ifdef KTRACE
1017	if (KTRPOINT(td, KTR_SYSCALL))
1018		ktrsyscall(code, callp->sy_narg, args);
1019#endif
1020	CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
1021	    td->td_proc->p_pid, td->td_name, code);
1022
1023	td->td_retval[0] = 0;
1024	td->td_retval[1] = 0;
1025	tf->tf_scratch.gr10 = EJUSTRETURN;
1026
1027	STOPEVENT(p, S_SCE, callp->sy_narg);
1028
1029	PTRACESTOP_SC(p, td, S_PT_SCE);
1030
1031	AUDIT_SYSCALL_ENTER(code, td);
1032	error = (*callp->sy_call)(td, args);
1033	AUDIT_SYSCALL_EXIT(error, td);
1034
1035	if (error != EJUSTRETURN) {
1036		/*
1037		 * Save the "raw" error code in r10. We use this to handle
1038		 * syscall restarts (see do_ast()).
1039		 */
1040		tf->tf_scratch.gr10 = error;
1041		if (error == 0) {
1042			tf->tf_scratch.gr8 = td->td_retval[0];
1043			tf->tf_scratch.gr9 = td->td_retval[1];
1044		} else if (error != ERESTART) {
1045			if (error < p->p_sysent->sv_errsize)
1046				error = p->p_sysent->sv_errtbl[error];
1047			/*
1048			 * Translated error codes are returned in r8. User
1049			 * processes use the translated error code.
1050			 */
1051			tf->tf_scratch.gr8 = error;
1052		}
1053	}
1054
1055	td->td_syscalls++;
1056
1057	/*
1058	 * Check for misbehavior.
1059	 */
1060	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
1061	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
1062	KASSERT(td->td_critnest == 0,
1063	    ("System call %s returning in a critical section",
1064	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"));
1065	KASSERT(td->td_locks == 0,
1066	    ("System call %s returning with %d locks held",
1067	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???",
1068	    td->td_locks));
1069
1070	/*
1071	 * Handle reschedule and other end-of-syscall issues
1072	 */
1073	userret(td, tf);
1074
1075	CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
1076	    td->td_proc->p_pid, td->td_name, code);
1077#ifdef KTRACE
1078	if (KTRPOINT(td, KTR_SYSRET))
1079		ktrsysret(code, error, td->td_retval[0]);
1080#endif
1081
1082	/*
1083	 * This works because errno is findable through the
1084	 * register set.  If we ever support an emulation where this
1085	 * is not the case, this code will need to be revisited.
1086	 */
1087	STOPEVENT(p, S_SCX, code);
1088
1089	PTRACESTOP_SC(p, td, S_PT_SCX);
1090
1091	return (error);
1092}
1093