syscall.c revision 190844
1/*	$NetBSD: fault.c,v 1.45 2003/11/20 14:44:36 scw Exp $	*/
2
3/*-
4 * Copyright 2004 Olivier Houchard
5 * Copyright 2003 Wasabi Systems, Inc.
6 * All rights reserved.
7 *
8 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed for the NetBSD Project by
21 *      Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 *    or promote products derived from this software without specific prior
24 *    written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38/*-
39 * Copyright (c) 1994-1997 Mark Brinicombe.
40 * Copyright (c) 1994 Brini.
41 * All rights reserved.
42 *
43 * This code is derived from software written for Brini by Mark Brinicombe
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 *    must display the following acknowledgement:
55 *	This product includes software developed by Brini.
56 * 4. The name of the company nor the name of the author may be used to
57 *    endorse or promote products derived from this software without specific
58 *    prior written permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
61 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
62 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
63 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
64 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
65 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
66 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70 * SUCH DAMAGE.
71 *
72 * RiscBSD kernel project
73 *
74 * fault.c
75 *
76 * Fault handlers
77 *
78 * Created      : 28/11/94
79 */
80
81
82#include "opt_ktrace.h"
83
84#include <sys/cdefs.h>
85__FBSDID("$FreeBSD: head/sys/arm/arm/trap.c 190844 2009-04-08 12:54:32Z raj $");
86
87#include <sys/param.h>
88#include <sys/systm.h>
89#include <sys/proc.h>
90#include <sys/kernel.h>
91#include <sys/lock.h>
92#include <sys/mutex.h>
93#include <sys/syscall.h>
94#include <sys/sysent.h>
95#include <sys/signalvar.h>
96#include <sys/ktr.h>
97#ifdef KTRACE
98#include <sys/uio.h>
99#include <sys/ktrace.h>
100#endif
101#include <sys/ptrace.h>
102#include <sys/pioctl.h>
103
104#include <vm/vm.h>
105#include <vm/pmap.h>
106#include <vm/vm_kern.h>
107#include <vm/vm_map.h>
108#include <vm/vm_extern.h>
109
110#include <machine/cpuconf.h>
111#include <machine/vmparam.h>
112#include <machine/frame.h>
113#include <machine/cpu.h>
114#include <machine/intr.h>
115#include <machine/pcb.h>
116#include <machine/proc.h>
117#include <machine/swi.h>
118
119#include <security/audit/audit.h>
120
121#ifdef KDB
122#include <sys/kdb.h>
123#endif
124
125
126void swi_handler(trapframe_t *);
127void undefinedinstruction(trapframe_t *);
128
129#include <machine/disassem.h>
130#include <machine/machdep.h>
131
132extern char fusubailout[];
133extern char *syscallnames[];
134
135#ifdef DEBUG
136int last_fault_code;	/* For the benefit of pmap_fault_fixup() */
137#endif
138
139#if defined(CPU_ARM7TDMI)
140/* These CPUs may need data/prefetch abort fixups */
141#define	CPU_ABORT_FIXUP_REQUIRED
142#endif
143
144struct ksig {
145	int signb;
146	u_long code;
147};
148struct data_abort {
149	int (*func)(trapframe_t *, u_int, u_int, struct thread *, struct ksig *);
150	const char *desc;
151};
152
153static int dab_fatal(trapframe_t *, u_int, u_int, struct thread *, struct ksig *);
154static int dab_align(trapframe_t *, u_int, u_int, struct thread *, struct ksig *);
155static int dab_buserr(trapframe_t *, u_int, u_int, struct thread *, struct ksig *);
156
157static const struct data_abort data_aborts[] = {
158	{dab_fatal,	"Vector Exception"},
159	{dab_align,	"Alignment Fault 1"},
160	{dab_fatal,	"Terminal Exception"},
161	{dab_align,	"Alignment Fault 3"},
162	{dab_buserr,	"External Linefetch Abort (S)"},
163	{NULL,		"Translation Fault (S)"},
164	{dab_buserr,	"External Linefetch Abort (P)"},
165	{NULL,		"Translation Fault (P)"},
166	{dab_buserr,	"External Non-Linefetch Abort (S)"},
167	{NULL,		"Domain Fault (S)"},
168	{dab_buserr,	"External Non-Linefetch Abort (P)"},
169	{NULL,		"Domain Fault (P)"},
170	{dab_buserr,	"External Translation Abort (L1)"},
171	{NULL,		"Permission Fault (S)"},
172	{dab_buserr,	"External Translation Abort (L2)"},
173	{NULL,		"Permission Fault (P)"}
174};
175
176/* Determine if a fault came from user mode */
177#define	TRAP_USERMODE(tf)	((tf->tf_spsr & PSR_MODE) == PSR_USR32_MODE)
178
179/* Determine if 'x' is a permission fault */
180#define	IS_PERMISSION_FAULT(x)					\
181	(((1 << ((x) & FAULT_TYPE_MASK)) &			\
182	  ((1 << FAULT_PERM_P) | (1 << FAULT_PERM_S))) != 0)
183
184static __inline void
185call_trapsignal(struct thread *td, int sig, u_long code)
186{
187	ksiginfo_t ksi;
188
189	ksiginfo_init_trap(&ksi);
190	ksi.ksi_signo = sig;
191	ksi.ksi_code = (int)code;
192	trapsignal(td, &ksi);
193}
194
195static __inline int
196data_abort_fixup(trapframe_t *tf, u_int fsr, u_int far, struct thread *td, struct ksig *ksig)
197{
198#ifdef CPU_ABORT_FIXUP_REQUIRED
199	int error;
200
201	/* Call the cpu specific data abort fixup routine */
202	error = cpu_dataabt_fixup(tf);
203	if (__predict_true(error != ABORT_FIXUP_FAILED))
204		return (error);
205
206	/*
207	 * Oops, couldn't fix up the instruction
208	 */
209	printf("data_abort_fixup: fixup for %s mode data abort failed.\n",
210	    TRAP_USERMODE(tf) ? "user" : "kernel");
211	printf("pc = 0x%08x, opcode 0x%08x, insn = ", tf->tf_pc,
212	    *((u_int *)tf->tf_pc));
213	disassemble(tf->tf_pc);
214
215	/* Die now if this happened in kernel mode */
216	if (!TRAP_USERMODE(tf))
217		dab_fatal(tf, fsr, far, td, NULL, ksig);
218
219	return (error);
220#else
221	return (ABORT_FIXUP_OK);
222#endif /* CPU_ABORT_FIXUP_REQUIRED */
223}
224
225void
226data_abort_handler(trapframe_t *tf)
227{
228	struct vm_map *map;
229	struct pcb *pcb;
230	struct thread *td;
231	u_int user, far, fsr;
232	vm_prot_t ftype;
233	void *onfault;
234	vm_offset_t va;
235	int error = 0;
236	struct ksig ksig;
237	struct proc *p;
238
239
240	/* Grab FAR/FSR before enabling interrupts */
241	far = cpu_faultaddress();
242	fsr = cpu_faultstatus();
243#if 0
244	printf("data abort: %p (from %p %p)\n", (void*)far, (void*)tf->tf_pc,
245	    (void*)tf->tf_svc_lr);
246#endif
247
248	/* Update vmmeter statistics */
249#if 0
250	vmexp.traps++;
251#endif
252
253	td = curthread;
254	p = td->td_proc;
255
256	PCPU_INC(cnt.v_trap);
257	/* Data abort came from user mode? */
258	user = TRAP_USERMODE(tf);
259
260	if (user) {
261		td->td_pticks = 0;
262		td->td_frame = tf;
263		if (td->td_ucred != td->td_proc->p_ucred)
264			cred_update_thread(td);
265
266	}
267	/* Grab the current pcb */
268	pcb = td->td_pcb;
269	/* Re-enable interrupts if they were enabled previously */
270	if (td->td_md.md_spinlock_count == 0) {
271		if (__predict_true(tf->tf_spsr & I32_bit) == 0)
272			enable_interrupts(I32_bit);
273		if (__predict_true(tf->tf_spsr & F32_bit) == 0)
274			enable_interrupts(F32_bit);
275	}
276
277
278	/* Invoke the appropriate handler, if necessary */
279	if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) {
280		if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far,
281		    td, &ksig)) {
282			goto do_trapsignal;
283		}
284		goto out;
285	}
286
287	/*
288	 * At this point, we're dealing with one of the following data aborts:
289	 *
290	 *  FAULT_TRANS_S  - Translation -- Section
291	 *  FAULT_TRANS_P  - Translation -- Page
292	 *  FAULT_DOMAIN_S - Domain -- Section
293	 *  FAULT_DOMAIN_P - Domain -- Page
294	 *  FAULT_PERM_S   - Permission -- Section
295	 *  FAULT_PERM_P   - Permission -- Page
296	 *
297	 * These are the main virtual memory-related faults signalled by
298	 * the MMU.
299	 */
300
301	/* fusubailout is used by [fs]uswintr to avoid page faulting */
302	if (__predict_false(pcb->pcb_onfault == fusubailout)) {
303		tf->tf_r0 = EFAULT;
304		tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
305		return;
306	}
307
308	/*
309	 * Make sure the Program Counter is sane. We could fall foul of
310	 * someone executing Thumb code, in which case the PC might not
311	 * be word-aligned. This would cause a kernel alignment fault
312	 * further down if we have to decode the current instruction.
313	 * XXX: It would be nice to be able to support Thumb at some point.
314	 */
315	if (__predict_false((tf->tf_pc & 3) != 0)) {
316		if (user) {
317			/*
318			 * Give the user an illegal instruction signal.
319			 */
320			/* Deliver a SIGILL to the process */
321			ksig.signb = SIGILL;
322			ksig.code = 0;
323			goto do_trapsignal;
324		}
325
326		/*
327		 * The kernel never executes Thumb code.
328		 */
329		printf("\ndata_abort_fault: Misaligned Kernel-mode "
330		    "Program Counter\n");
331		dab_fatal(tf, fsr, far, td, &ksig);
332	}
333
334	/* See if the cpu state needs to be fixed up */
335	switch (data_abort_fixup(tf, fsr, far, td, &ksig)) {
336	case ABORT_FIXUP_RETURN:
337		return;
338	case ABORT_FIXUP_FAILED:
339		/* Deliver a SIGILL to the process */
340		ksig.signb = SIGILL;
341		ksig.code = 0;
342		goto do_trapsignal;
343	default:
344		break;
345	}
346
347	va = trunc_page((vm_offset_t)far);
348
349	/*
350	 * It is only a kernel address space fault iff:
351	 *	1. user == 0  and
352	 *	2. pcb_onfault not set or
353	 *	3. pcb_onfault set and not LDRT/LDRBT/STRT/STRBT instruction.
354	 */
355	if (user == 0 && (va >= VM_MIN_KERNEL_ADDRESS ||
356	    (va < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW)) &&
357	    __predict_true((pcb->pcb_onfault == NULL ||
358	     (ReadWord(tf->tf_pc) & 0x05200000) != 0x04200000))) {
359		map = kernel_map;
360
361		/* Was the fault due to the FPE/IPKDB ? */
362		if (__predict_false((tf->tf_spsr & PSR_MODE)==PSR_UND32_MODE)) {
363
364			/*
365			 * Force exit via userret()
366			 * This is necessary as the FPE is an extension to
367			 * userland that actually runs in a priveledged mode
368			 * but uses USR mode permissions for its accesses.
369			 */
370			user = 1;
371			ksig.signb = SIGSEGV;
372			ksig.code = 0;
373			goto do_trapsignal;
374		}
375	} else {
376		map = &td->td_proc->p_vmspace->vm_map;
377	}
378
379	/*
380	 * We need to know whether the page should be mapped
381	 * as R or R/W. The MMU does not give us the info as
382	 * to whether the fault was caused by a read or a write.
383	 *
384	 * However, we know that a permission fault can only be
385	 * the result of a write to a read-only location, so
386	 * we can deal with those quickly.
387	 *
388	 * Otherwise we need to disassemble the instruction
389	 * responsible to determine if it was a write.
390	 */
391	if (IS_PERMISSION_FAULT(fsr)) {
392		ftype = VM_PROT_WRITE;
393	} else {
394		u_int insn = ReadWord(tf->tf_pc);
395
396		if (((insn & 0x0c100000) == 0x04000000) ||	/* STR/STRB */
397		    ((insn & 0x0e1000b0) == 0x000000b0) ||	/* STRH/STRD */
398		    ((insn & 0x0a100000) == 0x08000000))	/* STM/CDT */
399		{
400			ftype = VM_PROT_WRITE;
401	}
402		else
403		if ((insn & 0x0fb00ff0) == 0x01000090)		/* SWP */
404			ftype = VM_PROT_READ | VM_PROT_WRITE;
405		else
406			ftype = VM_PROT_READ;
407	}
408
409	/*
410	 * See if the fault is as a result of ref/mod emulation,
411	 * or domain mismatch.
412	 */
413#ifdef DEBUG
414	last_fault_code = fsr;
415#endif
416	if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
417	    user)) {
418		goto out;
419	}
420
421	onfault = pcb->pcb_onfault;
422	pcb->pcb_onfault = NULL;
423	if (map != kernel_map) {
424		PROC_LOCK(p);
425		p->p_lock++;
426		PROC_UNLOCK(p);
427	}
428	error = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE) ?
429	    VM_FAULT_DIRTY : VM_FAULT_NORMAL);
430	pcb->pcb_onfault = onfault;
431
432	if (map != kernel_map) {
433		PROC_LOCK(p);
434		p->p_lock--;
435		PROC_UNLOCK(p);
436	}
437	if (__predict_true(error == 0))
438		goto out;
439	if (user == 0) {
440		if (pcb->pcb_onfault) {
441			tf->tf_r0 = error;
442			tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
443			return;
444		}
445
446		printf("\nvm_fault(%p, %x, %x, 0) -> %x\n", map, va, ftype,
447		    error);
448		dab_fatal(tf, fsr, far, td, &ksig);
449	}
450
451
452	if (error == ENOMEM) {
453		printf("VM: pid %d (%s), uid %d killed: "
454		    "out of swap\n", td->td_proc->p_pid, td->td_name,
455		    (td->td_proc->p_ucred) ?
456		     td->td_proc->p_ucred->cr_uid : -1);
457		ksig.signb = SIGKILL;
458	} else {
459		ksig.signb = SIGSEGV;
460	}
461	ksig.code = 0;
462do_trapsignal:
463	call_trapsignal(td, ksig.signb, ksig.code);
464out:
465	/* If returning to user mode, make sure to invoke userret() */
466	if (user)
467		userret(td, tf);
468}
469
470/*
471 * dab_fatal() handles the following data aborts:
472 *
473 *  FAULT_WRTBUF_0 - Vector Exception
474 *  FAULT_WRTBUF_1 - Terminal Exception
475 *
476 * We should never see these on a properly functioning system.
477 *
478 * This function is also called by the other handlers if they
479 * detect a fatal problem.
480 *
481 * Note: If 'l' is NULL, we assume we're dealing with a prefetch abort.
482 */
483static int
484dab_fatal(trapframe_t *tf, u_int fsr, u_int far, struct thread *td, struct ksig *ksig)
485{
486	const char *mode;
487
488	mode = TRAP_USERMODE(tf) ? "user" : "kernel";
489
490	disable_interrupts(I32_bit|F32_bit);
491	if (td != NULL) {
492		printf("Fatal %s mode data abort: '%s'\n", mode,
493		    data_aborts[fsr & FAULT_TYPE_MASK].desc);
494		printf("trapframe: %p\nFSR=%08x, FAR=", tf, fsr);
495		if ((fsr & FAULT_IMPRECISE) == 0)
496			printf("%08x, ", far);
497		else
498			printf("Invalid,  ");
499		printf("spsr=%08x\n", tf->tf_spsr);
500	} else {
501		printf("Fatal %s mode prefetch abort at 0x%08x\n",
502		    mode, tf->tf_pc);
503		printf("trapframe: %p, spsr=%08x\n", tf, tf->tf_spsr);
504	}
505
506	printf("r0 =%08x, r1 =%08x, r2 =%08x, r3 =%08x\n",
507	    tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3);
508	printf("r4 =%08x, r5 =%08x, r6 =%08x, r7 =%08x\n",
509	    tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7);
510	printf("r8 =%08x, r9 =%08x, r10=%08x, r11=%08x\n",
511	    tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11);
512	printf("r12=%08x, ", tf->tf_r12);
513
514	if (TRAP_USERMODE(tf))
515		printf("usp=%08x, ulr=%08x",
516		    tf->tf_usr_sp, tf->tf_usr_lr);
517	else
518		printf("ssp=%08x, slr=%08x",
519		    tf->tf_svc_sp, tf->tf_svc_lr);
520	printf(", pc =%08x\n\n", tf->tf_pc);
521
522#ifdef KDB
523	if (debugger_on_panic || kdb_active)
524		kdb_trap(fsr, 0, tf);
525#endif
526	panic("Fatal abort");
527	/*NOTREACHED*/
528}
529
530/*
531 * dab_align() handles the following data aborts:
532 *
533 *  FAULT_ALIGN_0 - Alignment fault
534 *  FAULT_ALIGN_0 - Alignment fault
535 *
536 * These faults are fatal if they happen in kernel mode. Otherwise, we
537 * deliver a bus error to the process.
538 */
539static int
540dab_align(trapframe_t *tf, u_int fsr, u_int far, struct thread *td, struct ksig *ksig)
541{
542
543	/* Alignment faults are always fatal if they occur in kernel mode */
544	if (!TRAP_USERMODE(tf)) {
545		if (!td || !td->td_pcb->pcb_onfault)
546			dab_fatal(tf, fsr, far, td, ksig);
547		tf->tf_r0 = EFAULT;
548		tf->tf_pc = (int)td->td_pcb->pcb_onfault;
549		return (0);
550	}
551
552	/* pcb_onfault *must* be NULL at this point */
553
554	/* See if the cpu state needs to be fixed up */
555	(void) data_abort_fixup(tf, fsr, far, td, ksig);
556
557	/* Deliver a bus error signal to the process */
558	ksig->code = 0;
559	ksig->signb = SIGBUS;
560	td->td_frame = tf;
561
562	return (1);
563}
564
565/*
566 * dab_buserr() handles the following data aborts:
567 *
568 *  FAULT_BUSERR_0 - External Abort on Linefetch -- Section
569 *  FAULT_BUSERR_1 - External Abort on Linefetch -- Page
570 *  FAULT_BUSERR_2 - External Abort on Non-linefetch -- Section
571 *  FAULT_BUSERR_3 - External Abort on Non-linefetch -- Page
572 *  FAULT_BUSTRNL1 - External abort on Translation -- Level 1
573 *  FAULT_BUSTRNL2 - External abort on Translation -- Level 2
574 *
575 * If pcb_onfault is set, flag the fault and return to the handler.
576 * If the fault occurred in user mode, give the process a SIGBUS.
577 *
578 * Note: On XScale, FAULT_BUSERR_0, FAULT_BUSERR_1, and FAULT_BUSERR_2
579 * can be flagged as imprecise in the FSR. This causes a real headache
580 * since some of the machine state is lost. In this case, tf->tf_pc
581 * may not actually point to the offending instruction. In fact, if
582 * we've taken a double abort fault, it generally points somewhere near
583 * the top of "data_abort_entry" in exception.S.
584 *
585 * In all other cases, these data aborts are considered fatal.
586 */
587static int
588dab_buserr(trapframe_t *tf, u_int fsr, u_int far, struct thread *td, struct ksig *ksig)
589{
590	struct pcb *pcb = td->td_pcb;
591
592#ifdef __XSCALE__
593	if ((fsr & FAULT_IMPRECISE) != 0 &&
594	    (tf->tf_spsr & PSR_MODE) == PSR_ABT32_MODE) {
595		/*
596		 * Oops, an imprecise, double abort fault. We've lost the
597		 * r14_abt/spsr_abt values corresponding to the original
598		 * abort, and the spsr saved in the trapframe indicates
599		 * ABT mode.
600		 */
601		tf->tf_spsr &= ~PSR_MODE;
602
603		/*
604		 * We use a simple heuristic to determine if the double abort
605		 * happened as a result of a kernel or user mode access.
606		 * If the current trapframe is at the top of the kernel stack,
607		 * the fault _must_ have come from user mode.
608		 */
609		if (tf != ((trapframe_t *)pcb->un_32.pcb32_sp) - 1) {
610			/*
611			 * Kernel mode. We're either about to die a
612			 * spectacular death, or pcb_onfault will come
613			 * to our rescue. Either way, the current value
614			 * of tf->tf_pc is irrelevant.
615			 */
616			tf->tf_spsr |= PSR_SVC32_MODE;
617			if (pcb->pcb_onfault == NULL)
618				printf("\nKernel mode double abort!\n");
619		} else {
620			/*
621			 * User mode. We've lost the program counter at the
622			 * time of the fault (not that it was accurate anyway;
623			 * it's not called an imprecise fault for nothing).
624			 * About all we can do is copy r14_usr to tf_pc and
625			 * hope for the best. The process is about to get a
626			 * SIGBUS, so it's probably history anyway.
627			 */
628			tf->tf_spsr |= PSR_USR32_MODE;
629			tf->tf_pc = tf->tf_usr_lr;
630		}
631	}
632
633	/* FAR is invalid for imprecise exceptions */
634	if ((fsr & FAULT_IMPRECISE) != 0)
635		far = 0;
636#endif /* __XSCALE__ */
637
638	if (pcb->pcb_onfault) {
639		tf->tf_r0 = EFAULT;
640		tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
641		return (0);
642	}
643
644	/* See if the cpu state needs to be fixed up */
645	(void) data_abort_fixup(tf, fsr, far, td, ksig);
646
647	/*
648	 * At this point, if the fault happened in kernel mode, we're toast
649	 */
650	if (!TRAP_USERMODE(tf))
651		dab_fatal(tf, fsr, far, td, ksig);
652
653	/* Deliver a bus error signal to the process */
654	ksig->signb = SIGBUS;
655	ksig->code = 0;
656	td->td_frame = tf;
657
658	return (1);
659}
660
661static __inline int
662prefetch_abort_fixup(trapframe_t *tf, struct ksig *ksig)
663{
664#ifdef CPU_ABORT_FIXUP_REQUIRED
665	int error;
666
667	/* Call the cpu specific prefetch abort fixup routine */
668	error = cpu_prefetchabt_fixup(tf);
669	if (__predict_true(error != ABORT_FIXUP_FAILED))
670		return (error);
671
672	/*
673	 * Oops, couldn't fix up the instruction
674	 */
675	printf(
676	    "prefetch_abort_fixup: fixup for %s mode prefetch abort failed.\n",
677	    TRAP_USERMODE(tf) ? "user" : "kernel");
678	printf("pc = 0x%08x, opcode 0x%08x, insn = ", tf->tf_pc,
679	    *((u_int *)tf->tf_pc));
680	disassemble(tf->tf_pc);
681
682	/* Die now if this happened in kernel mode */
683	if (!TRAP_USERMODE(tf))
684		dab_fatal(tf, 0, tf->tf_pc, NULL, ksig);
685
686	return (error);
687#else
688	return (ABORT_FIXUP_OK);
689#endif /* CPU_ABORT_FIXUP_REQUIRED */
690}
691
692/*
693 * void prefetch_abort_handler(trapframe_t *tf)
694 *
695 * Abort handler called when instruction execution occurs at
696 * a non existent or restricted (access permissions) memory page.
697 * If the address is invalid and we were in SVC mode then panic as
698 * the kernel should never prefetch abort.
699 * If the address is invalid and the page is mapped then the user process
700 * does no have read permission so send it a signal.
701 * Otherwise fault the page in and try again.
702 */
703void
704prefetch_abort_handler(trapframe_t *tf)
705{
706	struct thread *td;
707	struct proc * p;
708	struct vm_map *map;
709	vm_offset_t fault_pc, va;
710	int error = 0;
711	struct ksig ksig;
712
713
714#if 0
715	/* Update vmmeter statistics */
716	uvmexp.traps++;
717#endif
718#if 0
719	printf("prefetch abort handler: %p %p\n", (void*)tf->tf_pc,
720	    (void*)tf->tf_usr_lr);
721#endif
722
723 	td = curthread;
724	p = td->td_proc;
725	PCPU_INC(cnt.v_trap);
726
727	if (TRAP_USERMODE(tf)) {
728		td->td_frame = tf;
729		if (td->td_ucred != td->td_proc->p_ucred)
730			cred_update_thread(td);
731	}
732	fault_pc = tf->tf_pc;
733	if (td->td_md.md_spinlock_count == 0) {
734		if (__predict_true(tf->tf_spsr & I32_bit) == 0)
735			enable_interrupts(I32_bit);
736		if (__predict_true(tf->tf_spsr & F32_bit) == 0)
737			enable_interrupts(F32_bit);
738	}
739
740
741
742	/* See if the cpu state needs to be fixed up */
743	switch (prefetch_abort_fixup(tf, &ksig)) {
744	case ABORT_FIXUP_RETURN:
745		return;
746	case ABORT_FIXUP_FAILED:
747		/* Deliver a SIGILL to the process */
748		ksig.signb = SIGILL;
749		ksig.code = 0;
750		td->td_frame = tf;
751		goto do_trapsignal;
752	default:
753		break;
754	}
755
756	/* Prefetch aborts cannot happen in kernel mode */
757	if (__predict_false(!TRAP_USERMODE(tf)))
758		dab_fatal(tf, 0, tf->tf_pc, NULL, &ksig);
759	td->td_pticks = 0;
760
761
762	/* Ok validate the address, can only execute in USER space */
763	if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS ||
764	    (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) {
765		ksig.signb = SIGSEGV;
766		ksig.code = 0;
767		goto do_trapsignal;
768	}
769
770	map = &td->td_proc->p_vmspace->vm_map;
771	va = trunc_page(fault_pc);
772
773	/*
774	 * See if the pmap can handle this fault on its own...
775	 */
776#ifdef DEBUG
777	last_fault_code = -1;
778#endif
779	if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1))
780		goto out;
781
782	if (map != kernel_map) {
783		PROC_LOCK(p);
784		p->p_lock++;
785		PROC_UNLOCK(p);
786	}
787
788	error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
789	    VM_FAULT_NORMAL);
790	if (map != kernel_map) {
791		PROC_LOCK(p);
792		p->p_lock--;
793		PROC_UNLOCK(p);
794	}
795
796	if (__predict_true(error == 0))
797		goto out;
798
799	if (error == ENOMEM) {
800		printf("VM: pid %d (%s), uid %d killed: "
801		    "out of swap\n", td->td_proc->p_pid, td->td_name,
802		    (td->td_proc->p_ucred) ?
803		     td->td_proc->p_ucred->cr_uid : -1);
804		ksig.signb = SIGKILL;
805	} else {
806		ksig.signb = SIGSEGV;
807	}
808	ksig.code = 0;
809
810do_trapsignal:
811	call_trapsignal(td, ksig.signb, ksig.code);
812
813out:
814	userret(td, tf);
815
816}
817
818extern int badaddr_read_1(const uint8_t *, uint8_t *);
819extern int badaddr_read_2(const uint16_t *, uint16_t *);
820extern int badaddr_read_4(const uint32_t *, uint32_t *);
821/*
822 * Tentatively read an 8, 16, or 32-bit value from 'addr'.
823 * If the read succeeds, the value is written to 'rptr' and zero is returned.
824 * Else, return EFAULT.
825 */
826int
827badaddr_read(void *addr, size_t size, void *rptr)
828{
829	union {
830		uint8_t v1;
831		uint16_t v2;
832		uint32_t v4;
833	} u;
834	int rv;
835
836	cpu_drain_writebuf();
837
838	/* Read from the test address. */
839	switch (size) {
840	case sizeof(uint8_t):
841		rv = badaddr_read_1(addr, &u.v1);
842		if (rv == 0 && rptr)
843			*(uint8_t *) rptr = u.v1;
844		break;
845
846	case sizeof(uint16_t):
847		rv = badaddr_read_2(addr, &u.v2);
848		if (rv == 0 && rptr)
849			*(uint16_t *) rptr = u.v2;
850		break;
851
852	case sizeof(uint32_t):
853		rv = badaddr_read_4(addr, &u.v4);
854		if (rv == 0 && rptr)
855			*(uint32_t *) rptr = u.v4;
856		break;
857
858	default:
859		panic("badaddr: invalid size (%lu)", (u_long) size);
860	}
861
862	/* Return EFAULT if the address was invalid, else zero */
863	return (rv);
864}
865
866#define MAXARGS	8
867static void
868syscall(struct thread *td, trapframe_t *frame, u_int32_t insn)
869{
870	struct proc *p = td->td_proc;
871	int code, error;
872	u_int nap, nargs;
873	register_t *ap, *args, copyargs[MAXARGS];
874	struct sysent *callp;
875
876	PCPU_INC(cnt.v_syscall);
877	td->td_pticks = 0;
878	if (td->td_ucred != td->td_proc->p_ucred)
879		cred_update_thread(td);
880	switch (insn & SWI_OS_MASK) {
881	case 0: /* XXX: we need our own one. */
882		nap = 4;
883		break;
884	default:
885		call_trapsignal(td, SIGILL, 0);
886		userret(td, frame);
887		return;
888	}
889	code = insn & 0x000fffff;
890	td->td_pticks = 0;
891	ap = &frame->tf_r0;
892	if (code == SYS_syscall) {
893		code = *ap++;
894
895		nap--;
896	} else if (code == SYS___syscall) {
897		code = ap[_QUAD_LOWWORD];
898		nap -= 2;
899		ap += 2;
900	}
901	if (p->p_sysent->sv_mask)
902		code &= p->p_sysent->sv_mask;
903	if (code >= p->p_sysent->sv_size)
904		callp = &p->p_sysent->sv_table[0];
905	else
906		callp = &p->p_sysent->sv_table[code];
907	nargs = callp->sy_narg;
908	memcpy(copyargs, ap, nap * sizeof(register_t));
909	if (nargs > nap) {
910		error = copyin((void *)frame->tf_usr_sp, copyargs + nap,
911		    (nargs - nap) * sizeof(register_t));
912		if (error)
913			goto bad;
914	}
915	args = copyargs;
916	error = 0;
917#ifdef KTRACE
918	if (KTRPOINT(td, KTR_SYSCALL))
919		ktrsyscall(code, nargs, args);
920#endif
921
922	CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
923	    td->td_proc->p_pid, td->td_name, code);
924	if (error == 0) {
925		td->td_retval[0] = 0;
926		td->td_retval[1] = 0;
927		STOPEVENT(p, S_SCE, callp->sy_narg);
928		PTRACESTOP_SC(p, td, S_PT_SCE);
929		AUDIT_SYSCALL_ENTER(code, td);
930		error = (*callp->sy_call)(td, args);
931		AUDIT_SYSCALL_EXIT(error, td);
932		KASSERT(td->td_ar == NULL,
933		    ("returning from syscall with td_ar set!"));
934	}
935	switch (error) {
936	case 0:
937#ifdef __ARMEB__
938		if ((insn & 0x000fffff) == SYS___syscall &&
939		    code != SYS_freebsd6_lseek && code != SYS_lseek) {
940			/*
941			 * 64-bit return, 32-bit syscall. Fixup byte order
942			 */
943			frame->tf_r0 = 0;
944			frame->tf_r1 = td->td_retval[0];
945		} else {
946			frame->tf_r0 = td->td_retval[0];
947			frame->tf_r1 = td->td_retval[1];
948		}
949#else
950		frame->tf_r0 = td->td_retval[0];
951	  	frame->tf_r1 = td->td_retval[1];
952#endif
953
954		frame->tf_spsr &= ~PSR_C_bit;   /* carry bit */
955		break;
956
957	case ERESTART:
958		/*
959		 * Reconstruct the pc to point at the swi.
960		 */
961		frame->tf_pc -= INSN_SIZE;
962		break;
963	case EJUSTRETURN:
964		/* nothing to do */
965		break;
966	default:
967bad:
968		frame->tf_r0 = error;
969		frame->tf_spsr |= PSR_C_bit;    /* carry bit */
970		break;
971	}
972
973	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
974	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
975	KASSERT(td->td_critnest == 0,
976	    ("System call %s returning in a critical section",
977	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"));
978	KASSERT(td->td_locks == 0,
979	    ("System call %s returning with %d locks held",
980	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???",
981	    td->td_locks));
982
983	userret(td, frame);
984	CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
985	    td->td_proc->p_pid, td->td_name, code);
986
987	STOPEVENT(p, S_SCX, code);
988	PTRACESTOP_SC(p, td, S_PT_SCX);
989#ifdef KTRACE
990      	if (KTRPOINT(td, KTR_SYSRET))
991		ktrsysret(code, error, td->td_retval[0]);
992#endif
993}
994
995void
996swi_handler(trapframe_t *frame)
997{
998	struct thread *td = curthread;
999	uint32_t insn;
1000
1001	td->td_frame = frame;
1002
1003	td->td_pticks = 0;
1004	/*
1005      	 * Make sure the program counter is correctly aligned so we
1006	 * don't take an alignment fault trying to read the opcode.
1007	 */
1008	if (__predict_false(((frame->tf_pc - INSN_SIZE) & 3) != 0)) {
1009		call_trapsignal(td, SIGILL, 0);
1010		userret(td, frame);
1011		return;
1012	}
1013	insn = *(u_int32_t *)(frame->tf_pc - INSN_SIZE);
1014	/*
1015	 * Enable interrupts if they were enabled before the exception.
1016	 * Since all syscalls *should* come from user mode it will always
1017	 * be safe to enable them, but check anyway.
1018	 */
1019	if (td->td_md.md_spinlock_count == 0) {
1020		if (__predict_true(frame->tf_spsr & I32_bit) == 0)
1021			enable_interrupts(I32_bit);
1022		if (__predict_true(frame->tf_spsr & F32_bit) == 0)
1023			enable_interrupts(F32_bit);
1024	}
1025
1026	syscall(td, frame, insn);
1027}
1028
1029