Deleted Added
full compact
subr_syscall.c (46129) subr_syscall.c (46568)
1/*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
1/*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 * $Id: trap.c,v 1.135 1999/04/19 14:14:13 peter Exp $
38 * $Id: trap.c,v 1.136 1999/04/28 01:03:26 luoqi Exp $
39 */
40
41/*
42 * 386 Trap and System call handling
43 */
44
45#include "opt_cpu.h"
46#include "opt_ddb.h"
47#include "opt_ktrace.h"
48#include "opt_clock.h"
49#include "opt_trap.h"
50#include "opt_vm86.h"
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/proc.h>
55#include <sys/pioctl.h>
56#include <sys/kernel.h>
57#include <sys/resourcevar.h>
58#include <sys/signalvar.h>
59#include <sys/syscall.h>
60#include <sys/sysent.h>
61#include <sys/uio.h>
62#include <sys/vmmeter.h>
63#ifdef KTRACE
64#include <sys/ktrace.h>
65#endif
66
67#include <vm/vm.h>
68#include <vm/vm_param.h>
69#include <vm/vm_prot.h>
70#include <sys/lock.h>
71#include <vm/pmap.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_map.h>
74#include <vm/vm_page.h>
75#include <vm/vm_extern.h>
76
77#include <machine/cpu.h>
78#include <machine/ipl.h>
79#include <machine/md_var.h>
80#include <machine/pcb.h>
81#ifdef SMP
82#include <machine/smp.h>
83#endif
84#include <machine/tss.h>
85
86#include <i386/isa/intr_machdep.h>
87
88#ifdef POWERFAIL_NMI
89#include <sys/syslog.h>
90#include <machine/clock.h>
91#endif
92
93#ifdef VM86
94#include <machine/vm86.h>
95#endif
96
97#ifdef DDB
98 extern int in_Debugger, debugger_on_panic;
99#endif
100
101#include "isa.h"
102#include "npx.h"
103
104int (*pmath_emulate) __P((struct trapframe *));
105
106extern void trap __P((struct trapframe frame));
107extern int trapwrite __P((unsigned addr));
108extern void syscall __P((struct trapframe frame));
109
110static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
111static void trap_fatal __P((struct trapframe *, vm_offset_t));
112void dblfault_handler __P((void));
113
114extern inthand_t IDTVEC(syscall);
115
116#define MAX_TRAP_MSG 28
117static char *trap_msg[] = {
118 "", /* 0 unused */
119 "privileged instruction fault", /* 1 T_PRIVINFLT */
120 "", /* 2 unused */
121 "breakpoint instruction fault", /* 3 T_BPTFLT */
122 "", /* 4 unused */
123 "", /* 5 unused */
124 "arithmetic trap", /* 6 T_ARITHTRAP */
125 "system forced exception", /* 7 T_ASTFLT */
126 "", /* 8 unused */
127 "general protection fault", /* 9 T_PROTFLT */
128 "trace trap", /* 10 T_TRCTRAP */
129 "", /* 11 unused */
130 "page fault", /* 12 T_PAGEFLT */
131 "", /* 13 unused */
132 "alignment fault", /* 14 T_ALIGNFLT */
133 "", /* 15 unused */
134 "", /* 16 unused */
135 "", /* 17 unused */
136 "integer divide fault", /* 18 T_DIVIDE */
137 "non-maskable interrupt trap", /* 19 T_NMI */
138 "overflow trap", /* 20 T_OFLOW */
139 "FPU bounds check fault", /* 21 T_BOUND */
140 "FPU device not available", /* 22 T_DNA */
141 "double fault", /* 23 T_DOUBLEFLT */
142 "FPU operand fetch fault", /* 24 T_FPOPFLT */
143 "invalid TSS fault", /* 25 T_TSSFLT */
144 "segment not present fault", /* 26 T_SEGNPFLT */
145 "stack fault", /* 27 T_STKFLT */
146 "machine check trap", /* 28 T_MCHK */
147};
148
149static __inline void userret __P((struct proc *p, struct trapframe *frame,
150 u_quad_t oticks));
151
152#if defined(I586_CPU) && !defined(NO_F00F_HACK)
153extern struct gate_descriptor *t_idt;
154extern int has_f00f_bug;
155#endif
156
157static __inline void
158userret(p, frame, oticks)
159 struct proc *p;
160 struct trapframe *frame;
161 u_quad_t oticks;
162{
163 int sig, s;
164
165 while ((sig = CURSIG(p)) != 0)
166 postsig(sig);
167
168#if 0
169 if (!want_resched &&
170 (p->p_priority <= p->p_usrpri) &&
171 (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
172 int newpriority;
173 p->p_estcpu += 1;
174 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
175 newpriority = min(newpriority, MAXPRI);
176 p->p_usrpri = newpriority;
177 }
178#endif
179
180 p->p_priority = p->p_usrpri;
181 if (want_resched) {
182 /*
183 * Since we are curproc, clock will normally just change
184 * our priority without moving us from one queue to another
185 * (since the running process is not on a queue.)
186 * If that happened after we setrunqueue ourselves but before we
187 * mi_switch()'ed, we might not be on the queue indicated by
188 * our priority.
189 */
190 s = splhigh();
191 setrunqueue(p);
192 p->p_stats->p_ru.ru_nivcsw++;
193 mi_switch();
194 splx(s);
195 while ((sig = CURSIG(p)) != 0)
196 postsig(sig);
197 }
198 /*
199 * Charge system time if profiling.
200 */
201 if (p->p_flag & P_PROFIL)
202 addupc_task(p, frame->tf_eip,
203 (u_int)(p->p_sticks - oticks) * psratio);
204
205 curpriority = p->p_priority;
206}
207
208/*
209 * Exception, fault, and trap interface to the FreeBSD kernel.
210 * This common code is called from assembly language IDT gate entry
211 * routines that prepare a suitable stack frame, and restore this
212 * frame after the exception has been processed.
213 */
214
215void
216trap(frame)
217 struct trapframe frame;
218{
219 struct proc *p = curproc;
220 u_quad_t sticks = 0;
221 int i = 0, ucode = 0, type, code;
222 vm_offset_t eva;
223
224 if (!(frame.tf_eflags & PSL_I)) {
225 /*
226 * Buggy application or kernel code has disabled interrupts
227 * and then trapped. Enabling interrupts now is wrong, but
228 * it is better than running with interrupts disabled until
229 * they are accidentally enabled later.
230 */
231 type = frame.tf_trapno;
232 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
233 printf(
234 "pid %ld (%s): trap %d with interrupts disabled\n",
235 (long)curproc->p_pid, curproc->p_comm, type);
236 else if (type != T_BPTFLT && type != T_TRCTRAP)
237 /*
238 * XXX not quite right, since this may be for a
239 * multiple fault in user mode.
240 */
241 printf("kernel trap %d with interrupts disabled\n",
242 type);
243 enable_intr();
244 }
245
246 eva = 0;
247 if (frame.tf_trapno == T_PAGEFLT) {
248 /*
249 * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
250 * This problem is worked around by using an interrupt
251 * gate for the pagefault handler. We are finally ready
252 * to read %cr2 and then must reenable interrupts.
253 *
254 * XXX this should be in the switch statement, but the
255 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
256 * flow of control too much for this to be obviously
257 * correct.
258 */
259 eva = rcr2();
260 enable_intr();
261 }
262
263#if defined(I586_CPU) && !defined(NO_F00F_HACK)
264restart:
265#endif
266 type = frame.tf_trapno;
267 code = frame.tf_err;
268
269#ifdef VM86
270 if (in_vm86call) {
271 if (frame.tf_eflags & PSL_VM &&
272 (type == T_PROTFLT || type == T_STKFLT)) {
273 i = vm86_emulate((struct vm86frame *)&frame);
274 if (i != 0)
275 /*
276 * returns to original process
277 */
278 vm86_trap((struct vm86frame *)&frame);
279 return;
280 }
281 switch (type) {
282 /*
283 * these traps want either a process context, or
284 * assume a normal userspace trap.
285 */
286 case T_PROTFLT:
287 case T_SEGNPFLT:
288 trap_fatal(&frame, eva);
289 return;
290 case T_TRCTRAP:
291 type = T_BPTFLT; /* kernel breakpoint */
292 /* FALL THROUGH */
293 }
294 goto kernel_trap; /* normal kernel trap handling */
295 }
296#endif
297
298 if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
299 /* user trap */
300
301 sticks = p->p_sticks;
302 p->p_md.md_regs = &frame;
303
304 switch (type) {
305 case T_PRIVINFLT: /* privileged instruction fault */
306 ucode = type;
307 i = SIGILL;
308 break;
309
310 case T_BPTFLT: /* bpt instruction fault */
311 case T_TRCTRAP: /* trace trap */
312 frame.tf_eflags &= ~PSL_T;
313 i = SIGTRAP;
314 break;
315
316 case T_ARITHTRAP: /* arithmetic trap */
317 ucode = code;
318 i = SIGFPE;
319 break;
320
321 case T_ASTFLT: /* Allow process switch */
322 astoff();
323 cnt.v_soft++;
324 if (p->p_flag & P_OWEUPC) {
325 p->p_flag &= ~P_OWEUPC;
326 addupc_task(p, p->p_stats->p_prof.pr_addr,
327 p->p_stats->p_prof.pr_ticks);
328 }
329 goto out;
330
331 /*
332 * The following two traps can happen in
333 * vm86 mode, and, if so, we want to handle
334 * them specially.
335 */
336 case T_PROTFLT: /* general protection fault */
337 case T_STKFLT: /* stack fault */
338#ifdef VM86
339 if (frame.tf_eflags & PSL_VM) {
340 i = vm86_emulate((struct vm86frame *)&frame);
341 if (i == 0)
342 goto out;
343 break;
344 }
345#endif /* VM86 */
346 /* FALL THROUGH */
347
348 case T_SEGNPFLT: /* segment not present fault */
349 case T_TSSFLT: /* invalid TSS fault */
350 case T_DOUBLEFLT: /* double fault */
351 default:
352 ucode = code + BUS_SEGM_FAULT ;
353 i = SIGBUS;
354 break;
355
356 case T_PAGEFLT: /* page fault */
357 i = trap_pfault(&frame, TRUE, eva);
358 if (i == -1)
359 return;
360#if defined(I586_CPU) && !defined(NO_F00F_HACK)
361 if (i == -2)
362 goto restart;
363#endif
364 if (i == 0)
365 goto out;
366
367 ucode = T_PAGEFLT;
368 break;
369
370 case T_DIVIDE: /* integer divide fault */
371 ucode = FPE_INTDIV_TRAP;
372 i = SIGFPE;
373 break;
374
375#if NISA > 0
376 case T_NMI:
377#ifdef POWERFAIL_NMI
378 goto handle_powerfail;
379#else /* !POWERFAIL_NMI */
380#ifdef DDB
381 /* NMI can be hooked up to a pushbutton for debugging */
382 printf ("NMI ... going to debugger\n");
383 if (kdb_trap (type, 0, &frame))
384 return;
385#endif /* DDB */
386 /* machine/parity/power fail/"kitchen sink" faults */
387 if (isa_nmi(code) == 0) return;
388 panic("NMI indicates hardware failure");
389#endif /* POWERFAIL_NMI */
390#endif /* NISA > 0 */
391
392 case T_OFLOW: /* integer overflow fault */
393 ucode = FPE_INTOVF_TRAP;
394 i = SIGFPE;
395 break;
396
397 case T_BOUND: /* bounds check fault */
398 ucode = FPE_SUBRNG_TRAP;
399 i = SIGFPE;
400 break;
401
402 case T_DNA:
403#if NNPX > 0
404 /* if a transparent fault (due to context switch "late") */
405 if (npxdna())
406 return;
407#endif
408 if (!pmath_emulate) {
409 i = SIGFPE;
410 ucode = FPE_FPU_NP_TRAP;
411 break;
412 }
413 i = (*pmath_emulate)(&frame);
414 if (i == 0) {
415 if (!(frame.tf_eflags & PSL_T))
416 return;
417 frame.tf_eflags &= ~PSL_T;
418 i = SIGTRAP;
419 }
420 /* else ucode = emulator_only_knows() XXX */
421 break;
422
423 case T_FPOPFLT: /* FPU operand fetch fault */
424 ucode = T_FPOPFLT;
425 i = SIGILL;
426 break;
427 }
428 } else {
429#ifdef VM86
430kernel_trap:
431#endif
432 /* kernel trap */
433
434 switch (type) {
435 case T_PAGEFLT: /* page fault */
436 (void) trap_pfault(&frame, FALSE, eva);
437 return;
438
439 case T_DNA:
440#if NNPX > 0
441 /*
442 * The kernel is apparently using npx for copying.
443 * XXX this should be fatal unless the kernel has
444 * registered such use.
445 */
446 if (npxdna())
447 return;
448#endif
449 break;
450
451 case T_PROTFLT: /* general protection fault */
452 case T_SEGNPFLT: /* segment not present fault */
453 /*
454 * Invalid segment selectors and out of bounds
455 * %eip's and %esp's can be set up in user mode.
456 * This causes a fault in kernel mode when the
457 * kernel tries to return to user mode. We want
458 * to get this fault so that we can fix the
459 * problem here and not have to check all the
460 * selectors and pointers when the user changes
461 * them.
462 */
463#define MAYBE_DORETI_FAULT(where, whereto) \
464 do { \
465 if (frame.tf_eip == (int)where) { \
466 frame.tf_eip = (int)whereto; \
467 return; \
468 } \
469 } while (0)
470
471 if (intr_nesting_level == 0) {
472 /*
473 * Invalid %fs's and %gs's can be created using
474 * procfs or PT_SETREGS or by invalidating the
475 * underlying LDT entry. This causes a fault
476 * in kernel mode when the kernel attempts to
477 * switch contexts. Lose the bad context
478 * (XXX) so that we can continue, and generate
479 * a signal.
480 */
481 if (frame.tf_eip == (int)cpu_switch_load_gs) {
482 curpcb->pcb_gs = 0;
483 psignal(p, SIGBUS);
484 return;
485 }
486 MAYBE_DORETI_FAULT(doreti_iret,
487 doreti_iret_fault);
488 MAYBE_DORETI_FAULT(doreti_popl_ds,
489 doreti_popl_ds_fault);
490 MAYBE_DORETI_FAULT(doreti_popl_es,
491 doreti_popl_es_fault);
492 MAYBE_DORETI_FAULT(doreti_popl_fs,
493 doreti_popl_fs_fault);
494 if (curpcb && curpcb->pcb_onfault) {
495 frame.tf_eip = (int)curpcb->pcb_onfault;
496 return;
497 }
498 }
499 break;
500
501 case T_TSSFLT:
502 /*
503 * PSL_NT can be set in user mode and isn't cleared
504 * automatically when the kernel is entered. This
505 * causes a TSS fault when the kernel attempts to
506 * `iret' because the TSS link is uninitialized. We
507 * want to get this fault so that we can fix the
508 * problem here and not every time the kernel is
509 * entered.
510 */
511 if (frame.tf_eflags & PSL_NT) {
512 frame.tf_eflags &= ~PSL_NT;
513 return;
514 }
515 break;
516
517 case T_TRCTRAP: /* trace trap */
518 if (frame.tf_eip == (int)IDTVEC(syscall)) {
519 /*
520 * We've just entered system mode via the
521 * syscall lcall. Continue single stepping
522 * silently until the syscall handler has
523 * saved the flags.
524 */
525 return;
526 }
527 if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
528 /*
529 * The syscall handler has now saved the
530 * flags. Stop single stepping it.
531 */
532 frame.tf_eflags &= ~PSL_T;
533 return;
534 }
535 /*
536 * Fall through.
537 */
538 case T_BPTFLT:
539 /*
540 * If DDB is enabled, let it handle the debugger trap.
541 * Otherwise, debugger traps "can't happen".
542 */
543#ifdef DDB
544 if (kdb_trap (type, 0, &frame))
545 return;
546#endif
547 break;
548
549#if NISA > 0
550 case T_NMI:
551#ifdef POWERFAIL_NMI
552#ifndef TIMER_FREQ
553# define TIMER_FREQ 1193182
554#endif
555 handle_powerfail:
556 {
557 static unsigned lastalert = 0;
558
559 if(time_second - lastalert > 10)
560 {
561 log(LOG_WARNING, "NMI: power fail\n");
562 sysbeep(TIMER_FREQ/880, hz);
563 lastalert = time_second;
564 }
565 return;
566 }
567#else /* !POWERFAIL_NMI */
568#ifdef DDB
569 /* NMI can be hooked up to a pushbutton for debugging */
570 printf ("NMI ... going to debugger\n");
571 if (kdb_trap (type, 0, &frame))
572 return;
573#endif /* DDB */
574 /* machine/parity/power fail/"kitchen sink" faults */
575 if (isa_nmi(code) == 0) return;
576 /* FALL THROUGH */
577#endif /* POWERFAIL_NMI */
578#endif /* NISA > 0 */
579 }
580
581 trap_fatal(&frame, eva);
582 return;
583 }
584
585 /* Translate fault for emulators (e.g. Linux) */
586 if (*p->p_sysent->sv_transtrap)
587 i = (*p->p_sysent->sv_transtrap)(i, type);
588
589 trapsignal(p, i, ucode);
590
591#ifdef DEBUG
592 if (type <= MAX_TRAP_MSG) {
593 uprintf("fatal process exception: %s",
594 trap_msg[type]);
595 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
596 uprintf(", fault VA = 0x%lx", (u_long)eva);
597 uprintf("\n");
598 }
599#endif
600
601out:
602 userret(p, &frame, sticks);
603}
604
605#ifdef notyet
606/*
607 * This version doesn't allow a page fault to user space while
608 * in the kernel. The rest of the kernel needs to be made "safe"
609 * before this can be used. I think the only things remaining
610 * to be made safe are the iBCS2 code and the process tracing/
611 * debugging code.
612 */
613static int
614trap_pfault(frame, usermode, eva)
615 struct trapframe *frame;
616 int usermode;
617 vm_offset_t eva;
618{
619 vm_offset_t va;
620 struct vmspace *vm = NULL;
621 vm_map_t map = 0;
622 int rv = 0;
623 vm_prot_t ftype;
624 struct proc *p = curproc;
625
626 if (frame->tf_err & PGEX_W)
627 ftype = VM_PROT_READ | VM_PROT_WRITE;
628 else
629 ftype = VM_PROT_READ;
630
631 va = trunc_page(eva);
632 if (va < VM_MIN_KERNEL_ADDRESS) {
633 vm_offset_t v;
634 vm_page_t mpte;
635
636 if (p == NULL ||
637 (!usermode && va < VM_MAXUSER_ADDRESS &&
638 (intr_nesting_level != 0 || curpcb == NULL ||
639 curpcb->pcb_onfault == NULL))) {
640 trap_fatal(frame, eva);
641 return (-1);
642 }
643
644 /*
645 * This is a fault on non-kernel virtual memory.
646 * vm is initialized above to NULL. If curproc is NULL
647 * or curproc->p_vmspace is NULL the fault is fatal.
648 */
649 vm = p->p_vmspace;
650 if (vm == NULL)
651 goto nogo;
652
653 map = &vm->vm_map;
654
655 /*
656 * Keep swapout from messing with us during this
657 * critical time.
658 */
659 ++p->p_lock;
660
661 /*
662 * Grow the stack if necessary
663 */
664 /* grow_stack returns false only if va falls into
665 * a growable stack region and the stack growth
666 * fails. It returns true if va was not within
667 * a growable stack region, or if the stack
668 * growth succeeded.
669 */
670 if (!grow_stack (p, va)) {
671 rv = KERN_FAILURE;
672 --p->p_lock;
673 goto nogo;
674 }
675
676 /* Fault in the user page: */
677 rv = vm_fault(map, va, ftype,
678 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
679
680 --p->p_lock;
681 } else {
682 /*
683 * Don't allow user-mode faults in kernel address space.
684 */
685 if (usermode)
686 goto nogo;
687
688 /*
689 * Since we know that kernel virtual address addresses
690 * always have pte pages mapped, we just have to fault
691 * the page.
692 */
693 rv = vm_fault(kernel_map, va, ftype, FALSE);
694 }
695
696 if (rv == KERN_SUCCESS)
697 return (0);
698nogo:
699 if (!usermode) {
700 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
701 frame->tf_eip = (int)curpcb->pcb_onfault;
702 return (0);
703 }
704 trap_fatal(frame, eva);
705 return (-1);
706 }
707
708 /* kludge to pass faulting virtual address to sendsig */
709 frame->tf_err = eva;
710
711 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
712}
713#endif
714
715int
716trap_pfault(frame, usermode, eva)
717 struct trapframe *frame;
718 int usermode;
719 vm_offset_t eva;
720{
721 vm_offset_t va;
722 struct vmspace *vm = NULL;
723 vm_map_t map = 0;
724 int rv = 0;
725 vm_prot_t ftype;
726 struct proc *p = curproc;
727
728 va = trunc_page(eva);
729 if (va >= KERNBASE) {
730 /*
731 * Don't allow user-mode faults in kernel address space.
732 * An exception: if the faulting address is the invalid
733 * instruction entry in the IDT, then the Intel Pentium
734 * F00F bug workaround was triggered, and we need to
735 * treat it is as an illegal instruction, and not a page
736 * fault.
737 */
738#if defined(I586_CPU) && !defined(NO_F00F_HACK)
739 if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
740 frame->tf_trapno = T_PRIVINFLT;
741 return -2;
742 }
743#endif
744 if (usermode)
745 goto nogo;
746
747 map = kernel_map;
748 } else {
749 /*
750 * This is a fault on non-kernel virtual memory.
751 * vm is initialized above to NULL. If curproc is NULL
752 * or curproc->p_vmspace is NULL the fault is fatal.
753 */
754 if (p != NULL)
755 vm = p->p_vmspace;
756
757 if (vm == NULL)
758 goto nogo;
759
760 map = &vm->vm_map;
761 }
762
763 if (frame->tf_err & PGEX_W)
764 ftype = VM_PROT_READ | VM_PROT_WRITE;
765 else
766 ftype = VM_PROT_READ;
767
768 if (map != kernel_map) {
769 /*
770 * Keep swapout from messing with us during this
771 * critical time.
772 */
773 ++p->p_lock;
774
775 /*
776 * Grow the stack if necessary
777 */
778 /* grow_stack returns false only if va falls into
779 * a growable stack region and the stack growth
780 * fails. It returns true if va was not within
781 * a growable stack region, or if the stack
782 * growth succeeded.
783 */
784 if (!grow_stack (p, va)) {
785 rv = KERN_FAILURE;
786 --p->p_lock;
787 goto nogo;
788 }
789
790 /* Fault in the user page: */
791 rv = vm_fault(map, va, ftype,
792 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
793
794 --p->p_lock;
795 } else {
796 /*
797 * Don't have to worry about process locking or stacks in the kernel.
798 */
799 rv = vm_fault(map, va, ftype, FALSE);
800 }
801
802 if (rv == KERN_SUCCESS)
803 return (0);
804nogo:
805 if (!usermode) {
806 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
807 frame->tf_eip = (int)curpcb->pcb_onfault;
808 return (0);
809 }
810 trap_fatal(frame, eva);
811 return (-1);
812 }
813
814 /* kludge to pass faulting virtual address to sendsig */
815 frame->tf_err = eva;
816
817 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
818}
819
820static void
821trap_fatal(frame, eva)
822 struct trapframe *frame;
823 vm_offset_t eva;
824{
825 int code, type, ss, esp;
826 struct soft_segment_descriptor softseg;
827
828 code = frame->tf_err;
829 type = frame->tf_trapno;
830 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
831
832 if (type <= MAX_TRAP_MSG)
833 printf("\n\nFatal trap %d: %s while in %s mode\n",
834 type, trap_msg[type],
835 frame->tf_eflags & PSL_VM ? "vm86" :
836 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
837#ifdef SMP
838 /* three seperate prints in case of a trap on an unmapped page */
839 printf("mp_lock = %08x; ", mp_lock);
840 printf("cpuid = %d; ", cpuid);
841 printf("lapic.id = %08x\n", lapic.id);
842#endif
843 if (type == T_PAGEFLT) {
844 printf("fault virtual address = 0x%x\n", eva);
845 printf("fault code = %s %s, %s\n",
846 code & PGEX_U ? "user" : "supervisor",
847 code & PGEX_W ? "write" : "read",
848 code & PGEX_P ? "protection violation" : "page not present");
849 }
850 printf("instruction pointer = 0x%x:0x%x\n",
851 frame->tf_cs & 0xffff, frame->tf_eip);
852 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
853 ss = frame->tf_ss & 0xffff;
854 esp = frame->tf_esp;
855 } else {
856 ss = GSEL(GDATA_SEL, SEL_KPL);
857 esp = (int)&frame->tf_esp;
858 }
859 printf("stack pointer = 0x%x:0x%x\n", ss, esp);
860 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
861 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
862 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
863 printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
864 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
865 softseg.ssd_gran);
866 printf("processor eflags = ");
867 if (frame->tf_eflags & PSL_T)
868 printf("trace trap, ");
869 if (frame->tf_eflags & PSL_I)
870 printf("interrupt enabled, ");
871 if (frame->tf_eflags & PSL_NT)
872 printf("nested task, ");
873 if (frame->tf_eflags & PSL_RF)
874 printf("resume, ");
875 if (frame->tf_eflags & PSL_VM)
876 printf("vm86, ");
877 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
878 printf("current process = ");
879 if (curproc) {
880 printf("%lu (%s)\n",
881 (u_long)curproc->p_pid, curproc->p_comm ?
882 curproc->p_comm : "");
883 } else {
884 printf("Idle\n");
885 }
886 printf("interrupt mask = ");
887 if ((cpl & net_imask) == net_imask)
888 printf("net ");
889 if ((cpl & tty_imask) == tty_imask)
890 printf("tty ");
891 if ((cpl & bio_imask) == bio_imask)
892 printf("bio ");
893 if ((cpl & cam_imask) == cam_imask)
894 printf("cam ");
895 if (cpl == 0)
896 printf("none");
897#ifdef SMP
898/**
899 * XXX FIXME:
900 * we probably SHOULD have stopped the other CPUs before now!
901 * another CPU COULD have been touching cpl at this moment...
902 */
903 printf(" <- SMP: XXX");
904#endif
905 printf("\n");
906
907#ifdef KDB
908 if (kdb_trap(&psl))
909 return;
910#endif
911#ifdef DDB
912 if ((debugger_on_panic || in_Debugger) && kdb_trap(type, 0, frame))
913 return;
914#endif
915 printf("trap number = %d\n", type);
916 if (type <= MAX_TRAP_MSG)
917 panic(trap_msg[type]);
918 else
919 panic("unknown/reserved trap");
920}
921
922/*
923 * Double fault handler. Called when a fault occurs while writing
924 * a frame for a trap/exception onto the stack. This usually occurs
925 * when the stack overflows (such is the case with infinite recursion,
926 * for example).
927 *
928 * XXX Note that the current PTD gets replaced by IdlePTD when the
929 * task switch occurs. This means that the stack that was active at
930 * the time of the double fault is not available at <kstack> unless
931 * the machine was idle when the double fault occurred. The downside
932 * of this is that "trace <ebp>" in ddb won't work.
933 */
934void
935dblfault_handler()
936{
937 printf("\nFatal double fault:\n");
938 printf("eip = 0x%x\n", common_tss.tss_eip);
939 printf("esp = 0x%x\n", common_tss.tss_esp);
940 printf("ebp = 0x%x\n", common_tss.tss_ebp);
941#ifdef SMP
942 /* three seperate prints in case of a trap on an unmapped page */
943 printf("mp_lock = %08x; ", mp_lock);
944 printf("cpuid = %d; ", cpuid);
945 printf("lapic.id = %08x\n", lapic.id);
946#endif
947 panic("double fault");
948}
949
950/*
951 * Compensate for 386 brain damage (missing URKR).
952 * This is a little simpler than the pagefault handler in trap() because
953 * it the page tables have already been faulted in and high addresses
954 * are thrown out early for other reasons.
955 */
956int trapwrite(addr)
957 unsigned addr;
958{
959 struct proc *p;
960 vm_offset_t va;
961 struct vmspace *vm;
962 int rv;
963
964 va = trunc_page((vm_offset_t)addr);
965 /*
966 * XXX - MAX is END. Changed > to >= for temp. fix.
967 */
968 if (va >= VM_MAXUSER_ADDRESS)
969 return (1);
970
971 p = curproc;
972 vm = p->p_vmspace;
973
974 ++p->p_lock;
975
976 if (!grow_stack (p, va)) {
977 --p->p_lock;
978 return (1);
979 }
980
981 /*
982 * fault the data page
983 */
984 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
985
986 --p->p_lock;
987
988 if (rv != KERN_SUCCESS)
989 return 1;
990
991 return (0);
992}
993
994/*
995 * System call request from POSIX system call gate interface to kernel.
996 * Like trap(), argument is call by reference.
997 */
998void
999syscall(frame)
1000 struct trapframe frame;
1001{
1002 caddr_t params;
1003 int i;
1004 struct sysent *callp;
1005 struct proc *p = curproc;
1006 u_quad_t sticks;
1007 int error;
1008 int args[8];
1009 u_int code;
1010
1011#ifdef DIAGNOSTIC
1012 if (ISPL(frame.tf_cs) != SEL_UPL)
1013 panic("syscall");
1014#endif
1015 sticks = p->p_sticks;
1016 p->p_md.md_regs = &frame;
1017 params = (caddr_t)frame.tf_esp + sizeof(int);
1018 code = frame.tf_eax;
1019 if (p->p_sysent->sv_prepsyscall) {
1020 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
1021 } else {
1022 /*
1023 * Need to check if this is a 32 bit or 64 bit syscall.
1024 */
1025 if (code == SYS_syscall) {
1026 /*
1027 * Code is first argument, followed by actual args.
1028 */
1029 code = fuword(params);
1030 params += sizeof(int);
1031 } else if (code == SYS___syscall) {
1032 /*
1033 * Like syscall, but code is a quad, so as to maintain
1034 * quad alignment for the rest of the arguments.
1035 */
1036 code = fuword(params);
1037 params += sizeof(quad_t);
1038 }
1039 }
1040
1041 if (p->p_sysent->sv_mask)
1042 code &= p->p_sysent->sv_mask;
1043
1044 if (code >= p->p_sysent->sv_size)
1045 callp = &p->p_sysent->sv_table[0];
1046 else
1047 callp = &p->p_sysent->sv_table[code];
1048
1049 if (params && (i = callp->sy_narg * sizeof(int)) &&
1050 (error = copyin(params, (caddr_t)args, (u_int)i))) {
1051#ifdef KTRACE
1052 if (KTRPOINT(p, KTR_SYSCALL))
1053 ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
1054#endif
1055 goto bad;
1056 }
1057#ifdef KTRACE
1058 if (KTRPOINT(p, KTR_SYSCALL))
1059 ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
1060#endif
1061 p->p_retval[0] = 0;
1062 p->p_retval[1] = frame.tf_edx;
1063
1064 STOPEVENT(p, S_SCE, callp->sy_narg);
1065
1066 error = (*callp->sy_call)(p, args);
1067
1068 switch (error) {
1069
1070 case 0:
1071 /*
1072 * Reinitialize proc pointer `p' as it may be different
1073 * if this is a child returning from fork syscall.
1074 */
1075 p = curproc;
1076 frame.tf_eax = p->p_retval[0];
1077 frame.tf_edx = p->p_retval[1];
1078 frame.tf_eflags &= ~PSL_C;
1079 break;
1080
1081 case ERESTART:
1082 /*
1083 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
1084 * int 0x80 is 2 bytes. We saved this in tf_err.
1085 */
1086 frame.tf_eip -= frame.tf_err;
1087 break;
1088
1089 case EJUSTRETURN:
1090 break;
1091
1092 default:
1093bad:
39 */
40
41/*
42 * 386 Trap and System call handling
43 */
44
45#include "opt_cpu.h"
46#include "opt_ddb.h"
47#include "opt_ktrace.h"
48#include "opt_clock.h"
49#include "opt_trap.h"
50#include "opt_vm86.h"
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/proc.h>
55#include <sys/pioctl.h>
56#include <sys/kernel.h>
57#include <sys/resourcevar.h>
58#include <sys/signalvar.h>
59#include <sys/syscall.h>
60#include <sys/sysent.h>
61#include <sys/uio.h>
62#include <sys/vmmeter.h>
63#ifdef KTRACE
64#include <sys/ktrace.h>
65#endif
66
67#include <vm/vm.h>
68#include <vm/vm_param.h>
69#include <vm/vm_prot.h>
70#include <sys/lock.h>
71#include <vm/pmap.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_map.h>
74#include <vm/vm_page.h>
75#include <vm/vm_extern.h>
76
77#include <machine/cpu.h>
78#include <machine/ipl.h>
79#include <machine/md_var.h>
80#include <machine/pcb.h>
81#ifdef SMP
82#include <machine/smp.h>
83#endif
84#include <machine/tss.h>
85
86#include <i386/isa/intr_machdep.h>
87
88#ifdef POWERFAIL_NMI
89#include <sys/syslog.h>
90#include <machine/clock.h>
91#endif
92
93#ifdef VM86
94#include <machine/vm86.h>
95#endif
96
97#ifdef DDB
98 extern int in_Debugger, debugger_on_panic;
99#endif
100
101#include "isa.h"
102#include "npx.h"
103
104int (*pmath_emulate) __P((struct trapframe *));
105
106extern void trap __P((struct trapframe frame));
107extern int trapwrite __P((unsigned addr));
108extern void syscall __P((struct trapframe frame));
109
110static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
111static void trap_fatal __P((struct trapframe *, vm_offset_t));
112void dblfault_handler __P((void));
113
114extern inthand_t IDTVEC(syscall);
115
116#define MAX_TRAP_MSG 28
117static char *trap_msg[] = {
118 "", /* 0 unused */
119 "privileged instruction fault", /* 1 T_PRIVINFLT */
120 "", /* 2 unused */
121 "breakpoint instruction fault", /* 3 T_BPTFLT */
122 "", /* 4 unused */
123 "", /* 5 unused */
124 "arithmetic trap", /* 6 T_ARITHTRAP */
125 "system forced exception", /* 7 T_ASTFLT */
126 "", /* 8 unused */
127 "general protection fault", /* 9 T_PROTFLT */
128 "trace trap", /* 10 T_TRCTRAP */
129 "", /* 11 unused */
130 "page fault", /* 12 T_PAGEFLT */
131 "", /* 13 unused */
132 "alignment fault", /* 14 T_ALIGNFLT */
133 "", /* 15 unused */
134 "", /* 16 unused */
135 "", /* 17 unused */
136 "integer divide fault", /* 18 T_DIVIDE */
137 "non-maskable interrupt trap", /* 19 T_NMI */
138 "overflow trap", /* 20 T_OFLOW */
139 "FPU bounds check fault", /* 21 T_BOUND */
140 "FPU device not available", /* 22 T_DNA */
141 "double fault", /* 23 T_DOUBLEFLT */
142 "FPU operand fetch fault", /* 24 T_FPOPFLT */
143 "invalid TSS fault", /* 25 T_TSSFLT */
144 "segment not present fault", /* 26 T_SEGNPFLT */
145 "stack fault", /* 27 T_STKFLT */
146 "machine check trap", /* 28 T_MCHK */
147};
148
149static __inline void userret __P((struct proc *p, struct trapframe *frame,
150 u_quad_t oticks));
151
152#if defined(I586_CPU) && !defined(NO_F00F_HACK)
153extern struct gate_descriptor *t_idt;
154extern int has_f00f_bug;
155#endif
156
157static __inline void
158userret(p, frame, oticks)
159 struct proc *p;
160 struct trapframe *frame;
161 u_quad_t oticks;
162{
163 int sig, s;
164
165 while ((sig = CURSIG(p)) != 0)
166 postsig(sig);
167
168#if 0
169 if (!want_resched &&
170 (p->p_priority <= p->p_usrpri) &&
171 (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
172 int newpriority;
173 p->p_estcpu += 1;
174 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
175 newpriority = min(newpriority, MAXPRI);
176 p->p_usrpri = newpriority;
177 }
178#endif
179
180 p->p_priority = p->p_usrpri;
181 if (want_resched) {
182 /*
183 * Since we are curproc, clock will normally just change
184 * our priority without moving us from one queue to another
185 * (since the running process is not on a queue.)
186 * If that happened after we setrunqueue ourselves but before we
187 * mi_switch()'ed, we might not be on the queue indicated by
188 * our priority.
189 */
190 s = splhigh();
191 setrunqueue(p);
192 p->p_stats->p_ru.ru_nivcsw++;
193 mi_switch();
194 splx(s);
195 while ((sig = CURSIG(p)) != 0)
196 postsig(sig);
197 }
198 /*
199 * Charge system time if profiling.
200 */
201 if (p->p_flag & P_PROFIL)
202 addupc_task(p, frame->tf_eip,
203 (u_int)(p->p_sticks - oticks) * psratio);
204
205 curpriority = p->p_priority;
206}
207
208/*
209 * Exception, fault, and trap interface to the FreeBSD kernel.
210 * This common code is called from assembly language IDT gate entry
211 * routines that prepare a suitable stack frame, and restore this
212 * frame after the exception has been processed.
213 */
214
215void
216trap(frame)
217 struct trapframe frame;
218{
219 struct proc *p = curproc;
220 u_quad_t sticks = 0;
221 int i = 0, ucode = 0, type, code;
222 vm_offset_t eva;
223
224 if (!(frame.tf_eflags & PSL_I)) {
225 /*
226 * Buggy application or kernel code has disabled interrupts
227 * and then trapped. Enabling interrupts now is wrong, but
228 * it is better than running with interrupts disabled until
229 * they are accidentally enabled later.
230 */
231 type = frame.tf_trapno;
232 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
233 printf(
234 "pid %ld (%s): trap %d with interrupts disabled\n",
235 (long)curproc->p_pid, curproc->p_comm, type);
236 else if (type != T_BPTFLT && type != T_TRCTRAP)
237 /*
238 * XXX not quite right, since this may be for a
239 * multiple fault in user mode.
240 */
241 printf("kernel trap %d with interrupts disabled\n",
242 type);
243 enable_intr();
244 }
245
246 eva = 0;
247 if (frame.tf_trapno == T_PAGEFLT) {
248 /*
249 * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
250 * This problem is worked around by using an interrupt
251 * gate for the pagefault handler. We are finally ready
252 * to read %cr2 and then must reenable interrupts.
253 *
254 * XXX this should be in the switch statement, but the
255 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
256 * flow of control too much for this to be obviously
257 * correct.
258 */
259 eva = rcr2();
260 enable_intr();
261 }
262
263#if defined(I586_CPU) && !defined(NO_F00F_HACK)
264restart:
265#endif
266 type = frame.tf_trapno;
267 code = frame.tf_err;
268
269#ifdef VM86
270 if (in_vm86call) {
271 if (frame.tf_eflags & PSL_VM &&
272 (type == T_PROTFLT || type == T_STKFLT)) {
273 i = vm86_emulate((struct vm86frame *)&frame);
274 if (i != 0)
275 /*
276 * returns to original process
277 */
278 vm86_trap((struct vm86frame *)&frame);
279 return;
280 }
281 switch (type) {
282 /*
283 * these traps want either a process context, or
284 * assume a normal userspace trap.
285 */
286 case T_PROTFLT:
287 case T_SEGNPFLT:
288 trap_fatal(&frame, eva);
289 return;
290 case T_TRCTRAP:
291 type = T_BPTFLT; /* kernel breakpoint */
292 /* FALL THROUGH */
293 }
294 goto kernel_trap; /* normal kernel trap handling */
295 }
296#endif
297
298 if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
299 /* user trap */
300
301 sticks = p->p_sticks;
302 p->p_md.md_regs = &frame;
303
304 switch (type) {
305 case T_PRIVINFLT: /* privileged instruction fault */
306 ucode = type;
307 i = SIGILL;
308 break;
309
310 case T_BPTFLT: /* bpt instruction fault */
311 case T_TRCTRAP: /* trace trap */
312 frame.tf_eflags &= ~PSL_T;
313 i = SIGTRAP;
314 break;
315
316 case T_ARITHTRAP: /* arithmetic trap */
317 ucode = code;
318 i = SIGFPE;
319 break;
320
321 case T_ASTFLT: /* Allow process switch */
322 astoff();
323 cnt.v_soft++;
324 if (p->p_flag & P_OWEUPC) {
325 p->p_flag &= ~P_OWEUPC;
326 addupc_task(p, p->p_stats->p_prof.pr_addr,
327 p->p_stats->p_prof.pr_ticks);
328 }
329 goto out;
330
331 /*
332 * The following two traps can happen in
333 * vm86 mode, and, if so, we want to handle
334 * them specially.
335 */
336 case T_PROTFLT: /* general protection fault */
337 case T_STKFLT: /* stack fault */
338#ifdef VM86
339 if (frame.tf_eflags & PSL_VM) {
340 i = vm86_emulate((struct vm86frame *)&frame);
341 if (i == 0)
342 goto out;
343 break;
344 }
345#endif /* VM86 */
346 /* FALL THROUGH */
347
348 case T_SEGNPFLT: /* segment not present fault */
349 case T_TSSFLT: /* invalid TSS fault */
350 case T_DOUBLEFLT: /* double fault */
351 default:
352 ucode = code + BUS_SEGM_FAULT ;
353 i = SIGBUS;
354 break;
355
356 case T_PAGEFLT: /* page fault */
357 i = trap_pfault(&frame, TRUE, eva);
358 if (i == -1)
359 return;
360#if defined(I586_CPU) && !defined(NO_F00F_HACK)
361 if (i == -2)
362 goto restart;
363#endif
364 if (i == 0)
365 goto out;
366
367 ucode = T_PAGEFLT;
368 break;
369
370 case T_DIVIDE: /* integer divide fault */
371 ucode = FPE_INTDIV_TRAP;
372 i = SIGFPE;
373 break;
374
375#if NISA > 0
376 case T_NMI:
377#ifdef POWERFAIL_NMI
378 goto handle_powerfail;
379#else /* !POWERFAIL_NMI */
380#ifdef DDB
381 /* NMI can be hooked up to a pushbutton for debugging */
382 printf ("NMI ... going to debugger\n");
383 if (kdb_trap (type, 0, &frame))
384 return;
385#endif /* DDB */
386 /* machine/parity/power fail/"kitchen sink" faults */
387 if (isa_nmi(code) == 0) return;
388 panic("NMI indicates hardware failure");
389#endif /* POWERFAIL_NMI */
390#endif /* NISA > 0 */
391
392 case T_OFLOW: /* integer overflow fault */
393 ucode = FPE_INTOVF_TRAP;
394 i = SIGFPE;
395 break;
396
397 case T_BOUND: /* bounds check fault */
398 ucode = FPE_SUBRNG_TRAP;
399 i = SIGFPE;
400 break;
401
402 case T_DNA:
403#if NNPX > 0
404 /* if a transparent fault (due to context switch "late") */
405 if (npxdna())
406 return;
407#endif
408 if (!pmath_emulate) {
409 i = SIGFPE;
410 ucode = FPE_FPU_NP_TRAP;
411 break;
412 }
413 i = (*pmath_emulate)(&frame);
414 if (i == 0) {
415 if (!(frame.tf_eflags & PSL_T))
416 return;
417 frame.tf_eflags &= ~PSL_T;
418 i = SIGTRAP;
419 }
420 /* else ucode = emulator_only_knows() XXX */
421 break;
422
423 case T_FPOPFLT: /* FPU operand fetch fault */
424 ucode = T_FPOPFLT;
425 i = SIGILL;
426 break;
427 }
428 } else {
429#ifdef VM86
430kernel_trap:
431#endif
432 /* kernel trap */
433
434 switch (type) {
435 case T_PAGEFLT: /* page fault */
436 (void) trap_pfault(&frame, FALSE, eva);
437 return;
438
439 case T_DNA:
440#if NNPX > 0
441 /*
442 * The kernel is apparently using npx for copying.
443 * XXX this should be fatal unless the kernel has
444 * registered such use.
445 */
446 if (npxdna())
447 return;
448#endif
449 break;
450
451 case T_PROTFLT: /* general protection fault */
452 case T_SEGNPFLT: /* segment not present fault */
453 /*
454 * Invalid segment selectors and out of bounds
455 * %eip's and %esp's can be set up in user mode.
456 * This causes a fault in kernel mode when the
457 * kernel tries to return to user mode. We want
458 * to get this fault so that we can fix the
459 * problem here and not have to check all the
460 * selectors and pointers when the user changes
461 * them.
462 */
463#define MAYBE_DORETI_FAULT(where, whereto) \
464 do { \
465 if (frame.tf_eip == (int)where) { \
466 frame.tf_eip = (int)whereto; \
467 return; \
468 } \
469 } while (0)
470
471 if (intr_nesting_level == 0) {
472 /*
473 * Invalid %fs's and %gs's can be created using
474 * procfs or PT_SETREGS or by invalidating the
475 * underlying LDT entry. This causes a fault
476 * in kernel mode when the kernel attempts to
477 * switch contexts. Lose the bad context
478 * (XXX) so that we can continue, and generate
479 * a signal.
480 */
481 if (frame.tf_eip == (int)cpu_switch_load_gs) {
482 curpcb->pcb_gs = 0;
483 psignal(p, SIGBUS);
484 return;
485 }
486 MAYBE_DORETI_FAULT(doreti_iret,
487 doreti_iret_fault);
488 MAYBE_DORETI_FAULT(doreti_popl_ds,
489 doreti_popl_ds_fault);
490 MAYBE_DORETI_FAULT(doreti_popl_es,
491 doreti_popl_es_fault);
492 MAYBE_DORETI_FAULT(doreti_popl_fs,
493 doreti_popl_fs_fault);
494 if (curpcb && curpcb->pcb_onfault) {
495 frame.tf_eip = (int)curpcb->pcb_onfault;
496 return;
497 }
498 }
499 break;
500
501 case T_TSSFLT:
502 /*
503 * PSL_NT can be set in user mode and isn't cleared
504 * automatically when the kernel is entered. This
505 * causes a TSS fault when the kernel attempts to
506 * `iret' because the TSS link is uninitialized. We
507 * want to get this fault so that we can fix the
508 * problem here and not every time the kernel is
509 * entered.
510 */
511 if (frame.tf_eflags & PSL_NT) {
512 frame.tf_eflags &= ~PSL_NT;
513 return;
514 }
515 break;
516
517 case T_TRCTRAP: /* trace trap */
518 if (frame.tf_eip == (int)IDTVEC(syscall)) {
519 /*
520 * We've just entered system mode via the
521 * syscall lcall. Continue single stepping
522 * silently until the syscall handler has
523 * saved the flags.
524 */
525 return;
526 }
527 if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
528 /*
529 * The syscall handler has now saved the
530 * flags. Stop single stepping it.
531 */
532 frame.tf_eflags &= ~PSL_T;
533 return;
534 }
535 /*
536 * Fall through.
537 */
538 case T_BPTFLT:
539 /*
540 * If DDB is enabled, let it handle the debugger trap.
541 * Otherwise, debugger traps "can't happen".
542 */
543#ifdef DDB
544 if (kdb_trap (type, 0, &frame))
545 return;
546#endif
547 break;
548
549#if NISA > 0
550 case T_NMI:
551#ifdef POWERFAIL_NMI
552#ifndef TIMER_FREQ
553# define TIMER_FREQ 1193182
554#endif
555 handle_powerfail:
556 {
557 static unsigned lastalert = 0;
558
559 if(time_second - lastalert > 10)
560 {
561 log(LOG_WARNING, "NMI: power fail\n");
562 sysbeep(TIMER_FREQ/880, hz);
563 lastalert = time_second;
564 }
565 return;
566 }
567#else /* !POWERFAIL_NMI */
568#ifdef DDB
569 /* NMI can be hooked up to a pushbutton for debugging */
570 printf ("NMI ... going to debugger\n");
571 if (kdb_trap (type, 0, &frame))
572 return;
573#endif /* DDB */
574 /* machine/parity/power fail/"kitchen sink" faults */
575 if (isa_nmi(code) == 0) return;
576 /* FALL THROUGH */
577#endif /* POWERFAIL_NMI */
578#endif /* NISA > 0 */
579 }
580
581 trap_fatal(&frame, eva);
582 return;
583 }
584
585 /* Translate fault for emulators (e.g. Linux) */
586 if (*p->p_sysent->sv_transtrap)
587 i = (*p->p_sysent->sv_transtrap)(i, type);
588
589 trapsignal(p, i, ucode);
590
591#ifdef DEBUG
592 if (type <= MAX_TRAP_MSG) {
593 uprintf("fatal process exception: %s",
594 trap_msg[type]);
595 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
596 uprintf(", fault VA = 0x%lx", (u_long)eva);
597 uprintf("\n");
598 }
599#endif
600
601out:
602 userret(p, &frame, sticks);
603}
604
605#ifdef notyet
606/*
607 * This version doesn't allow a page fault to user space while
608 * in the kernel. The rest of the kernel needs to be made "safe"
609 * before this can be used. I think the only things remaining
610 * to be made safe are the iBCS2 code and the process tracing/
611 * debugging code.
612 */
613static int
614trap_pfault(frame, usermode, eva)
615 struct trapframe *frame;
616 int usermode;
617 vm_offset_t eva;
618{
619 vm_offset_t va;
620 struct vmspace *vm = NULL;
621 vm_map_t map = 0;
622 int rv = 0;
623 vm_prot_t ftype;
624 struct proc *p = curproc;
625
626 if (frame->tf_err & PGEX_W)
627 ftype = VM_PROT_READ | VM_PROT_WRITE;
628 else
629 ftype = VM_PROT_READ;
630
631 va = trunc_page(eva);
632 if (va < VM_MIN_KERNEL_ADDRESS) {
633 vm_offset_t v;
634 vm_page_t mpte;
635
636 if (p == NULL ||
637 (!usermode && va < VM_MAXUSER_ADDRESS &&
638 (intr_nesting_level != 0 || curpcb == NULL ||
639 curpcb->pcb_onfault == NULL))) {
640 trap_fatal(frame, eva);
641 return (-1);
642 }
643
644 /*
645 * This is a fault on non-kernel virtual memory.
646 * vm is initialized above to NULL. If curproc is NULL
647 * or curproc->p_vmspace is NULL the fault is fatal.
648 */
649 vm = p->p_vmspace;
650 if (vm == NULL)
651 goto nogo;
652
653 map = &vm->vm_map;
654
655 /*
656 * Keep swapout from messing with us during this
657 * critical time.
658 */
659 ++p->p_lock;
660
661 /*
662 * Grow the stack if necessary
663 */
664 /* grow_stack returns false only if va falls into
665 * a growable stack region and the stack growth
666 * fails. It returns true if va was not within
667 * a growable stack region, or if the stack
668 * growth succeeded.
669 */
670 if (!grow_stack (p, va)) {
671 rv = KERN_FAILURE;
672 --p->p_lock;
673 goto nogo;
674 }
675
676 /* Fault in the user page: */
677 rv = vm_fault(map, va, ftype,
678 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
679
680 --p->p_lock;
681 } else {
682 /*
683 * Don't allow user-mode faults in kernel address space.
684 */
685 if (usermode)
686 goto nogo;
687
688 /*
689 * Since we know that kernel virtual address addresses
690 * always have pte pages mapped, we just have to fault
691 * the page.
692 */
693 rv = vm_fault(kernel_map, va, ftype, FALSE);
694 }
695
696 if (rv == KERN_SUCCESS)
697 return (0);
698nogo:
699 if (!usermode) {
700 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
701 frame->tf_eip = (int)curpcb->pcb_onfault;
702 return (0);
703 }
704 trap_fatal(frame, eva);
705 return (-1);
706 }
707
708 /* kludge to pass faulting virtual address to sendsig */
709 frame->tf_err = eva;
710
711 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
712}
713#endif
714
715int
716trap_pfault(frame, usermode, eva)
717 struct trapframe *frame;
718 int usermode;
719 vm_offset_t eva;
720{
721 vm_offset_t va;
722 struct vmspace *vm = NULL;
723 vm_map_t map = 0;
724 int rv = 0;
725 vm_prot_t ftype;
726 struct proc *p = curproc;
727
728 va = trunc_page(eva);
729 if (va >= KERNBASE) {
730 /*
731 * Don't allow user-mode faults in kernel address space.
732 * An exception: if the faulting address is the invalid
733 * instruction entry in the IDT, then the Intel Pentium
734 * F00F bug workaround was triggered, and we need to
735 * treat it is as an illegal instruction, and not a page
736 * fault.
737 */
738#if defined(I586_CPU) && !defined(NO_F00F_HACK)
739 if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
740 frame->tf_trapno = T_PRIVINFLT;
741 return -2;
742 }
743#endif
744 if (usermode)
745 goto nogo;
746
747 map = kernel_map;
748 } else {
749 /*
750 * This is a fault on non-kernel virtual memory.
751 * vm is initialized above to NULL. If curproc is NULL
752 * or curproc->p_vmspace is NULL the fault is fatal.
753 */
754 if (p != NULL)
755 vm = p->p_vmspace;
756
757 if (vm == NULL)
758 goto nogo;
759
760 map = &vm->vm_map;
761 }
762
763 if (frame->tf_err & PGEX_W)
764 ftype = VM_PROT_READ | VM_PROT_WRITE;
765 else
766 ftype = VM_PROT_READ;
767
768 if (map != kernel_map) {
769 /*
770 * Keep swapout from messing with us during this
771 * critical time.
772 */
773 ++p->p_lock;
774
775 /*
776 * Grow the stack if necessary
777 */
778 /* grow_stack returns false only if va falls into
779 * a growable stack region and the stack growth
780 * fails. It returns true if va was not within
781 * a growable stack region, or if the stack
782 * growth succeeded.
783 */
784 if (!grow_stack (p, va)) {
785 rv = KERN_FAILURE;
786 --p->p_lock;
787 goto nogo;
788 }
789
790 /* Fault in the user page: */
791 rv = vm_fault(map, va, ftype,
792 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
793
794 --p->p_lock;
795 } else {
796 /*
797 * Don't have to worry about process locking or stacks in the kernel.
798 */
799 rv = vm_fault(map, va, ftype, FALSE);
800 }
801
802 if (rv == KERN_SUCCESS)
803 return (0);
804nogo:
805 if (!usermode) {
806 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
807 frame->tf_eip = (int)curpcb->pcb_onfault;
808 return (0);
809 }
810 trap_fatal(frame, eva);
811 return (-1);
812 }
813
814 /* kludge to pass faulting virtual address to sendsig */
815 frame->tf_err = eva;
816
817 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
818}
819
820static void
821trap_fatal(frame, eva)
822 struct trapframe *frame;
823 vm_offset_t eva;
824{
825 int code, type, ss, esp;
826 struct soft_segment_descriptor softseg;
827
828 code = frame->tf_err;
829 type = frame->tf_trapno;
830 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
831
832 if (type <= MAX_TRAP_MSG)
833 printf("\n\nFatal trap %d: %s while in %s mode\n",
834 type, trap_msg[type],
835 frame->tf_eflags & PSL_VM ? "vm86" :
836 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
837#ifdef SMP
838 /* three seperate prints in case of a trap on an unmapped page */
839 printf("mp_lock = %08x; ", mp_lock);
840 printf("cpuid = %d; ", cpuid);
841 printf("lapic.id = %08x\n", lapic.id);
842#endif
843 if (type == T_PAGEFLT) {
844 printf("fault virtual address = 0x%x\n", eva);
845 printf("fault code = %s %s, %s\n",
846 code & PGEX_U ? "user" : "supervisor",
847 code & PGEX_W ? "write" : "read",
848 code & PGEX_P ? "protection violation" : "page not present");
849 }
850 printf("instruction pointer = 0x%x:0x%x\n",
851 frame->tf_cs & 0xffff, frame->tf_eip);
852 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
853 ss = frame->tf_ss & 0xffff;
854 esp = frame->tf_esp;
855 } else {
856 ss = GSEL(GDATA_SEL, SEL_KPL);
857 esp = (int)&frame->tf_esp;
858 }
859 printf("stack pointer = 0x%x:0x%x\n", ss, esp);
860 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
861 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
862 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
863 printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
864 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
865 softseg.ssd_gran);
866 printf("processor eflags = ");
867 if (frame->tf_eflags & PSL_T)
868 printf("trace trap, ");
869 if (frame->tf_eflags & PSL_I)
870 printf("interrupt enabled, ");
871 if (frame->tf_eflags & PSL_NT)
872 printf("nested task, ");
873 if (frame->tf_eflags & PSL_RF)
874 printf("resume, ");
875 if (frame->tf_eflags & PSL_VM)
876 printf("vm86, ");
877 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
878 printf("current process = ");
879 if (curproc) {
880 printf("%lu (%s)\n",
881 (u_long)curproc->p_pid, curproc->p_comm ?
882 curproc->p_comm : "");
883 } else {
884 printf("Idle\n");
885 }
886 printf("interrupt mask = ");
887 if ((cpl & net_imask) == net_imask)
888 printf("net ");
889 if ((cpl & tty_imask) == tty_imask)
890 printf("tty ");
891 if ((cpl & bio_imask) == bio_imask)
892 printf("bio ");
893 if ((cpl & cam_imask) == cam_imask)
894 printf("cam ");
895 if (cpl == 0)
896 printf("none");
897#ifdef SMP
898/**
899 * XXX FIXME:
900 * we probably SHOULD have stopped the other CPUs before now!
901 * another CPU COULD have been touching cpl at this moment...
902 */
903 printf(" <- SMP: XXX");
904#endif
905 printf("\n");
906
907#ifdef KDB
908 if (kdb_trap(&psl))
909 return;
910#endif
911#ifdef DDB
912 if ((debugger_on_panic || in_Debugger) && kdb_trap(type, 0, frame))
913 return;
914#endif
915 printf("trap number = %d\n", type);
916 if (type <= MAX_TRAP_MSG)
917 panic(trap_msg[type]);
918 else
919 panic("unknown/reserved trap");
920}
921
922/*
923 * Double fault handler. Called when a fault occurs while writing
924 * a frame for a trap/exception onto the stack. This usually occurs
925 * when the stack overflows (such is the case with infinite recursion,
926 * for example).
927 *
928 * XXX Note that the current PTD gets replaced by IdlePTD when the
929 * task switch occurs. This means that the stack that was active at
930 * the time of the double fault is not available at <kstack> unless
931 * the machine was idle when the double fault occurred. The downside
932 * of this is that "trace <ebp>" in ddb won't work.
933 */
934void
935dblfault_handler()
936{
937 printf("\nFatal double fault:\n");
938 printf("eip = 0x%x\n", common_tss.tss_eip);
939 printf("esp = 0x%x\n", common_tss.tss_esp);
940 printf("ebp = 0x%x\n", common_tss.tss_ebp);
941#ifdef SMP
942 /* three seperate prints in case of a trap on an unmapped page */
943 printf("mp_lock = %08x; ", mp_lock);
944 printf("cpuid = %d; ", cpuid);
945 printf("lapic.id = %08x\n", lapic.id);
946#endif
947 panic("double fault");
948}
949
950/*
951 * Compensate for 386 brain damage (missing URKR).
952 * This is a little simpler than the pagefault handler in trap() because
953 * it the page tables have already been faulted in and high addresses
954 * are thrown out early for other reasons.
955 */
956int trapwrite(addr)
957 unsigned addr;
958{
959 struct proc *p;
960 vm_offset_t va;
961 struct vmspace *vm;
962 int rv;
963
964 va = trunc_page((vm_offset_t)addr);
965 /*
966 * XXX - MAX is END. Changed > to >= for temp. fix.
967 */
968 if (va >= VM_MAXUSER_ADDRESS)
969 return (1);
970
971 p = curproc;
972 vm = p->p_vmspace;
973
974 ++p->p_lock;
975
976 if (!grow_stack (p, va)) {
977 --p->p_lock;
978 return (1);
979 }
980
981 /*
982 * fault the data page
983 */
984 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
985
986 --p->p_lock;
987
988 if (rv != KERN_SUCCESS)
989 return 1;
990
991 return (0);
992}
993
994/*
995 * System call request from POSIX system call gate interface to kernel.
996 * Like trap(), argument is call by reference.
997 */
998void
999syscall(frame)
1000 struct trapframe frame;
1001{
1002 caddr_t params;
1003 int i;
1004 struct sysent *callp;
1005 struct proc *p = curproc;
1006 u_quad_t sticks;
1007 int error;
1008 int args[8];
1009 u_int code;
1010
1011#ifdef DIAGNOSTIC
1012 if (ISPL(frame.tf_cs) != SEL_UPL)
1013 panic("syscall");
1014#endif
1015 sticks = p->p_sticks;
1016 p->p_md.md_regs = &frame;
1017 params = (caddr_t)frame.tf_esp + sizeof(int);
1018 code = frame.tf_eax;
1019 if (p->p_sysent->sv_prepsyscall) {
1020 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
1021 } else {
1022 /*
1023 * Need to check if this is a 32 bit or 64 bit syscall.
1024 */
1025 if (code == SYS_syscall) {
1026 /*
1027 * Code is first argument, followed by actual args.
1028 */
1029 code = fuword(params);
1030 params += sizeof(int);
1031 } else if (code == SYS___syscall) {
1032 /*
1033 * Like syscall, but code is a quad, so as to maintain
1034 * quad alignment for the rest of the arguments.
1035 */
1036 code = fuword(params);
1037 params += sizeof(quad_t);
1038 }
1039 }
1040
1041 if (p->p_sysent->sv_mask)
1042 code &= p->p_sysent->sv_mask;
1043
1044 if (code >= p->p_sysent->sv_size)
1045 callp = &p->p_sysent->sv_table[0];
1046 else
1047 callp = &p->p_sysent->sv_table[code];
1048
1049 if (params && (i = callp->sy_narg * sizeof(int)) &&
1050 (error = copyin(params, (caddr_t)args, (u_int)i))) {
1051#ifdef KTRACE
1052 if (KTRPOINT(p, KTR_SYSCALL))
1053 ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
1054#endif
1055 goto bad;
1056 }
1057#ifdef KTRACE
1058 if (KTRPOINT(p, KTR_SYSCALL))
1059 ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
1060#endif
1061 p->p_retval[0] = 0;
1062 p->p_retval[1] = frame.tf_edx;
1063
1064 STOPEVENT(p, S_SCE, callp->sy_narg);
1065
1066 error = (*callp->sy_call)(p, args);
1067
1068 switch (error) {
1069
1070 case 0:
1071 /*
1072 * Reinitialize proc pointer `p' as it may be different
1073 * if this is a child returning from fork syscall.
1074 */
1075 p = curproc;
1076 frame.tf_eax = p->p_retval[0];
1077 frame.tf_edx = p->p_retval[1];
1078 frame.tf_eflags &= ~PSL_C;
1079 break;
1080
1081 case ERESTART:
1082 /*
1083 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
1084 * int 0x80 is 2 bytes. We saved this in tf_err.
1085 */
1086 frame.tf_eip -= frame.tf_err;
1087 break;
1088
1089 case EJUSTRETURN:
1090 break;
1091
1092 default:
1093bad:
1094 if (p->p_sysent->sv_errsize)
1094 if (p->p_sysent->sv_errsize) {
1095 if (error >= p->p_sysent->sv_errsize)
1096 error = -1; /* XXX */
1097 else
1098 error = p->p_sysent->sv_errtbl[error];
1095 if (error >= p->p_sysent->sv_errsize)
1096 error = -1; /* XXX */
1097 else
1098 error = p->p_sysent->sv_errtbl[error];
1099 }
1099 frame.tf_eax = error;
1100 frame.tf_eflags |= PSL_C;
1101 break;
1102 }
1103
1104 if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
1105 /* Traced syscall. */
1106 frame.tf_eflags &= ~PSL_T;
1107 trapsignal(p, SIGTRAP, 0);
1108 }
1109
1110 userret(p, &frame, sticks);
1111
1112#ifdef KTRACE
1113 if (KTRPOINT(p, KTR_SYSRET))
1114 ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
1115#endif
1116
1117 /*
1118 * This works because errno is findable through the
1119 * register set. If we ever support an emulation where this
1120 * is not the case, this code will need to be revisited.
1121 */
1122 STOPEVENT(p, S_SCX, code);
1123
1124}
1125
1126/*
1127 * Simplified back end of syscall(), used when returning from fork()
1128 * directly into user mode.
1129 */
1130void
1131fork_return(p, frame)
1132 struct proc *p;
1133 struct trapframe frame;
1134{
1135 frame.tf_eax = 0; /* Child returns zero */
1136 frame.tf_eflags &= ~PSL_C; /* success */
1137 frame.tf_edx = 1;
1138
1139 userret(p, &frame, 0);
1140#ifdef KTRACE
1141 if (KTRPOINT(p, KTR_SYSRET))
1142 ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1143#endif
1144}
1100 frame.tf_eax = error;
1101 frame.tf_eflags |= PSL_C;
1102 break;
1103 }
1104
1105 if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
1106 /* Traced syscall. */
1107 frame.tf_eflags &= ~PSL_T;
1108 trapsignal(p, SIGTRAP, 0);
1109 }
1110
1111 userret(p, &frame, sticks);
1112
1113#ifdef KTRACE
1114 if (KTRPOINT(p, KTR_SYSRET))
1115 ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
1116#endif
1117
1118 /*
1119 * This works because errno is findable through the
1120 * register set. If we ever support an emulation where this
1121 * is not the case, this code will need to be revisited.
1122 */
1123 STOPEVENT(p, S_SCX, code);
1124
1125}
1126
1127/*
1128 * Simplified back end of syscall(), used when returning from fork()
1129 * directly into user mode.
1130 */
1131void
1132fork_return(p, frame)
1133 struct proc *p;
1134 struct trapframe frame;
1135{
1136 frame.tf_eax = 0; /* Child returns zero */
1137 frame.tf_eflags &= ~PSL_C; /* success */
1138 frame.tf_edx = 1;
1139
1140 userret(p, &frame, 0);
1141#ifdef KTRACE
1142 if (KTRPOINT(p, KTR_SYSRET))
1143 ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1144#endif
1145}