ia32_syscall.c revision 160764
1290650Shselasky/*- 2329200Shselasky * Copyright (C) 1994, David Greenman 3290650Shselasky * Copyright (c) 1990, 1993 4290650Shselasky * The Regents of the University of California. All rights reserved. 5290650Shselasky * 6290650Shselasky * This code is derived from software contributed to Berkeley by 7290650Shselasky * the University of Utah, and William Jolitz. 8290650Shselasky * 9290650Shselasky * Redistribution and use in source and binary forms, with or without 10290650Shselasky * modification, are permitted provided that the following conditions 11290650Shselasky * are met: 12290650Shselasky * 1. Redistributions of source code must retain the above copyright 13290650Shselasky * notice, this list of conditions and the following disclaimer. 14290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright 15290650Shselasky * notice, this list of conditions and the following disclaimer in the 16290650Shselasky * documentation and/or other materials provided with the distribution. 17290650Shselasky * 3. All advertising materials mentioning features or use of this software 18290650Shselasky * must display the following acknowledgement: 19290650Shselasky * This product includes software developed by the University of 20290650Shselasky * California, Berkeley and its contributors. 21290650Shselasky * 4. Neither the name of the University nor the names of its contributors 22290650Shselasky * may be used to endorse or promote products derived from this software 23290650Shselasky * without specific prior written permission. 24290650Shselasky * 25290650Shselasky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28300676Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29300676Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35290650Shselasky * SUCH DAMAGE. 36290650Shselasky */ 37290650Shselasky 38290650Shselasky#include <sys/cdefs.h> 39290650Shselasky__FBSDID("$FreeBSD: head/sys/amd64/ia32/ia32_syscall.c 160764 2006-07-27 19:50:16Z jhb $"); 40290650Shselasky 41290650Shselasky/* 42290650Shselasky * 386 Trap and System call handling 43290650Shselasky */ 44290650Shselasky 45329200Shselasky#include "opt_clock.h" 46290650Shselasky#include "opt_cpu.h" 47290650Shselasky#include "opt_isa.h" 48290650Shselasky#include "opt_ktrace.h" 49290650Shselasky 50290650Shselasky#include <sys/param.h> 51290650Shselasky#include <sys/bus.h> 52290650Shselasky#include <sys/systm.h> 53290650Shselasky#include <sys/proc.h> 54290650Shselasky#include <sys/pioctl.h> 55290650Shselasky#include <sys/kernel.h> 56290650Shselasky#include <sys/ktr.h> 57290650Shselasky#include <sys/lock.h> 58290650Shselasky#include <sys/mutex.h> 59290650Shselasky#include <sys/ptrace.h> 60290650Shselasky#include <sys/resourcevar.h> 61290650Shselasky#include <sys/signalvar.h> 62290650Shselasky#include <sys/syscall.h> 63290650Shselasky#include <sys/sysctl.h> 64290650Shselasky#include <sys/sysent.h> 65290650Shselasky#include <sys/uio.h> 66290650Shselasky#include <sys/vmmeter.h> 67290650Shselasky#ifdef KTRACE 68290650Shselasky#include <sys/ktrace.h> 69290650Shselasky#endif 70290650Shselasky#include <security/audit/audit.h> 71290650Shselasky 72290650Shselasky#include <vm/vm.h> 73290650Shselasky#include <vm/vm_param.h> 74290650Shselasky#include <vm/pmap.h> 75290650Shselasky#include <vm/vm_kern.h> 76329209Shselasky#include <vm/vm_map.h> 77329209Shselasky#include <vm/vm_page.h> 78329209Shselasky#include <vm/vm_extern.h> 79329209Shselasky 80329209Shselasky#include <machine/cpu.h> 81290650Shselasky#include <machine/intr_machdep.h> 82290650Shselasky#include <machine/md_var.h> 83290650Shselasky 84290650Shselasky#define IDTVEC(name) __CONCAT(X,name) 85290650Shselasky 86290650Shselaskyextern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd); 87290650Shselaskyextern const char *freebsd32_syscallnames[]; 88290650Shselasky 89290650Shselaskyvoid ia32_syscall(struct trapframe frame); /* Called from asm code */ 90290650Shselasky 91290650Shselaskyvoid 92290650Shselaskyia32_syscall(struct trapframe frame) 93290650Shselasky{ 94290650Shselasky caddr_t params; 95290650Shselasky int i; 96290650Shselasky struct sysent *callp; 97290650Shselasky struct thread *td = curthread; 98290650Shselasky struct proc *p = td->td_proc; 99290650Shselasky register_t orig_tf_rflags; 100290650Shselasky int error; 101290650Shselasky int narg; 102290650Shselasky u_int32_t args[8]; 103290650Shselasky u_int64_t args64[8]; 104290650Shselasky u_int code; 105290650Shselasky ksiginfo_t ksi; 106290650Shselasky 107290650Shselasky /* 108290650Shselasky * note: PCPU_LAZY_INC() can only be used if we can afford 109290650Shselasky * occassional inaccuracy in the count. 110290650Shselasky */ 111290650Shselasky PCPU_LAZY_INC(cnt.v_syscall); 112290650Shselasky 113290650Shselasky td->td_pticks = 0; 114290650Shselasky td->td_frame = &frame; 115290650Shselasky if (td->td_ucred != p->p_ucred) 116290650Shselasky cred_update_thread(td); 117290650Shselasky params = (caddr_t)frame.tf_rsp + sizeof(u_int32_t); 118290650Shselasky code = frame.tf_rax; 119290650Shselasky orig_tf_rflags = frame.tf_rflags; 120290650Shselasky 121290650Shselasky if (p->p_sysent->sv_prepsyscall) { 122290650Shselasky /* 123290650Shselasky * The prep code is MP aware. 124290650Shselasky */ 125290650Shselasky (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); 126290650Shselasky } else { 127290650Shselasky /* 128290650Shselasky * Need to check if this is a 32 bit or 64 bit syscall. 129290650Shselasky * fuword is MP aware. 130290650Shselasky */ 131290650Shselasky if (code == SYS_syscall) { 132290650Shselasky /* 133290650Shselasky * Code is first argument, followed by actual args. 134290650Shselasky */ 135290650Shselasky code = fuword32(params); 136290650Shselasky params += sizeof(int); 137290650Shselasky } else if (code == SYS___syscall) { 138290650Shselasky /* 139290650Shselasky * Like syscall, but code is a quad, so as to maintain 140290650Shselasky * quad alignment for the rest of the arguments. 141290650Shselasky * We use a 32-bit fetch in case params is not 142290650Shselasky * aligned. 143290650Shselasky */ 144290650Shselasky code = fuword32(params); 145290650Shselasky params += sizeof(quad_t); 146290650Shselasky } 147290650Shselasky } 148290650Shselasky 149290650Shselasky if (p->p_sysent->sv_mask) 150290650Shselasky code &= p->p_sysent->sv_mask; 151290650Shselasky 152290650Shselasky if (code >= p->p_sysent->sv_size) 153290650Shselasky callp = &p->p_sysent->sv_table[0]; 154290650Shselasky else 155290650Shselasky callp = &p->p_sysent->sv_table[code]; 156290650Shselasky 157290650Shselasky narg = callp->sy_narg & SYF_ARGMASK; 158290650Shselasky 159290650Shselasky /* 160290650Shselasky * copyin and the ktrsyscall()/ktrsysret() code is MP-aware 161290650Shselasky */ 162290650Shselasky if (params != NULL && narg != 0) 163290650Shselasky error = copyin(params, (caddr_t)args, 164290650Shselasky (u_int)(narg * sizeof(int))); 165290650Shselasky else 166290650Shselasky error = 0; 167290650Shselasky 168290650Shselasky for (i = 0; i < narg; i++) 169290650Shselasky args64[i] = args[i]; 170290650Shselasky 171290650Shselasky#ifdef KTRACE 172290650Shselasky if (KTRPOINT(td, KTR_SYSCALL)) 173290650Shselasky ktrsyscall(code, narg, args64); 174290650Shselasky#endif 175290650Shselasky /* 176290650Shselasky * Try to run the syscall without Giant if the syscall 177290650Shselasky * is MP safe. 178290650Shselasky */ 179290650Shselasky if ((callp->sy_narg & SYF_MPSAFE) == 0) 180290650Shselasky mtx_lock(&Giant); 181290650Shselasky 182290650Shselasky if (error == 0) { 183290650Shselasky td->td_retval[0] = 0; 184290650Shselasky td->td_retval[1] = frame.tf_rdx; 185290650Shselasky 186290650Shselasky STOPEVENT(p, S_SCE, narg); 187290650Shselasky 188331580Shselasky PTRACESTOP_SC(p, td, S_PT_SCE); 189331580Shselasky 190331580Shselasky AUDIT_SYSCALL_ENTER(code, td); 191331580Shselasky error = (*callp->sy_call)(td, args64); 192331580Shselasky AUDIT_SYSCALL_EXIT(error, td); 193331580Shselasky } 194331580Shselasky 195331580Shselasky switch (error) { 196331580Shselasky case 0: 197331580Shselasky frame.tf_rax = td->td_retval[0]; 198331580Shselasky frame.tf_rdx = td->td_retval[1]; 199331580Shselasky frame.tf_rflags &= ~PSL_C; 200331580Shselasky break; 201331580Shselasky 202331580Shselasky case ERESTART: 203331580Shselasky /* 204331580Shselasky * Reconstruct pc, assuming lcall $X,y is 7 bytes, 205331580Shselasky * int 0x80 is 2 bytes. We saved this in tf_err. 206331580Shselasky */ 207331580Shselasky frame.tf_rip -= frame.tf_err; 208331580Shselasky break; 209331580Shselasky 210331580Shselasky case EJUSTRETURN: 211331580Shselasky break; 212331580Shselasky 213331580Shselasky default: 214331580Shselasky if (p->p_sysent->sv_errsize) { 215331580Shselasky if (error >= p->p_sysent->sv_errsize) 216290650Shselasky error = -1; /* XXX */ 217290650Shselasky else 218290650Shselasky error = p->p_sysent->sv_errtbl[error]; 219290650Shselasky } 220290650Shselasky frame.tf_rax = error; 221290650Shselasky frame.tf_rflags |= PSL_C; 222290650Shselasky break; 223290650Shselasky } 224290650Shselasky 225290650Shselasky /* 226290650Shselasky * Release Giant if we previously set it. 227290650Shselasky */ 228290650Shselasky if ((callp->sy_narg & SYF_MPSAFE) == 0) 229290650Shselasky mtx_unlock(&Giant); 230290650Shselasky 231290650Shselasky /* 232290650Shselasky * Traced syscall. 233290650Shselasky */ 234290650Shselasky if (orig_tf_rflags & PSL_T) { 235290650Shselasky frame.tf_rflags &= ~PSL_T; 236290650Shselasky ksiginfo_init_trap(&ksi); 237290650Shselasky ksi.ksi_signo = SIGTRAP; 238290650Shselasky ksi.ksi_code = TRAP_TRACE; 239290650Shselasky ksi.ksi_addr = (void *)frame.tf_rip; 240290650Shselasky trapsignal(td, &ksi); 241290650Shselasky } 242290650Shselasky 243290650Shselasky /* 244290650Shselasky * Handle reschedule and other end-of-syscall issues 245290650Shselasky */ 246290650Shselasky userret(td, &frame); 247290650Shselasky 248290650Shselasky#ifdef KTRACE 249290650Shselasky if (KTRPOINT(td, KTR_SYSRET)) 250290650Shselasky ktrsysret(code, error, td->td_retval[0]); 251290650Shselasky#endif 252290650Shselasky 253290650Shselasky /* 254290650Shselasky * This works because errno is findable through the 255290650Shselasky * register set. If we ever support an emulation where this 256290650Shselasky * is not the case, this code will need to be revisited. 257290650Shselasky */ 258290650Shselasky STOPEVENT(p, S_SCX, code); 259290650Shselasky 260290650Shselasky PTRACESTOP_SC(p, td, S_PT_SCX); 261290650Shselasky 262290650Shselasky WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", 263290650Shselasky (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???"); 264290650Shselasky mtx_assert(&sched_lock, MA_NOTOWNED); 265290650Shselasky mtx_assert(&Giant, MA_NOTOWNED); 266290650Shselasky} 267290650Shselasky 268290650Shselasky 269290650Shselaskystatic void 270290650Shselaskyia32_syscall_enable(void *dummy) 271290650Shselasky{ 272290650Shselasky 273290650Shselasky setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0); 274290650Shselasky} 275290650Shselasky 276290650Shselaskystatic void 277290650Shselaskyia32_syscall_disable(void *dummy) 278290650Shselasky{ 279290650Shselasky 280290650Shselasky setidt(IDT_SYSCALL, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 281290650Shselasky} 282290650Shselasky 283290650ShselaskySYSINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_enable, NULL); 284290650ShselaskySYSUNINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_disable, NULL); 285290650Shselasky