linux32_machdep.c revision 147588
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Tim J. Robbins 3133819Stjr * Copyright (c) 2002 Doug Rabson 4133819Stjr * Copyright (c) 2000 Marcel Moolenaar 5133819Stjr * All rights reserved. 6133819Stjr * 7133819Stjr * Redistribution and use in source and binary forms, with or without 8133819Stjr * modification, are permitted provided that the following conditions 9133819Stjr * are met: 10133819Stjr * 1. Redistributions of source code must retain the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer 12133819Stjr * in this position and unchanged. 13133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 14133819Stjr * notice, this list of conditions and the following disclaimer in the 15133819Stjr * documentation and/or other materials provided with the distribution. 16133819Stjr * 3. The name of the author may not be used to endorse or promote products 17133819Stjr * derived from this software without specific prior written permission. 18133819Stjr * 19133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/cdefs.h> 32133819Stjr__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 147588 2005-06-24 17:41:28Z jhb $"); 33133819Stjr 34133819Stjr#include <sys/param.h> 35133819Stjr#include <sys/kernel.h> 36133819Stjr#include <sys/systm.h> 37142057Sjhb#include <sys/imgact.h> 38133819Stjr#include <sys/lock.h> 39133819Stjr#include <sys/malloc.h> 40133819Stjr#include <sys/mman.h> 41133819Stjr#include <sys/mutex.h> 42133819Stjr#include <sys/proc.h> 43133819Stjr#include <sys/resource.h> 44133819Stjr#include <sys/resourcevar.h> 45133819Stjr#include <sys/syscallsubr.h> 46133819Stjr#include <sys/sysproto.h> 47133819Stjr#include <sys/unistd.h> 48133819Stjr 49133819Stjr#include <machine/frame.h> 50133819Stjr 51133819Stjr#include <vm/vm.h> 52133819Stjr#include <vm/pmap.h> 53142057Sjhb#include <vm/vm_extern.h> 54142057Sjhb#include <vm/vm_kern.h> 55133819Stjr#include <vm/vm_map.h> 56133819Stjr 57133819Stjr#include <amd64/linux32/linux.h> 58133819Stjr#include <amd64/linux32/linux32_proto.h> 59133819Stjr#include <compat/linux/linux_ipc.h> 60133819Stjr#include <compat/linux/linux_signal.h> 61133819Stjr#include <compat/linux/linux_util.h> 62133819Stjr 63133819Stjrstruct l_old_select_argv { 64133819Stjr l_int nfds; 65133819Stjr l_uintptr_t readfds; 66133819Stjr l_uintptr_t writefds; 67133819Stjr l_uintptr_t exceptfds; 68133819Stjr l_uintptr_t timeout; 69133819Stjr} __packed; 70133819Stjr 71133819Stjrint 72133819Stjrlinux_to_bsd_sigaltstack(int lsa) 73133819Stjr{ 74133819Stjr int bsa = 0; 75133819Stjr 76133819Stjr if (lsa & LINUX_SS_DISABLE) 77133819Stjr bsa |= SS_DISABLE; 78133819Stjr if (lsa & LINUX_SS_ONSTACK) 79133819Stjr bsa |= SS_ONSTACK; 80133819Stjr return (bsa); 81133819Stjr} 82133819Stjr 83133819Stjrint 84133819Stjrbsd_to_linux_sigaltstack(int bsa) 85133819Stjr{ 86133819Stjr int lsa = 0; 87133819Stjr 88133819Stjr if (bsa & SS_DISABLE) 89133819Stjr lsa |= LINUX_SS_DISABLE; 90133819Stjr if (bsa & SS_ONSTACK) 91133819Stjr lsa |= LINUX_SS_ONSTACK; 92133819Stjr return (lsa); 93133819Stjr} 94133819Stjr 95142057Sjhb/* 96142057Sjhb * Custom version of exec_copyin_args() so that we can translate 97142057Sjhb * the pointers. 98142057Sjhb */ 99142057Sjhbstatic int 100142057Sjhblinux_exec_copyin_args(struct image_args *args, char *fname, 101142057Sjhb enum uio_seg segflg, char **argv, char **envv) 102133819Stjr{ 103142057Sjhb char *argp, *envp; 104142057Sjhb u_int32_t *p32, arg; 105142057Sjhb size_t length; 106133819Stjr int error; 107133819Stjr 108142057Sjhb bzero(args, sizeof(*args)); 109142057Sjhb if (argv == NULL) 110142057Sjhb return (EFAULT); 111133819Stjr 112142057Sjhb /* 113142057Sjhb * Allocate temporary demand zeroed space for argument and 114142057Sjhb * environment strings 115142057Sjhb */ 116147588Sjhb args->buf = (char *) kmem_alloc_wait(exec_map, 117147588Sjhb PATH_MAX + ARG_MAX + MAXSHELLCMDLEN); 118142057Sjhb if (args->buf == NULL) 119142057Sjhb return (ENOMEM); 120142057Sjhb args->begin_argv = args->buf; 121142057Sjhb args->endp = args->begin_argv; 122142057Sjhb args->stringspace = ARG_MAX; 123133819Stjr 124142057Sjhb args->fname = args->buf + ARG_MAX; 125133819Stjr 126142057Sjhb /* 127142057Sjhb * Copy the file name. 128142057Sjhb */ 129142057Sjhb error = (segflg == UIO_SYSSPACE) ? 130142057Sjhb copystr(fname, args->fname, PATH_MAX, &length) : 131142057Sjhb copyinstr(fname, args->fname, PATH_MAX, &length); 132142057Sjhb if (error != 0) 133142057Sjhb return (error); 134142057Sjhb 135142057Sjhb /* 136142057Sjhb * extract arguments first 137142057Sjhb */ 138142057Sjhb p32 = (u_int32_t *)argv; 139142057Sjhb for (;;) { 140142057Sjhb error = copyin(p32++, &arg, sizeof(arg)); 141142057Sjhb if (error) 142142057Sjhb return (error); 143142057Sjhb if (arg == 0) 144142057Sjhb break; 145142057Sjhb argp = PTRIN(arg); 146142057Sjhb error = copyinstr(argp, args->endp, args->stringspace, &length); 147142057Sjhb if (error) { 148142057Sjhb if (error == ENAMETOOLONG) 149142057Sjhb return (E2BIG); 150142057Sjhb else 151142057Sjhb return (error); 152142057Sjhb } 153142057Sjhb args->stringspace -= length; 154142057Sjhb args->endp += length; 155142057Sjhb args->argc++; 156133819Stjr } 157142057Sjhb 158142057Sjhb args->begin_envv = args->endp; 159142057Sjhb 160142057Sjhb /* 161142057Sjhb * extract environment strings 162142057Sjhb */ 163142057Sjhb if (envv) { 164142057Sjhb p32 = (u_int32_t *)envv; 165142057Sjhb for (;;) { 166133819Stjr error = copyin(p32++, &arg, sizeof(arg)); 167133819Stjr if (error) 168142057Sjhb return (error); 169142057Sjhb if (arg == 0) 170142057Sjhb break; 171142057Sjhb envp = PTRIN(arg); 172142057Sjhb error = copyinstr(envp, args->endp, args->stringspace, 173142057Sjhb &length); 174142057Sjhb if (error) { 175142057Sjhb if (error == ENAMETOOLONG) 176142057Sjhb return (E2BIG); 177142057Sjhb else 178142057Sjhb return (error); 179142057Sjhb } 180142057Sjhb args->stringspace -= length; 181142057Sjhb args->endp += length; 182142057Sjhb args->envc++; 183142057Sjhb } 184133819Stjr } 185133819Stjr 186142057Sjhb return (0); 187133819Stjr} 188133819Stjr 189142057Sjhbint 190142057Sjhblinux_execve(struct thread *td, struct linux_execve_args *args) 191142057Sjhb{ 192142057Sjhb struct image_args eargs; 193142057Sjhb char *path; 194142057Sjhb int error; 195142057Sjhb 196142057Sjhb LCONVPATHEXIST(td, args->path, &path); 197142057Sjhb 198142057Sjhb#ifdef DEBUG 199142057Sjhb if (ldebug(execve)) 200142057Sjhb printf(ARGS(execve, "%s"), path); 201142057Sjhb#endif 202142057Sjhb 203142057Sjhb error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, 204142057Sjhb args->envp); 205142057Sjhb free(path, M_TEMP); 206142057Sjhb if (error == 0) 207142057Sjhb error = kern_execve(td, &eargs, NULL); 208142057Sjhb exec_free_args(&eargs); 209142057Sjhb return (error); 210142057Sjhb} 211142057Sjhb 212133819Stjrstruct iovec32 { 213133819Stjr u_int32_t iov_base; 214133819Stjr int iov_len; 215133819Stjr}; 216133819Stjr 217133819StjrCTASSERT(sizeof(struct iovec32) == 8); 218133819Stjr 219144449Sjhbstatic int 220144449Sjhblinux32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) 221133819Stjr{ 222144449Sjhb struct iovec32 iov32; 223144449Sjhb struct iovec *iov; 224144449Sjhb struct uio *uio; 225144449Sjhb u_int iovlen; 226144449Sjhb int error, i; 227133819Stjr 228144449Sjhb *uiop = NULL; 229144449Sjhb if (iovcnt > UIO_MAXIOV) 230133819Stjr return (EINVAL); 231144449Sjhb iovlen = iovcnt * sizeof(struct iovec); 232144449Sjhb uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); 233144449Sjhb iov = (struct iovec *)(uio + 1); 234144449Sjhb for (i = 0; i < iovcnt; i++) { 235144449Sjhb error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); 236144449Sjhb if (error) { 237144449Sjhb free(uio, M_IOV); 238144449Sjhb return (error); 239144449Sjhb } 240144449Sjhb iov[i].iov_base = PTRIN(iov32.iov_base); 241144449Sjhb iov[i].iov_len = iov32.iov_len; 242133819Stjr } 243144449Sjhb uio->uio_iov = iov; 244144449Sjhb uio->uio_iovcnt = iovcnt; 245144449Sjhb uio->uio_segflg = UIO_USERSPACE; 246144449Sjhb uio->uio_offset = -1; 247144449Sjhb uio->uio_resid = 0; 248144449Sjhb for (i = 0; i < iovcnt; i++) { 249144449Sjhb if (iov->iov_len > INT_MAX - uio->uio_resid) { 250144449Sjhb free(uio, M_IOV); 251144449Sjhb return (EINVAL); 252144449Sjhb } 253144449Sjhb uio->uio_resid += iov->iov_len; 254144449Sjhb iov++; 255144449Sjhb } 256144449Sjhb *uiop = uio; 257144449Sjhb return (0); 258144449Sjhb} 259133819Stjr 260144449Sjhbint 261144449Sjhblinux_readv(struct thread *td, struct linux_readv_args *uap) 262144449Sjhb{ 263144449Sjhb struct uio *auio; 264144449Sjhb int error; 265133819Stjr 266144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 267144449Sjhb if (error) 268144449Sjhb return (error); 269144449Sjhb error = kern_readv(td, uap->fd, auio); 270144449Sjhb free(auio, M_IOV); 271133819Stjr return (error); 272133819Stjr} 273133819Stjr 274133819Stjrint 275133819Stjrlinux_writev(struct thread *td, struct linux_writev_args *uap) 276133819Stjr{ 277144449Sjhb struct uio *auio; 278144449Sjhb int error; 279133819Stjr 280144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 281144449Sjhb if (error) 282144449Sjhb return (error); 283144449Sjhb error = kern_writev(td, uap->fd, auio); 284144449Sjhb free(auio, M_IOV); 285133819Stjr return (error); 286133819Stjr} 287133819Stjr 288133819Stjrstruct l_ipc_kludge { 289133819Stjr l_uintptr_t msgp; 290133819Stjr l_long msgtyp; 291133819Stjr} __packed; 292133819Stjr 293133819Stjrint 294133819Stjrlinux_ipc(struct thread *td, struct linux_ipc_args *args) 295133819Stjr{ 296133819Stjr 297133819Stjr switch (args->what & 0xFFFF) { 298133819Stjr case LINUX_SEMOP: { 299133819Stjr struct linux_semop_args a; 300133819Stjr 301133819Stjr a.semid = args->arg1; 302133819Stjr a.tsops = args->ptr; 303133819Stjr a.nsops = args->arg2; 304133819Stjr return (linux_semop(td, &a)); 305133819Stjr } 306133819Stjr case LINUX_SEMGET: { 307133819Stjr struct linux_semget_args a; 308133819Stjr 309133819Stjr a.key = args->arg1; 310133819Stjr a.nsems = args->arg2; 311133819Stjr a.semflg = args->arg3; 312133819Stjr return (linux_semget(td, &a)); 313133819Stjr } 314133819Stjr case LINUX_SEMCTL: { 315133819Stjr struct linux_semctl_args a; 316133819Stjr int error; 317133819Stjr 318133819Stjr a.semid = args->arg1; 319133819Stjr a.semnum = args->arg2; 320133819Stjr a.cmd = args->arg3; 321133819Stjr error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 322133819Stjr if (error) 323133819Stjr return (error); 324133819Stjr return (linux_semctl(td, &a)); 325133819Stjr } 326133819Stjr case LINUX_MSGSND: { 327133819Stjr struct linux_msgsnd_args a; 328133819Stjr 329133819Stjr a.msqid = args->arg1; 330133819Stjr a.msgp = args->ptr; 331133819Stjr a.msgsz = args->arg2; 332133819Stjr a.msgflg = args->arg3; 333133819Stjr return (linux_msgsnd(td, &a)); 334133819Stjr } 335133819Stjr case LINUX_MSGRCV: { 336133819Stjr struct linux_msgrcv_args a; 337133819Stjr 338133819Stjr a.msqid = args->arg1; 339133819Stjr a.msgsz = args->arg2; 340133819Stjr a.msgflg = args->arg3; 341133819Stjr if ((args->what >> 16) == 0) { 342133819Stjr struct l_ipc_kludge tmp; 343133819Stjr int error; 344133819Stjr 345133819Stjr if (args->ptr == 0) 346133819Stjr return (EINVAL); 347133819Stjr error = copyin(args->ptr, &tmp, sizeof(tmp)); 348133819Stjr if (error) 349133819Stjr return (error); 350133819Stjr a.msgp = PTRIN(tmp.msgp); 351133819Stjr a.msgtyp = tmp.msgtyp; 352133819Stjr } else { 353133819Stjr a.msgp = args->ptr; 354133819Stjr a.msgtyp = args->arg5; 355133819Stjr } 356133819Stjr return (linux_msgrcv(td, &a)); 357133819Stjr } 358133819Stjr case LINUX_MSGGET: { 359133819Stjr struct linux_msgget_args a; 360133819Stjr 361133819Stjr a.key = args->arg1; 362133819Stjr a.msgflg = args->arg2; 363133819Stjr return (linux_msgget(td, &a)); 364133819Stjr } 365133819Stjr case LINUX_MSGCTL: { 366133819Stjr struct linux_msgctl_args a; 367133819Stjr 368133819Stjr a.msqid = args->arg1; 369133819Stjr a.cmd = args->arg2; 370133819Stjr a.buf = args->ptr; 371133819Stjr return (linux_msgctl(td, &a)); 372133819Stjr } 373133819Stjr case LINUX_SHMAT: { 374133819Stjr struct linux_shmat_args a; 375133819Stjr 376133819Stjr a.shmid = args->arg1; 377133819Stjr a.shmaddr = args->ptr; 378133819Stjr a.shmflg = args->arg2; 379144441Sjhb a.raddr = PTRIN((l_uint)args->arg3); 380133819Stjr return (linux_shmat(td, &a)); 381133819Stjr } 382133819Stjr case LINUX_SHMDT: { 383133819Stjr struct linux_shmdt_args a; 384133819Stjr 385133819Stjr a.shmaddr = args->ptr; 386133819Stjr return (linux_shmdt(td, &a)); 387133819Stjr } 388133819Stjr case LINUX_SHMGET: { 389133819Stjr struct linux_shmget_args a; 390133819Stjr 391133819Stjr a.key = args->arg1; 392133819Stjr a.size = args->arg2; 393133819Stjr a.shmflg = args->arg3; 394133819Stjr return (linux_shmget(td, &a)); 395133819Stjr } 396133819Stjr case LINUX_SHMCTL: { 397133819Stjr struct linux_shmctl_args a; 398133819Stjr 399133819Stjr a.shmid = args->arg1; 400133819Stjr a.cmd = args->arg2; 401133819Stjr a.buf = args->ptr; 402133819Stjr return (linux_shmctl(td, &a)); 403133819Stjr } 404133819Stjr default: 405133819Stjr break; 406133819Stjr } 407133819Stjr 408133819Stjr return (EINVAL); 409133819Stjr} 410133819Stjr 411133819Stjrint 412133819Stjrlinux_old_select(struct thread *td, struct linux_old_select_args *args) 413133819Stjr{ 414133819Stjr struct l_old_select_argv linux_args; 415133819Stjr struct linux_select_args newsel; 416133819Stjr int error; 417133819Stjr 418133819Stjr#ifdef DEBUG 419133819Stjr if (ldebug(old_select)) 420133819Stjr printf(ARGS(old_select, "%p"), args->ptr); 421133819Stjr#endif 422133819Stjr 423133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 424133819Stjr if (error) 425133819Stjr return (error); 426133819Stjr 427133819Stjr newsel.nfds = linux_args.nfds; 428133819Stjr newsel.readfds = PTRIN(linux_args.readfds); 429133819Stjr newsel.writefds = PTRIN(linux_args.writefds); 430133819Stjr newsel.exceptfds = PTRIN(linux_args.exceptfds); 431133819Stjr newsel.timeout = PTRIN(linux_args.timeout); 432133819Stjr return (linux_select(td, &newsel)); 433133819Stjr} 434133819Stjr 435133819Stjrint 436133819Stjrlinux_fork(struct thread *td, struct linux_fork_args *args) 437133819Stjr{ 438133819Stjr int error; 439133819Stjr 440133819Stjr#ifdef DEBUG 441133819Stjr if (ldebug(fork)) 442133819Stjr printf(ARGS(fork, "")); 443133819Stjr#endif 444133819Stjr 445133819Stjr if ((error = fork(td, (struct fork_args *)args)) != 0) 446133819Stjr return (error); 447133819Stjr 448133819Stjr if (td->td_retval[1] == 1) 449133819Stjr td->td_retval[0] = 0; 450133819Stjr return (0); 451133819Stjr} 452133819Stjr 453133819Stjrint 454133819Stjrlinux_vfork(struct thread *td, struct linux_vfork_args *args) 455133819Stjr{ 456133819Stjr int error; 457133819Stjr 458133819Stjr#ifdef DEBUG 459133819Stjr if (ldebug(vfork)) 460133819Stjr printf(ARGS(vfork, "")); 461133819Stjr#endif 462133819Stjr 463133819Stjr if ((error = vfork(td, (struct vfork_args *)args)) != 0) 464133819Stjr return (error); 465133819Stjr /* Are we the child? */ 466133819Stjr if (td->td_retval[1] == 1) 467133819Stjr td->td_retval[0] = 0; 468133819Stjr return (0); 469133819Stjr} 470133819Stjr 471133819Stjr#define CLONE_VM 0x100 472133819Stjr#define CLONE_FS 0x200 473133819Stjr#define CLONE_FILES 0x400 474133819Stjr#define CLONE_SIGHAND 0x800 475133819Stjr#define CLONE_PID 0x1000 476133819Stjr 477133819Stjrint 478133819Stjrlinux_clone(struct thread *td, struct linux_clone_args *args) 479133819Stjr{ 480133819Stjr int error, ff = RFPROC | RFSTOPPED; 481133819Stjr struct proc *p2; 482133819Stjr struct thread *td2; 483133819Stjr int exit_signal; 484133819Stjr 485133819Stjr#ifdef DEBUG 486133819Stjr if (ldebug(clone)) { 487133819Stjr printf(ARGS(clone, "flags %x, stack %x"), 488133843Sobrien (unsigned int)(uintptr_t)args->flags, 489133843Sobrien (unsigned int)(uintptr_t)args->stack); 490133819Stjr if (args->flags & CLONE_PID) 491133819Stjr printf(LMSG("CLONE_PID not yet supported")); 492133819Stjr } 493133819Stjr#endif 494133819Stjr 495133819Stjr if (!args->stack) 496133819Stjr return (EINVAL); 497133819Stjr 498133819Stjr exit_signal = args->flags & 0x000000ff; 499133819Stjr if (exit_signal >= LINUX_NSIG) 500133819Stjr return (EINVAL); 501133819Stjr 502133819Stjr if (exit_signal <= LINUX_SIGTBLSZ) 503133819Stjr exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 504133819Stjr 505133819Stjr if (args->flags & CLONE_VM) 506133819Stjr ff |= RFMEM; 507133819Stjr if (args->flags & CLONE_SIGHAND) 508133819Stjr ff |= RFSIGSHARE; 509133819Stjr if (!(args->flags & CLONE_FILES)) 510133819Stjr ff |= RFFDG; 511133819Stjr 512133819Stjr error = fork1(td, ff, 0, &p2); 513133819Stjr if (error) 514133819Stjr return (error); 515133819Stjr 516133819Stjr 517133819Stjr PROC_LOCK(p2); 518133819Stjr p2->p_sigparent = exit_signal; 519133819Stjr PROC_UNLOCK(p2); 520133819Stjr td2 = FIRST_THREAD_IN_PROC(p2); 521133819Stjr td2->td_frame->tf_rsp = PTROUT(args->stack); 522133819Stjr 523133819Stjr#ifdef DEBUG 524133819Stjr if (ldebug(clone)) 525133819Stjr printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"), 526133819Stjr (long)p2->p_pid, args->stack, exit_signal); 527133819Stjr#endif 528133819Stjr 529133819Stjr /* 530133819Stjr * Make this runnable after we are finished with it. 531133819Stjr */ 532133819Stjr mtx_lock_spin(&sched_lock); 533133819Stjr TD_SET_CAN_RUN(td2); 534134586Sjulian setrunqueue(td2, SRQ_BORING); 535133819Stjr mtx_unlock_spin(&sched_lock); 536133819Stjr 537133819Stjr td->td_retval[0] = p2->p_pid; 538133819Stjr td->td_retval[1] = 0; 539133819Stjr return (0); 540133819Stjr} 541133819Stjr 542133819Stjr/* XXX move */ 543133819Stjrstruct l_mmap_argv { 544133819Stjr l_ulong addr; 545144441Sjhb l_ulong len; 546144441Sjhb l_ulong prot; 547144441Sjhb l_ulong flags; 548144441Sjhb l_ulong fd; 549144441Sjhb l_ulong pgoff; 550133819Stjr}; 551133819Stjr 552133819Stjr#define STACK_SIZE (2 * 1024 * 1024) 553133819Stjr#define GUARD_SIZE (4 * PAGE_SIZE) 554133819Stjr 555133819Stjrstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *); 556133819Stjr 557133819Stjrint 558133819Stjrlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 559133819Stjr{ 560133819Stjr struct l_mmap_argv linux_args; 561133819Stjr 562133819Stjr#ifdef DEBUG 563133819Stjr if (ldebug(mmap2)) 564133819Stjr printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"), 565133843Sobrien (void *)(intptr_t)args->addr, args->len, args->prot, 566133819Stjr args->flags, args->fd, args->pgoff); 567133819Stjr#endif 568133819Stjr 569133819Stjr linux_args.addr = PTROUT(args->addr); 570133819Stjr linux_args.len = args->len; 571133819Stjr linux_args.prot = args->prot; 572133819Stjr linux_args.flags = args->flags; 573133819Stjr linux_args.fd = args->fd; 574144441Sjhb linux_args.pgoff = args->pgoff; 575133819Stjr 576133819Stjr return (linux_mmap_common(td, &linux_args)); 577133819Stjr} 578133819Stjr 579133819Stjrint 580133819Stjrlinux_mmap(struct thread *td, struct linux_mmap_args *args) 581133819Stjr{ 582133819Stjr int error; 583133819Stjr struct l_mmap_argv linux_args; 584133819Stjr 585133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 586133819Stjr if (error) 587133819Stjr return (error); 588133819Stjr 589133819Stjr#ifdef DEBUG 590133819Stjr if (ldebug(mmap)) 591133819Stjr printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"), 592133843Sobrien (void *)(intptr_t)linux_args.addr, linux_args.len, 593133843Sobrien linux_args.prot, linux_args.flags, linux_args.fd, 594144670Sjhb linux_args.pgoff); 595133819Stjr#endif 596144441Sjhb if ((linux_args.pgoff % PAGE_SIZE) != 0) 597144441Sjhb return (EINVAL); 598144441Sjhb linux_args.pgoff /= PAGE_SIZE; 599133819Stjr 600133819Stjr return (linux_mmap_common(td, &linux_args)); 601133819Stjr} 602133819Stjr 603133819Stjrstatic int 604133819Stjrlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args) 605133819Stjr{ 606133819Stjr struct proc *p = td->td_proc; 607133819Stjr struct mmap_args /* { 608133819Stjr caddr_t addr; 609133819Stjr size_t len; 610133819Stjr int prot; 611133819Stjr int flags; 612133819Stjr int fd; 613133819Stjr long pad; 614133819Stjr off_t pos; 615133819Stjr } */ bsd_args; 616133819Stjr int error; 617133819Stjr 618133819Stjr error = 0; 619133819Stjr bsd_args.flags = 0; 620133819Stjr if (linux_args->flags & LINUX_MAP_SHARED) 621133819Stjr bsd_args.flags |= MAP_SHARED; 622133819Stjr if (linux_args->flags & LINUX_MAP_PRIVATE) 623133819Stjr bsd_args.flags |= MAP_PRIVATE; 624133819Stjr if (linux_args->flags & LINUX_MAP_FIXED) 625133819Stjr bsd_args.flags |= MAP_FIXED; 626133819Stjr if (linux_args->flags & LINUX_MAP_ANON) 627133819Stjr bsd_args.flags |= MAP_ANON; 628133819Stjr else 629133819Stjr bsd_args.flags |= MAP_NOSYNC; 630133819Stjr if (linux_args->flags & LINUX_MAP_GROWSDOWN) { 631133819Stjr bsd_args.flags |= MAP_STACK; 632133819Stjr 633133819Stjr /* The linux MAP_GROWSDOWN option does not limit auto 634133819Stjr * growth of the region. Linux mmap with this option 635133819Stjr * takes as addr the inital BOS, and as len, the initial 636133819Stjr * region size. It can then grow down from addr without 637133819Stjr * limit. However, linux threads has an implicit internal 638133819Stjr * limit to stack size of STACK_SIZE. Its just not 639133819Stjr * enforced explicitly in linux. But, here we impose 640133819Stjr * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 641133819Stjr * region, since we can do this with our mmap. 642133819Stjr * 643133819Stjr * Our mmap with MAP_STACK takes addr as the maximum 644133819Stjr * downsize limit on BOS, and as len the max size of 645133819Stjr * the region. It them maps the top SGROWSIZ bytes, 646133819Stjr * and autgrows the region down, up to the limit 647133819Stjr * in addr. 648133819Stjr * 649133819Stjr * If we don't use the MAP_STACK option, the effect 650133819Stjr * of this code is to allocate a stack region of a 651133819Stjr * fixed size of (STACK_SIZE - GUARD_SIZE). 652133819Stjr */ 653133819Stjr 654133819Stjr /* This gives us TOS */ 655133819Stjr bsd_args.addr = (caddr_t)PTRIN(linux_args->addr) + 656133819Stjr linux_args->len; 657133819Stjr 658133819Stjr if ((caddr_t)PTRIN(bsd_args.addr) > 659133819Stjr p->p_vmspace->vm_maxsaddr) { 660133819Stjr /* Some linux apps will attempt to mmap 661133819Stjr * thread stacks near the top of their 662133819Stjr * address space. If their TOS is greater 663133819Stjr * than vm_maxsaddr, vm_map_growstack() 664133819Stjr * will confuse the thread stack with the 665133819Stjr * process stack and deliver a SEGV if they 666133819Stjr * attempt to grow the thread stack past their 667133819Stjr * current stacksize rlimit. To avoid this, 668133819Stjr * adjust vm_maxsaddr upwards to reflect 669133819Stjr * the current stacksize rlimit rather 670133819Stjr * than the maximum possible stacksize. 671133819Stjr * It would be better to adjust the 672133819Stjr * mmap'ed region, but some apps do not check 673133819Stjr * mmap's return value. 674133819Stjr */ 675133819Stjr PROC_LOCK(p); 676133819Stjr p->p_vmspace->vm_maxsaddr = 677133819Stjr (char *)LINUX32_USRSTACK - 678133819Stjr lim_cur(p, RLIMIT_STACK); 679133819Stjr PROC_UNLOCK(p); 680133819Stjr } 681133819Stjr 682133819Stjr /* This gives us our maximum stack size */ 683133819Stjr if (linux_args->len > STACK_SIZE - GUARD_SIZE) 684133819Stjr bsd_args.len = linux_args->len; 685133819Stjr else 686133819Stjr bsd_args.len = STACK_SIZE - GUARD_SIZE; 687133819Stjr 688133819Stjr /* This gives us a new BOS. If we're using VM_STACK, then 689133819Stjr * mmap will just map the top SGROWSIZ bytes, and let 690133819Stjr * the stack grow down to the limit at BOS. If we're 691133819Stjr * not using VM_STACK we map the full stack, since we 692133819Stjr * don't have a way to autogrow it. 693133819Stjr */ 694133819Stjr bsd_args.addr -= bsd_args.len; 695133819Stjr } else { 696133819Stjr bsd_args.addr = (caddr_t)PTRIN(linux_args->addr); 697133819Stjr bsd_args.len = linux_args->len; 698133819Stjr } 699133819Stjr /* 700133819Stjr * XXX i386 Linux always emulator forces PROT_READ on (why?) 701133819Stjr * so we do the same. We add PROT_EXEC to work around buggy 702133819Stjr * applications (e.g. Java) that take advantage of the fact 703133819Stjr * that execute permissions are not enforced by x86 CPUs. 704133819Stjr */ 705133819Stjr bsd_args.prot = linux_args->prot | PROT_EXEC | PROT_READ; 706133819Stjr if (linux_args->flags & LINUX_MAP_ANON) 707133819Stjr bsd_args.fd = -1; 708133819Stjr else 709133819Stjr bsd_args.fd = linux_args->fd; 710144441Sjhb bsd_args.pos = (off_t)linux_args->pgoff * PAGE_SIZE; 711133819Stjr bsd_args.pad = 0; 712133819Stjr 713133819Stjr#ifdef DEBUG 714133819Stjr if (ldebug(mmap)) 715133819Stjr printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 716133819Stjr __func__, 717133843Sobrien (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 718133819Stjr bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 719133819Stjr#endif 720133819Stjr error = mmap(td, &bsd_args); 721133819Stjr#ifdef DEBUG 722133819Stjr if (ldebug(mmap)) 723133819Stjr printf("-> %s() return: 0x%x (0x%08x)\n", 724133819Stjr __func__, error, (u_int)td->td_retval[0]); 725133819Stjr#endif 726133819Stjr return (error); 727133819Stjr} 728133819Stjr 729133819Stjrint 730133819Stjrlinux_pipe(struct thread *td, struct linux_pipe_args *args) 731133819Stjr{ 732133819Stjr int pip[2]; 733133819Stjr int error; 734133819Stjr register_t reg_rdx; 735133819Stjr 736133819Stjr#ifdef DEBUG 737133819Stjr if (ldebug(pipe)) 738133819Stjr printf(ARGS(pipe, "*")); 739133819Stjr#endif 740133819Stjr 741133819Stjr reg_rdx = td->td_retval[1]; 742133819Stjr error = pipe(td, 0); 743133819Stjr if (error) { 744133819Stjr td->td_retval[1] = reg_rdx; 745133819Stjr return (error); 746133819Stjr } 747133819Stjr 748133819Stjr pip[0] = td->td_retval[0]; 749133819Stjr pip[1] = td->td_retval[1]; 750133819Stjr error = copyout(pip, args->pipefds, 2 * sizeof(int)); 751133819Stjr if (error) { 752133819Stjr td->td_retval[1] = reg_rdx; 753133819Stjr return (error); 754133819Stjr } 755133819Stjr 756133819Stjr td->td_retval[1] = reg_rdx; 757133819Stjr td->td_retval[0] = 0; 758133819Stjr return (0); 759133819Stjr} 760133819Stjr 761133819Stjrint 762133819Stjrlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 763133819Stjr{ 764133819Stjr l_osigaction_t osa; 765133819Stjr l_sigaction_t act, oact; 766133819Stjr int error; 767133819Stjr 768133819Stjr#ifdef DEBUG 769133819Stjr if (ldebug(sigaction)) 770133819Stjr printf(ARGS(sigaction, "%d, %p, %p"), 771133819Stjr args->sig, (void *)args->nsa, (void *)args->osa); 772133819Stjr#endif 773133819Stjr 774133819Stjr if (args->nsa != NULL) { 775133819Stjr error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 776133819Stjr if (error) 777133819Stjr return (error); 778133819Stjr act.lsa_handler = osa.lsa_handler; 779133819Stjr act.lsa_flags = osa.lsa_flags; 780133819Stjr act.lsa_restorer = osa.lsa_restorer; 781133819Stjr LINUX_SIGEMPTYSET(act.lsa_mask); 782133819Stjr act.lsa_mask.__bits[0] = osa.lsa_mask; 783133819Stjr } 784133819Stjr 785133819Stjr error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 786133819Stjr args->osa ? &oact : NULL); 787133819Stjr 788133819Stjr if (args->osa != NULL && !error) { 789133819Stjr osa.lsa_handler = oact.lsa_handler; 790133819Stjr osa.lsa_flags = oact.lsa_flags; 791133819Stjr osa.lsa_restorer = oact.lsa_restorer; 792133819Stjr osa.lsa_mask = oact.lsa_mask.__bits[0]; 793133819Stjr error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 794133819Stjr } 795133819Stjr 796133819Stjr return (error); 797133819Stjr} 798133819Stjr 799133819Stjr/* 800133819Stjr * Linux has two extra args, restart and oldmask. We dont use these, 801133819Stjr * but it seems that "restart" is actually a context pointer that 802133819Stjr * enables the signal to happen with a different register set. 803133819Stjr */ 804133819Stjrint 805133819Stjrlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 806133819Stjr{ 807133819Stjr sigset_t sigmask; 808133819Stjr l_sigset_t mask; 809133819Stjr 810133819Stjr#ifdef DEBUG 811133819Stjr if (ldebug(sigsuspend)) 812133819Stjr printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 813133819Stjr#endif 814133819Stjr 815133819Stjr LINUX_SIGEMPTYSET(mask); 816133819Stjr mask.__bits[0] = args->mask; 817133819Stjr linux_to_bsd_sigset(&mask, &sigmask); 818133819Stjr return (kern_sigsuspend(td, sigmask)); 819133819Stjr} 820133819Stjr 821133819Stjrint 822133819Stjrlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 823133819Stjr{ 824133819Stjr l_sigset_t lmask; 825133819Stjr sigset_t sigmask; 826133819Stjr int error; 827133819Stjr 828133819Stjr#ifdef DEBUG 829133819Stjr if (ldebug(rt_sigsuspend)) 830133819Stjr printf(ARGS(rt_sigsuspend, "%p, %d"), 831133819Stjr (void *)uap->newset, uap->sigsetsize); 832133819Stjr#endif 833133819Stjr 834133819Stjr if (uap->sigsetsize != sizeof(l_sigset_t)) 835133819Stjr return (EINVAL); 836133819Stjr 837133819Stjr error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 838133819Stjr if (error) 839133819Stjr return (error); 840133819Stjr 841133819Stjr linux_to_bsd_sigset(&lmask, &sigmask); 842133819Stjr return (kern_sigsuspend(td, sigmask)); 843133819Stjr} 844133819Stjr 845133819Stjrint 846133819Stjrlinux_pause(struct thread *td, struct linux_pause_args *args) 847133819Stjr{ 848133819Stjr struct proc *p = td->td_proc; 849133819Stjr sigset_t sigmask; 850133819Stjr 851133819Stjr#ifdef DEBUG 852133819Stjr if (ldebug(pause)) 853133819Stjr printf(ARGS(pause, "")); 854133819Stjr#endif 855133819Stjr 856133819Stjr PROC_LOCK(p); 857133819Stjr sigmask = td->td_sigmask; 858133819Stjr PROC_UNLOCK(p); 859133819Stjr return (kern_sigsuspend(td, sigmask)); 860133819Stjr} 861133819Stjr 862133819Stjrint 863133819Stjrlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 864133819Stjr{ 865133819Stjr stack_t ss, oss; 866133819Stjr l_stack_t lss; 867133819Stjr int error; 868133819Stjr 869133819Stjr#ifdef DEBUG 870133819Stjr if (ldebug(sigaltstack)) 871133819Stjr printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 872133819Stjr#endif 873133819Stjr 874133819Stjr if (uap->uss != NULL) { 875133819Stjr error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 876133819Stjr if (error) 877133819Stjr return (error); 878133819Stjr 879133819Stjr ss.ss_sp = PTRIN(lss.ss_sp); 880133819Stjr ss.ss_size = lss.ss_size; 881133819Stjr ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 882133819Stjr } 883134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 884134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 885133819Stjr if (!error && uap->uoss != NULL) { 886133819Stjr lss.ss_sp = PTROUT(oss.ss_sp); 887133819Stjr lss.ss_size = oss.ss_size; 888133819Stjr lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 889133819Stjr error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 890133819Stjr } 891133819Stjr 892133819Stjr return (error); 893133819Stjr} 894133819Stjr 895133819Stjrint 896133819Stjrlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 897133819Stjr{ 898133819Stjr struct ftruncate_args sa; 899133819Stjr 900133819Stjr#ifdef DEBUG 901133819Stjr if (ldebug(ftruncate64)) 902133819Stjr printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 903133819Stjr (intmax_t)args->length); 904133819Stjr#endif 905133819Stjr 906133819Stjr sa.fd = args->fd; 907133819Stjr sa.pad = 0; 908133819Stjr sa.length = args->length; 909133819Stjr return ftruncate(td, &sa); 910133819Stjr} 911133819Stjr 912133819Stjrint 913133819Stjrlinux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 914133819Stjr{ 915133819Stjr struct timeval atv; 916133819Stjr l_timeval atv32; 917133819Stjr struct timezone rtz; 918133819Stjr int error = 0; 919133819Stjr 920133819Stjr if (uap->tp) { 921133819Stjr microtime(&atv); 922133819Stjr atv32.tv_sec = atv.tv_sec; 923133819Stjr atv32.tv_usec = atv.tv_usec; 924133819Stjr error = copyout(&atv32, uap->tp, sizeof (atv32)); 925133819Stjr } 926133819Stjr if (error == 0 && uap->tzp != NULL) { 927133819Stjr rtz.tz_minuteswest = tz_minuteswest; 928133819Stjr rtz.tz_dsttime = tz_dsttime; 929133819Stjr error = copyout(&rtz, uap->tzp, sizeof (rtz)); 930133819Stjr } 931133819Stjr return (error); 932133819Stjr} 933133819Stjr 934133819Stjrint 935133819Stjrlinux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap) 936133819Stjr{ 937142057Sjhb struct timespec rqt, rmt; 938133819Stjr struct l_timespec ats32; 939133819Stjr int error; 940133819Stjr 941133819Stjr error = copyin(uap->rqtp, &ats32, sizeof(ats32)); 942133819Stjr if (error != 0) 943133819Stjr return (error); 944142057Sjhb rqt.tv_sec = ats32.tv_sec; 945142057Sjhb rqt.tv_nsec = ats32.tv_nsec; 946142057Sjhb error = kern_nanosleep(td, &rqt, &rmt); 947133819Stjr if (uap->rmtp != NULL) { 948142057Sjhb ats32.tv_sec = rmt.tv_sec; 949142057Sjhb ats32.tv_nsec = rmt.tv_nsec; 950133819Stjr error = copyout(&ats32, uap->rmtp, sizeof(ats32)); 951133819Stjr } 952133819Stjr return (error); 953133819Stjr} 954133819Stjr 955133819Stjrint 956133819Stjrlinux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 957133819Stjr{ 958136152Sjhb struct l_rusage s32; 959136152Sjhb struct rusage s; 960133819Stjr int error; 961133819Stjr 962136152Sjhb error = kern_getrusage(td, uap->who, &s); 963133819Stjr if (error != 0) 964133819Stjr return (error); 965136152Sjhb if (uap->rusage != NULL) { 966133819Stjr s32.ru_utime.tv_sec = s.ru_utime.tv_sec; 967133819Stjr s32.ru_utime.tv_usec = s.ru_utime.tv_usec; 968133819Stjr s32.ru_stime.tv_sec = s.ru_stime.tv_sec; 969133819Stjr s32.ru_stime.tv_usec = s.ru_stime.tv_usec; 970133819Stjr s32.ru_maxrss = s.ru_maxrss; 971133819Stjr s32.ru_ixrss = s.ru_ixrss; 972133819Stjr s32.ru_idrss = s.ru_idrss; 973133819Stjr s32.ru_isrss = s.ru_isrss; 974133819Stjr s32.ru_minflt = s.ru_minflt; 975133819Stjr s32.ru_majflt = s.ru_majflt; 976133819Stjr s32.ru_nswap = s.ru_nswap; 977133819Stjr s32.ru_inblock = s.ru_inblock; 978133819Stjr s32.ru_oublock = s.ru_oublock; 979133819Stjr s32.ru_msgsnd = s.ru_msgsnd; 980133819Stjr s32.ru_msgrcv = s.ru_msgrcv; 981133819Stjr s32.ru_nsignals = s.ru_nsignals; 982133819Stjr s32.ru_nvcsw = s.ru_nvcsw; 983133819Stjr s32.ru_nivcsw = s.ru_nivcsw; 984136152Sjhb error = copyout(&s32, uap->rusage, sizeof(s32)); 985133819Stjr } 986133819Stjr return (error); 987133819Stjr} 988133819Stjr 989133819Stjrint 990133819Stjrlinux_sched_rr_get_interval(struct thread *td, 991133819Stjr struct linux_sched_rr_get_interval_args *uap) 992133819Stjr{ 993133819Stjr struct timespec ts; 994133819Stjr struct l_timespec ts32; 995133819Stjr int error; 996133819Stjr 997144449Sjhb error = kern_sched_rr_get_interval(td, uap->pid, &ts); 998133819Stjr if (error != 0) 999133819Stjr return (error); 1000133819Stjr ts32.tv_sec = ts.tv_sec; 1001133819Stjr ts32.tv_nsec = ts.tv_nsec; 1002133819Stjr return (copyout(&ts32, uap->interval, sizeof(ts32))); 1003133819Stjr} 1004133819Stjr 1005133819Stjrint 1006133819Stjrlinux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 1007133819Stjr{ 1008133819Stjr struct mprotect_args bsd_args; 1009133819Stjr 1010133819Stjr bsd_args.addr = uap->addr; 1011133819Stjr bsd_args.len = uap->len; 1012133819Stjr bsd_args.prot = uap->prot; 1013133819Stjr /* XXX PROT_READ implies PROT_EXEC; see linux_mmap_common(). */ 1014133819Stjr if ((bsd_args.prot & PROT_READ) != 0) 1015133819Stjr bsd_args.prot |= PROT_EXEC; 1016133819Stjr return (mprotect(td, &bsd_args)); 1017133819Stjr} 1018