linux32_machdep.c revision 198554
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Tim J. Robbins 3133819Stjr * Copyright (c) 2002 Doug Rabson 4133819Stjr * Copyright (c) 2000 Marcel Moolenaar 5133819Stjr * All rights reserved. 6133819Stjr * 7133819Stjr * Redistribution and use in source and binary forms, with or without 8133819Stjr * modification, are permitted provided that the following conditions 9133819Stjr * are met: 10133819Stjr * 1. Redistributions of source code must retain the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer 12133819Stjr * in this position and unchanged. 13133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 14133819Stjr * notice, this list of conditions and the following disclaimer in the 15133819Stjr * documentation and/or other materials provided with the distribution. 16133819Stjr * 3. The name of the author may not be used to endorse or promote products 17133819Stjr * derived from this software without specific prior written permission. 18133819Stjr * 19133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/cdefs.h> 32133819Stjr__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 198554 2009-10-28 20:17:54Z jhb $"); 33133819Stjr 34133819Stjr#include <sys/param.h> 35133819Stjr#include <sys/kernel.h> 36133819Stjr#include <sys/systm.h> 37165832Snetchild#include <sys/file.h> 38165832Snetchild#include <sys/fcntl.h> 39162954Sphk#include <sys/clock.h> 40142057Sjhb#include <sys/imgact.h> 41161310Snetchild#include <sys/limits.h> 42133819Stjr#include <sys/lock.h> 43133819Stjr#include <sys/malloc.h> 44133819Stjr#include <sys/mman.h> 45133819Stjr#include <sys/mutex.h> 46166729Sjkim#include <sys/priv.h> 47133819Stjr#include <sys/proc.h> 48133819Stjr#include <sys/resource.h> 49133819Stjr#include <sys/resourcevar.h> 50166188Sjeff#include <sys/sched.h> 51133819Stjr#include <sys/syscallsubr.h> 52133819Stjr#include <sys/sysproto.h> 53133819Stjr#include <sys/unistd.h> 54133819Stjr 55133819Stjr#include <machine/frame.h> 56168035Sjkim#include <machine/pcb.h> 57166729Sjkim#include <machine/psl.h> 58168035Sjkim#include <machine/segments.h> 59168035Sjkim#include <machine/specialreg.h> 60133819Stjr 61133819Stjr#include <vm/vm.h> 62133819Stjr#include <vm/pmap.h> 63142057Sjhb#include <vm/vm_extern.h> 64142057Sjhb#include <vm/vm_kern.h> 65133819Stjr#include <vm/vm_map.h> 66133819Stjr 67133819Stjr#include <amd64/linux32/linux.h> 68133819Stjr#include <amd64/linux32/linux32_proto.h> 69133819Stjr#include <compat/linux/linux_ipc.h> 70133819Stjr#include <compat/linux/linux_signal.h> 71133819Stjr#include <compat/linux/linux_util.h> 72161474Snetchild#include <compat/linux/linux_emul.h> 73133819Stjr 74133819Stjrstruct l_old_select_argv { 75133819Stjr l_int nfds; 76133819Stjr l_uintptr_t readfds; 77133819Stjr l_uintptr_t writefds; 78133819Stjr l_uintptr_t exceptfds; 79133819Stjr l_uintptr_t timeout; 80133819Stjr} __packed; 81133819Stjr 82133819Stjrint 83133819Stjrlinux_to_bsd_sigaltstack(int lsa) 84133819Stjr{ 85133819Stjr int bsa = 0; 86133819Stjr 87133819Stjr if (lsa & LINUX_SS_DISABLE) 88133819Stjr bsa |= SS_DISABLE; 89133819Stjr if (lsa & LINUX_SS_ONSTACK) 90133819Stjr bsa |= SS_ONSTACK; 91133819Stjr return (bsa); 92133819Stjr} 93133819Stjr 94198554Sjhbstatic int linux_mmap_common(struct thread *td, l_uintptr_t addr, 95198554Sjhb l_size_t len, l_int prot, l_int flags, l_int fd, 96198554Sjhb l_loff_t pos); 97198554Sjhb 98133819Stjrint 99133819Stjrbsd_to_linux_sigaltstack(int bsa) 100133819Stjr{ 101133819Stjr int lsa = 0; 102133819Stjr 103133819Stjr if (bsa & SS_DISABLE) 104133819Stjr lsa |= LINUX_SS_DISABLE; 105133819Stjr if (bsa & SS_ONSTACK) 106133819Stjr lsa |= LINUX_SS_ONSTACK; 107133819Stjr return (lsa); 108133819Stjr} 109133819Stjr 110142057Sjhb/* 111142057Sjhb * Custom version of exec_copyin_args() so that we can translate 112142057Sjhb * the pointers. 113142057Sjhb */ 114142057Sjhbstatic int 115142057Sjhblinux_exec_copyin_args(struct image_args *args, char *fname, 116142057Sjhb enum uio_seg segflg, char **argv, char **envv) 117133819Stjr{ 118142057Sjhb char *argp, *envp; 119142057Sjhb u_int32_t *p32, arg; 120142057Sjhb size_t length; 121133819Stjr int error; 122133819Stjr 123142057Sjhb bzero(args, sizeof(*args)); 124142057Sjhb if (argv == NULL) 125142057Sjhb return (EFAULT); 126133819Stjr 127142057Sjhb /* 128142057Sjhb * Allocate temporary demand zeroed space for argument and 129142057Sjhb * environment strings 130142057Sjhb */ 131168063Sjkim args->buf = (char *)kmem_alloc_wait(exec_map, 132147588Sjhb PATH_MAX + ARG_MAX + MAXSHELLCMDLEN); 133142057Sjhb if (args->buf == NULL) 134142057Sjhb return (ENOMEM); 135142057Sjhb args->begin_argv = args->buf; 136142057Sjhb args->endp = args->begin_argv; 137142057Sjhb args->stringspace = ARG_MAX; 138133819Stjr 139142057Sjhb args->fname = args->buf + ARG_MAX; 140133819Stjr 141142057Sjhb /* 142142057Sjhb * Copy the file name. 143142057Sjhb */ 144142057Sjhb error = (segflg == UIO_SYSSPACE) ? 145142057Sjhb copystr(fname, args->fname, PATH_MAX, &length) : 146142057Sjhb copyinstr(fname, args->fname, PATH_MAX, &length); 147142057Sjhb if (error != 0) 148156440Sups goto err_exit; 149142057Sjhb 150142057Sjhb /* 151142057Sjhb * extract arguments first 152142057Sjhb */ 153142057Sjhb p32 = (u_int32_t *)argv; 154142057Sjhb for (;;) { 155142057Sjhb error = copyin(p32++, &arg, sizeof(arg)); 156142057Sjhb if (error) 157156440Sups goto err_exit; 158142057Sjhb if (arg == 0) 159142057Sjhb break; 160142057Sjhb argp = PTRIN(arg); 161142057Sjhb error = copyinstr(argp, args->endp, args->stringspace, &length); 162142057Sjhb if (error) { 163142057Sjhb if (error == ENAMETOOLONG) 164156440Sups error = E2BIG; 165168063Sjkim 166156440Sups goto err_exit; 167142057Sjhb } 168142057Sjhb args->stringspace -= length; 169142057Sjhb args->endp += length; 170142057Sjhb args->argc++; 171133819Stjr } 172168063Sjkim 173142057Sjhb args->begin_envv = args->endp; 174142057Sjhb 175142057Sjhb /* 176142057Sjhb * extract environment strings 177142057Sjhb */ 178142057Sjhb if (envv) { 179142057Sjhb p32 = (u_int32_t *)envv; 180142057Sjhb for (;;) { 181133819Stjr error = copyin(p32++, &arg, sizeof(arg)); 182133819Stjr if (error) 183156440Sups goto err_exit; 184142057Sjhb if (arg == 0) 185142057Sjhb break; 186142057Sjhb envp = PTRIN(arg); 187142057Sjhb error = copyinstr(envp, args->endp, args->stringspace, 188142057Sjhb &length); 189142057Sjhb if (error) { 190142057Sjhb if (error == ENAMETOOLONG) 191156440Sups error = E2BIG; 192156440Sups goto err_exit; 193142057Sjhb } 194142057Sjhb args->stringspace -= length; 195142057Sjhb args->endp += length; 196142057Sjhb args->envc++; 197142057Sjhb } 198133819Stjr } 199133819Stjr 200142057Sjhb return (0); 201156440Sups 202156440Supserr_exit: 203156440Sups kmem_free_wakeup(exec_map, (vm_offset_t)args->buf, 204156440Sups PATH_MAX + ARG_MAX + MAXSHELLCMDLEN); 205156440Sups args->buf = NULL; 206156440Sups return (error); 207133819Stjr} 208133819Stjr 209142057Sjhbint 210142057Sjhblinux_execve(struct thread *td, struct linux_execve_args *args) 211142057Sjhb{ 212142057Sjhb struct image_args eargs; 213142057Sjhb char *path; 214142057Sjhb int error; 215142057Sjhb 216142057Sjhb LCONVPATHEXIST(td, args->path, &path); 217142057Sjhb 218142057Sjhb#ifdef DEBUG 219142057Sjhb if (ldebug(execve)) 220142057Sjhb printf(ARGS(execve, "%s"), path); 221142057Sjhb#endif 222142057Sjhb 223142057Sjhb error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, 224142057Sjhb args->envp); 225142057Sjhb free(path, M_TEMP); 226142057Sjhb if (error == 0) 227142057Sjhb error = kern_execve(td, &eargs, NULL); 228161474Snetchild if (error == 0) 229168063Sjkim /* Linux process can execute FreeBSD one, do not attempt 230161474Snetchild * to create emuldata for such process using 231161474Snetchild * linux_proc_init, this leads to a panic on KASSERT 232168063Sjkim * because such process has p->p_emuldata == NULL. 233161474Snetchild */ 234161474Snetchild if (td->td_proc->p_sysent == &elf_linux_sysvec) 235168063Sjkim error = linux_proc_init(td, 0, 0); 236142057Sjhb return (error); 237142057Sjhb} 238142057Sjhb 239185438SkibCTASSERT(sizeof(struct l_iovec32) == 8); 240133819Stjr 241144449Sjhbstatic int 242185438Skiblinux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 243133819Stjr{ 244185438Skib struct l_iovec32 iov32; 245144449Sjhb struct iovec *iov; 246144449Sjhb struct uio *uio; 247185438Skib uint32_t iovlen; 248144449Sjhb int error, i; 249133819Stjr 250144449Sjhb *uiop = NULL; 251144449Sjhb if (iovcnt > UIO_MAXIOV) 252133819Stjr return (EINVAL); 253144449Sjhb iovlen = iovcnt * sizeof(struct iovec); 254168844Sjkim uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 255144449Sjhb iov = (struct iovec *)(uio + 1); 256144449Sjhb for (i = 0; i < iovcnt; i++) { 257185438Skib error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 258144449Sjhb if (error) { 259144449Sjhb free(uio, M_IOV); 260144449Sjhb return (error); 261144449Sjhb } 262144449Sjhb iov[i].iov_base = PTRIN(iov32.iov_base); 263144449Sjhb iov[i].iov_len = iov32.iov_len; 264133819Stjr } 265144449Sjhb uio->uio_iov = iov; 266144449Sjhb uio->uio_iovcnt = iovcnt; 267144449Sjhb uio->uio_segflg = UIO_USERSPACE; 268144449Sjhb uio->uio_offset = -1; 269144449Sjhb uio->uio_resid = 0; 270144449Sjhb for (i = 0; i < iovcnt; i++) { 271144449Sjhb if (iov->iov_len > INT_MAX - uio->uio_resid) { 272144449Sjhb free(uio, M_IOV); 273144449Sjhb return (EINVAL); 274144449Sjhb } 275144449Sjhb uio->uio_resid += iov->iov_len; 276144449Sjhb iov++; 277144449Sjhb } 278144449Sjhb *uiop = uio; 279144449Sjhb return (0); 280144449Sjhb} 281133819Stjr 282144449Sjhbint 283185438Skiblinux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 284185438Skib int error) 285185438Skib{ 286185438Skib struct l_iovec32 iov32; 287185438Skib struct iovec *iov; 288185438Skib uint32_t iovlen; 289185438Skib int i; 290185438Skib 291185438Skib *iovp = NULL; 292185438Skib if (iovcnt > UIO_MAXIOV) 293185438Skib return (error); 294185438Skib iovlen = iovcnt * sizeof(struct iovec); 295185438Skib iov = malloc(iovlen, M_IOV, M_WAITOK); 296185438Skib for (i = 0; i < iovcnt; i++) { 297185438Skib error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 298185438Skib if (error) { 299185438Skib free(iov, M_IOV); 300185438Skib return (error); 301185438Skib } 302185438Skib iov[i].iov_base = PTRIN(iov32.iov_base); 303185438Skib iov[i].iov_len = iov32.iov_len; 304185438Skib } 305185438Skib *iovp = iov; 306185438Skib return(0); 307185438Skib 308185438Skib} 309185438Skib 310185438Skibint 311144449Sjhblinux_readv(struct thread *td, struct linux_readv_args *uap) 312144449Sjhb{ 313144449Sjhb struct uio *auio; 314144449Sjhb int error; 315133819Stjr 316144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 317144449Sjhb if (error) 318144449Sjhb return (error); 319144449Sjhb error = kern_readv(td, uap->fd, auio); 320144449Sjhb free(auio, M_IOV); 321133819Stjr return (error); 322133819Stjr} 323133819Stjr 324133819Stjrint 325133819Stjrlinux_writev(struct thread *td, struct linux_writev_args *uap) 326133819Stjr{ 327144449Sjhb struct uio *auio; 328144449Sjhb int error; 329133819Stjr 330144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 331144449Sjhb if (error) 332144449Sjhb return (error); 333144449Sjhb error = kern_writev(td, uap->fd, auio); 334144449Sjhb free(auio, M_IOV); 335133819Stjr return (error); 336133819Stjr} 337133819Stjr 338133819Stjrstruct l_ipc_kludge { 339133819Stjr l_uintptr_t msgp; 340133819Stjr l_long msgtyp; 341133819Stjr} __packed; 342133819Stjr 343133819Stjrint 344133819Stjrlinux_ipc(struct thread *td, struct linux_ipc_args *args) 345133819Stjr{ 346133819Stjr 347133819Stjr switch (args->what & 0xFFFF) { 348133819Stjr case LINUX_SEMOP: { 349133819Stjr struct linux_semop_args a; 350133819Stjr 351133819Stjr a.semid = args->arg1; 352133819Stjr a.tsops = args->ptr; 353133819Stjr a.nsops = args->arg2; 354133819Stjr return (linux_semop(td, &a)); 355133819Stjr } 356133819Stjr case LINUX_SEMGET: { 357133819Stjr struct linux_semget_args a; 358133819Stjr 359133819Stjr a.key = args->arg1; 360133819Stjr a.nsems = args->arg2; 361133819Stjr a.semflg = args->arg3; 362133819Stjr return (linux_semget(td, &a)); 363133819Stjr } 364133819Stjr case LINUX_SEMCTL: { 365133819Stjr struct linux_semctl_args a; 366133819Stjr int error; 367133819Stjr 368133819Stjr a.semid = args->arg1; 369133819Stjr a.semnum = args->arg2; 370133819Stjr a.cmd = args->arg3; 371133819Stjr error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 372133819Stjr if (error) 373133819Stjr return (error); 374133819Stjr return (linux_semctl(td, &a)); 375133819Stjr } 376133819Stjr case LINUX_MSGSND: { 377133819Stjr struct linux_msgsnd_args a; 378133819Stjr 379133819Stjr a.msqid = args->arg1; 380133819Stjr a.msgp = args->ptr; 381133819Stjr a.msgsz = args->arg2; 382133819Stjr a.msgflg = args->arg3; 383133819Stjr return (linux_msgsnd(td, &a)); 384133819Stjr } 385133819Stjr case LINUX_MSGRCV: { 386133819Stjr struct linux_msgrcv_args a; 387133819Stjr 388133819Stjr a.msqid = args->arg1; 389133819Stjr a.msgsz = args->arg2; 390133819Stjr a.msgflg = args->arg3; 391133819Stjr if ((args->what >> 16) == 0) { 392133819Stjr struct l_ipc_kludge tmp; 393133819Stjr int error; 394133819Stjr 395133819Stjr if (args->ptr == 0) 396133819Stjr return (EINVAL); 397133819Stjr error = copyin(args->ptr, &tmp, sizeof(tmp)); 398133819Stjr if (error) 399133819Stjr return (error); 400133819Stjr a.msgp = PTRIN(tmp.msgp); 401133819Stjr a.msgtyp = tmp.msgtyp; 402133819Stjr } else { 403133819Stjr a.msgp = args->ptr; 404133819Stjr a.msgtyp = args->arg5; 405133819Stjr } 406133819Stjr return (linux_msgrcv(td, &a)); 407133819Stjr } 408133819Stjr case LINUX_MSGGET: { 409133819Stjr struct linux_msgget_args a; 410133819Stjr 411133819Stjr a.key = args->arg1; 412133819Stjr a.msgflg = args->arg2; 413133819Stjr return (linux_msgget(td, &a)); 414133819Stjr } 415133819Stjr case LINUX_MSGCTL: { 416133819Stjr struct linux_msgctl_args a; 417133819Stjr 418133819Stjr a.msqid = args->arg1; 419133819Stjr a.cmd = args->arg2; 420133819Stjr a.buf = args->ptr; 421133819Stjr return (linux_msgctl(td, &a)); 422133819Stjr } 423133819Stjr case LINUX_SHMAT: { 424133819Stjr struct linux_shmat_args a; 425133819Stjr 426133819Stjr a.shmid = args->arg1; 427133819Stjr a.shmaddr = args->ptr; 428133819Stjr a.shmflg = args->arg2; 429144441Sjhb a.raddr = PTRIN((l_uint)args->arg3); 430133819Stjr return (linux_shmat(td, &a)); 431133819Stjr } 432133819Stjr case LINUX_SHMDT: { 433133819Stjr struct linux_shmdt_args a; 434133819Stjr 435133819Stjr a.shmaddr = args->ptr; 436133819Stjr return (linux_shmdt(td, &a)); 437133819Stjr } 438133819Stjr case LINUX_SHMGET: { 439133819Stjr struct linux_shmget_args a; 440133819Stjr 441133819Stjr a.key = args->arg1; 442133819Stjr a.size = args->arg2; 443133819Stjr a.shmflg = args->arg3; 444133819Stjr return (linux_shmget(td, &a)); 445133819Stjr } 446133819Stjr case LINUX_SHMCTL: { 447133819Stjr struct linux_shmctl_args a; 448133819Stjr 449133819Stjr a.shmid = args->arg1; 450133819Stjr a.cmd = args->arg2; 451133819Stjr a.buf = args->ptr; 452133819Stjr return (linux_shmctl(td, &a)); 453133819Stjr } 454133819Stjr default: 455133819Stjr break; 456133819Stjr } 457133819Stjr 458133819Stjr return (EINVAL); 459133819Stjr} 460133819Stjr 461133819Stjrint 462133819Stjrlinux_old_select(struct thread *td, struct linux_old_select_args *args) 463133819Stjr{ 464133819Stjr struct l_old_select_argv linux_args; 465133819Stjr struct linux_select_args newsel; 466133819Stjr int error; 467133819Stjr 468133819Stjr#ifdef DEBUG 469133819Stjr if (ldebug(old_select)) 470133819Stjr printf(ARGS(old_select, "%p"), args->ptr); 471133819Stjr#endif 472133819Stjr 473133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 474133819Stjr if (error) 475133819Stjr return (error); 476133819Stjr 477133819Stjr newsel.nfds = linux_args.nfds; 478133819Stjr newsel.readfds = PTRIN(linux_args.readfds); 479133819Stjr newsel.writefds = PTRIN(linux_args.writefds); 480133819Stjr newsel.exceptfds = PTRIN(linux_args.exceptfds); 481133819Stjr newsel.timeout = PTRIN(linux_args.timeout); 482133819Stjr return (linux_select(td, &newsel)); 483133819Stjr} 484133819Stjr 485133819Stjrint 486133819Stjrlinux_fork(struct thread *td, struct linux_fork_args *args) 487133819Stjr{ 488133819Stjr int error; 489166150Snetchild struct proc *p2; 490166150Snetchild struct thread *td2; 491133819Stjr 492133819Stjr#ifdef DEBUG 493133819Stjr if (ldebug(fork)) 494133819Stjr printf(ARGS(fork, "")); 495133819Stjr#endif 496133819Stjr 497166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) 498133819Stjr return (error); 499168063Sjkim 500166150Snetchild if (error == 0) { 501166150Snetchild td->td_retval[0] = p2->p_pid; 502166150Snetchild td->td_retval[1] = 0; 503166150Snetchild } 504133819Stjr 505133819Stjr if (td->td_retval[1] == 1) 506133819Stjr td->td_retval[0] = 0; 507161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 508161474Snetchild if (error) 509161474Snetchild return (error); 510161474Snetchild 511166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 512166150Snetchild 513168063Sjkim /* 514168063Sjkim * Make this runnable after we are finished with it. 515168063Sjkim */ 516170307Sjeff thread_lock(td2); 517166150Snetchild TD_SET_CAN_RUN(td2); 518166188Sjeff sched_add(td2, SRQ_BORING); 519170307Sjeff thread_unlock(td2); 520166150Snetchild 521133819Stjr return (0); 522133819Stjr} 523133819Stjr 524133819Stjrint 525133819Stjrlinux_vfork(struct thread *td, struct linux_vfork_args *args) 526133819Stjr{ 527133819Stjr int error; 528161611Snetchild struct proc *p2; 529166150Snetchild struct thread *td2; 530133819Stjr 531133819Stjr#ifdef DEBUG 532133819Stjr if (ldebug(vfork)) 533133819Stjr printf(ARGS(vfork, "")); 534133819Stjr#endif 535133819Stjr 536168063Sjkim /* Exclude RFPPWAIT */ 537166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) 538133819Stjr return (error); 539161611Snetchild if (error == 0) { 540161611Snetchild td->td_retval[0] = p2->p_pid; 541161611Snetchild td->td_retval[1] = 0; 542161611Snetchild } 543133819Stjr /* Are we the child? */ 544133819Stjr if (td->td_retval[1] == 1) 545133819Stjr td->td_retval[0] = 0; 546161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 547161474Snetchild if (error) 548161474Snetchild return (error); 549166150Snetchild 550166150Snetchild PROC_LOCK(p2); 551166150Snetchild p2->p_flag |= P_PPWAIT; 552166150Snetchild PROC_UNLOCK(p2); 553166150Snetchild 554166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 555168063Sjkim 556168848Sjkim /* 557168848Sjkim * Make this runnable after we are finished with it. 558168848Sjkim */ 559170307Sjeff thread_lock(td2); 560166150Snetchild TD_SET_CAN_RUN(td2); 561166188Sjeff sched_add(td2, SRQ_BORING); 562170307Sjeff thread_unlock(td2); 563166150Snetchild 564161611Snetchild /* wait for the children to exit, ie. emulate vfork */ 565161611Snetchild PROC_LOCK(p2); 566161611Snetchild while (p2->p_flag & P_PPWAIT) 567188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 568161611Snetchild PROC_UNLOCK(p2); 569168063Sjkim 570133819Stjr return (0); 571133819Stjr} 572133819Stjr 573133819Stjrint 574133819Stjrlinux_clone(struct thread *td, struct linux_clone_args *args) 575133819Stjr{ 576133819Stjr int error, ff = RFPROC | RFSTOPPED; 577133819Stjr struct proc *p2; 578133819Stjr struct thread *td2; 579133819Stjr int exit_signal; 580161474Snetchild struct linux_emuldata *em; 581133819Stjr 582133819Stjr#ifdef DEBUG 583133819Stjr if (ldebug(clone)) { 584168063Sjkim printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, " 585168063Sjkim "child tid: %p"), (unsigned)args->flags, 586168063Sjkim args->stack, args->parent_tidptr, args->child_tidptr); 587133819Stjr } 588133819Stjr#endif 589133819Stjr 590133819Stjr exit_signal = args->flags & 0x000000ff; 591169458Skan if (LINUX_SIG_VALID(exit_signal)) { 592169458Skan if (exit_signal <= LINUX_SIGTBLSZ) 593169458Skan exit_signal = 594169458Skan linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 595169458Skan } else if (exit_signal != 0) 596133819Stjr return (EINVAL); 597133819Stjr 598167157Sjkim if (args->flags & LINUX_CLONE_VM) 599133819Stjr ff |= RFMEM; 600167157Sjkim if (args->flags & LINUX_CLONE_SIGHAND) 601133819Stjr ff |= RFSIGSHARE; 602168063Sjkim /* 603168063Sjkim * XXX: In Linux, sharing of fs info (chroot/cwd/umask) 604168063Sjkim * and open files is independant. In FreeBSD, its in one 605168848Sjkim * structure but in reality it does not cause any problems 606168848Sjkim * because both of these flags are usually set together. 607163371Snetchild */ 608167157Sjkim if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) 609133819Stjr ff |= RFFDG; 610133819Stjr 611161474Snetchild /* 612161474Snetchild * Attempt to detect when linux_clone(2) is used for creating 613161474Snetchild * kernel threads. Unfortunately despite the existence of the 614161474Snetchild * CLONE_THREAD flag, version of linuxthreads package used in 615161474Snetchild * most popular distros as of beginning of 2005 doesn't make 616166944Snetchild * any use of it. Therefore, this detection relies on 617161474Snetchild * empirical observation that linuxthreads sets certain 618161474Snetchild * combination of flags, so that we can make more or less 619161474Snetchild * precise detection and notify the FreeBSD kernel that several 620161474Snetchild * processes are in fact part of the same threading group, so 621161474Snetchild * that special treatment is necessary for signal delivery 622161474Snetchild * between those processes and fd locking. 623161474Snetchild */ 624167157Sjkim if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) 625161474Snetchild ff |= RFTHREAD; 626161474Snetchild 627168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) 628168056Sjkim if (args->parent_tidptr == NULL) 629168056Sjkim return (EINVAL); 630168056Sjkim 631133819Stjr error = fork1(td, ff, 0, &p2); 632133819Stjr if (error) 633133819Stjr return (error); 634166395Skib 635167157Sjkim if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { 636166395Skib sx_xlock(&proctree_lock); 637166395Skib PROC_LOCK(p2); 638166395Skib proc_reparent(p2, td->td_proc->p_pptr); 639166395Skib PROC_UNLOCK(p2); 640166395Skib sx_xunlock(&proctree_lock); 641166395Skib } 642168063Sjkim 643161474Snetchild /* create the emuldata */ 644161474Snetchild error = linux_proc_init(td, p2->p_pid, args->flags); 645161474Snetchild /* reference it - no need to check this */ 646165867Snetchild em = em_find(p2, EMUL_DOLOCK); 647161474Snetchild KASSERT(em != NULL, ("clone: emuldata not found.\n")); 648161474Snetchild /* and adjust it */ 649133819Stjr 650167157Sjkim if (args->flags & LINUX_CLONE_THREAD) { 651161474Snetchild#ifdef notyet 652161696Snetchild PROC_LOCK(p2); 653161474Snetchild p2->p_pgrp = td->td_proc->p_pgrp; 654161696Snetchild PROC_UNLOCK(p2); 655161474Snetchild#endif 656168063Sjkim exit_signal = 0; 657161474Snetchild } 658161474Snetchild 659167157Sjkim if (args->flags & LINUX_CLONE_CHILD_SETTID) 660161474Snetchild em->child_set_tid = args->child_tidptr; 661161474Snetchild else 662161474Snetchild em->child_set_tid = NULL; 663161474Snetchild 664167157Sjkim if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 665161474Snetchild em->child_clear_tid = args->child_tidptr; 666161474Snetchild else 667161474Snetchild em->child_clear_tid = NULL; 668161696Snetchild 669161474Snetchild EMUL_UNLOCK(&emul_lock); 670161474Snetchild 671168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) { 672168056Sjkim error = copyout(&p2->p_pid, args->parent_tidptr, 673168056Sjkim sizeof(p2->p_pid)); 674168056Sjkim if (error) 675168056Sjkim printf(LMSG("copyout failed!")); 676168056Sjkim } 677168056Sjkim 678133819Stjr PROC_LOCK(p2); 679133819Stjr p2->p_sigparent = exit_signal; 680133819Stjr PROC_UNLOCK(p2); 681133819Stjr td2 = FIRST_THREAD_IN_PROC(p2); 682168063Sjkim /* 683168063Sjkim * In a case of stack = NULL, we are supposed to COW calling process 684168063Sjkim * stack. This is what normal fork() does, so we just keep tf_rsp arg 685168063Sjkim * intact. 686161310Snetchild */ 687161310Snetchild if (args->stack) 688168063Sjkim td2->td_frame->tf_rsp = PTROUT(args->stack); 689133819Stjr 690167157Sjkim if (args->flags & LINUX_CLONE_SETTLS) { 691168035Sjkim struct user_segment_descriptor sd; 692168035Sjkim struct l_user_desc info; 693168848Sjkim int a[2]; 694168035Sjkim 695168848Sjkim error = copyin((void *)td->td_frame->tf_rsi, &info, 696168035Sjkim sizeof(struct l_user_desc)); 697168035Sjkim if (error) { 698168035Sjkim printf(LMSG("copyin failed!")); 699168035Sjkim } else { 700168035Sjkim /* We might copy out the entry_number as GUGS32_SEL. */ 701168848Sjkim info.entry_number = GUGS32_SEL; 702168035Sjkim error = copyout(&info, (void *)td->td_frame->tf_rsi, 703168035Sjkim sizeof(struct l_user_desc)); 704168035Sjkim if (error) 705168035Sjkim printf(LMSG("copyout failed!")); 706168035Sjkim 707168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 708168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 709168035Sjkim 710168035Sjkim memcpy(&sd, &a, sizeof(a)); 711168035Sjkim#ifdef DEBUG 712168035Sjkim if (ldebug(clone)) 713168035Sjkim printf("Segment created in clone with " 714168035Sjkim "CLONE_SETTLS: lobase: %x, hibase: %x, " 715168035Sjkim "lolimit: %x, hilimit: %x, type: %i, " 716168035Sjkim "dpl: %i, p: %i, xx: %i, long: %i, " 717168035Sjkim "def32: %i, gran: %i\n", sd.sd_lobase, 718168035Sjkim sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 719168035Sjkim sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 720168035Sjkim sd.sd_long, sd.sd_def32, sd.sd_gran); 721168035Sjkim#endif 722168035Sjkim td2->td_pcb->pcb_gsbase = (register_t)info.base_addr; 723190620Skib/* XXXKIB td2->td_pcb->pcb_gs32sd = sd; */ 724190620Skib td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 725180992Skib td2->td_pcb->pcb_flags |= PCB_GS32BIT | PCB_32BIT; 726168035Sjkim } 727161474Snetchild } 728161474Snetchild 729133819Stjr#ifdef DEBUG 730133819Stjr if (ldebug(clone)) 731168063Sjkim printf(LMSG("clone: successful rfork to %d, " 732168063Sjkim "stack %p sig = %d"), (int)p2->p_pid, args->stack, 733168063Sjkim exit_signal); 734133819Stjr#endif 735167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 736166150Snetchild PROC_LOCK(p2); 737166150Snetchild p2->p_flag |= P_PPWAIT; 738166150Snetchild PROC_UNLOCK(p2); 739166150Snetchild } 740133819Stjr 741133819Stjr /* 742133819Stjr * Make this runnable after we are finished with it. 743133819Stjr */ 744170307Sjeff thread_lock(td2); 745133819Stjr TD_SET_CAN_RUN(td2); 746166188Sjeff sched_add(td2, SRQ_BORING); 747170307Sjeff thread_unlock(td2); 748133819Stjr 749133819Stjr td->td_retval[0] = p2->p_pid; 750133819Stjr td->td_retval[1] = 0; 751168063Sjkim 752167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 753168063Sjkim /* wait for the children to exit, ie. emulate vfork */ 754168063Sjkim PROC_LOCK(p2); 755163374Snetchild while (p2->p_flag & P_PPWAIT) 756188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 757163374Snetchild PROC_UNLOCK(p2); 758163374Snetchild } 759163374Snetchild 760133819Stjr return (0); 761133819Stjr} 762133819Stjr 763133819Stjr#define STACK_SIZE (2 * 1024 * 1024) 764133819Stjr#define GUARD_SIZE (4 * PAGE_SIZE) 765133819Stjr 766133819Stjrint 767133819Stjrlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 768133819Stjr{ 769133819Stjr 770133819Stjr#ifdef DEBUG 771133819Stjr if (ldebug(mmap2)) 772168063Sjkim printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 773168063Sjkim args->addr, args->len, args->prot, 774133819Stjr args->flags, args->fd, args->pgoff); 775133819Stjr#endif 776133819Stjr 777198554Sjhb return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 778198554Sjhb args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 779198554Sjhb PAGE_SIZE)); 780133819Stjr} 781133819Stjr 782133819Stjrint 783133819Stjrlinux_mmap(struct thread *td, struct linux_mmap_args *args) 784133819Stjr{ 785133819Stjr int error; 786133819Stjr struct l_mmap_argv linux_args; 787133819Stjr 788133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 789133819Stjr if (error) 790133819Stjr return (error); 791133819Stjr 792133819Stjr#ifdef DEBUG 793133819Stjr if (ldebug(mmap)) 794168063Sjkim printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 795168063Sjkim linux_args.addr, linux_args.len, linux_args.prot, 796168063Sjkim linux_args.flags, linux_args.fd, linux_args.pgoff); 797133819Stjr#endif 798133819Stjr 799198554Sjhb return (linux_mmap_common(td, linux_args.addr, linux_args.len, 800198554Sjhb linux_args.prot, linux_args.flags, linux_args.fd, 801198554Sjhb (uint32_t)linux_args.pgoff)); 802133819Stjr} 803133819Stjr 804133819Stjrstatic int 805198554Sjhblinux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 806198554Sjhb l_int flags, l_int fd, l_loff_t pos) 807133819Stjr{ 808133819Stjr struct proc *p = td->td_proc; 809133819Stjr struct mmap_args /* { 810133819Stjr caddr_t addr; 811133819Stjr size_t len; 812133819Stjr int prot; 813133819Stjr int flags; 814133819Stjr int fd; 815133819Stjr long pad; 816133819Stjr off_t pos; 817133819Stjr } */ bsd_args; 818133819Stjr int error; 819165832Snetchild struct file *fp; 820133819Stjr 821133819Stjr error = 0; 822133819Stjr bsd_args.flags = 0; 823165832Snetchild fp = NULL; 824165832Snetchild 825165832Snetchild /* 826165832Snetchild * Linux mmap(2): 827165832Snetchild * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 828165832Snetchild */ 829198554Sjhb if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 830165832Snetchild return (EINVAL); 831165832Snetchild 832198554Sjhb if (flags & LINUX_MAP_SHARED) 833133819Stjr bsd_args.flags |= MAP_SHARED; 834198554Sjhb if (flags & LINUX_MAP_PRIVATE) 835133819Stjr bsd_args.flags |= MAP_PRIVATE; 836198554Sjhb if (flags & LINUX_MAP_FIXED) 837133819Stjr bsd_args.flags |= MAP_FIXED; 838198554Sjhb if (flags & LINUX_MAP_ANON) 839133819Stjr bsd_args.flags |= MAP_ANON; 840133819Stjr else 841133819Stjr bsd_args.flags |= MAP_NOSYNC; 842198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) 843133819Stjr bsd_args.flags |= MAP_STACK; 844133819Stjr 845166727Sjkim /* 846166727Sjkim * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 847166727Sjkim * on Linux/i386. We do this to ensure maximum compatibility. 848166727Sjkim * Linux/ia64 does the same in i386 emulation mode. 849166727Sjkim */ 850198554Sjhb bsd_args.prot = prot; 851166727Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 852166727Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 853166727Sjkim 854167048Sjkim /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 855198554Sjhb bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 856167048Sjkim if (bsd_args.fd != -1) { 857166727Sjkim /* 858166727Sjkim * Linux follows Solaris mmap(2) description: 859166727Sjkim * The file descriptor fildes is opened with 860166727Sjkim * read permission, regardless of the 861166727Sjkim * protection options specified. 862166727Sjkim */ 863166727Sjkim 864167048Sjkim if ((error = fget(td, bsd_args.fd, &fp)) != 0) 865166727Sjkim return (error); 866166727Sjkim if (fp->f_type != DTYPE_VNODE) { 867166727Sjkim fdrop(fp, td); 868166727Sjkim return (EINVAL); 869166727Sjkim } 870166727Sjkim 871166727Sjkim /* Linux mmap() just fails for O_WRONLY files */ 872166727Sjkim if (!(fp->f_flag & FREAD)) { 873166727Sjkim fdrop(fp, td); 874166727Sjkim return (EACCES); 875166727Sjkim } 876166727Sjkim 877166727Sjkim fdrop(fp, td); 878166727Sjkim } 879166727Sjkim 880198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) { 881168063Sjkim /* 882168063Sjkim * The Linux MAP_GROWSDOWN option does not limit auto 883133819Stjr * growth of the region. Linux mmap with this option 884133819Stjr * takes as addr the inital BOS, and as len, the initial 885133819Stjr * region size. It can then grow down from addr without 886168063Sjkim * limit. However, Linux threads has an implicit internal 887133819Stjr * limit to stack size of STACK_SIZE. Its just not 888168063Sjkim * enforced explicitly in Linux. But, here we impose 889133819Stjr * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 890133819Stjr * region, since we can do this with our mmap. 891133819Stjr * 892133819Stjr * Our mmap with MAP_STACK takes addr as the maximum 893133819Stjr * downsize limit on BOS, and as len the max size of 894168848Sjkim * the region. It then maps the top SGROWSIZ bytes, 895166944Snetchild * and auto grows the region down, up to the limit 896133819Stjr * in addr. 897133819Stjr * 898133819Stjr * If we don't use the MAP_STACK option, the effect 899133819Stjr * of this code is to allocate a stack region of a 900133819Stjr * fixed size of (STACK_SIZE - GUARD_SIZE). 901133819Stjr */ 902133819Stjr 903198554Sjhb if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 904168063Sjkim /* 905168063Sjkim * Some Linux apps will attempt to mmap 906133819Stjr * thread stacks near the top of their 907133819Stjr * address space. If their TOS is greater 908133819Stjr * than vm_maxsaddr, vm_map_growstack() 909133819Stjr * will confuse the thread stack with the 910133819Stjr * process stack and deliver a SEGV if they 911133819Stjr * attempt to grow the thread stack past their 912133819Stjr * current stacksize rlimit. To avoid this, 913133819Stjr * adjust vm_maxsaddr upwards to reflect 914133819Stjr * the current stacksize rlimit rather 915133819Stjr * than the maximum possible stacksize. 916133819Stjr * It would be better to adjust the 917133819Stjr * mmap'ed region, but some apps do not check 918133819Stjr * mmap's return value. 919133819Stjr */ 920133819Stjr PROC_LOCK(p); 921166727Sjkim p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 922133819Stjr lim_cur(p, RLIMIT_STACK); 923133819Stjr PROC_UNLOCK(p); 924133819Stjr } 925133819Stjr 926168063Sjkim /* 927176193Sjkim * This gives us our maximum stack size and a new BOS. 928176193Sjkim * If we're using VM_STACK, then mmap will just map 929176193Sjkim * the top SGROWSIZ bytes, and let the stack grow down 930176193Sjkim * to the limit at BOS. If we're not using VM_STACK 931176193Sjkim * we map the full stack, since we don't have a way 932176193Sjkim * to autogrow it. 933133819Stjr */ 934198554Sjhb if (len > STACK_SIZE - GUARD_SIZE) { 935198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 936198554Sjhb bsd_args.len = len; 937176193Sjkim } else { 938198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr) - 939198554Sjhb (STACK_SIZE - GUARD_SIZE - len); 940176193Sjkim bsd_args.len = STACK_SIZE - GUARD_SIZE; 941176193Sjkim } 942133819Stjr } else { 943198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 944198554Sjhb bsd_args.len = len; 945133819Stjr } 946198554Sjhb bsd_args.pos = pos; 947133819Stjr 948133819Stjr#ifdef DEBUG 949133819Stjr if (ldebug(mmap)) 950133819Stjr printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 951133819Stjr __func__, 952133843Sobrien (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 953133819Stjr bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 954133819Stjr#endif 955133819Stjr error = mmap(td, &bsd_args); 956133819Stjr#ifdef DEBUG 957133819Stjr if (ldebug(mmap)) 958133819Stjr printf("-> %s() return: 0x%x (0x%08x)\n", 959133819Stjr __func__, error, (u_int)td->td_retval[0]); 960133819Stjr#endif 961133819Stjr return (error); 962133819Stjr} 963133819Stjr 964133819Stjrint 965168035Sjkimlinux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 966168035Sjkim{ 967168035Sjkim struct mprotect_args bsd_args; 968168035Sjkim 969168035Sjkim bsd_args.addr = uap->addr; 970168035Sjkim bsd_args.len = uap->len; 971168035Sjkim bsd_args.prot = uap->prot; 972168035Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 973168035Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 974168035Sjkim return (mprotect(td, &bsd_args)); 975168035Sjkim} 976168035Sjkim 977168035Sjkimint 978166729Sjkimlinux_iopl(struct thread *td, struct linux_iopl_args *args) 979166729Sjkim{ 980166729Sjkim int error; 981166729Sjkim 982166729Sjkim if (args->level < 0 || args->level > 3) 983166729Sjkim return (EINVAL); 984166729Sjkim if ((error = priv_check(td, PRIV_IO)) != 0) 985166729Sjkim return (error); 986166729Sjkim if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 987166729Sjkim return (error); 988166729Sjkim td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 989166729Sjkim (args->level * (PSL_IOPL / 3)); 990166729Sjkim 991166729Sjkim return (0); 992166729Sjkim} 993166729Sjkim 994166729Sjkimint 995133819Stjrlinux_pipe(struct thread *td, struct linux_pipe_args *args) 996133819Stjr{ 997133819Stjr int error; 998184849Sed int fildes[2]; 999133819Stjr 1000133819Stjr#ifdef DEBUG 1001133819Stjr if (ldebug(pipe)) 1002133819Stjr printf(ARGS(pipe, "*")); 1003133819Stjr#endif 1004133819Stjr 1005184849Sed error = kern_pipe(td, fildes); 1006184849Sed if (error) 1007133819Stjr return (error); 1008133819Stjr 1009184849Sed /* XXX: Close descriptors on error. */ 1010184849Sed return (copyout(fildes, args->pipefds, sizeof fildes)); 1011133819Stjr} 1012166731Sjkim 1013133819Stjrint 1014133819Stjrlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 1015133819Stjr{ 1016133819Stjr l_osigaction_t osa; 1017133819Stjr l_sigaction_t act, oact; 1018133819Stjr int error; 1019133819Stjr 1020133819Stjr#ifdef DEBUG 1021133819Stjr if (ldebug(sigaction)) 1022133819Stjr printf(ARGS(sigaction, "%d, %p, %p"), 1023133819Stjr args->sig, (void *)args->nsa, (void *)args->osa); 1024133819Stjr#endif 1025133819Stjr 1026133819Stjr if (args->nsa != NULL) { 1027133819Stjr error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 1028133819Stjr if (error) 1029133819Stjr return (error); 1030133819Stjr act.lsa_handler = osa.lsa_handler; 1031133819Stjr act.lsa_flags = osa.lsa_flags; 1032133819Stjr act.lsa_restorer = osa.lsa_restorer; 1033133819Stjr LINUX_SIGEMPTYSET(act.lsa_mask); 1034133819Stjr act.lsa_mask.__bits[0] = osa.lsa_mask; 1035133819Stjr } 1036133819Stjr 1037133819Stjr error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 1038133819Stjr args->osa ? &oact : NULL); 1039133819Stjr 1040133819Stjr if (args->osa != NULL && !error) { 1041133819Stjr osa.lsa_handler = oact.lsa_handler; 1042133819Stjr osa.lsa_flags = oact.lsa_flags; 1043133819Stjr osa.lsa_restorer = oact.lsa_restorer; 1044133819Stjr osa.lsa_mask = oact.lsa_mask.__bits[0]; 1045133819Stjr error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 1046133819Stjr } 1047133819Stjr 1048133819Stjr return (error); 1049133819Stjr} 1050133819Stjr 1051133819Stjr/* 1052168063Sjkim * Linux has two extra args, restart and oldmask. We don't use these, 1053133819Stjr * but it seems that "restart" is actually a context pointer that 1054133819Stjr * enables the signal to happen with a different register set. 1055133819Stjr */ 1056133819Stjrint 1057133819Stjrlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 1058133819Stjr{ 1059133819Stjr sigset_t sigmask; 1060133819Stjr l_sigset_t mask; 1061133819Stjr 1062133819Stjr#ifdef DEBUG 1063133819Stjr if (ldebug(sigsuspend)) 1064133819Stjr printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 1065133819Stjr#endif 1066133819Stjr 1067133819Stjr LINUX_SIGEMPTYSET(mask); 1068133819Stjr mask.__bits[0] = args->mask; 1069133819Stjr linux_to_bsd_sigset(&mask, &sigmask); 1070133819Stjr return (kern_sigsuspend(td, sigmask)); 1071133819Stjr} 1072133819Stjr 1073133819Stjrint 1074133819Stjrlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 1075133819Stjr{ 1076133819Stjr l_sigset_t lmask; 1077133819Stjr sigset_t sigmask; 1078133819Stjr int error; 1079133819Stjr 1080133819Stjr#ifdef DEBUG 1081133819Stjr if (ldebug(rt_sigsuspend)) 1082133819Stjr printf(ARGS(rt_sigsuspend, "%p, %d"), 1083133819Stjr (void *)uap->newset, uap->sigsetsize); 1084133819Stjr#endif 1085133819Stjr 1086133819Stjr if (uap->sigsetsize != sizeof(l_sigset_t)) 1087133819Stjr return (EINVAL); 1088133819Stjr 1089133819Stjr error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 1090133819Stjr if (error) 1091133819Stjr return (error); 1092133819Stjr 1093133819Stjr linux_to_bsd_sigset(&lmask, &sigmask); 1094133819Stjr return (kern_sigsuspend(td, sigmask)); 1095133819Stjr} 1096133819Stjr 1097133819Stjrint 1098133819Stjrlinux_pause(struct thread *td, struct linux_pause_args *args) 1099133819Stjr{ 1100133819Stjr struct proc *p = td->td_proc; 1101133819Stjr sigset_t sigmask; 1102133819Stjr 1103133819Stjr#ifdef DEBUG 1104133819Stjr if (ldebug(pause)) 1105133819Stjr printf(ARGS(pause, "")); 1106133819Stjr#endif 1107133819Stjr 1108133819Stjr PROC_LOCK(p); 1109133819Stjr sigmask = td->td_sigmask; 1110133819Stjr PROC_UNLOCK(p); 1111133819Stjr return (kern_sigsuspend(td, sigmask)); 1112133819Stjr} 1113133819Stjr 1114133819Stjrint 1115133819Stjrlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 1116133819Stjr{ 1117133819Stjr stack_t ss, oss; 1118133819Stjr l_stack_t lss; 1119133819Stjr int error; 1120133819Stjr 1121133819Stjr#ifdef DEBUG 1122133819Stjr if (ldebug(sigaltstack)) 1123133819Stjr printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 1124133819Stjr#endif 1125133819Stjr 1126133819Stjr if (uap->uss != NULL) { 1127133819Stjr error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 1128133819Stjr if (error) 1129133819Stjr return (error); 1130133819Stjr 1131133819Stjr ss.ss_sp = PTRIN(lss.ss_sp); 1132133819Stjr ss.ss_size = lss.ss_size; 1133133819Stjr ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 1134133819Stjr } 1135134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 1136134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 1137133819Stjr if (!error && uap->uoss != NULL) { 1138133819Stjr lss.ss_sp = PTROUT(oss.ss_sp); 1139133819Stjr lss.ss_size = oss.ss_size; 1140133819Stjr lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 1141133819Stjr error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 1142133819Stjr } 1143133819Stjr 1144133819Stjr return (error); 1145133819Stjr} 1146133819Stjr 1147133819Stjrint 1148133819Stjrlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 1149133819Stjr{ 1150133819Stjr struct ftruncate_args sa; 1151133819Stjr 1152133819Stjr#ifdef DEBUG 1153133819Stjr if (ldebug(ftruncate64)) 1154133819Stjr printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 1155133819Stjr (intmax_t)args->length); 1156133819Stjr#endif 1157133819Stjr 1158133819Stjr sa.fd = args->fd; 1159133819Stjr sa.length = args->length; 1160133819Stjr return ftruncate(td, &sa); 1161133819Stjr} 1162133819Stjr 1163133819Stjrint 1164133819Stjrlinux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 1165133819Stjr{ 1166133819Stjr struct timeval atv; 1167133819Stjr l_timeval atv32; 1168133819Stjr struct timezone rtz; 1169133819Stjr int error = 0; 1170133819Stjr 1171133819Stjr if (uap->tp) { 1172133819Stjr microtime(&atv); 1173133819Stjr atv32.tv_sec = atv.tv_sec; 1174133819Stjr atv32.tv_usec = atv.tv_usec; 1175168844Sjkim error = copyout(&atv32, uap->tp, sizeof(atv32)); 1176133819Stjr } 1177133819Stjr if (error == 0 && uap->tzp != NULL) { 1178133819Stjr rtz.tz_minuteswest = tz_minuteswest; 1179133819Stjr rtz.tz_dsttime = tz_dsttime; 1180168844Sjkim error = copyout(&rtz, uap->tzp, sizeof(rtz)); 1181133819Stjr } 1182133819Stjr return (error); 1183133819Stjr} 1184133819Stjr 1185133819Stjrint 1186168843Sjkimlinux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 1187168843Sjkim{ 1188168843Sjkim l_timeval atv32; 1189168843Sjkim struct timeval atv, *tvp; 1190168843Sjkim struct timezone atz, *tzp; 1191168843Sjkim int error; 1192168843Sjkim 1193168843Sjkim if (uap->tp) { 1194168843Sjkim error = copyin(uap->tp, &atv32, sizeof(atv32)); 1195168843Sjkim if (error) 1196168843Sjkim return (error); 1197168843Sjkim atv.tv_sec = atv32.tv_sec; 1198168843Sjkim atv.tv_usec = atv32.tv_usec; 1199168843Sjkim tvp = &atv; 1200168843Sjkim } else 1201168843Sjkim tvp = NULL; 1202168843Sjkim if (uap->tzp) { 1203168843Sjkim error = copyin(uap->tzp, &atz, sizeof(atz)); 1204168843Sjkim if (error) 1205168843Sjkim return (error); 1206168843Sjkim tzp = &atz; 1207168843Sjkim } else 1208168843Sjkim tzp = NULL; 1209168843Sjkim return (kern_settimeofday(td, tvp, tzp)); 1210168843Sjkim} 1211168843Sjkim 1212168843Sjkimint 1213133819Stjrlinux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 1214133819Stjr{ 1215136152Sjhb struct l_rusage s32; 1216136152Sjhb struct rusage s; 1217133819Stjr int error; 1218133819Stjr 1219136152Sjhb error = kern_getrusage(td, uap->who, &s); 1220133819Stjr if (error != 0) 1221133819Stjr return (error); 1222136152Sjhb if (uap->rusage != NULL) { 1223133819Stjr s32.ru_utime.tv_sec = s.ru_utime.tv_sec; 1224133819Stjr s32.ru_utime.tv_usec = s.ru_utime.tv_usec; 1225133819Stjr s32.ru_stime.tv_sec = s.ru_stime.tv_sec; 1226133819Stjr s32.ru_stime.tv_usec = s.ru_stime.tv_usec; 1227133819Stjr s32.ru_maxrss = s.ru_maxrss; 1228133819Stjr s32.ru_ixrss = s.ru_ixrss; 1229133819Stjr s32.ru_idrss = s.ru_idrss; 1230133819Stjr s32.ru_isrss = s.ru_isrss; 1231133819Stjr s32.ru_minflt = s.ru_minflt; 1232133819Stjr s32.ru_majflt = s.ru_majflt; 1233133819Stjr s32.ru_nswap = s.ru_nswap; 1234133819Stjr s32.ru_inblock = s.ru_inblock; 1235133819Stjr s32.ru_oublock = s.ru_oublock; 1236133819Stjr s32.ru_msgsnd = s.ru_msgsnd; 1237133819Stjr s32.ru_msgrcv = s.ru_msgrcv; 1238133819Stjr s32.ru_nsignals = s.ru_nsignals; 1239133819Stjr s32.ru_nvcsw = s.ru_nvcsw; 1240133819Stjr s32.ru_nivcsw = s.ru_nivcsw; 1241136152Sjhb error = copyout(&s32, uap->rusage, sizeof(s32)); 1242133819Stjr } 1243133819Stjr return (error); 1244133819Stjr} 1245133819Stjr 1246133819Stjrint 1247133819Stjrlinux_sched_rr_get_interval(struct thread *td, 1248133819Stjr struct linux_sched_rr_get_interval_args *uap) 1249133819Stjr{ 1250133819Stjr struct timespec ts; 1251133819Stjr struct l_timespec ts32; 1252133819Stjr int error; 1253133819Stjr 1254144449Sjhb error = kern_sched_rr_get_interval(td, uap->pid, &ts); 1255133819Stjr if (error != 0) 1256133819Stjr return (error); 1257133819Stjr ts32.tv_sec = ts.tv_sec; 1258133819Stjr ts32.tv_nsec = ts.tv_nsec; 1259133819Stjr return (copyout(&ts32, uap->interval, sizeof(ts32))); 1260133819Stjr} 1261133819Stjr 1262133819Stjrint 1263168035Sjkimlinux_set_thread_area(struct thread *td, 1264168035Sjkim struct linux_set_thread_area_args *args) 1265133819Stjr{ 1266168035Sjkim struct l_user_desc info; 1267168035Sjkim struct user_segment_descriptor sd; 1268168035Sjkim int a[2]; 1269168035Sjkim int error; 1270133819Stjr 1271168035Sjkim error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1272168035Sjkim if (error) 1273168035Sjkim return (error); 1274168035Sjkim 1275168035Sjkim#ifdef DEBUG 1276168035Sjkim if (ldebug(set_thread_area)) 1277168848Sjkim printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 1278168035Sjkim "%i, %i, %i"), info.entry_number, info.base_addr, 1279168035Sjkim info.limit, info.seg_32bit, info.contents, 1280168035Sjkim info.read_exec_only, info.limit_in_pages, 1281168035Sjkim info.seg_not_present, info.useable); 1282168035Sjkim#endif 1283168035Sjkim 1284168035Sjkim /* 1285168035Sjkim * Semantics of Linux version: every thread in the system has array 1286168035Sjkim * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 1287168035Sjkim * This syscall loads one of the selected TLS decriptors with a value 1288168035Sjkim * and also loads GDT descriptors 6, 7 and 8 with the content of 1289168035Sjkim * the per-thread descriptors. 1290168035Sjkim * 1291168035Sjkim * Semantics of FreeBSD version: I think we can ignore that Linux has 1292168035Sjkim * three per-thread descriptors and use just the first one. 1293168035Sjkim * The tls_array[] is used only in [gs]et_thread_area() syscalls and 1294168035Sjkim * for loading the GDT descriptors. We use just one GDT descriptor 1295168035Sjkim * for TLS, so we will load just one. 1296168848Sjkim * 1297168848Sjkim * XXX: This doesn't work when a user space process tries to use more 1298168035Sjkim * than one TLS segment. Comment in the Linux source says wine might 1299168848Sjkim * do this. 1300168035Sjkim */ 1301168035Sjkim 1302168035Sjkim /* 1303168035Sjkim * GLIBC reads current %gs and call set_thread_area() with it. 1304168035Sjkim * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 1305168035Sjkim * we use these segments. 1306168035Sjkim */ 1307168035Sjkim switch (info.entry_number) { 1308168035Sjkim case GUGS32_SEL: 1309168035Sjkim case GUDATA_SEL: 1310168035Sjkim case 6: 1311168035Sjkim case -1: 1312168035Sjkim info.entry_number = GUGS32_SEL; 1313168035Sjkim break; 1314168035Sjkim default: 1315168035Sjkim return (EINVAL); 1316168035Sjkim } 1317168035Sjkim 1318168035Sjkim /* 1319168035Sjkim * We have to copy out the GDT entry we use. 1320168848Sjkim * 1321168848Sjkim * XXX: What if a user space program does not check the return value 1322168848Sjkim * and tries to use 6, 7 or 8? 1323168035Sjkim */ 1324168035Sjkim error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1325168035Sjkim if (error) 1326168035Sjkim return (error); 1327168035Sjkim 1328168035Sjkim if (LINUX_LDT_empty(&info)) { 1329168035Sjkim a[0] = 0; 1330168035Sjkim a[1] = 0; 1331168035Sjkim } else { 1332168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 1333168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 1334168035Sjkim } 1335168035Sjkim 1336168035Sjkim memcpy(&sd, &a, sizeof(a)); 1337168035Sjkim#ifdef DEBUG 1338168035Sjkim if (ldebug(set_thread_area)) 1339168035Sjkim printf("Segment created in set_thread_area: " 1340168035Sjkim "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1341168035Sjkim "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1342168035Sjkim "def32: %i, gran: %i\n", 1343168035Sjkim sd.sd_lobase, 1344168035Sjkim sd.sd_hibase, 1345168035Sjkim sd.sd_lolimit, 1346168035Sjkim sd.sd_hilimit, 1347168035Sjkim sd.sd_type, 1348168035Sjkim sd.sd_dpl, 1349168035Sjkim sd.sd_p, 1350168035Sjkim sd.sd_xx, 1351168035Sjkim sd.sd_long, 1352168035Sjkim sd.sd_def32, 1353168035Sjkim sd.sd_gran); 1354168035Sjkim#endif 1355168035Sjkim 1356168035Sjkim td->td_pcb->pcb_gsbase = (register_t)info.base_addr; 1357182866Skib td->td_pcb->pcb_flags |= PCB_32BIT | PCB_GS32BIT; 1358190620Skib update_gdt_gsbase(td, info.base_addr); 1359168035Sjkim 1360168035Sjkim return (0); 1361133819Stjr} 1362