linux32_machdep.c revision 218030
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Tim J. Robbins 3133819Stjr * Copyright (c) 2002 Doug Rabson 4133819Stjr * Copyright (c) 2000 Marcel Moolenaar 5133819Stjr * All rights reserved. 6133819Stjr * 7133819Stjr * Redistribution and use in source and binary forms, with or without 8133819Stjr * modification, are permitted provided that the following conditions 9133819Stjr * are met: 10133819Stjr * 1. Redistributions of source code must retain the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer 12133819Stjr * in this position and unchanged. 13133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 14133819Stjr * notice, this list of conditions and the following disclaimer in the 15133819Stjr * documentation and/or other materials provided with the distribution. 16133819Stjr * 3. The name of the author may not be used to endorse or promote products 17133819Stjr * derived from this software without specific prior written permission. 18133819Stjr * 19133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/cdefs.h> 32133819Stjr__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 218030 2011-01-28 18:47:07Z dchagin $"); 33133819Stjr 34133819Stjr#include <sys/param.h> 35133819Stjr#include <sys/kernel.h> 36133819Stjr#include <sys/systm.h> 37165832Snetchild#include <sys/file.h> 38165832Snetchild#include <sys/fcntl.h> 39162954Sphk#include <sys/clock.h> 40142057Sjhb#include <sys/imgact.h> 41161310Snetchild#include <sys/limits.h> 42133819Stjr#include <sys/lock.h> 43133819Stjr#include <sys/malloc.h> 44133819Stjr#include <sys/mman.h> 45133819Stjr#include <sys/mutex.h> 46166729Sjkim#include <sys/priv.h> 47133819Stjr#include <sys/proc.h> 48133819Stjr#include <sys/resource.h> 49133819Stjr#include <sys/resourcevar.h> 50166188Sjeff#include <sys/sched.h> 51133819Stjr#include <sys/syscallsubr.h> 52133819Stjr#include <sys/sysproto.h> 53133819Stjr#include <sys/unistd.h> 54218030Sdchagin#include <sys/wait.h> 55133819Stjr 56133819Stjr#include <machine/frame.h> 57168035Sjkim#include <machine/pcb.h> 58166729Sjkim#include <machine/psl.h> 59168035Sjkim#include <machine/segments.h> 60168035Sjkim#include <machine/specialreg.h> 61133819Stjr 62133819Stjr#include <vm/vm.h> 63133819Stjr#include <vm/pmap.h> 64133819Stjr#include <vm/vm_map.h> 65133819Stjr 66210431Skib#include <compat/freebsd32/freebsd32_util.h> 67133819Stjr#include <amd64/linux32/linux.h> 68133819Stjr#include <amd64/linux32/linux32_proto.h> 69133819Stjr#include <compat/linux/linux_ipc.h> 70218030Sdchagin#include <compat/linux/linux_misc.h> 71133819Stjr#include <compat/linux/linux_signal.h> 72133819Stjr#include <compat/linux/linux_util.h> 73161474Snetchild#include <compat/linux/linux_emul.h> 74133819Stjr 75133819Stjrstruct l_old_select_argv { 76133819Stjr l_int nfds; 77133819Stjr l_uintptr_t readfds; 78133819Stjr l_uintptr_t writefds; 79133819Stjr l_uintptr_t exceptfds; 80133819Stjr l_uintptr_t timeout; 81133819Stjr} __packed; 82133819Stjr 83133819Stjrint 84133819Stjrlinux_to_bsd_sigaltstack(int lsa) 85133819Stjr{ 86133819Stjr int bsa = 0; 87133819Stjr 88133819Stjr if (lsa & LINUX_SS_DISABLE) 89133819Stjr bsa |= SS_DISABLE; 90133819Stjr if (lsa & LINUX_SS_ONSTACK) 91133819Stjr bsa |= SS_ONSTACK; 92133819Stjr return (bsa); 93133819Stjr} 94133819Stjr 95198554Sjhbstatic int linux_mmap_common(struct thread *td, l_uintptr_t addr, 96198554Sjhb l_size_t len, l_int prot, l_int flags, l_int fd, 97198554Sjhb l_loff_t pos); 98198554Sjhb 99133819Stjrint 100133819Stjrbsd_to_linux_sigaltstack(int bsa) 101133819Stjr{ 102133819Stjr int lsa = 0; 103133819Stjr 104133819Stjr if (bsa & SS_DISABLE) 105133819Stjr lsa |= LINUX_SS_DISABLE; 106133819Stjr if (bsa & SS_ONSTACK) 107133819Stjr lsa |= LINUX_SS_ONSTACK; 108133819Stjr return (lsa); 109133819Stjr} 110133819Stjr 111218028Sdchaginstatic void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 112218028Sdchagin{ 113218028Sdchagin lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 114218028Sdchagin lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 115218028Sdchagin lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 116218028Sdchagin lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 117218028Sdchagin lru->ru_maxrss = ru->ru_maxrss; 118218028Sdchagin lru->ru_ixrss = ru->ru_ixrss; 119218028Sdchagin lru->ru_idrss = ru->ru_idrss; 120218028Sdchagin lru->ru_isrss = ru->ru_isrss; 121218028Sdchagin lru->ru_minflt = ru->ru_minflt; 122218028Sdchagin lru->ru_majflt = ru->ru_majflt; 123218028Sdchagin lru->ru_nswap = ru->ru_nswap; 124218028Sdchagin lru->ru_inblock = ru->ru_inblock; 125218028Sdchagin lru->ru_oublock = ru->ru_oublock; 126218028Sdchagin lru->ru_msgsnd = ru->ru_msgsnd; 127218028Sdchagin lru->ru_msgrcv = ru->ru_msgrcv; 128218028Sdchagin lru->ru_nsignals = ru->ru_nsignals; 129218028Sdchagin lru->ru_nvcsw = ru->ru_nvcsw; 130218028Sdchagin lru->ru_nivcsw = ru->ru_nivcsw; 131218028Sdchagin} 132218028Sdchagin 133142057Sjhbint 134142057Sjhblinux_execve(struct thread *td, struct linux_execve_args *args) 135142057Sjhb{ 136142057Sjhb struct image_args eargs; 137142057Sjhb char *path; 138142057Sjhb int error; 139142057Sjhb 140142057Sjhb LCONVPATHEXIST(td, args->path, &path); 141142057Sjhb 142142057Sjhb#ifdef DEBUG 143142057Sjhb if (ldebug(execve)) 144142057Sjhb printf(ARGS(execve, "%s"), path); 145142057Sjhb#endif 146142057Sjhb 147210431Skib error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 148210431Skib args->argp, args->envp); 149142057Sjhb free(path, M_TEMP); 150142057Sjhb if (error == 0) 151142057Sjhb error = kern_execve(td, &eargs, NULL); 152161474Snetchild if (error == 0) 153168063Sjkim /* Linux process can execute FreeBSD one, do not attempt 154161474Snetchild * to create emuldata for such process using 155161474Snetchild * linux_proc_init, this leads to a panic on KASSERT 156168063Sjkim * because such process has p->p_emuldata == NULL. 157161474Snetchild */ 158217896Sdchagin if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) 159168063Sjkim error = linux_proc_init(td, 0, 0); 160142057Sjhb return (error); 161142057Sjhb} 162142057Sjhb 163185438SkibCTASSERT(sizeof(struct l_iovec32) == 8); 164133819Stjr 165144449Sjhbstatic int 166185438Skiblinux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 167133819Stjr{ 168185438Skib struct l_iovec32 iov32; 169144449Sjhb struct iovec *iov; 170144449Sjhb struct uio *uio; 171185438Skib uint32_t iovlen; 172144449Sjhb int error, i; 173133819Stjr 174144449Sjhb *uiop = NULL; 175144449Sjhb if (iovcnt > UIO_MAXIOV) 176133819Stjr return (EINVAL); 177144449Sjhb iovlen = iovcnt * sizeof(struct iovec); 178168844Sjkim uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 179144449Sjhb iov = (struct iovec *)(uio + 1); 180144449Sjhb for (i = 0; i < iovcnt; i++) { 181185438Skib error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 182144449Sjhb if (error) { 183144449Sjhb free(uio, M_IOV); 184144449Sjhb return (error); 185144449Sjhb } 186144449Sjhb iov[i].iov_base = PTRIN(iov32.iov_base); 187144449Sjhb iov[i].iov_len = iov32.iov_len; 188133819Stjr } 189144449Sjhb uio->uio_iov = iov; 190144449Sjhb uio->uio_iovcnt = iovcnt; 191144449Sjhb uio->uio_segflg = UIO_USERSPACE; 192144449Sjhb uio->uio_offset = -1; 193144449Sjhb uio->uio_resid = 0; 194144449Sjhb for (i = 0; i < iovcnt; i++) { 195144449Sjhb if (iov->iov_len > INT_MAX - uio->uio_resid) { 196144449Sjhb free(uio, M_IOV); 197144449Sjhb return (EINVAL); 198144449Sjhb } 199144449Sjhb uio->uio_resid += iov->iov_len; 200144449Sjhb iov++; 201144449Sjhb } 202144449Sjhb *uiop = uio; 203144449Sjhb return (0); 204144449Sjhb} 205133819Stjr 206144449Sjhbint 207185438Skiblinux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 208185438Skib int error) 209185438Skib{ 210185438Skib struct l_iovec32 iov32; 211185438Skib struct iovec *iov; 212185438Skib uint32_t iovlen; 213185438Skib int i; 214185438Skib 215185438Skib *iovp = NULL; 216185438Skib if (iovcnt > UIO_MAXIOV) 217185438Skib return (error); 218185438Skib iovlen = iovcnt * sizeof(struct iovec); 219185438Skib iov = malloc(iovlen, M_IOV, M_WAITOK); 220185438Skib for (i = 0; i < iovcnt; i++) { 221185438Skib error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 222185438Skib if (error) { 223185438Skib free(iov, M_IOV); 224185438Skib return (error); 225185438Skib } 226185438Skib iov[i].iov_base = PTRIN(iov32.iov_base); 227185438Skib iov[i].iov_len = iov32.iov_len; 228185438Skib } 229185438Skib *iovp = iov; 230185438Skib return(0); 231185438Skib 232185438Skib} 233185438Skib 234185438Skibint 235144449Sjhblinux_readv(struct thread *td, struct linux_readv_args *uap) 236144449Sjhb{ 237144449Sjhb struct uio *auio; 238144449Sjhb int error; 239133819Stjr 240144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 241144449Sjhb if (error) 242144449Sjhb return (error); 243144449Sjhb error = kern_readv(td, uap->fd, auio); 244144449Sjhb free(auio, M_IOV); 245133819Stjr return (error); 246133819Stjr} 247133819Stjr 248133819Stjrint 249133819Stjrlinux_writev(struct thread *td, struct linux_writev_args *uap) 250133819Stjr{ 251144449Sjhb struct uio *auio; 252144449Sjhb int error; 253133819Stjr 254144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 255144449Sjhb if (error) 256144449Sjhb return (error); 257144449Sjhb error = kern_writev(td, uap->fd, auio); 258144449Sjhb free(auio, M_IOV); 259133819Stjr return (error); 260133819Stjr} 261133819Stjr 262133819Stjrstruct l_ipc_kludge { 263133819Stjr l_uintptr_t msgp; 264133819Stjr l_long msgtyp; 265133819Stjr} __packed; 266133819Stjr 267133819Stjrint 268133819Stjrlinux_ipc(struct thread *td, struct linux_ipc_args *args) 269133819Stjr{ 270133819Stjr 271133819Stjr switch (args->what & 0xFFFF) { 272133819Stjr case LINUX_SEMOP: { 273133819Stjr struct linux_semop_args a; 274133819Stjr 275133819Stjr a.semid = args->arg1; 276133819Stjr a.tsops = args->ptr; 277133819Stjr a.nsops = args->arg2; 278133819Stjr return (linux_semop(td, &a)); 279133819Stjr } 280133819Stjr case LINUX_SEMGET: { 281133819Stjr struct linux_semget_args a; 282133819Stjr 283133819Stjr a.key = args->arg1; 284133819Stjr a.nsems = args->arg2; 285133819Stjr a.semflg = args->arg3; 286133819Stjr return (linux_semget(td, &a)); 287133819Stjr } 288133819Stjr case LINUX_SEMCTL: { 289133819Stjr struct linux_semctl_args a; 290133819Stjr int error; 291133819Stjr 292133819Stjr a.semid = args->arg1; 293133819Stjr a.semnum = args->arg2; 294133819Stjr a.cmd = args->arg3; 295133819Stjr error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 296133819Stjr if (error) 297133819Stjr return (error); 298133819Stjr return (linux_semctl(td, &a)); 299133819Stjr } 300133819Stjr case LINUX_MSGSND: { 301133819Stjr struct linux_msgsnd_args a; 302133819Stjr 303133819Stjr a.msqid = args->arg1; 304133819Stjr a.msgp = args->ptr; 305133819Stjr a.msgsz = args->arg2; 306133819Stjr a.msgflg = args->arg3; 307133819Stjr return (linux_msgsnd(td, &a)); 308133819Stjr } 309133819Stjr case LINUX_MSGRCV: { 310133819Stjr struct linux_msgrcv_args a; 311133819Stjr 312133819Stjr a.msqid = args->arg1; 313133819Stjr a.msgsz = args->arg2; 314133819Stjr a.msgflg = args->arg3; 315133819Stjr if ((args->what >> 16) == 0) { 316133819Stjr struct l_ipc_kludge tmp; 317133819Stjr int error; 318133819Stjr 319133819Stjr if (args->ptr == 0) 320133819Stjr return (EINVAL); 321133819Stjr error = copyin(args->ptr, &tmp, sizeof(tmp)); 322133819Stjr if (error) 323133819Stjr return (error); 324133819Stjr a.msgp = PTRIN(tmp.msgp); 325133819Stjr a.msgtyp = tmp.msgtyp; 326133819Stjr } else { 327133819Stjr a.msgp = args->ptr; 328133819Stjr a.msgtyp = args->arg5; 329133819Stjr } 330133819Stjr return (linux_msgrcv(td, &a)); 331133819Stjr } 332133819Stjr case LINUX_MSGGET: { 333133819Stjr struct linux_msgget_args a; 334133819Stjr 335133819Stjr a.key = args->arg1; 336133819Stjr a.msgflg = args->arg2; 337133819Stjr return (linux_msgget(td, &a)); 338133819Stjr } 339133819Stjr case LINUX_MSGCTL: { 340133819Stjr struct linux_msgctl_args a; 341133819Stjr 342133819Stjr a.msqid = args->arg1; 343133819Stjr a.cmd = args->arg2; 344133819Stjr a.buf = args->ptr; 345133819Stjr return (linux_msgctl(td, &a)); 346133819Stjr } 347133819Stjr case LINUX_SHMAT: { 348133819Stjr struct linux_shmat_args a; 349133819Stjr 350133819Stjr a.shmid = args->arg1; 351133819Stjr a.shmaddr = args->ptr; 352133819Stjr a.shmflg = args->arg2; 353144441Sjhb a.raddr = PTRIN((l_uint)args->arg3); 354133819Stjr return (linux_shmat(td, &a)); 355133819Stjr } 356133819Stjr case LINUX_SHMDT: { 357133819Stjr struct linux_shmdt_args a; 358133819Stjr 359133819Stjr a.shmaddr = args->ptr; 360133819Stjr return (linux_shmdt(td, &a)); 361133819Stjr } 362133819Stjr case LINUX_SHMGET: { 363133819Stjr struct linux_shmget_args a; 364133819Stjr 365133819Stjr a.key = args->arg1; 366133819Stjr a.size = args->arg2; 367133819Stjr a.shmflg = args->arg3; 368133819Stjr return (linux_shmget(td, &a)); 369133819Stjr } 370133819Stjr case LINUX_SHMCTL: { 371133819Stjr struct linux_shmctl_args a; 372133819Stjr 373133819Stjr a.shmid = args->arg1; 374133819Stjr a.cmd = args->arg2; 375133819Stjr a.buf = args->ptr; 376133819Stjr return (linux_shmctl(td, &a)); 377133819Stjr } 378133819Stjr default: 379133819Stjr break; 380133819Stjr } 381133819Stjr 382133819Stjr return (EINVAL); 383133819Stjr} 384133819Stjr 385133819Stjrint 386133819Stjrlinux_old_select(struct thread *td, struct linux_old_select_args *args) 387133819Stjr{ 388133819Stjr struct l_old_select_argv linux_args; 389133819Stjr struct linux_select_args newsel; 390133819Stjr int error; 391133819Stjr 392133819Stjr#ifdef DEBUG 393133819Stjr if (ldebug(old_select)) 394133819Stjr printf(ARGS(old_select, "%p"), args->ptr); 395133819Stjr#endif 396133819Stjr 397133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 398133819Stjr if (error) 399133819Stjr return (error); 400133819Stjr 401133819Stjr newsel.nfds = linux_args.nfds; 402133819Stjr newsel.readfds = PTRIN(linux_args.readfds); 403133819Stjr newsel.writefds = PTRIN(linux_args.writefds); 404133819Stjr newsel.exceptfds = PTRIN(linux_args.exceptfds); 405133819Stjr newsel.timeout = PTRIN(linux_args.timeout); 406133819Stjr return (linux_select(td, &newsel)); 407133819Stjr} 408133819Stjr 409133819Stjrint 410133819Stjrlinux_fork(struct thread *td, struct linux_fork_args *args) 411133819Stjr{ 412133819Stjr int error; 413166150Snetchild struct proc *p2; 414166150Snetchild struct thread *td2; 415133819Stjr 416133819Stjr#ifdef DEBUG 417133819Stjr if (ldebug(fork)) 418133819Stjr printf(ARGS(fork, "")); 419133819Stjr#endif 420133819Stjr 421166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) 422133819Stjr return (error); 423168063Sjkim 424166150Snetchild if (error == 0) { 425166150Snetchild td->td_retval[0] = p2->p_pid; 426166150Snetchild td->td_retval[1] = 0; 427166150Snetchild } 428133819Stjr 429133819Stjr if (td->td_retval[1] == 1) 430133819Stjr td->td_retval[0] = 0; 431161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 432161474Snetchild if (error) 433161474Snetchild return (error); 434161474Snetchild 435166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 436166150Snetchild 437168063Sjkim /* 438168063Sjkim * Make this runnable after we are finished with it. 439168063Sjkim */ 440170307Sjeff thread_lock(td2); 441166150Snetchild TD_SET_CAN_RUN(td2); 442166188Sjeff sched_add(td2, SRQ_BORING); 443170307Sjeff thread_unlock(td2); 444166150Snetchild 445133819Stjr return (0); 446133819Stjr} 447133819Stjr 448133819Stjrint 449133819Stjrlinux_vfork(struct thread *td, struct linux_vfork_args *args) 450133819Stjr{ 451133819Stjr int error; 452161611Snetchild struct proc *p2; 453166150Snetchild struct thread *td2; 454133819Stjr 455133819Stjr#ifdef DEBUG 456133819Stjr if (ldebug(vfork)) 457133819Stjr printf(ARGS(vfork, "")); 458133819Stjr#endif 459133819Stjr 460168063Sjkim /* Exclude RFPPWAIT */ 461166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) 462133819Stjr return (error); 463161611Snetchild if (error == 0) { 464161611Snetchild td->td_retval[0] = p2->p_pid; 465161611Snetchild td->td_retval[1] = 0; 466161611Snetchild } 467133819Stjr /* Are we the child? */ 468133819Stjr if (td->td_retval[1] == 1) 469133819Stjr td->td_retval[0] = 0; 470161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 471161474Snetchild if (error) 472161474Snetchild return (error); 473166150Snetchild 474166150Snetchild PROC_LOCK(p2); 475166150Snetchild p2->p_flag |= P_PPWAIT; 476166150Snetchild PROC_UNLOCK(p2); 477166150Snetchild 478166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 479168063Sjkim 480168848Sjkim /* 481168848Sjkim * Make this runnable after we are finished with it. 482168848Sjkim */ 483170307Sjeff thread_lock(td2); 484166150Snetchild TD_SET_CAN_RUN(td2); 485166188Sjeff sched_add(td2, SRQ_BORING); 486170307Sjeff thread_unlock(td2); 487166150Snetchild 488161611Snetchild /* wait for the children to exit, ie. emulate vfork */ 489161611Snetchild PROC_LOCK(p2); 490161611Snetchild while (p2->p_flag & P_PPWAIT) 491188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 492161611Snetchild PROC_UNLOCK(p2); 493168063Sjkim 494133819Stjr return (0); 495133819Stjr} 496133819Stjr 497133819Stjrint 498133819Stjrlinux_clone(struct thread *td, struct linux_clone_args *args) 499133819Stjr{ 500133819Stjr int error, ff = RFPROC | RFSTOPPED; 501133819Stjr struct proc *p2; 502133819Stjr struct thread *td2; 503133819Stjr int exit_signal; 504161474Snetchild struct linux_emuldata *em; 505133819Stjr 506133819Stjr#ifdef DEBUG 507133819Stjr if (ldebug(clone)) { 508168063Sjkim printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, " 509168063Sjkim "child tid: %p"), (unsigned)args->flags, 510168063Sjkim args->stack, args->parent_tidptr, args->child_tidptr); 511133819Stjr } 512133819Stjr#endif 513133819Stjr 514133819Stjr exit_signal = args->flags & 0x000000ff; 515169458Skan if (LINUX_SIG_VALID(exit_signal)) { 516169458Skan if (exit_signal <= LINUX_SIGTBLSZ) 517169458Skan exit_signal = 518169458Skan linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 519169458Skan } else if (exit_signal != 0) 520133819Stjr return (EINVAL); 521133819Stjr 522167157Sjkim if (args->flags & LINUX_CLONE_VM) 523133819Stjr ff |= RFMEM; 524167157Sjkim if (args->flags & LINUX_CLONE_SIGHAND) 525133819Stjr ff |= RFSIGSHARE; 526168063Sjkim /* 527168063Sjkim * XXX: In Linux, sharing of fs info (chroot/cwd/umask) 528168063Sjkim * and open files is independant. In FreeBSD, its in one 529168848Sjkim * structure but in reality it does not cause any problems 530168848Sjkim * because both of these flags are usually set together. 531163371Snetchild */ 532167157Sjkim if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) 533133819Stjr ff |= RFFDG; 534133819Stjr 535161474Snetchild /* 536161474Snetchild * Attempt to detect when linux_clone(2) is used for creating 537161474Snetchild * kernel threads. Unfortunately despite the existence of the 538161474Snetchild * CLONE_THREAD flag, version of linuxthreads package used in 539161474Snetchild * most popular distros as of beginning of 2005 doesn't make 540166944Snetchild * any use of it. Therefore, this detection relies on 541161474Snetchild * empirical observation that linuxthreads sets certain 542161474Snetchild * combination of flags, so that we can make more or less 543161474Snetchild * precise detection and notify the FreeBSD kernel that several 544161474Snetchild * processes are in fact part of the same threading group, so 545161474Snetchild * that special treatment is necessary for signal delivery 546161474Snetchild * between those processes and fd locking. 547161474Snetchild */ 548167157Sjkim if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) 549161474Snetchild ff |= RFTHREAD; 550161474Snetchild 551168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) 552168056Sjkim if (args->parent_tidptr == NULL) 553168056Sjkim return (EINVAL); 554168056Sjkim 555133819Stjr error = fork1(td, ff, 0, &p2); 556133819Stjr if (error) 557133819Stjr return (error); 558166395Skib 559167157Sjkim if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { 560166395Skib sx_xlock(&proctree_lock); 561166395Skib PROC_LOCK(p2); 562166395Skib proc_reparent(p2, td->td_proc->p_pptr); 563166395Skib PROC_UNLOCK(p2); 564166395Skib sx_xunlock(&proctree_lock); 565166395Skib } 566168063Sjkim 567161474Snetchild /* create the emuldata */ 568161474Snetchild error = linux_proc_init(td, p2->p_pid, args->flags); 569161474Snetchild /* reference it - no need to check this */ 570165867Snetchild em = em_find(p2, EMUL_DOLOCK); 571161474Snetchild KASSERT(em != NULL, ("clone: emuldata not found.\n")); 572161474Snetchild /* and adjust it */ 573133819Stjr 574167157Sjkim if (args->flags & LINUX_CLONE_THREAD) { 575161474Snetchild#ifdef notyet 576161696Snetchild PROC_LOCK(p2); 577161474Snetchild p2->p_pgrp = td->td_proc->p_pgrp; 578161696Snetchild PROC_UNLOCK(p2); 579161474Snetchild#endif 580168063Sjkim exit_signal = 0; 581161474Snetchild } 582161474Snetchild 583167157Sjkim if (args->flags & LINUX_CLONE_CHILD_SETTID) 584161474Snetchild em->child_set_tid = args->child_tidptr; 585161474Snetchild else 586161474Snetchild em->child_set_tid = NULL; 587161474Snetchild 588167157Sjkim if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 589161474Snetchild em->child_clear_tid = args->child_tidptr; 590161474Snetchild else 591161474Snetchild em->child_clear_tid = NULL; 592161696Snetchild 593161474Snetchild EMUL_UNLOCK(&emul_lock); 594161474Snetchild 595168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) { 596168056Sjkim error = copyout(&p2->p_pid, args->parent_tidptr, 597168056Sjkim sizeof(p2->p_pid)); 598168056Sjkim if (error) 599168056Sjkim printf(LMSG("copyout failed!")); 600168056Sjkim } 601168056Sjkim 602133819Stjr PROC_LOCK(p2); 603133819Stjr p2->p_sigparent = exit_signal; 604133819Stjr PROC_UNLOCK(p2); 605133819Stjr td2 = FIRST_THREAD_IN_PROC(p2); 606168063Sjkim /* 607168063Sjkim * In a case of stack = NULL, we are supposed to COW calling process 608168063Sjkim * stack. This is what normal fork() does, so we just keep tf_rsp arg 609168063Sjkim * intact. 610161310Snetchild */ 611161310Snetchild if (args->stack) 612168063Sjkim td2->td_frame->tf_rsp = PTROUT(args->stack); 613133819Stjr 614167157Sjkim if (args->flags & LINUX_CLONE_SETTLS) { 615168035Sjkim struct user_segment_descriptor sd; 616168035Sjkim struct l_user_desc info; 617216634Sjkim struct pcb *pcb; 618168848Sjkim int a[2]; 619168035Sjkim 620168848Sjkim error = copyin((void *)td->td_frame->tf_rsi, &info, 621168035Sjkim sizeof(struct l_user_desc)); 622168035Sjkim if (error) { 623168035Sjkim printf(LMSG("copyin failed!")); 624168035Sjkim } else { 625168035Sjkim /* We might copy out the entry_number as GUGS32_SEL. */ 626168848Sjkim info.entry_number = GUGS32_SEL; 627168035Sjkim error = copyout(&info, (void *)td->td_frame->tf_rsi, 628168035Sjkim sizeof(struct l_user_desc)); 629168035Sjkim if (error) 630168035Sjkim printf(LMSG("copyout failed!")); 631168035Sjkim 632168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 633168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 634168035Sjkim 635168035Sjkim memcpy(&sd, &a, sizeof(a)); 636168035Sjkim#ifdef DEBUG 637168035Sjkim if (ldebug(clone)) 638168035Sjkim printf("Segment created in clone with " 639168035Sjkim "CLONE_SETTLS: lobase: %x, hibase: %x, " 640168035Sjkim "lolimit: %x, hilimit: %x, type: %i, " 641168035Sjkim "dpl: %i, p: %i, xx: %i, long: %i, " 642168035Sjkim "def32: %i, gran: %i\n", sd.sd_lobase, 643168035Sjkim sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 644168035Sjkim sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 645168035Sjkim sd.sd_long, sd.sd_def32, sd.sd_gran); 646168035Sjkim#endif 647216634Sjkim pcb = td2->td_pcb; 648216634Sjkim pcb->pcb_gsbase = (register_t)info.base_addr; 649216634Sjkim/* XXXKIB pcb->pcb_gs32sd = sd; */ 650190620Skib td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 651216634Sjkim set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT); 652168035Sjkim } 653161474Snetchild } 654161474Snetchild 655133819Stjr#ifdef DEBUG 656133819Stjr if (ldebug(clone)) 657168063Sjkim printf(LMSG("clone: successful rfork to %d, " 658168063Sjkim "stack %p sig = %d"), (int)p2->p_pid, args->stack, 659168063Sjkim exit_signal); 660133819Stjr#endif 661167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 662166150Snetchild PROC_LOCK(p2); 663166150Snetchild p2->p_flag |= P_PPWAIT; 664166150Snetchild PROC_UNLOCK(p2); 665166150Snetchild } 666133819Stjr 667133819Stjr /* 668133819Stjr * Make this runnable after we are finished with it. 669133819Stjr */ 670170307Sjeff thread_lock(td2); 671133819Stjr TD_SET_CAN_RUN(td2); 672166188Sjeff sched_add(td2, SRQ_BORING); 673170307Sjeff thread_unlock(td2); 674133819Stjr 675133819Stjr td->td_retval[0] = p2->p_pid; 676133819Stjr td->td_retval[1] = 0; 677168063Sjkim 678167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 679168063Sjkim /* wait for the children to exit, ie. emulate vfork */ 680168063Sjkim PROC_LOCK(p2); 681163374Snetchild while (p2->p_flag & P_PPWAIT) 682188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 683163374Snetchild PROC_UNLOCK(p2); 684163374Snetchild } 685163374Snetchild 686133819Stjr return (0); 687133819Stjr} 688133819Stjr 689133819Stjr#define STACK_SIZE (2 * 1024 * 1024) 690133819Stjr#define GUARD_SIZE (4 * PAGE_SIZE) 691133819Stjr 692133819Stjrint 693133819Stjrlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 694133819Stjr{ 695133819Stjr 696133819Stjr#ifdef DEBUG 697133819Stjr if (ldebug(mmap2)) 698168063Sjkim printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 699168063Sjkim args->addr, args->len, args->prot, 700133819Stjr args->flags, args->fd, args->pgoff); 701133819Stjr#endif 702133819Stjr 703198554Sjhb return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 704198554Sjhb args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 705198554Sjhb PAGE_SIZE)); 706133819Stjr} 707133819Stjr 708133819Stjrint 709133819Stjrlinux_mmap(struct thread *td, struct linux_mmap_args *args) 710133819Stjr{ 711133819Stjr int error; 712133819Stjr struct l_mmap_argv linux_args; 713133819Stjr 714133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 715133819Stjr if (error) 716133819Stjr return (error); 717133819Stjr 718133819Stjr#ifdef DEBUG 719133819Stjr if (ldebug(mmap)) 720168063Sjkim printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 721168063Sjkim linux_args.addr, linux_args.len, linux_args.prot, 722168063Sjkim linux_args.flags, linux_args.fd, linux_args.pgoff); 723133819Stjr#endif 724133819Stjr 725198554Sjhb return (linux_mmap_common(td, linux_args.addr, linux_args.len, 726198554Sjhb linux_args.prot, linux_args.flags, linux_args.fd, 727198554Sjhb (uint32_t)linux_args.pgoff)); 728133819Stjr} 729133819Stjr 730133819Stjrstatic int 731198554Sjhblinux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 732198554Sjhb l_int flags, l_int fd, l_loff_t pos) 733133819Stjr{ 734133819Stjr struct proc *p = td->td_proc; 735133819Stjr struct mmap_args /* { 736133819Stjr caddr_t addr; 737133819Stjr size_t len; 738133819Stjr int prot; 739133819Stjr int flags; 740133819Stjr int fd; 741133819Stjr long pad; 742133819Stjr off_t pos; 743133819Stjr } */ bsd_args; 744133819Stjr int error; 745165832Snetchild struct file *fp; 746133819Stjr 747133819Stjr error = 0; 748133819Stjr bsd_args.flags = 0; 749165832Snetchild fp = NULL; 750165832Snetchild 751165832Snetchild /* 752165832Snetchild * Linux mmap(2): 753165832Snetchild * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 754165832Snetchild */ 755198554Sjhb if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 756165832Snetchild return (EINVAL); 757165832Snetchild 758198554Sjhb if (flags & LINUX_MAP_SHARED) 759133819Stjr bsd_args.flags |= MAP_SHARED; 760198554Sjhb if (flags & LINUX_MAP_PRIVATE) 761133819Stjr bsd_args.flags |= MAP_PRIVATE; 762198554Sjhb if (flags & LINUX_MAP_FIXED) 763133819Stjr bsd_args.flags |= MAP_FIXED; 764208994Skan if (flags & LINUX_MAP_ANON) { 765208994Skan /* Enforce pos to be on page boundary, then ignore. */ 766208994Skan if ((pos & PAGE_MASK) != 0) 767208994Skan return (EINVAL); 768208994Skan pos = 0; 769133819Stjr bsd_args.flags |= MAP_ANON; 770208994Skan } else 771133819Stjr bsd_args.flags |= MAP_NOSYNC; 772198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) 773133819Stjr bsd_args.flags |= MAP_STACK; 774133819Stjr 775166727Sjkim /* 776166727Sjkim * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 777166727Sjkim * on Linux/i386. We do this to ensure maximum compatibility. 778166727Sjkim * Linux/ia64 does the same in i386 emulation mode. 779166727Sjkim */ 780198554Sjhb bsd_args.prot = prot; 781166727Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 782166727Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 783166727Sjkim 784167048Sjkim /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 785198554Sjhb bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 786167048Sjkim if (bsd_args.fd != -1) { 787166727Sjkim /* 788166727Sjkim * Linux follows Solaris mmap(2) description: 789166727Sjkim * The file descriptor fildes is opened with 790166727Sjkim * read permission, regardless of the 791166727Sjkim * protection options specified. 792166727Sjkim */ 793166727Sjkim 794167048Sjkim if ((error = fget(td, bsd_args.fd, &fp)) != 0) 795166727Sjkim return (error); 796166727Sjkim if (fp->f_type != DTYPE_VNODE) { 797166727Sjkim fdrop(fp, td); 798166727Sjkim return (EINVAL); 799166727Sjkim } 800166727Sjkim 801166727Sjkim /* Linux mmap() just fails for O_WRONLY files */ 802166727Sjkim if (!(fp->f_flag & FREAD)) { 803166727Sjkim fdrop(fp, td); 804166727Sjkim return (EACCES); 805166727Sjkim } 806166727Sjkim 807166727Sjkim fdrop(fp, td); 808166727Sjkim } 809166727Sjkim 810198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) { 811168063Sjkim /* 812168063Sjkim * The Linux MAP_GROWSDOWN option does not limit auto 813133819Stjr * growth of the region. Linux mmap with this option 814133819Stjr * takes as addr the inital BOS, and as len, the initial 815133819Stjr * region size. It can then grow down from addr without 816168063Sjkim * limit. However, Linux threads has an implicit internal 817133819Stjr * limit to stack size of STACK_SIZE. Its just not 818168063Sjkim * enforced explicitly in Linux. But, here we impose 819133819Stjr * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 820133819Stjr * region, since we can do this with our mmap. 821133819Stjr * 822133819Stjr * Our mmap with MAP_STACK takes addr as the maximum 823133819Stjr * downsize limit on BOS, and as len the max size of 824168848Sjkim * the region. It then maps the top SGROWSIZ bytes, 825166944Snetchild * and auto grows the region down, up to the limit 826133819Stjr * in addr. 827133819Stjr * 828133819Stjr * If we don't use the MAP_STACK option, the effect 829133819Stjr * of this code is to allocate a stack region of a 830133819Stjr * fixed size of (STACK_SIZE - GUARD_SIZE). 831133819Stjr */ 832133819Stjr 833198554Sjhb if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 834168063Sjkim /* 835168063Sjkim * Some Linux apps will attempt to mmap 836133819Stjr * thread stacks near the top of their 837133819Stjr * address space. If their TOS is greater 838133819Stjr * than vm_maxsaddr, vm_map_growstack() 839133819Stjr * will confuse the thread stack with the 840133819Stjr * process stack and deliver a SEGV if they 841133819Stjr * attempt to grow the thread stack past their 842133819Stjr * current stacksize rlimit. To avoid this, 843133819Stjr * adjust vm_maxsaddr upwards to reflect 844133819Stjr * the current stacksize rlimit rather 845133819Stjr * than the maximum possible stacksize. 846133819Stjr * It would be better to adjust the 847133819Stjr * mmap'ed region, but some apps do not check 848133819Stjr * mmap's return value. 849133819Stjr */ 850133819Stjr PROC_LOCK(p); 851166727Sjkim p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 852133819Stjr lim_cur(p, RLIMIT_STACK); 853133819Stjr PROC_UNLOCK(p); 854133819Stjr } 855133819Stjr 856168063Sjkim /* 857176193Sjkim * This gives us our maximum stack size and a new BOS. 858176193Sjkim * If we're using VM_STACK, then mmap will just map 859176193Sjkim * the top SGROWSIZ bytes, and let the stack grow down 860176193Sjkim * to the limit at BOS. If we're not using VM_STACK 861176193Sjkim * we map the full stack, since we don't have a way 862176193Sjkim * to autogrow it. 863133819Stjr */ 864198554Sjhb if (len > STACK_SIZE - GUARD_SIZE) { 865198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 866198554Sjhb bsd_args.len = len; 867176193Sjkim } else { 868198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr) - 869198554Sjhb (STACK_SIZE - GUARD_SIZE - len); 870176193Sjkim bsd_args.len = STACK_SIZE - GUARD_SIZE; 871176193Sjkim } 872133819Stjr } else { 873198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 874198554Sjhb bsd_args.len = len; 875133819Stjr } 876198554Sjhb bsd_args.pos = pos; 877133819Stjr 878133819Stjr#ifdef DEBUG 879133819Stjr if (ldebug(mmap)) 880133819Stjr printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 881133819Stjr __func__, 882133843Sobrien (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 883133819Stjr bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 884133819Stjr#endif 885133819Stjr error = mmap(td, &bsd_args); 886133819Stjr#ifdef DEBUG 887133819Stjr if (ldebug(mmap)) 888133819Stjr printf("-> %s() return: 0x%x (0x%08x)\n", 889133819Stjr __func__, error, (u_int)td->td_retval[0]); 890133819Stjr#endif 891133819Stjr return (error); 892133819Stjr} 893133819Stjr 894133819Stjrint 895168035Sjkimlinux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 896168035Sjkim{ 897168035Sjkim struct mprotect_args bsd_args; 898168035Sjkim 899168035Sjkim bsd_args.addr = uap->addr; 900168035Sjkim bsd_args.len = uap->len; 901168035Sjkim bsd_args.prot = uap->prot; 902168035Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 903168035Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 904168035Sjkim return (mprotect(td, &bsd_args)); 905168035Sjkim} 906168035Sjkim 907168035Sjkimint 908166729Sjkimlinux_iopl(struct thread *td, struct linux_iopl_args *args) 909166729Sjkim{ 910166729Sjkim int error; 911166729Sjkim 912166729Sjkim if (args->level < 0 || args->level > 3) 913166729Sjkim return (EINVAL); 914166729Sjkim if ((error = priv_check(td, PRIV_IO)) != 0) 915166729Sjkim return (error); 916166729Sjkim if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 917166729Sjkim return (error); 918166729Sjkim td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 919166729Sjkim (args->level * (PSL_IOPL / 3)); 920166729Sjkim 921166729Sjkim return (0); 922166729Sjkim} 923166729Sjkim 924166729Sjkimint 925133819Stjrlinux_pipe(struct thread *td, struct linux_pipe_args *args) 926133819Stjr{ 927133819Stjr int error; 928184849Sed int fildes[2]; 929133819Stjr 930133819Stjr#ifdef DEBUG 931133819Stjr if (ldebug(pipe)) 932133819Stjr printf(ARGS(pipe, "*")); 933133819Stjr#endif 934133819Stjr 935184849Sed error = kern_pipe(td, fildes); 936184849Sed if (error) 937133819Stjr return (error); 938133819Stjr 939184849Sed /* XXX: Close descriptors on error. */ 940184849Sed return (copyout(fildes, args->pipefds, sizeof fildes)); 941133819Stjr} 942166731Sjkim 943133819Stjrint 944133819Stjrlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 945133819Stjr{ 946133819Stjr l_osigaction_t osa; 947133819Stjr l_sigaction_t act, oact; 948133819Stjr int error; 949133819Stjr 950133819Stjr#ifdef DEBUG 951133819Stjr if (ldebug(sigaction)) 952133819Stjr printf(ARGS(sigaction, "%d, %p, %p"), 953133819Stjr args->sig, (void *)args->nsa, (void *)args->osa); 954133819Stjr#endif 955133819Stjr 956133819Stjr if (args->nsa != NULL) { 957133819Stjr error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 958133819Stjr if (error) 959133819Stjr return (error); 960133819Stjr act.lsa_handler = osa.lsa_handler; 961133819Stjr act.lsa_flags = osa.lsa_flags; 962133819Stjr act.lsa_restorer = osa.lsa_restorer; 963133819Stjr LINUX_SIGEMPTYSET(act.lsa_mask); 964133819Stjr act.lsa_mask.__bits[0] = osa.lsa_mask; 965133819Stjr } 966133819Stjr 967133819Stjr error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 968133819Stjr args->osa ? &oact : NULL); 969133819Stjr 970133819Stjr if (args->osa != NULL && !error) { 971133819Stjr osa.lsa_handler = oact.lsa_handler; 972133819Stjr osa.lsa_flags = oact.lsa_flags; 973133819Stjr osa.lsa_restorer = oact.lsa_restorer; 974133819Stjr osa.lsa_mask = oact.lsa_mask.__bits[0]; 975133819Stjr error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 976133819Stjr } 977133819Stjr 978133819Stjr return (error); 979133819Stjr} 980133819Stjr 981133819Stjr/* 982168063Sjkim * Linux has two extra args, restart and oldmask. We don't use these, 983133819Stjr * but it seems that "restart" is actually a context pointer that 984133819Stjr * enables the signal to happen with a different register set. 985133819Stjr */ 986133819Stjrint 987133819Stjrlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 988133819Stjr{ 989133819Stjr sigset_t sigmask; 990133819Stjr l_sigset_t mask; 991133819Stjr 992133819Stjr#ifdef DEBUG 993133819Stjr if (ldebug(sigsuspend)) 994133819Stjr printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 995133819Stjr#endif 996133819Stjr 997133819Stjr LINUX_SIGEMPTYSET(mask); 998133819Stjr mask.__bits[0] = args->mask; 999133819Stjr linux_to_bsd_sigset(&mask, &sigmask); 1000133819Stjr return (kern_sigsuspend(td, sigmask)); 1001133819Stjr} 1002133819Stjr 1003133819Stjrint 1004133819Stjrlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 1005133819Stjr{ 1006133819Stjr l_sigset_t lmask; 1007133819Stjr sigset_t sigmask; 1008133819Stjr int error; 1009133819Stjr 1010133819Stjr#ifdef DEBUG 1011133819Stjr if (ldebug(rt_sigsuspend)) 1012133819Stjr printf(ARGS(rt_sigsuspend, "%p, %d"), 1013133819Stjr (void *)uap->newset, uap->sigsetsize); 1014133819Stjr#endif 1015133819Stjr 1016133819Stjr if (uap->sigsetsize != sizeof(l_sigset_t)) 1017133819Stjr return (EINVAL); 1018133819Stjr 1019133819Stjr error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 1020133819Stjr if (error) 1021133819Stjr return (error); 1022133819Stjr 1023133819Stjr linux_to_bsd_sigset(&lmask, &sigmask); 1024133819Stjr return (kern_sigsuspend(td, sigmask)); 1025133819Stjr} 1026133819Stjr 1027133819Stjrint 1028133819Stjrlinux_pause(struct thread *td, struct linux_pause_args *args) 1029133819Stjr{ 1030133819Stjr struct proc *p = td->td_proc; 1031133819Stjr sigset_t sigmask; 1032133819Stjr 1033133819Stjr#ifdef DEBUG 1034133819Stjr if (ldebug(pause)) 1035133819Stjr printf(ARGS(pause, "")); 1036133819Stjr#endif 1037133819Stjr 1038133819Stjr PROC_LOCK(p); 1039133819Stjr sigmask = td->td_sigmask; 1040133819Stjr PROC_UNLOCK(p); 1041133819Stjr return (kern_sigsuspend(td, sigmask)); 1042133819Stjr} 1043133819Stjr 1044133819Stjrint 1045133819Stjrlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 1046133819Stjr{ 1047133819Stjr stack_t ss, oss; 1048133819Stjr l_stack_t lss; 1049133819Stjr int error; 1050133819Stjr 1051133819Stjr#ifdef DEBUG 1052133819Stjr if (ldebug(sigaltstack)) 1053133819Stjr printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 1054133819Stjr#endif 1055133819Stjr 1056133819Stjr if (uap->uss != NULL) { 1057133819Stjr error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 1058133819Stjr if (error) 1059133819Stjr return (error); 1060133819Stjr 1061133819Stjr ss.ss_sp = PTRIN(lss.ss_sp); 1062133819Stjr ss.ss_size = lss.ss_size; 1063133819Stjr ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 1064133819Stjr } 1065134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 1066134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 1067133819Stjr if (!error && uap->uoss != NULL) { 1068133819Stjr lss.ss_sp = PTROUT(oss.ss_sp); 1069133819Stjr lss.ss_size = oss.ss_size; 1070133819Stjr lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 1071133819Stjr error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 1072133819Stjr } 1073133819Stjr 1074133819Stjr return (error); 1075133819Stjr} 1076133819Stjr 1077133819Stjrint 1078133819Stjrlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 1079133819Stjr{ 1080133819Stjr struct ftruncate_args sa; 1081133819Stjr 1082133819Stjr#ifdef DEBUG 1083133819Stjr if (ldebug(ftruncate64)) 1084133819Stjr printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 1085133819Stjr (intmax_t)args->length); 1086133819Stjr#endif 1087133819Stjr 1088133819Stjr sa.fd = args->fd; 1089133819Stjr sa.length = args->length; 1090133819Stjr return ftruncate(td, &sa); 1091133819Stjr} 1092133819Stjr 1093133819Stjrint 1094133819Stjrlinux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 1095133819Stjr{ 1096133819Stjr struct timeval atv; 1097133819Stjr l_timeval atv32; 1098133819Stjr struct timezone rtz; 1099133819Stjr int error = 0; 1100133819Stjr 1101133819Stjr if (uap->tp) { 1102133819Stjr microtime(&atv); 1103133819Stjr atv32.tv_sec = atv.tv_sec; 1104133819Stjr atv32.tv_usec = atv.tv_usec; 1105168844Sjkim error = copyout(&atv32, uap->tp, sizeof(atv32)); 1106133819Stjr } 1107133819Stjr if (error == 0 && uap->tzp != NULL) { 1108133819Stjr rtz.tz_minuteswest = tz_minuteswest; 1109133819Stjr rtz.tz_dsttime = tz_dsttime; 1110168844Sjkim error = copyout(&rtz, uap->tzp, sizeof(rtz)); 1111133819Stjr } 1112133819Stjr return (error); 1113133819Stjr} 1114133819Stjr 1115133819Stjrint 1116168843Sjkimlinux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 1117168843Sjkim{ 1118168843Sjkim l_timeval atv32; 1119168843Sjkim struct timeval atv, *tvp; 1120168843Sjkim struct timezone atz, *tzp; 1121168843Sjkim int error; 1122168843Sjkim 1123168843Sjkim if (uap->tp) { 1124168843Sjkim error = copyin(uap->tp, &atv32, sizeof(atv32)); 1125168843Sjkim if (error) 1126168843Sjkim return (error); 1127168843Sjkim atv.tv_sec = atv32.tv_sec; 1128168843Sjkim atv.tv_usec = atv32.tv_usec; 1129168843Sjkim tvp = &atv; 1130168843Sjkim } else 1131168843Sjkim tvp = NULL; 1132168843Sjkim if (uap->tzp) { 1133168843Sjkim error = copyin(uap->tzp, &atz, sizeof(atz)); 1134168843Sjkim if (error) 1135168843Sjkim return (error); 1136168843Sjkim tzp = &atz; 1137168843Sjkim } else 1138168843Sjkim tzp = NULL; 1139168843Sjkim return (kern_settimeofday(td, tvp, tzp)); 1140168843Sjkim} 1141168843Sjkim 1142168843Sjkimint 1143133819Stjrlinux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 1144133819Stjr{ 1145136152Sjhb struct l_rusage s32; 1146136152Sjhb struct rusage s; 1147133819Stjr int error; 1148133819Stjr 1149136152Sjhb error = kern_getrusage(td, uap->who, &s); 1150133819Stjr if (error != 0) 1151133819Stjr return (error); 1152136152Sjhb if (uap->rusage != NULL) { 1153218028Sdchagin bsd_to_linux_rusage(&s, &s32); 1154136152Sjhb error = copyout(&s32, uap->rusage, sizeof(s32)); 1155133819Stjr } 1156133819Stjr return (error); 1157133819Stjr} 1158133819Stjr 1159133819Stjrint 1160133819Stjrlinux_sched_rr_get_interval(struct thread *td, 1161133819Stjr struct linux_sched_rr_get_interval_args *uap) 1162133819Stjr{ 1163133819Stjr struct timespec ts; 1164133819Stjr struct l_timespec ts32; 1165133819Stjr int error; 1166133819Stjr 1167144449Sjhb error = kern_sched_rr_get_interval(td, uap->pid, &ts); 1168133819Stjr if (error != 0) 1169133819Stjr return (error); 1170133819Stjr ts32.tv_sec = ts.tv_sec; 1171133819Stjr ts32.tv_nsec = ts.tv_nsec; 1172133819Stjr return (copyout(&ts32, uap->interval, sizeof(ts32))); 1173133819Stjr} 1174133819Stjr 1175133819Stjrint 1176168035Sjkimlinux_set_thread_area(struct thread *td, 1177168035Sjkim struct linux_set_thread_area_args *args) 1178133819Stjr{ 1179168035Sjkim struct l_user_desc info; 1180168035Sjkim struct user_segment_descriptor sd; 1181216634Sjkim struct pcb *pcb; 1182168035Sjkim int a[2]; 1183168035Sjkim int error; 1184133819Stjr 1185168035Sjkim error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1186168035Sjkim if (error) 1187168035Sjkim return (error); 1188168035Sjkim 1189168035Sjkim#ifdef DEBUG 1190168035Sjkim if (ldebug(set_thread_area)) 1191168848Sjkim printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 1192168035Sjkim "%i, %i, %i"), info.entry_number, info.base_addr, 1193168035Sjkim info.limit, info.seg_32bit, info.contents, 1194168035Sjkim info.read_exec_only, info.limit_in_pages, 1195168035Sjkim info.seg_not_present, info.useable); 1196168035Sjkim#endif 1197168035Sjkim 1198168035Sjkim /* 1199168035Sjkim * Semantics of Linux version: every thread in the system has array 1200168035Sjkim * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 1201168035Sjkim * This syscall loads one of the selected TLS decriptors with a value 1202168035Sjkim * and also loads GDT descriptors 6, 7 and 8 with the content of 1203168035Sjkim * the per-thread descriptors. 1204168035Sjkim * 1205168035Sjkim * Semantics of FreeBSD version: I think we can ignore that Linux has 1206168035Sjkim * three per-thread descriptors and use just the first one. 1207168035Sjkim * The tls_array[] is used only in [gs]et_thread_area() syscalls and 1208168035Sjkim * for loading the GDT descriptors. We use just one GDT descriptor 1209168035Sjkim * for TLS, so we will load just one. 1210168848Sjkim * 1211168848Sjkim * XXX: This doesn't work when a user space process tries to use more 1212168035Sjkim * than one TLS segment. Comment in the Linux source says wine might 1213168848Sjkim * do this. 1214168035Sjkim */ 1215168035Sjkim 1216168035Sjkim /* 1217168035Sjkim * GLIBC reads current %gs and call set_thread_area() with it. 1218168035Sjkim * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 1219168035Sjkim * we use these segments. 1220168035Sjkim */ 1221168035Sjkim switch (info.entry_number) { 1222168035Sjkim case GUGS32_SEL: 1223168035Sjkim case GUDATA_SEL: 1224168035Sjkim case 6: 1225168035Sjkim case -1: 1226168035Sjkim info.entry_number = GUGS32_SEL; 1227168035Sjkim break; 1228168035Sjkim default: 1229168035Sjkim return (EINVAL); 1230168035Sjkim } 1231168035Sjkim 1232168035Sjkim /* 1233168035Sjkim * We have to copy out the GDT entry we use. 1234168848Sjkim * 1235168848Sjkim * XXX: What if a user space program does not check the return value 1236168848Sjkim * and tries to use 6, 7 or 8? 1237168035Sjkim */ 1238168035Sjkim error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1239168035Sjkim if (error) 1240168035Sjkim return (error); 1241168035Sjkim 1242168035Sjkim if (LINUX_LDT_empty(&info)) { 1243168035Sjkim a[0] = 0; 1244168035Sjkim a[1] = 0; 1245168035Sjkim } else { 1246168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 1247168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 1248168035Sjkim } 1249168035Sjkim 1250168035Sjkim memcpy(&sd, &a, sizeof(a)); 1251168035Sjkim#ifdef DEBUG 1252168035Sjkim if (ldebug(set_thread_area)) 1253168035Sjkim printf("Segment created in set_thread_area: " 1254168035Sjkim "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1255168035Sjkim "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1256168035Sjkim "def32: %i, gran: %i\n", 1257168035Sjkim sd.sd_lobase, 1258168035Sjkim sd.sd_hibase, 1259168035Sjkim sd.sd_lolimit, 1260168035Sjkim sd.sd_hilimit, 1261168035Sjkim sd.sd_type, 1262168035Sjkim sd.sd_dpl, 1263168035Sjkim sd.sd_p, 1264168035Sjkim sd.sd_xx, 1265168035Sjkim sd.sd_long, 1266168035Sjkim sd.sd_def32, 1267168035Sjkim sd.sd_gran); 1268168035Sjkim#endif 1269168035Sjkim 1270216634Sjkim pcb = td->td_pcb; 1271216634Sjkim pcb->pcb_gsbase = (register_t)info.base_addr; 1272216634Sjkim set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT); 1273190620Skib update_gdt_gsbase(td, info.base_addr); 1274168035Sjkim 1275168035Sjkim return (0); 1276133819Stjr} 1277218030Sdchagin 1278218030Sdchaginint 1279218030Sdchaginlinux_wait4(struct thread *td, struct linux_wait4_args *args) 1280218030Sdchagin{ 1281218030Sdchagin int error, options; 1282218030Sdchagin struct rusage ru, *rup; 1283218030Sdchagin struct l_rusage lru; 1284218030Sdchagin struct proc *p; 1285218030Sdchagin 1286218030Sdchagin#ifdef DEBUG 1287218030Sdchagin if (ldebug(wait4)) 1288218030Sdchagin printf(ARGS(wait4, "%d, %p, %d, %p"), 1289218030Sdchagin args->pid, (void *)args->status, args->options, 1290218030Sdchagin (void *)args->rusage); 1291218030Sdchagin#endif 1292218030Sdchagin 1293218030Sdchagin options = (args->options & (WNOHANG | WUNTRACED)); 1294218030Sdchagin /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1295218030Sdchagin if (args->options & __WCLONE) 1296218030Sdchagin options |= WLINUXCLONE; 1297218030Sdchagin 1298218030Sdchagin if (args->rusage != NULL) 1299218030Sdchagin rup = &ru; 1300218030Sdchagin else 1301218030Sdchagin rup = NULL; 1302218030Sdchagin error = linux_common_wait(td, args->pid, args->status, options, rup); 1303218030Sdchagin if (error) 1304218030Sdchagin return (error); 1305218030Sdchagin 1306218030Sdchagin p = td->td_proc; 1307218030Sdchagin PROC_LOCK(p); 1308218030Sdchagin sigqueue_delete(&p->p_sigqueue, SIGCHLD); 1309218030Sdchagin PROC_UNLOCK(p); 1310218030Sdchagin 1311218030Sdchagin if (args->rusage != NULL) { 1312218030Sdchagin bsd_to_linux_rusage(rup, &lru); 1313218030Sdchagin error = copyout(&lru, args->rusage, sizeof(lru)); 1314218030Sdchagin } 1315218030Sdchagin 1316218030Sdchagin return (error); 1317218030Sdchagin} 1318