linux32_machdep.c revision 218028
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Tim J. Robbins 3133819Stjr * Copyright (c) 2002 Doug Rabson 4133819Stjr * Copyright (c) 2000 Marcel Moolenaar 5133819Stjr * All rights reserved. 6133819Stjr * 7133819Stjr * Redistribution and use in source and binary forms, with or without 8133819Stjr * modification, are permitted provided that the following conditions 9133819Stjr * are met: 10133819Stjr * 1. Redistributions of source code must retain the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer 12133819Stjr * in this position and unchanged. 13133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 14133819Stjr * notice, this list of conditions and the following disclaimer in the 15133819Stjr * documentation and/or other materials provided with the distribution. 16133819Stjr * 3. The name of the author may not be used to endorse or promote products 17133819Stjr * derived from this software without specific prior written permission. 18133819Stjr * 19133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/cdefs.h> 32133819Stjr__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 218028 2011-01-28 18:28:06Z dchagin $"); 33133819Stjr 34133819Stjr#include <sys/param.h> 35133819Stjr#include <sys/kernel.h> 36133819Stjr#include <sys/systm.h> 37165832Snetchild#include <sys/file.h> 38165832Snetchild#include <sys/fcntl.h> 39162954Sphk#include <sys/clock.h> 40142057Sjhb#include <sys/imgact.h> 41161310Snetchild#include <sys/limits.h> 42133819Stjr#include <sys/lock.h> 43133819Stjr#include <sys/malloc.h> 44133819Stjr#include <sys/mman.h> 45133819Stjr#include <sys/mutex.h> 46166729Sjkim#include <sys/priv.h> 47133819Stjr#include <sys/proc.h> 48133819Stjr#include <sys/resource.h> 49133819Stjr#include <sys/resourcevar.h> 50166188Sjeff#include <sys/sched.h> 51133819Stjr#include <sys/syscallsubr.h> 52133819Stjr#include <sys/sysproto.h> 53133819Stjr#include <sys/unistd.h> 54133819Stjr 55133819Stjr#include <machine/frame.h> 56168035Sjkim#include <machine/pcb.h> 57166729Sjkim#include <machine/psl.h> 58168035Sjkim#include <machine/segments.h> 59168035Sjkim#include <machine/specialreg.h> 60133819Stjr 61133819Stjr#include <vm/vm.h> 62133819Stjr#include <vm/pmap.h> 63133819Stjr#include <vm/vm_map.h> 64133819Stjr 65210431Skib#include <compat/freebsd32/freebsd32_util.h> 66133819Stjr#include <amd64/linux32/linux.h> 67133819Stjr#include <amd64/linux32/linux32_proto.h> 68133819Stjr#include <compat/linux/linux_ipc.h> 69133819Stjr#include <compat/linux/linux_signal.h> 70133819Stjr#include <compat/linux/linux_util.h> 71161474Snetchild#include <compat/linux/linux_emul.h> 72133819Stjr 73133819Stjrstruct l_old_select_argv { 74133819Stjr l_int nfds; 75133819Stjr l_uintptr_t readfds; 76133819Stjr l_uintptr_t writefds; 77133819Stjr l_uintptr_t exceptfds; 78133819Stjr l_uintptr_t timeout; 79133819Stjr} __packed; 80133819Stjr 81133819Stjrint 82133819Stjrlinux_to_bsd_sigaltstack(int lsa) 83133819Stjr{ 84133819Stjr int bsa = 0; 85133819Stjr 86133819Stjr if (lsa & LINUX_SS_DISABLE) 87133819Stjr bsa |= SS_DISABLE; 88133819Stjr if (lsa & LINUX_SS_ONSTACK) 89133819Stjr bsa |= SS_ONSTACK; 90133819Stjr return (bsa); 91133819Stjr} 92133819Stjr 93198554Sjhbstatic int linux_mmap_common(struct thread *td, l_uintptr_t addr, 94198554Sjhb l_size_t len, l_int prot, l_int flags, l_int fd, 95198554Sjhb l_loff_t pos); 96198554Sjhb 97133819Stjrint 98133819Stjrbsd_to_linux_sigaltstack(int bsa) 99133819Stjr{ 100133819Stjr int lsa = 0; 101133819Stjr 102133819Stjr if (bsa & SS_DISABLE) 103133819Stjr lsa |= LINUX_SS_DISABLE; 104133819Stjr if (bsa & SS_ONSTACK) 105133819Stjr lsa |= LINUX_SS_ONSTACK; 106133819Stjr return (lsa); 107133819Stjr} 108133819Stjr 109218028Sdchaginstatic void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 110218028Sdchagin{ 111218028Sdchagin lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 112218028Sdchagin lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 113218028Sdchagin lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 114218028Sdchagin lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 115218028Sdchagin lru->ru_maxrss = ru->ru_maxrss; 116218028Sdchagin lru->ru_ixrss = ru->ru_ixrss; 117218028Sdchagin lru->ru_idrss = ru->ru_idrss; 118218028Sdchagin lru->ru_isrss = ru->ru_isrss; 119218028Sdchagin lru->ru_minflt = ru->ru_minflt; 120218028Sdchagin lru->ru_majflt = ru->ru_majflt; 121218028Sdchagin lru->ru_nswap = ru->ru_nswap; 122218028Sdchagin lru->ru_inblock = ru->ru_inblock; 123218028Sdchagin lru->ru_oublock = ru->ru_oublock; 124218028Sdchagin lru->ru_msgsnd = ru->ru_msgsnd; 125218028Sdchagin lru->ru_msgrcv = ru->ru_msgrcv; 126218028Sdchagin lru->ru_nsignals = ru->ru_nsignals; 127218028Sdchagin lru->ru_nvcsw = ru->ru_nvcsw; 128218028Sdchagin lru->ru_nivcsw = ru->ru_nivcsw; 129218028Sdchagin} 130218028Sdchagin 131142057Sjhbint 132142057Sjhblinux_execve(struct thread *td, struct linux_execve_args *args) 133142057Sjhb{ 134142057Sjhb struct image_args eargs; 135142057Sjhb char *path; 136142057Sjhb int error; 137142057Sjhb 138142057Sjhb LCONVPATHEXIST(td, args->path, &path); 139142057Sjhb 140142057Sjhb#ifdef DEBUG 141142057Sjhb if (ldebug(execve)) 142142057Sjhb printf(ARGS(execve, "%s"), path); 143142057Sjhb#endif 144142057Sjhb 145210431Skib error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 146210431Skib args->argp, args->envp); 147142057Sjhb free(path, M_TEMP); 148142057Sjhb if (error == 0) 149142057Sjhb error = kern_execve(td, &eargs, NULL); 150161474Snetchild if (error == 0) 151168063Sjkim /* Linux process can execute FreeBSD one, do not attempt 152161474Snetchild * to create emuldata for such process using 153161474Snetchild * linux_proc_init, this leads to a panic on KASSERT 154168063Sjkim * because such process has p->p_emuldata == NULL. 155161474Snetchild */ 156217896Sdchagin if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) 157168063Sjkim error = linux_proc_init(td, 0, 0); 158142057Sjhb return (error); 159142057Sjhb} 160142057Sjhb 161185438SkibCTASSERT(sizeof(struct l_iovec32) == 8); 162133819Stjr 163144449Sjhbstatic int 164185438Skiblinux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 165133819Stjr{ 166185438Skib struct l_iovec32 iov32; 167144449Sjhb struct iovec *iov; 168144449Sjhb struct uio *uio; 169185438Skib uint32_t iovlen; 170144449Sjhb int error, i; 171133819Stjr 172144449Sjhb *uiop = NULL; 173144449Sjhb if (iovcnt > UIO_MAXIOV) 174133819Stjr return (EINVAL); 175144449Sjhb iovlen = iovcnt * sizeof(struct iovec); 176168844Sjkim uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 177144449Sjhb iov = (struct iovec *)(uio + 1); 178144449Sjhb for (i = 0; i < iovcnt; i++) { 179185438Skib error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 180144449Sjhb if (error) { 181144449Sjhb free(uio, M_IOV); 182144449Sjhb return (error); 183144449Sjhb } 184144449Sjhb iov[i].iov_base = PTRIN(iov32.iov_base); 185144449Sjhb iov[i].iov_len = iov32.iov_len; 186133819Stjr } 187144449Sjhb uio->uio_iov = iov; 188144449Sjhb uio->uio_iovcnt = iovcnt; 189144449Sjhb uio->uio_segflg = UIO_USERSPACE; 190144449Sjhb uio->uio_offset = -1; 191144449Sjhb uio->uio_resid = 0; 192144449Sjhb for (i = 0; i < iovcnt; i++) { 193144449Sjhb if (iov->iov_len > INT_MAX - uio->uio_resid) { 194144449Sjhb free(uio, M_IOV); 195144449Sjhb return (EINVAL); 196144449Sjhb } 197144449Sjhb uio->uio_resid += iov->iov_len; 198144449Sjhb iov++; 199144449Sjhb } 200144449Sjhb *uiop = uio; 201144449Sjhb return (0); 202144449Sjhb} 203133819Stjr 204144449Sjhbint 205185438Skiblinux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 206185438Skib int error) 207185438Skib{ 208185438Skib struct l_iovec32 iov32; 209185438Skib struct iovec *iov; 210185438Skib uint32_t iovlen; 211185438Skib int i; 212185438Skib 213185438Skib *iovp = NULL; 214185438Skib if (iovcnt > UIO_MAXIOV) 215185438Skib return (error); 216185438Skib iovlen = iovcnt * sizeof(struct iovec); 217185438Skib iov = malloc(iovlen, M_IOV, M_WAITOK); 218185438Skib for (i = 0; i < iovcnt; i++) { 219185438Skib error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 220185438Skib if (error) { 221185438Skib free(iov, M_IOV); 222185438Skib return (error); 223185438Skib } 224185438Skib iov[i].iov_base = PTRIN(iov32.iov_base); 225185438Skib iov[i].iov_len = iov32.iov_len; 226185438Skib } 227185438Skib *iovp = iov; 228185438Skib return(0); 229185438Skib 230185438Skib} 231185438Skib 232185438Skibint 233144449Sjhblinux_readv(struct thread *td, struct linux_readv_args *uap) 234144449Sjhb{ 235144449Sjhb struct uio *auio; 236144449Sjhb int error; 237133819Stjr 238144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 239144449Sjhb if (error) 240144449Sjhb return (error); 241144449Sjhb error = kern_readv(td, uap->fd, auio); 242144449Sjhb free(auio, M_IOV); 243133819Stjr return (error); 244133819Stjr} 245133819Stjr 246133819Stjrint 247133819Stjrlinux_writev(struct thread *td, struct linux_writev_args *uap) 248133819Stjr{ 249144449Sjhb struct uio *auio; 250144449Sjhb int error; 251133819Stjr 252144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 253144449Sjhb if (error) 254144449Sjhb return (error); 255144449Sjhb error = kern_writev(td, uap->fd, auio); 256144449Sjhb free(auio, M_IOV); 257133819Stjr return (error); 258133819Stjr} 259133819Stjr 260133819Stjrstruct l_ipc_kludge { 261133819Stjr l_uintptr_t msgp; 262133819Stjr l_long msgtyp; 263133819Stjr} __packed; 264133819Stjr 265133819Stjrint 266133819Stjrlinux_ipc(struct thread *td, struct linux_ipc_args *args) 267133819Stjr{ 268133819Stjr 269133819Stjr switch (args->what & 0xFFFF) { 270133819Stjr case LINUX_SEMOP: { 271133819Stjr struct linux_semop_args a; 272133819Stjr 273133819Stjr a.semid = args->arg1; 274133819Stjr a.tsops = args->ptr; 275133819Stjr a.nsops = args->arg2; 276133819Stjr return (linux_semop(td, &a)); 277133819Stjr } 278133819Stjr case LINUX_SEMGET: { 279133819Stjr struct linux_semget_args a; 280133819Stjr 281133819Stjr a.key = args->arg1; 282133819Stjr a.nsems = args->arg2; 283133819Stjr a.semflg = args->arg3; 284133819Stjr return (linux_semget(td, &a)); 285133819Stjr } 286133819Stjr case LINUX_SEMCTL: { 287133819Stjr struct linux_semctl_args a; 288133819Stjr int error; 289133819Stjr 290133819Stjr a.semid = args->arg1; 291133819Stjr a.semnum = args->arg2; 292133819Stjr a.cmd = args->arg3; 293133819Stjr error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 294133819Stjr if (error) 295133819Stjr return (error); 296133819Stjr return (linux_semctl(td, &a)); 297133819Stjr } 298133819Stjr case LINUX_MSGSND: { 299133819Stjr struct linux_msgsnd_args a; 300133819Stjr 301133819Stjr a.msqid = args->arg1; 302133819Stjr a.msgp = args->ptr; 303133819Stjr a.msgsz = args->arg2; 304133819Stjr a.msgflg = args->arg3; 305133819Stjr return (linux_msgsnd(td, &a)); 306133819Stjr } 307133819Stjr case LINUX_MSGRCV: { 308133819Stjr struct linux_msgrcv_args a; 309133819Stjr 310133819Stjr a.msqid = args->arg1; 311133819Stjr a.msgsz = args->arg2; 312133819Stjr a.msgflg = args->arg3; 313133819Stjr if ((args->what >> 16) == 0) { 314133819Stjr struct l_ipc_kludge tmp; 315133819Stjr int error; 316133819Stjr 317133819Stjr if (args->ptr == 0) 318133819Stjr return (EINVAL); 319133819Stjr error = copyin(args->ptr, &tmp, sizeof(tmp)); 320133819Stjr if (error) 321133819Stjr return (error); 322133819Stjr a.msgp = PTRIN(tmp.msgp); 323133819Stjr a.msgtyp = tmp.msgtyp; 324133819Stjr } else { 325133819Stjr a.msgp = args->ptr; 326133819Stjr a.msgtyp = args->arg5; 327133819Stjr } 328133819Stjr return (linux_msgrcv(td, &a)); 329133819Stjr } 330133819Stjr case LINUX_MSGGET: { 331133819Stjr struct linux_msgget_args a; 332133819Stjr 333133819Stjr a.key = args->arg1; 334133819Stjr a.msgflg = args->arg2; 335133819Stjr return (linux_msgget(td, &a)); 336133819Stjr } 337133819Stjr case LINUX_MSGCTL: { 338133819Stjr struct linux_msgctl_args a; 339133819Stjr 340133819Stjr a.msqid = args->arg1; 341133819Stjr a.cmd = args->arg2; 342133819Stjr a.buf = args->ptr; 343133819Stjr return (linux_msgctl(td, &a)); 344133819Stjr } 345133819Stjr case LINUX_SHMAT: { 346133819Stjr struct linux_shmat_args a; 347133819Stjr 348133819Stjr a.shmid = args->arg1; 349133819Stjr a.shmaddr = args->ptr; 350133819Stjr a.shmflg = args->arg2; 351144441Sjhb a.raddr = PTRIN((l_uint)args->arg3); 352133819Stjr return (linux_shmat(td, &a)); 353133819Stjr } 354133819Stjr case LINUX_SHMDT: { 355133819Stjr struct linux_shmdt_args a; 356133819Stjr 357133819Stjr a.shmaddr = args->ptr; 358133819Stjr return (linux_shmdt(td, &a)); 359133819Stjr } 360133819Stjr case LINUX_SHMGET: { 361133819Stjr struct linux_shmget_args a; 362133819Stjr 363133819Stjr a.key = args->arg1; 364133819Stjr a.size = args->arg2; 365133819Stjr a.shmflg = args->arg3; 366133819Stjr return (linux_shmget(td, &a)); 367133819Stjr } 368133819Stjr case LINUX_SHMCTL: { 369133819Stjr struct linux_shmctl_args a; 370133819Stjr 371133819Stjr a.shmid = args->arg1; 372133819Stjr a.cmd = args->arg2; 373133819Stjr a.buf = args->ptr; 374133819Stjr return (linux_shmctl(td, &a)); 375133819Stjr } 376133819Stjr default: 377133819Stjr break; 378133819Stjr } 379133819Stjr 380133819Stjr return (EINVAL); 381133819Stjr} 382133819Stjr 383133819Stjrint 384133819Stjrlinux_old_select(struct thread *td, struct linux_old_select_args *args) 385133819Stjr{ 386133819Stjr struct l_old_select_argv linux_args; 387133819Stjr struct linux_select_args newsel; 388133819Stjr int error; 389133819Stjr 390133819Stjr#ifdef DEBUG 391133819Stjr if (ldebug(old_select)) 392133819Stjr printf(ARGS(old_select, "%p"), args->ptr); 393133819Stjr#endif 394133819Stjr 395133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 396133819Stjr if (error) 397133819Stjr return (error); 398133819Stjr 399133819Stjr newsel.nfds = linux_args.nfds; 400133819Stjr newsel.readfds = PTRIN(linux_args.readfds); 401133819Stjr newsel.writefds = PTRIN(linux_args.writefds); 402133819Stjr newsel.exceptfds = PTRIN(linux_args.exceptfds); 403133819Stjr newsel.timeout = PTRIN(linux_args.timeout); 404133819Stjr return (linux_select(td, &newsel)); 405133819Stjr} 406133819Stjr 407133819Stjrint 408133819Stjrlinux_fork(struct thread *td, struct linux_fork_args *args) 409133819Stjr{ 410133819Stjr int error; 411166150Snetchild struct proc *p2; 412166150Snetchild struct thread *td2; 413133819Stjr 414133819Stjr#ifdef DEBUG 415133819Stjr if (ldebug(fork)) 416133819Stjr printf(ARGS(fork, "")); 417133819Stjr#endif 418133819Stjr 419166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) 420133819Stjr return (error); 421168063Sjkim 422166150Snetchild if (error == 0) { 423166150Snetchild td->td_retval[0] = p2->p_pid; 424166150Snetchild td->td_retval[1] = 0; 425166150Snetchild } 426133819Stjr 427133819Stjr if (td->td_retval[1] == 1) 428133819Stjr td->td_retval[0] = 0; 429161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 430161474Snetchild if (error) 431161474Snetchild return (error); 432161474Snetchild 433166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 434166150Snetchild 435168063Sjkim /* 436168063Sjkim * Make this runnable after we are finished with it. 437168063Sjkim */ 438170307Sjeff thread_lock(td2); 439166150Snetchild TD_SET_CAN_RUN(td2); 440166188Sjeff sched_add(td2, SRQ_BORING); 441170307Sjeff thread_unlock(td2); 442166150Snetchild 443133819Stjr return (0); 444133819Stjr} 445133819Stjr 446133819Stjrint 447133819Stjrlinux_vfork(struct thread *td, struct linux_vfork_args *args) 448133819Stjr{ 449133819Stjr int error; 450161611Snetchild struct proc *p2; 451166150Snetchild struct thread *td2; 452133819Stjr 453133819Stjr#ifdef DEBUG 454133819Stjr if (ldebug(vfork)) 455133819Stjr printf(ARGS(vfork, "")); 456133819Stjr#endif 457133819Stjr 458168063Sjkim /* Exclude RFPPWAIT */ 459166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) 460133819Stjr return (error); 461161611Snetchild if (error == 0) { 462161611Snetchild td->td_retval[0] = p2->p_pid; 463161611Snetchild td->td_retval[1] = 0; 464161611Snetchild } 465133819Stjr /* Are we the child? */ 466133819Stjr if (td->td_retval[1] == 1) 467133819Stjr td->td_retval[0] = 0; 468161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 469161474Snetchild if (error) 470161474Snetchild return (error); 471166150Snetchild 472166150Snetchild PROC_LOCK(p2); 473166150Snetchild p2->p_flag |= P_PPWAIT; 474166150Snetchild PROC_UNLOCK(p2); 475166150Snetchild 476166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 477168063Sjkim 478168848Sjkim /* 479168848Sjkim * Make this runnable after we are finished with it. 480168848Sjkim */ 481170307Sjeff thread_lock(td2); 482166150Snetchild TD_SET_CAN_RUN(td2); 483166188Sjeff sched_add(td2, SRQ_BORING); 484170307Sjeff thread_unlock(td2); 485166150Snetchild 486161611Snetchild /* wait for the children to exit, ie. emulate vfork */ 487161611Snetchild PROC_LOCK(p2); 488161611Snetchild while (p2->p_flag & P_PPWAIT) 489188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 490161611Snetchild PROC_UNLOCK(p2); 491168063Sjkim 492133819Stjr return (0); 493133819Stjr} 494133819Stjr 495133819Stjrint 496133819Stjrlinux_clone(struct thread *td, struct linux_clone_args *args) 497133819Stjr{ 498133819Stjr int error, ff = RFPROC | RFSTOPPED; 499133819Stjr struct proc *p2; 500133819Stjr struct thread *td2; 501133819Stjr int exit_signal; 502161474Snetchild struct linux_emuldata *em; 503133819Stjr 504133819Stjr#ifdef DEBUG 505133819Stjr if (ldebug(clone)) { 506168063Sjkim printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, " 507168063Sjkim "child tid: %p"), (unsigned)args->flags, 508168063Sjkim args->stack, args->parent_tidptr, args->child_tidptr); 509133819Stjr } 510133819Stjr#endif 511133819Stjr 512133819Stjr exit_signal = args->flags & 0x000000ff; 513169458Skan if (LINUX_SIG_VALID(exit_signal)) { 514169458Skan if (exit_signal <= LINUX_SIGTBLSZ) 515169458Skan exit_signal = 516169458Skan linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 517169458Skan } else if (exit_signal != 0) 518133819Stjr return (EINVAL); 519133819Stjr 520167157Sjkim if (args->flags & LINUX_CLONE_VM) 521133819Stjr ff |= RFMEM; 522167157Sjkim if (args->flags & LINUX_CLONE_SIGHAND) 523133819Stjr ff |= RFSIGSHARE; 524168063Sjkim /* 525168063Sjkim * XXX: In Linux, sharing of fs info (chroot/cwd/umask) 526168063Sjkim * and open files is independant. In FreeBSD, its in one 527168848Sjkim * structure but in reality it does not cause any problems 528168848Sjkim * because both of these flags are usually set together. 529163371Snetchild */ 530167157Sjkim if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) 531133819Stjr ff |= RFFDG; 532133819Stjr 533161474Snetchild /* 534161474Snetchild * Attempt to detect when linux_clone(2) is used for creating 535161474Snetchild * kernel threads. Unfortunately despite the existence of the 536161474Snetchild * CLONE_THREAD flag, version of linuxthreads package used in 537161474Snetchild * most popular distros as of beginning of 2005 doesn't make 538166944Snetchild * any use of it. Therefore, this detection relies on 539161474Snetchild * empirical observation that linuxthreads sets certain 540161474Snetchild * combination of flags, so that we can make more or less 541161474Snetchild * precise detection and notify the FreeBSD kernel that several 542161474Snetchild * processes are in fact part of the same threading group, so 543161474Snetchild * that special treatment is necessary for signal delivery 544161474Snetchild * between those processes and fd locking. 545161474Snetchild */ 546167157Sjkim if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) 547161474Snetchild ff |= RFTHREAD; 548161474Snetchild 549168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) 550168056Sjkim if (args->parent_tidptr == NULL) 551168056Sjkim return (EINVAL); 552168056Sjkim 553133819Stjr error = fork1(td, ff, 0, &p2); 554133819Stjr if (error) 555133819Stjr return (error); 556166395Skib 557167157Sjkim if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { 558166395Skib sx_xlock(&proctree_lock); 559166395Skib PROC_LOCK(p2); 560166395Skib proc_reparent(p2, td->td_proc->p_pptr); 561166395Skib PROC_UNLOCK(p2); 562166395Skib sx_xunlock(&proctree_lock); 563166395Skib } 564168063Sjkim 565161474Snetchild /* create the emuldata */ 566161474Snetchild error = linux_proc_init(td, p2->p_pid, args->flags); 567161474Snetchild /* reference it - no need to check this */ 568165867Snetchild em = em_find(p2, EMUL_DOLOCK); 569161474Snetchild KASSERT(em != NULL, ("clone: emuldata not found.\n")); 570161474Snetchild /* and adjust it */ 571133819Stjr 572167157Sjkim if (args->flags & LINUX_CLONE_THREAD) { 573161474Snetchild#ifdef notyet 574161696Snetchild PROC_LOCK(p2); 575161474Snetchild p2->p_pgrp = td->td_proc->p_pgrp; 576161696Snetchild PROC_UNLOCK(p2); 577161474Snetchild#endif 578168063Sjkim exit_signal = 0; 579161474Snetchild } 580161474Snetchild 581167157Sjkim if (args->flags & LINUX_CLONE_CHILD_SETTID) 582161474Snetchild em->child_set_tid = args->child_tidptr; 583161474Snetchild else 584161474Snetchild em->child_set_tid = NULL; 585161474Snetchild 586167157Sjkim if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 587161474Snetchild em->child_clear_tid = args->child_tidptr; 588161474Snetchild else 589161474Snetchild em->child_clear_tid = NULL; 590161696Snetchild 591161474Snetchild EMUL_UNLOCK(&emul_lock); 592161474Snetchild 593168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) { 594168056Sjkim error = copyout(&p2->p_pid, args->parent_tidptr, 595168056Sjkim sizeof(p2->p_pid)); 596168056Sjkim if (error) 597168056Sjkim printf(LMSG("copyout failed!")); 598168056Sjkim } 599168056Sjkim 600133819Stjr PROC_LOCK(p2); 601133819Stjr p2->p_sigparent = exit_signal; 602133819Stjr PROC_UNLOCK(p2); 603133819Stjr td2 = FIRST_THREAD_IN_PROC(p2); 604168063Sjkim /* 605168063Sjkim * In a case of stack = NULL, we are supposed to COW calling process 606168063Sjkim * stack. This is what normal fork() does, so we just keep tf_rsp arg 607168063Sjkim * intact. 608161310Snetchild */ 609161310Snetchild if (args->stack) 610168063Sjkim td2->td_frame->tf_rsp = PTROUT(args->stack); 611133819Stjr 612167157Sjkim if (args->flags & LINUX_CLONE_SETTLS) { 613168035Sjkim struct user_segment_descriptor sd; 614168035Sjkim struct l_user_desc info; 615216634Sjkim struct pcb *pcb; 616168848Sjkim int a[2]; 617168035Sjkim 618168848Sjkim error = copyin((void *)td->td_frame->tf_rsi, &info, 619168035Sjkim sizeof(struct l_user_desc)); 620168035Sjkim if (error) { 621168035Sjkim printf(LMSG("copyin failed!")); 622168035Sjkim } else { 623168035Sjkim /* We might copy out the entry_number as GUGS32_SEL. */ 624168848Sjkim info.entry_number = GUGS32_SEL; 625168035Sjkim error = copyout(&info, (void *)td->td_frame->tf_rsi, 626168035Sjkim sizeof(struct l_user_desc)); 627168035Sjkim if (error) 628168035Sjkim printf(LMSG("copyout failed!")); 629168035Sjkim 630168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 631168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 632168035Sjkim 633168035Sjkim memcpy(&sd, &a, sizeof(a)); 634168035Sjkim#ifdef DEBUG 635168035Sjkim if (ldebug(clone)) 636168035Sjkim printf("Segment created in clone with " 637168035Sjkim "CLONE_SETTLS: lobase: %x, hibase: %x, " 638168035Sjkim "lolimit: %x, hilimit: %x, type: %i, " 639168035Sjkim "dpl: %i, p: %i, xx: %i, long: %i, " 640168035Sjkim "def32: %i, gran: %i\n", sd.sd_lobase, 641168035Sjkim sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 642168035Sjkim sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 643168035Sjkim sd.sd_long, sd.sd_def32, sd.sd_gran); 644168035Sjkim#endif 645216634Sjkim pcb = td2->td_pcb; 646216634Sjkim pcb->pcb_gsbase = (register_t)info.base_addr; 647216634Sjkim/* XXXKIB pcb->pcb_gs32sd = sd; */ 648190620Skib td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 649216634Sjkim set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT); 650168035Sjkim } 651161474Snetchild } 652161474Snetchild 653133819Stjr#ifdef DEBUG 654133819Stjr if (ldebug(clone)) 655168063Sjkim printf(LMSG("clone: successful rfork to %d, " 656168063Sjkim "stack %p sig = %d"), (int)p2->p_pid, args->stack, 657168063Sjkim exit_signal); 658133819Stjr#endif 659167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 660166150Snetchild PROC_LOCK(p2); 661166150Snetchild p2->p_flag |= P_PPWAIT; 662166150Snetchild PROC_UNLOCK(p2); 663166150Snetchild } 664133819Stjr 665133819Stjr /* 666133819Stjr * Make this runnable after we are finished with it. 667133819Stjr */ 668170307Sjeff thread_lock(td2); 669133819Stjr TD_SET_CAN_RUN(td2); 670166188Sjeff sched_add(td2, SRQ_BORING); 671170307Sjeff thread_unlock(td2); 672133819Stjr 673133819Stjr td->td_retval[0] = p2->p_pid; 674133819Stjr td->td_retval[1] = 0; 675168063Sjkim 676167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 677168063Sjkim /* wait for the children to exit, ie. emulate vfork */ 678168063Sjkim PROC_LOCK(p2); 679163374Snetchild while (p2->p_flag & P_PPWAIT) 680188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 681163374Snetchild PROC_UNLOCK(p2); 682163374Snetchild } 683163374Snetchild 684133819Stjr return (0); 685133819Stjr} 686133819Stjr 687133819Stjr#define STACK_SIZE (2 * 1024 * 1024) 688133819Stjr#define GUARD_SIZE (4 * PAGE_SIZE) 689133819Stjr 690133819Stjrint 691133819Stjrlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 692133819Stjr{ 693133819Stjr 694133819Stjr#ifdef DEBUG 695133819Stjr if (ldebug(mmap2)) 696168063Sjkim printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 697168063Sjkim args->addr, args->len, args->prot, 698133819Stjr args->flags, args->fd, args->pgoff); 699133819Stjr#endif 700133819Stjr 701198554Sjhb return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 702198554Sjhb args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 703198554Sjhb PAGE_SIZE)); 704133819Stjr} 705133819Stjr 706133819Stjrint 707133819Stjrlinux_mmap(struct thread *td, struct linux_mmap_args *args) 708133819Stjr{ 709133819Stjr int error; 710133819Stjr struct l_mmap_argv linux_args; 711133819Stjr 712133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 713133819Stjr if (error) 714133819Stjr return (error); 715133819Stjr 716133819Stjr#ifdef DEBUG 717133819Stjr if (ldebug(mmap)) 718168063Sjkim printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 719168063Sjkim linux_args.addr, linux_args.len, linux_args.prot, 720168063Sjkim linux_args.flags, linux_args.fd, linux_args.pgoff); 721133819Stjr#endif 722133819Stjr 723198554Sjhb return (linux_mmap_common(td, linux_args.addr, linux_args.len, 724198554Sjhb linux_args.prot, linux_args.flags, linux_args.fd, 725198554Sjhb (uint32_t)linux_args.pgoff)); 726133819Stjr} 727133819Stjr 728133819Stjrstatic int 729198554Sjhblinux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 730198554Sjhb l_int flags, l_int fd, l_loff_t pos) 731133819Stjr{ 732133819Stjr struct proc *p = td->td_proc; 733133819Stjr struct mmap_args /* { 734133819Stjr caddr_t addr; 735133819Stjr size_t len; 736133819Stjr int prot; 737133819Stjr int flags; 738133819Stjr int fd; 739133819Stjr long pad; 740133819Stjr off_t pos; 741133819Stjr } */ bsd_args; 742133819Stjr int error; 743165832Snetchild struct file *fp; 744133819Stjr 745133819Stjr error = 0; 746133819Stjr bsd_args.flags = 0; 747165832Snetchild fp = NULL; 748165832Snetchild 749165832Snetchild /* 750165832Snetchild * Linux mmap(2): 751165832Snetchild * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 752165832Snetchild */ 753198554Sjhb if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 754165832Snetchild return (EINVAL); 755165832Snetchild 756198554Sjhb if (flags & LINUX_MAP_SHARED) 757133819Stjr bsd_args.flags |= MAP_SHARED; 758198554Sjhb if (flags & LINUX_MAP_PRIVATE) 759133819Stjr bsd_args.flags |= MAP_PRIVATE; 760198554Sjhb if (flags & LINUX_MAP_FIXED) 761133819Stjr bsd_args.flags |= MAP_FIXED; 762208994Skan if (flags & LINUX_MAP_ANON) { 763208994Skan /* Enforce pos to be on page boundary, then ignore. */ 764208994Skan if ((pos & PAGE_MASK) != 0) 765208994Skan return (EINVAL); 766208994Skan pos = 0; 767133819Stjr bsd_args.flags |= MAP_ANON; 768208994Skan } else 769133819Stjr bsd_args.flags |= MAP_NOSYNC; 770198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) 771133819Stjr bsd_args.flags |= MAP_STACK; 772133819Stjr 773166727Sjkim /* 774166727Sjkim * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 775166727Sjkim * on Linux/i386. We do this to ensure maximum compatibility. 776166727Sjkim * Linux/ia64 does the same in i386 emulation mode. 777166727Sjkim */ 778198554Sjhb bsd_args.prot = prot; 779166727Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 780166727Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 781166727Sjkim 782167048Sjkim /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 783198554Sjhb bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 784167048Sjkim if (bsd_args.fd != -1) { 785166727Sjkim /* 786166727Sjkim * Linux follows Solaris mmap(2) description: 787166727Sjkim * The file descriptor fildes is opened with 788166727Sjkim * read permission, regardless of the 789166727Sjkim * protection options specified. 790166727Sjkim */ 791166727Sjkim 792167048Sjkim if ((error = fget(td, bsd_args.fd, &fp)) != 0) 793166727Sjkim return (error); 794166727Sjkim if (fp->f_type != DTYPE_VNODE) { 795166727Sjkim fdrop(fp, td); 796166727Sjkim return (EINVAL); 797166727Sjkim } 798166727Sjkim 799166727Sjkim /* Linux mmap() just fails for O_WRONLY files */ 800166727Sjkim if (!(fp->f_flag & FREAD)) { 801166727Sjkim fdrop(fp, td); 802166727Sjkim return (EACCES); 803166727Sjkim } 804166727Sjkim 805166727Sjkim fdrop(fp, td); 806166727Sjkim } 807166727Sjkim 808198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) { 809168063Sjkim /* 810168063Sjkim * The Linux MAP_GROWSDOWN option does not limit auto 811133819Stjr * growth of the region. Linux mmap with this option 812133819Stjr * takes as addr the inital BOS, and as len, the initial 813133819Stjr * region size. It can then grow down from addr without 814168063Sjkim * limit. However, Linux threads has an implicit internal 815133819Stjr * limit to stack size of STACK_SIZE. Its just not 816168063Sjkim * enforced explicitly in Linux. But, here we impose 817133819Stjr * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 818133819Stjr * region, since we can do this with our mmap. 819133819Stjr * 820133819Stjr * Our mmap with MAP_STACK takes addr as the maximum 821133819Stjr * downsize limit on BOS, and as len the max size of 822168848Sjkim * the region. It then maps the top SGROWSIZ bytes, 823166944Snetchild * and auto grows the region down, up to the limit 824133819Stjr * in addr. 825133819Stjr * 826133819Stjr * If we don't use the MAP_STACK option, the effect 827133819Stjr * of this code is to allocate a stack region of a 828133819Stjr * fixed size of (STACK_SIZE - GUARD_SIZE). 829133819Stjr */ 830133819Stjr 831198554Sjhb if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 832168063Sjkim /* 833168063Sjkim * Some Linux apps will attempt to mmap 834133819Stjr * thread stacks near the top of their 835133819Stjr * address space. If their TOS is greater 836133819Stjr * than vm_maxsaddr, vm_map_growstack() 837133819Stjr * will confuse the thread stack with the 838133819Stjr * process stack and deliver a SEGV if they 839133819Stjr * attempt to grow the thread stack past their 840133819Stjr * current stacksize rlimit. To avoid this, 841133819Stjr * adjust vm_maxsaddr upwards to reflect 842133819Stjr * the current stacksize rlimit rather 843133819Stjr * than the maximum possible stacksize. 844133819Stjr * It would be better to adjust the 845133819Stjr * mmap'ed region, but some apps do not check 846133819Stjr * mmap's return value. 847133819Stjr */ 848133819Stjr PROC_LOCK(p); 849166727Sjkim p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 850133819Stjr lim_cur(p, RLIMIT_STACK); 851133819Stjr PROC_UNLOCK(p); 852133819Stjr } 853133819Stjr 854168063Sjkim /* 855176193Sjkim * This gives us our maximum stack size and a new BOS. 856176193Sjkim * If we're using VM_STACK, then mmap will just map 857176193Sjkim * the top SGROWSIZ bytes, and let the stack grow down 858176193Sjkim * to the limit at BOS. If we're not using VM_STACK 859176193Sjkim * we map the full stack, since we don't have a way 860176193Sjkim * to autogrow it. 861133819Stjr */ 862198554Sjhb if (len > STACK_SIZE - GUARD_SIZE) { 863198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 864198554Sjhb bsd_args.len = len; 865176193Sjkim } else { 866198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr) - 867198554Sjhb (STACK_SIZE - GUARD_SIZE - len); 868176193Sjkim bsd_args.len = STACK_SIZE - GUARD_SIZE; 869176193Sjkim } 870133819Stjr } else { 871198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 872198554Sjhb bsd_args.len = len; 873133819Stjr } 874198554Sjhb bsd_args.pos = pos; 875133819Stjr 876133819Stjr#ifdef DEBUG 877133819Stjr if (ldebug(mmap)) 878133819Stjr printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 879133819Stjr __func__, 880133843Sobrien (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 881133819Stjr bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 882133819Stjr#endif 883133819Stjr error = mmap(td, &bsd_args); 884133819Stjr#ifdef DEBUG 885133819Stjr if (ldebug(mmap)) 886133819Stjr printf("-> %s() return: 0x%x (0x%08x)\n", 887133819Stjr __func__, error, (u_int)td->td_retval[0]); 888133819Stjr#endif 889133819Stjr return (error); 890133819Stjr} 891133819Stjr 892133819Stjrint 893168035Sjkimlinux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 894168035Sjkim{ 895168035Sjkim struct mprotect_args bsd_args; 896168035Sjkim 897168035Sjkim bsd_args.addr = uap->addr; 898168035Sjkim bsd_args.len = uap->len; 899168035Sjkim bsd_args.prot = uap->prot; 900168035Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 901168035Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 902168035Sjkim return (mprotect(td, &bsd_args)); 903168035Sjkim} 904168035Sjkim 905168035Sjkimint 906166729Sjkimlinux_iopl(struct thread *td, struct linux_iopl_args *args) 907166729Sjkim{ 908166729Sjkim int error; 909166729Sjkim 910166729Sjkim if (args->level < 0 || args->level > 3) 911166729Sjkim return (EINVAL); 912166729Sjkim if ((error = priv_check(td, PRIV_IO)) != 0) 913166729Sjkim return (error); 914166729Sjkim if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 915166729Sjkim return (error); 916166729Sjkim td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 917166729Sjkim (args->level * (PSL_IOPL / 3)); 918166729Sjkim 919166729Sjkim return (0); 920166729Sjkim} 921166729Sjkim 922166729Sjkimint 923133819Stjrlinux_pipe(struct thread *td, struct linux_pipe_args *args) 924133819Stjr{ 925133819Stjr int error; 926184849Sed int fildes[2]; 927133819Stjr 928133819Stjr#ifdef DEBUG 929133819Stjr if (ldebug(pipe)) 930133819Stjr printf(ARGS(pipe, "*")); 931133819Stjr#endif 932133819Stjr 933184849Sed error = kern_pipe(td, fildes); 934184849Sed if (error) 935133819Stjr return (error); 936133819Stjr 937184849Sed /* XXX: Close descriptors on error. */ 938184849Sed return (copyout(fildes, args->pipefds, sizeof fildes)); 939133819Stjr} 940166731Sjkim 941133819Stjrint 942133819Stjrlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 943133819Stjr{ 944133819Stjr l_osigaction_t osa; 945133819Stjr l_sigaction_t act, oact; 946133819Stjr int error; 947133819Stjr 948133819Stjr#ifdef DEBUG 949133819Stjr if (ldebug(sigaction)) 950133819Stjr printf(ARGS(sigaction, "%d, %p, %p"), 951133819Stjr args->sig, (void *)args->nsa, (void *)args->osa); 952133819Stjr#endif 953133819Stjr 954133819Stjr if (args->nsa != NULL) { 955133819Stjr error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 956133819Stjr if (error) 957133819Stjr return (error); 958133819Stjr act.lsa_handler = osa.lsa_handler; 959133819Stjr act.lsa_flags = osa.lsa_flags; 960133819Stjr act.lsa_restorer = osa.lsa_restorer; 961133819Stjr LINUX_SIGEMPTYSET(act.lsa_mask); 962133819Stjr act.lsa_mask.__bits[0] = osa.lsa_mask; 963133819Stjr } 964133819Stjr 965133819Stjr error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 966133819Stjr args->osa ? &oact : NULL); 967133819Stjr 968133819Stjr if (args->osa != NULL && !error) { 969133819Stjr osa.lsa_handler = oact.lsa_handler; 970133819Stjr osa.lsa_flags = oact.lsa_flags; 971133819Stjr osa.lsa_restorer = oact.lsa_restorer; 972133819Stjr osa.lsa_mask = oact.lsa_mask.__bits[0]; 973133819Stjr error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 974133819Stjr } 975133819Stjr 976133819Stjr return (error); 977133819Stjr} 978133819Stjr 979133819Stjr/* 980168063Sjkim * Linux has two extra args, restart and oldmask. We don't use these, 981133819Stjr * but it seems that "restart" is actually a context pointer that 982133819Stjr * enables the signal to happen with a different register set. 983133819Stjr */ 984133819Stjrint 985133819Stjrlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 986133819Stjr{ 987133819Stjr sigset_t sigmask; 988133819Stjr l_sigset_t mask; 989133819Stjr 990133819Stjr#ifdef DEBUG 991133819Stjr if (ldebug(sigsuspend)) 992133819Stjr printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 993133819Stjr#endif 994133819Stjr 995133819Stjr LINUX_SIGEMPTYSET(mask); 996133819Stjr mask.__bits[0] = args->mask; 997133819Stjr linux_to_bsd_sigset(&mask, &sigmask); 998133819Stjr return (kern_sigsuspend(td, sigmask)); 999133819Stjr} 1000133819Stjr 1001133819Stjrint 1002133819Stjrlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 1003133819Stjr{ 1004133819Stjr l_sigset_t lmask; 1005133819Stjr sigset_t sigmask; 1006133819Stjr int error; 1007133819Stjr 1008133819Stjr#ifdef DEBUG 1009133819Stjr if (ldebug(rt_sigsuspend)) 1010133819Stjr printf(ARGS(rt_sigsuspend, "%p, %d"), 1011133819Stjr (void *)uap->newset, uap->sigsetsize); 1012133819Stjr#endif 1013133819Stjr 1014133819Stjr if (uap->sigsetsize != sizeof(l_sigset_t)) 1015133819Stjr return (EINVAL); 1016133819Stjr 1017133819Stjr error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 1018133819Stjr if (error) 1019133819Stjr return (error); 1020133819Stjr 1021133819Stjr linux_to_bsd_sigset(&lmask, &sigmask); 1022133819Stjr return (kern_sigsuspend(td, sigmask)); 1023133819Stjr} 1024133819Stjr 1025133819Stjrint 1026133819Stjrlinux_pause(struct thread *td, struct linux_pause_args *args) 1027133819Stjr{ 1028133819Stjr struct proc *p = td->td_proc; 1029133819Stjr sigset_t sigmask; 1030133819Stjr 1031133819Stjr#ifdef DEBUG 1032133819Stjr if (ldebug(pause)) 1033133819Stjr printf(ARGS(pause, "")); 1034133819Stjr#endif 1035133819Stjr 1036133819Stjr PROC_LOCK(p); 1037133819Stjr sigmask = td->td_sigmask; 1038133819Stjr PROC_UNLOCK(p); 1039133819Stjr return (kern_sigsuspend(td, sigmask)); 1040133819Stjr} 1041133819Stjr 1042133819Stjrint 1043133819Stjrlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 1044133819Stjr{ 1045133819Stjr stack_t ss, oss; 1046133819Stjr l_stack_t lss; 1047133819Stjr int error; 1048133819Stjr 1049133819Stjr#ifdef DEBUG 1050133819Stjr if (ldebug(sigaltstack)) 1051133819Stjr printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 1052133819Stjr#endif 1053133819Stjr 1054133819Stjr if (uap->uss != NULL) { 1055133819Stjr error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 1056133819Stjr if (error) 1057133819Stjr return (error); 1058133819Stjr 1059133819Stjr ss.ss_sp = PTRIN(lss.ss_sp); 1060133819Stjr ss.ss_size = lss.ss_size; 1061133819Stjr ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 1062133819Stjr } 1063134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 1064134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 1065133819Stjr if (!error && uap->uoss != NULL) { 1066133819Stjr lss.ss_sp = PTROUT(oss.ss_sp); 1067133819Stjr lss.ss_size = oss.ss_size; 1068133819Stjr lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 1069133819Stjr error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 1070133819Stjr } 1071133819Stjr 1072133819Stjr return (error); 1073133819Stjr} 1074133819Stjr 1075133819Stjrint 1076133819Stjrlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 1077133819Stjr{ 1078133819Stjr struct ftruncate_args sa; 1079133819Stjr 1080133819Stjr#ifdef DEBUG 1081133819Stjr if (ldebug(ftruncate64)) 1082133819Stjr printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 1083133819Stjr (intmax_t)args->length); 1084133819Stjr#endif 1085133819Stjr 1086133819Stjr sa.fd = args->fd; 1087133819Stjr sa.length = args->length; 1088133819Stjr return ftruncate(td, &sa); 1089133819Stjr} 1090133819Stjr 1091133819Stjrint 1092133819Stjrlinux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 1093133819Stjr{ 1094133819Stjr struct timeval atv; 1095133819Stjr l_timeval atv32; 1096133819Stjr struct timezone rtz; 1097133819Stjr int error = 0; 1098133819Stjr 1099133819Stjr if (uap->tp) { 1100133819Stjr microtime(&atv); 1101133819Stjr atv32.tv_sec = atv.tv_sec; 1102133819Stjr atv32.tv_usec = atv.tv_usec; 1103168844Sjkim error = copyout(&atv32, uap->tp, sizeof(atv32)); 1104133819Stjr } 1105133819Stjr if (error == 0 && uap->tzp != NULL) { 1106133819Stjr rtz.tz_minuteswest = tz_minuteswest; 1107133819Stjr rtz.tz_dsttime = tz_dsttime; 1108168844Sjkim error = copyout(&rtz, uap->tzp, sizeof(rtz)); 1109133819Stjr } 1110133819Stjr return (error); 1111133819Stjr} 1112133819Stjr 1113133819Stjrint 1114168843Sjkimlinux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 1115168843Sjkim{ 1116168843Sjkim l_timeval atv32; 1117168843Sjkim struct timeval atv, *tvp; 1118168843Sjkim struct timezone atz, *tzp; 1119168843Sjkim int error; 1120168843Sjkim 1121168843Sjkim if (uap->tp) { 1122168843Sjkim error = copyin(uap->tp, &atv32, sizeof(atv32)); 1123168843Sjkim if (error) 1124168843Sjkim return (error); 1125168843Sjkim atv.tv_sec = atv32.tv_sec; 1126168843Sjkim atv.tv_usec = atv32.tv_usec; 1127168843Sjkim tvp = &atv; 1128168843Sjkim } else 1129168843Sjkim tvp = NULL; 1130168843Sjkim if (uap->tzp) { 1131168843Sjkim error = copyin(uap->tzp, &atz, sizeof(atz)); 1132168843Sjkim if (error) 1133168843Sjkim return (error); 1134168843Sjkim tzp = &atz; 1135168843Sjkim } else 1136168843Sjkim tzp = NULL; 1137168843Sjkim return (kern_settimeofday(td, tvp, tzp)); 1138168843Sjkim} 1139168843Sjkim 1140168843Sjkimint 1141133819Stjrlinux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 1142133819Stjr{ 1143136152Sjhb struct l_rusage s32; 1144136152Sjhb struct rusage s; 1145133819Stjr int error; 1146133819Stjr 1147136152Sjhb error = kern_getrusage(td, uap->who, &s); 1148133819Stjr if (error != 0) 1149133819Stjr return (error); 1150136152Sjhb if (uap->rusage != NULL) { 1151218028Sdchagin bsd_to_linux_rusage(&s, &s32); 1152136152Sjhb error = copyout(&s32, uap->rusage, sizeof(s32)); 1153133819Stjr } 1154133819Stjr return (error); 1155133819Stjr} 1156133819Stjr 1157133819Stjrint 1158133819Stjrlinux_sched_rr_get_interval(struct thread *td, 1159133819Stjr struct linux_sched_rr_get_interval_args *uap) 1160133819Stjr{ 1161133819Stjr struct timespec ts; 1162133819Stjr struct l_timespec ts32; 1163133819Stjr int error; 1164133819Stjr 1165144449Sjhb error = kern_sched_rr_get_interval(td, uap->pid, &ts); 1166133819Stjr if (error != 0) 1167133819Stjr return (error); 1168133819Stjr ts32.tv_sec = ts.tv_sec; 1169133819Stjr ts32.tv_nsec = ts.tv_nsec; 1170133819Stjr return (copyout(&ts32, uap->interval, sizeof(ts32))); 1171133819Stjr} 1172133819Stjr 1173133819Stjrint 1174168035Sjkimlinux_set_thread_area(struct thread *td, 1175168035Sjkim struct linux_set_thread_area_args *args) 1176133819Stjr{ 1177168035Sjkim struct l_user_desc info; 1178168035Sjkim struct user_segment_descriptor sd; 1179216634Sjkim struct pcb *pcb; 1180168035Sjkim int a[2]; 1181168035Sjkim int error; 1182133819Stjr 1183168035Sjkim error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1184168035Sjkim if (error) 1185168035Sjkim return (error); 1186168035Sjkim 1187168035Sjkim#ifdef DEBUG 1188168035Sjkim if (ldebug(set_thread_area)) 1189168848Sjkim printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 1190168035Sjkim "%i, %i, %i"), info.entry_number, info.base_addr, 1191168035Sjkim info.limit, info.seg_32bit, info.contents, 1192168035Sjkim info.read_exec_only, info.limit_in_pages, 1193168035Sjkim info.seg_not_present, info.useable); 1194168035Sjkim#endif 1195168035Sjkim 1196168035Sjkim /* 1197168035Sjkim * Semantics of Linux version: every thread in the system has array 1198168035Sjkim * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 1199168035Sjkim * This syscall loads one of the selected TLS decriptors with a value 1200168035Sjkim * and also loads GDT descriptors 6, 7 and 8 with the content of 1201168035Sjkim * the per-thread descriptors. 1202168035Sjkim * 1203168035Sjkim * Semantics of FreeBSD version: I think we can ignore that Linux has 1204168035Sjkim * three per-thread descriptors and use just the first one. 1205168035Sjkim * The tls_array[] is used only in [gs]et_thread_area() syscalls and 1206168035Sjkim * for loading the GDT descriptors. We use just one GDT descriptor 1207168035Sjkim * for TLS, so we will load just one. 1208168848Sjkim * 1209168848Sjkim * XXX: This doesn't work when a user space process tries to use more 1210168035Sjkim * than one TLS segment. Comment in the Linux source says wine might 1211168848Sjkim * do this. 1212168035Sjkim */ 1213168035Sjkim 1214168035Sjkim /* 1215168035Sjkim * GLIBC reads current %gs and call set_thread_area() with it. 1216168035Sjkim * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 1217168035Sjkim * we use these segments. 1218168035Sjkim */ 1219168035Sjkim switch (info.entry_number) { 1220168035Sjkim case GUGS32_SEL: 1221168035Sjkim case GUDATA_SEL: 1222168035Sjkim case 6: 1223168035Sjkim case -1: 1224168035Sjkim info.entry_number = GUGS32_SEL; 1225168035Sjkim break; 1226168035Sjkim default: 1227168035Sjkim return (EINVAL); 1228168035Sjkim } 1229168035Sjkim 1230168035Sjkim /* 1231168035Sjkim * We have to copy out the GDT entry we use. 1232168848Sjkim * 1233168848Sjkim * XXX: What if a user space program does not check the return value 1234168848Sjkim * and tries to use 6, 7 or 8? 1235168035Sjkim */ 1236168035Sjkim error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1237168035Sjkim if (error) 1238168035Sjkim return (error); 1239168035Sjkim 1240168035Sjkim if (LINUX_LDT_empty(&info)) { 1241168035Sjkim a[0] = 0; 1242168035Sjkim a[1] = 0; 1243168035Sjkim } else { 1244168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 1245168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 1246168035Sjkim } 1247168035Sjkim 1248168035Sjkim memcpy(&sd, &a, sizeof(a)); 1249168035Sjkim#ifdef DEBUG 1250168035Sjkim if (ldebug(set_thread_area)) 1251168035Sjkim printf("Segment created in set_thread_area: " 1252168035Sjkim "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1253168035Sjkim "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1254168035Sjkim "def32: %i, gran: %i\n", 1255168035Sjkim sd.sd_lobase, 1256168035Sjkim sd.sd_hibase, 1257168035Sjkim sd.sd_lolimit, 1258168035Sjkim sd.sd_hilimit, 1259168035Sjkim sd.sd_type, 1260168035Sjkim sd.sd_dpl, 1261168035Sjkim sd.sd_p, 1262168035Sjkim sd.sd_xx, 1263168035Sjkim sd.sd_long, 1264168035Sjkim sd.sd_def32, 1265168035Sjkim sd.sd_gran); 1266168035Sjkim#endif 1267168035Sjkim 1268216634Sjkim pcb = td->td_pcb; 1269216634Sjkim pcb->pcb_gsbase = (register_t)info.base_addr; 1270216634Sjkim set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT); 1271190620Skib update_gdt_gsbase(td, info.base_addr); 1272168035Sjkim 1273168035Sjkim return (0); 1274133819Stjr} 1275