linux32_machdep.c revision 218612
1133819Stjr/*- 2133819Stjr * Copyright (c) 2004 Tim J. Robbins 3133819Stjr * Copyright (c) 2002 Doug Rabson 4133819Stjr * Copyright (c) 2000 Marcel Moolenaar 5133819Stjr * All rights reserved. 6133819Stjr * 7133819Stjr * Redistribution and use in source and binary forms, with or without 8133819Stjr * modification, are permitted provided that the following conditions 9133819Stjr * are met: 10133819Stjr * 1. Redistributions of source code must retain the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer 12133819Stjr * in this position and unchanged. 13133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 14133819Stjr * notice, this list of conditions and the following disclaimer in the 15133819Stjr * documentation and/or other materials provided with the distribution. 16133819Stjr * 3. The name of the author may not be used to endorse or promote products 17133819Stjr * derived from this software without specific prior written permission. 18133819Stjr * 19133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/cdefs.h> 32133819Stjr__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 218612 2011-02-12 15:50:21Z dchagin $"); 33133819Stjr 34133819Stjr#include <sys/param.h> 35133819Stjr#include <sys/kernel.h> 36133819Stjr#include <sys/systm.h> 37165832Snetchild#include <sys/file.h> 38165832Snetchild#include <sys/fcntl.h> 39162954Sphk#include <sys/clock.h> 40142057Sjhb#include <sys/imgact.h> 41161310Snetchild#include <sys/limits.h> 42133819Stjr#include <sys/lock.h> 43133819Stjr#include <sys/malloc.h> 44133819Stjr#include <sys/mman.h> 45133819Stjr#include <sys/mutex.h> 46166729Sjkim#include <sys/priv.h> 47133819Stjr#include <sys/proc.h> 48133819Stjr#include <sys/resource.h> 49133819Stjr#include <sys/resourcevar.h> 50166188Sjeff#include <sys/sched.h> 51133819Stjr#include <sys/syscallsubr.h> 52133819Stjr#include <sys/sysproto.h> 53133819Stjr#include <sys/unistd.h> 54218030Sdchagin#include <sys/wait.h> 55133819Stjr 56133819Stjr#include <machine/frame.h> 57168035Sjkim#include <machine/pcb.h> 58166729Sjkim#include <machine/psl.h> 59168035Sjkim#include <machine/segments.h> 60168035Sjkim#include <machine/specialreg.h> 61133819Stjr 62133819Stjr#include <vm/vm.h> 63133819Stjr#include <vm/pmap.h> 64133819Stjr#include <vm/vm_map.h> 65133819Stjr 66210431Skib#include <compat/freebsd32/freebsd32_util.h> 67133819Stjr#include <amd64/linux32/linux.h> 68133819Stjr#include <amd64/linux32/linux32_proto.h> 69133819Stjr#include <compat/linux/linux_ipc.h> 70218030Sdchagin#include <compat/linux/linux_misc.h> 71133819Stjr#include <compat/linux/linux_signal.h> 72133819Stjr#include <compat/linux/linux_util.h> 73161474Snetchild#include <compat/linux/linux_emul.h> 74133819Stjr 75133819Stjrstruct l_old_select_argv { 76133819Stjr l_int nfds; 77133819Stjr l_uintptr_t readfds; 78133819Stjr l_uintptr_t writefds; 79133819Stjr l_uintptr_t exceptfds; 80133819Stjr l_uintptr_t timeout; 81133819Stjr} __packed; 82133819Stjr 83133819Stjrint 84133819Stjrlinux_to_bsd_sigaltstack(int lsa) 85133819Stjr{ 86133819Stjr int bsa = 0; 87133819Stjr 88133819Stjr if (lsa & LINUX_SS_DISABLE) 89133819Stjr bsa |= SS_DISABLE; 90133819Stjr if (lsa & LINUX_SS_ONSTACK) 91133819Stjr bsa |= SS_ONSTACK; 92133819Stjr return (bsa); 93133819Stjr} 94133819Stjr 95198554Sjhbstatic int linux_mmap_common(struct thread *td, l_uintptr_t addr, 96198554Sjhb l_size_t len, l_int prot, l_int flags, l_int fd, 97198554Sjhb l_loff_t pos); 98198554Sjhb 99133819Stjrint 100133819Stjrbsd_to_linux_sigaltstack(int bsa) 101133819Stjr{ 102133819Stjr int lsa = 0; 103133819Stjr 104133819Stjr if (bsa & SS_DISABLE) 105133819Stjr lsa |= LINUX_SS_DISABLE; 106133819Stjr if (bsa & SS_ONSTACK) 107133819Stjr lsa |= LINUX_SS_ONSTACK; 108133819Stjr return (lsa); 109133819Stjr} 110133819Stjr 111218059Sdchaginstatic void 112218059Sdchaginbsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 113218028Sdchagin{ 114218059Sdchagin 115218028Sdchagin lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 116218028Sdchagin lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 117218028Sdchagin lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 118218028Sdchagin lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 119218028Sdchagin lru->ru_maxrss = ru->ru_maxrss; 120218028Sdchagin lru->ru_ixrss = ru->ru_ixrss; 121218028Sdchagin lru->ru_idrss = ru->ru_idrss; 122218028Sdchagin lru->ru_isrss = ru->ru_isrss; 123218028Sdchagin lru->ru_minflt = ru->ru_minflt; 124218028Sdchagin lru->ru_majflt = ru->ru_majflt; 125218028Sdchagin lru->ru_nswap = ru->ru_nswap; 126218028Sdchagin lru->ru_inblock = ru->ru_inblock; 127218028Sdchagin lru->ru_oublock = ru->ru_oublock; 128218028Sdchagin lru->ru_msgsnd = ru->ru_msgsnd; 129218028Sdchagin lru->ru_msgrcv = ru->ru_msgrcv; 130218028Sdchagin lru->ru_nsignals = ru->ru_nsignals; 131218028Sdchagin lru->ru_nvcsw = ru->ru_nvcsw; 132218028Sdchagin lru->ru_nivcsw = ru->ru_nivcsw; 133218028Sdchagin} 134218028Sdchagin 135142057Sjhbint 136142057Sjhblinux_execve(struct thread *td, struct linux_execve_args *args) 137142057Sjhb{ 138142057Sjhb struct image_args eargs; 139142057Sjhb char *path; 140142057Sjhb int error; 141142057Sjhb 142142057Sjhb LCONVPATHEXIST(td, args->path, &path); 143142057Sjhb 144142057Sjhb#ifdef DEBUG 145142057Sjhb if (ldebug(execve)) 146142057Sjhb printf(ARGS(execve, "%s"), path); 147142057Sjhb#endif 148142057Sjhb 149210431Skib error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 150210431Skib args->argp, args->envp); 151142057Sjhb free(path, M_TEMP); 152142057Sjhb if (error == 0) 153142057Sjhb error = kern_execve(td, &eargs, NULL); 154161474Snetchild if (error == 0) 155168063Sjkim /* Linux process can execute FreeBSD one, do not attempt 156161474Snetchild * to create emuldata for such process using 157161474Snetchild * linux_proc_init, this leads to a panic on KASSERT 158168063Sjkim * because such process has p->p_emuldata == NULL. 159161474Snetchild */ 160217896Sdchagin if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) 161168063Sjkim error = linux_proc_init(td, 0, 0); 162142057Sjhb return (error); 163142057Sjhb} 164142057Sjhb 165185438SkibCTASSERT(sizeof(struct l_iovec32) == 8); 166133819Stjr 167144449Sjhbstatic int 168185438Skiblinux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 169133819Stjr{ 170185438Skib struct l_iovec32 iov32; 171144449Sjhb struct iovec *iov; 172144449Sjhb struct uio *uio; 173185438Skib uint32_t iovlen; 174144449Sjhb int error, i; 175133819Stjr 176144449Sjhb *uiop = NULL; 177144449Sjhb if (iovcnt > UIO_MAXIOV) 178133819Stjr return (EINVAL); 179144449Sjhb iovlen = iovcnt * sizeof(struct iovec); 180168844Sjkim uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 181144449Sjhb iov = (struct iovec *)(uio + 1); 182144449Sjhb for (i = 0; i < iovcnt; i++) { 183185438Skib error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 184144449Sjhb if (error) { 185144449Sjhb free(uio, M_IOV); 186144449Sjhb return (error); 187144449Sjhb } 188144449Sjhb iov[i].iov_base = PTRIN(iov32.iov_base); 189144449Sjhb iov[i].iov_len = iov32.iov_len; 190133819Stjr } 191144449Sjhb uio->uio_iov = iov; 192144449Sjhb uio->uio_iovcnt = iovcnt; 193144449Sjhb uio->uio_segflg = UIO_USERSPACE; 194144449Sjhb uio->uio_offset = -1; 195144449Sjhb uio->uio_resid = 0; 196144449Sjhb for (i = 0; i < iovcnt; i++) { 197144449Sjhb if (iov->iov_len > INT_MAX - uio->uio_resid) { 198144449Sjhb free(uio, M_IOV); 199144449Sjhb return (EINVAL); 200144449Sjhb } 201144449Sjhb uio->uio_resid += iov->iov_len; 202144449Sjhb iov++; 203144449Sjhb } 204144449Sjhb *uiop = uio; 205144449Sjhb return (0); 206144449Sjhb} 207133819Stjr 208144449Sjhbint 209185438Skiblinux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 210185438Skib int error) 211185438Skib{ 212185438Skib struct l_iovec32 iov32; 213185438Skib struct iovec *iov; 214185438Skib uint32_t iovlen; 215185438Skib int i; 216185438Skib 217185438Skib *iovp = NULL; 218185438Skib if (iovcnt > UIO_MAXIOV) 219185438Skib return (error); 220185438Skib iovlen = iovcnt * sizeof(struct iovec); 221185438Skib iov = malloc(iovlen, M_IOV, M_WAITOK); 222185438Skib for (i = 0; i < iovcnt; i++) { 223185438Skib error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 224185438Skib if (error) { 225185438Skib free(iov, M_IOV); 226185438Skib return (error); 227185438Skib } 228185438Skib iov[i].iov_base = PTRIN(iov32.iov_base); 229185438Skib iov[i].iov_len = iov32.iov_len; 230185438Skib } 231185438Skib *iovp = iov; 232185438Skib return(0); 233185438Skib 234185438Skib} 235185438Skib 236185438Skibint 237144449Sjhblinux_readv(struct thread *td, struct linux_readv_args *uap) 238144449Sjhb{ 239144449Sjhb struct uio *auio; 240144449Sjhb int error; 241133819Stjr 242144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 243144449Sjhb if (error) 244144449Sjhb return (error); 245144449Sjhb error = kern_readv(td, uap->fd, auio); 246144449Sjhb free(auio, M_IOV); 247133819Stjr return (error); 248133819Stjr} 249133819Stjr 250133819Stjrint 251133819Stjrlinux_writev(struct thread *td, struct linux_writev_args *uap) 252133819Stjr{ 253144449Sjhb struct uio *auio; 254144449Sjhb int error; 255133819Stjr 256144449Sjhb error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 257144449Sjhb if (error) 258144449Sjhb return (error); 259144449Sjhb error = kern_writev(td, uap->fd, auio); 260144449Sjhb free(auio, M_IOV); 261133819Stjr return (error); 262133819Stjr} 263133819Stjr 264133819Stjrstruct l_ipc_kludge { 265133819Stjr l_uintptr_t msgp; 266133819Stjr l_long msgtyp; 267133819Stjr} __packed; 268133819Stjr 269133819Stjrint 270133819Stjrlinux_ipc(struct thread *td, struct linux_ipc_args *args) 271133819Stjr{ 272133819Stjr 273133819Stjr switch (args->what & 0xFFFF) { 274133819Stjr case LINUX_SEMOP: { 275133819Stjr struct linux_semop_args a; 276133819Stjr 277133819Stjr a.semid = args->arg1; 278133819Stjr a.tsops = args->ptr; 279133819Stjr a.nsops = args->arg2; 280133819Stjr return (linux_semop(td, &a)); 281133819Stjr } 282133819Stjr case LINUX_SEMGET: { 283133819Stjr struct linux_semget_args a; 284133819Stjr 285133819Stjr a.key = args->arg1; 286133819Stjr a.nsems = args->arg2; 287133819Stjr a.semflg = args->arg3; 288133819Stjr return (linux_semget(td, &a)); 289133819Stjr } 290133819Stjr case LINUX_SEMCTL: { 291133819Stjr struct linux_semctl_args a; 292133819Stjr int error; 293133819Stjr 294133819Stjr a.semid = args->arg1; 295133819Stjr a.semnum = args->arg2; 296133819Stjr a.cmd = args->arg3; 297133819Stjr error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 298133819Stjr if (error) 299133819Stjr return (error); 300133819Stjr return (linux_semctl(td, &a)); 301133819Stjr } 302133819Stjr case LINUX_MSGSND: { 303133819Stjr struct linux_msgsnd_args a; 304133819Stjr 305133819Stjr a.msqid = args->arg1; 306133819Stjr a.msgp = args->ptr; 307133819Stjr a.msgsz = args->arg2; 308133819Stjr a.msgflg = args->arg3; 309133819Stjr return (linux_msgsnd(td, &a)); 310133819Stjr } 311133819Stjr case LINUX_MSGRCV: { 312133819Stjr struct linux_msgrcv_args a; 313133819Stjr 314133819Stjr a.msqid = args->arg1; 315133819Stjr a.msgsz = args->arg2; 316133819Stjr a.msgflg = args->arg3; 317133819Stjr if ((args->what >> 16) == 0) { 318133819Stjr struct l_ipc_kludge tmp; 319133819Stjr int error; 320133819Stjr 321133819Stjr if (args->ptr == 0) 322133819Stjr return (EINVAL); 323133819Stjr error = copyin(args->ptr, &tmp, sizeof(tmp)); 324133819Stjr if (error) 325133819Stjr return (error); 326133819Stjr a.msgp = PTRIN(tmp.msgp); 327133819Stjr a.msgtyp = tmp.msgtyp; 328133819Stjr } else { 329133819Stjr a.msgp = args->ptr; 330133819Stjr a.msgtyp = args->arg5; 331133819Stjr } 332133819Stjr return (linux_msgrcv(td, &a)); 333133819Stjr } 334133819Stjr case LINUX_MSGGET: { 335133819Stjr struct linux_msgget_args a; 336133819Stjr 337133819Stjr a.key = args->arg1; 338133819Stjr a.msgflg = args->arg2; 339133819Stjr return (linux_msgget(td, &a)); 340133819Stjr } 341133819Stjr case LINUX_MSGCTL: { 342133819Stjr struct linux_msgctl_args a; 343133819Stjr 344133819Stjr a.msqid = args->arg1; 345133819Stjr a.cmd = args->arg2; 346133819Stjr a.buf = args->ptr; 347133819Stjr return (linux_msgctl(td, &a)); 348133819Stjr } 349133819Stjr case LINUX_SHMAT: { 350133819Stjr struct linux_shmat_args a; 351133819Stjr 352133819Stjr a.shmid = args->arg1; 353133819Stjr a.shmaddr = args->ptr; 354133819Stjr a.shmflg = args->arg2; 355144441Sjhb a.raddr = PTRIN((l_uint)args->arg3); 356133819Stjr return (linux_shmat(td, &a)); 357133819Stjr } 358133819Stjr case LINUX_SHMDT: { 359133819Stjr struct linux_shmdt_args a; 360133819Stjr 361133819Stjr a.shmaddr = args->ptr; 362133819Stjr return (linux_shmdt(td, &a)); 363133819Stjr } 364133819Stjr case LINUX_SHMGET: { 365133819Stjr struct linux_shmget_args a; 366133819Stjr 367133819Stjr a.key = args->arg1; 368133819Stjr a.size = args->arg2; 369133819Stjr a.shmflg = args->arg3; 370133819Stjr return (linux_shmget(td, &a)); 371133819Stjr } 372133819Stjr case LINUX_SHMCTL: { 373133819Stjr struct linux_shmctl_args a; 374133819Stjr 375133819Stjr a.shmid = args->arg1; 376133819Stjr a.cmd = args->arg2; 377133819Stjr a.buf = args->ptr; 378133819Stjr return (linux_shmctl(td, &a)); 379133819Stjr } 380133819Stjr default: 381133819Stjr break; 382133819Stjr } 383133819Stjr 384133819Stjr return (EINVAL); 385133819Stjr} 386133819Stjr 387133819Stjrint 388133819Stjrlinux_old_select(struct thread *td, struct linux_old_select_args *args) 389133819Stjr{ 390133819Stjr struct l_old_select_argv linux_args; 391133819Stjr struct linux_select_args newsel; 392133819Stjr int error; 393133819Stjr 394133819Stjr#ifdef DEBUG 395133819Stjr if (ldebug(old_select)) 396133819Stjr printf(ARGS(old_select, "%p"), args->ptr); 397133819Stjr#endif 398133819Stjr 399133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 400133819Stjr if (error) 401133819Stjr return (error); 402133819Stjr 403133819Stjr newsel.nfds = linux_args.nfds; 404133819Stjr newsel.readfds = PTRIN(linux_args.readfds); 405133819Stjr newsel.writefds = PTRIN(linux_args.writefds); 406133819Stjr newsel.exceptfds = PTRIN(linux_args.exceptfds); 407133819Stjr newsel.timeout = PTRIN(linux_args.timeout); 408133819Stjr return (linux_select(td, &newsel)); 409133819Stjr} 410133819Stjr 411133819Stjrint 412133819Stjrlinux_fork(struct thread *td, struct linux_fork_args *args) 413133819Stjr{ 414133819Stjr int error; 415166150Snetchild struct proc *p2; 416166150Snetchild struct thread *td2; 417133819Stjr 418133819Stjr#ifdef DEBUG 419133819Stjr if (ldebug(fork)) 420133819Stjr printf(ARGS(fork, "")); 421133819Stjr#endif 422133819Stjr 423166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) 424133819Stjr return (error); 425168063Sjkim 426166150Snetchild if (error == 0) { 427166150Snetchild td->td_retval[0] = p2->p_pid; 428166150Snetchild td->td_retval[1] = 0; 429166150Snetchild } 430133819Stjr 431133819Stjr if (td->td_retval[1] == 1) 432133819Stjr td->td_retval[0] = 0; 433161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 434161474Snetchild if (error) 435161474Snetchild return (error); 436161474Snetchild 437166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 438166150Snetchild 439168063Sjkim /* 440168063Sjkim * Make this runnable after we are finished with it. 441168063Sjkim */ 442170307Sjeff thread_lock(td2); 443166150Snetchild TD_SET_CAN_RUN(td2); 444166188Sjeff sched_add(td2, SRQ_BORING); 445170307Sjeff thread_unlock(td2); 446166150Snetchild 447133819Stjr return (0); 448133819Stjr} 449133819Stjr 450133819Stjrint 451133819Stjrlinux_vfork(struct thread *td, struct linux_vfork_args *args) 452133819Stjr{ 453133819Stjr int error; 454161611Snetchild struct proc *p2; 455166150Snetchild struct thread *td2; 456133819Stjr 457133819Stjr#ifdef DEBUG 458133819Stjr if (ldebug(vfork)) 459133819Stjr printf(ARGS(vfork, "")); 460133819Stjr#endif 461133819Stjr 462168063Sjkim /* Exclude RFPPWAIT */ 463166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) 464133819Stjr return (error); 465161611Snetchild if (error == 0) { 466161611Snetchild td->td_retval[0] = p2->p_pid; 467161611Snetchild td->td_retval[1] = 0; 468161611Snetchild } 469133819Stjr /* Are we the child? */ 470133819Stjr if (td->td_retval[1] == 1) 471133819Stjr td->td_retval[0] = 0; 472161474Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 473161474Snetchild if (error) 474161474Snetchild return (error); 475166150Snetchild 476166150Snetchild PROC_LOCK(p2); 477166150Snetchild p2->p_flag |= P_PPWAIT; 478166150Snetchild PROC_UNLOCK(p2); 479166150Snetchild 480166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 481168063Sjkim 482168848Sjkim /* 483168848Sjkim * Make this runnable after we are finished with it. 484168848Sjkim */ 485170307Sjeff thread_lock(td2); 486166150Snetchild TD_SET_CAN_RUN(td2); 487166188Sjeff sched_add(td2, SRQ_BORING); 488170307Sjeff thread_unlock(td2); 489166150Snetchild 490161611Snetchild /* wait for the children to exit, ie. emulate vfork */ 491161611Snetchild PROC_LOCK(p2); 492161611Snetchild while (p2->p_flag & P_PPWAIT) 493188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 494161611Snetchild PROC_UNLOCK(p2); 495168063Sjkim 496133819Stjr return (0); 497133819Stjr} 498133819Stjr 499218612Sdchaginstatic int 500218612Sdchaginlinux_set_cloned_tls(struct thread *td, void *desc) 501218612Sdchagin{ 502218612Sdchagin struct user_segment_descriptor sd; 503218612Sdchagin struct l_user_desc info; 504218612Sdchagin struct pcb *pcb; 505218612Sdchagin int error; 506218612Sdchagin int a[2]; 507218612Sdchagin 508218612Sdchagin error = copyin(desc, &info, sizeof(struct l_user_desc)); 509218612Sdchagin if (error) { 510218612Sdchagin printf(LMSG("copyin failed!")); 511218612Sdchagin } else { 512218612Sdchagin /* We might copy out the entry_number as GUGS32_SEL. */ 513218612Sdchagin info.entry_number = GUGS32_SEL; 514218612Sdchagin error = copyout(&info, desc, sizeof(struct l_user_desc)); 515218612Sdchagin if (error) 516218612Sdchagin printf(LMSG("copyout failed!")); 517218612Sdchagin 518218612Sdchagin a[0] = LINUX_LDT_entry_a(&info); 519218612Sdchagin a[1] = LINUX_LDT_entry_b(&info); 520218612Sdchagin 521218612Sdchagin memcpy(&sd, &a, sizeof(a)); 522218612Sdchagin#ifdef DEBUG 523218612Sdchagin if (ldebug(clone)) 524218612Sdchagin printf("Segment created in clone with " 525218612Sdchagin "CLONE_SETTLS: lobase: %x, hibase: %x, " 526218612Sdchagin "lolimit: %x, hilimit: %x, type: %i, " 527218612Sdchagin "dpl: %i, p: %i, xx: %i, long: %i, " 528218612Sdchagin "def32: %i, gran: %i\n", sd.sd_lobase, 529218612Sdchagin sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 530218612Sdchagin sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 531218612Sdchagin sd.sd_long, sd.sd_def32, sd.sd_gran); 532218612Sdchagin#endif 533218612Sdchagin pcb = td->td_pcb; 534218612Sdchagin pcb->pcb_gsbase = (register_t)info.base_addr; 535218612Sdchagin/* XXXKIB pcb->pcb_gs32sd = sd; */ 536218612Sdchagin td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 537218612Sdchagin set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT); 538218612Sdchagin } 539218612Sdchagin 540218612Sdchagin return (error); 541218612Sdchagin} 542218612Sdchagin 543133819Stjrint 544133819Stjrlinux_clone(struct thread *td, struct linux_clone_args *args) 545133819Stjr{ 546133819Stjr int error, ff = RFPROC | RFSTOPPED; 547133819Stjr struct proc *p2; 548133819Stjr struct thread *td2; 549133819Stjr int exit_signal; 550161474Snetchild struct linux_emuldata *em; 551133819Stjr 552133819Stjr#ifdef DEBUG 553133819Stjr if (ldebug(clone)) { 554168063Sjkim printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, " 555168063Sjkim "child tid: %p"), (unsigned)args->flags, 556168063Sjkim args->stack, args->parent_tidptr, args->child_tidptr); 557133819Stjr } 558133819Stjr#endif 559133819Stjr 560133819Stjr exit_signal = args->flags & 0x000000ff; 561169458Skan if (LINUX_SIG_VALID(exit_signal)) { 562169458Skan if (exit_signal <= LINUX_SIGTBLSZ) 563169458Skan exit_signal = 564169458Skan linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 565169458Skan } else if (exit_signal != 0) 566133819Stjr return (EINVAL); 567133819Stjr 568167157Sjkim if (args->flags & LINUX_CLONE_VM) 569133819Stjr ff |= RFMEM; 570167157Sjkim if (args->flags & LINUX_CLONE_SIGHAND) 571133819Stjr ff |= RFSIGSHARE; 572168063Sjkim /* 573168063Sjkim * XXX: In Linux, sharing of fs info (chroot/cwd/umask) 574168063Sjkim * and open files is independant. In FreeBSD, its in one 575168848Sjkim * structure but in reality it does not cause any problems 576168848Sjkim * because both of these flags are usually set together. 577163371Snetchild */ 578167157Sjkim if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) 579133819Stjr ff |= RFFDG; 580133819Stjr 581161474Snetchild /* 582161474Snetchild * Attempt to detect when linux_clone(2) is used for creating 583161474Snetchild * kernel threads. Unfortunately despite the existence of the 584161474Snetchild * CLONE_THREAD flag, version of linuxthreads package used in 585161474Snetchild * most popular distros as of beginning of 2005 doesn't make 586166944Snetchild * any use of it. Therefore, this detection relies on 587161474Snetchild * empirical observation that linuxthreads sets certain 588161474Snetchild * combination of flags, so that we can make more or less 589161474Snetchild * precise detection and notify the FreeBSD kernel that several 590161474Snetchild * processes are in fact part of the same threading group, so 591161474Snetchild * that special treatment is necessary for signal delivery 592161474Snetchild * between those processes and fd locking. 593161474Snetchild */ 594167157Sjkim if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) 595161474Snetchild ff |= RFTHREAD; 596161474Snetchild 597168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) 598168056Sjkim if (args->parent_tidptr == NULL) 599168056Sjkim return (EINVAL); 600168056Sjkim 601133819Stjr error = fork1(td, ff, 0, &p2); 602133819Stjr if (error) 603133819Stjr return (error); 604166395Skib 605167157Sjkim if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { 606166395Skib sx_xlock(&proctree_lock); 607166395Skib PROC_LOCK(p2); 608166395Skib proc_reparent(p2, td->td_proc->p_pptr); 609166395Skib PROC_UNLOCK(p2); 610166395Skib sx_xunlock(&proctree_lock); 611166395Skib } 612168063Sjkim 613161474Snetchild /* create the emuldata */ 614161474Snetchild error = linux_proc_init(td, p2->p_pid, args->flags); 615161474Snetchild /* reference it - no need to check this */ 616165867Snetchild em = em_find(p2, EMUL_DOLOCK); 617161474Snetchild KASSERT(em != NULL, ("clone: emuldata not found.\n")); 618161474Snetchild /* and adjust it */ 619133819Stjr 620167157Sjkim if (args->flags & LINUX_CLONE_THREAD) { 621161474Snetchild#ifdef notyet 622161696Snetchild PROC_LOCK(p2); 623161474Snetchild p2->p_pgrp = td->td_proc->p_pgrp; 624161696Snetchild PROC_UNLOCK(p2); 625161474Snetchild#endif 626168063Sjkim exit_signal = 0; 627161474Snetchild } 628161474Snetchild 629167157Sjkim if (args->flags & LINUX_CLONE_CHILD_SETTID) 630161474Snetchild em->child_set_tid = args->child_tidptr; 631161474Snetchild else 632161474Snetchild em->child_set_tid = NULL; 633161474Snetchild 634167157Sjkim if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 635161474Snetchild em->child_clear_tid = args->child_tidptr; 636161474Snetchild else 637161474Snetchild em->child_clear_tid = NULL; 638161696Snetchild 639161474Snetchild EMUL_UNLOCK(&emul_lock); 640161474Snetchild 641168056Sjkim if (args->flags & LINUX_CLONE_PARENT_SETTID) { 642168056Sjkim error = copyout(&p2->p_pid, args->parent_tidptr, 643168056Sjkim sizeof(p2->p_pid)); 644168056Sjkim if (error) 645168056Sjkim printf(LMSG("copyout failed!")); 646168056Sjkim } 647168056Sjkim 648133819Stjr PROC_LOCK(p2); 649133819Stjr p2->p_sigparent = exit_signal; 650133819Stjr PROC_UNLOCK(p2); 651133819Stjr td2 = FIRST_THREAD_IN_PROC(p2); 652168063Sjkim /* 653168063Sjkim * In a case of stack = NULL, we are supposed to COW calling process 654168063Sjkim * stack. This is what normal fork() does, so we just keep tf_rsp arg 655168063Sjkim * intact. 656161310Snetchild */ 657161310Snetchild if (args->stack) 658168063Sjkim td2->td_frame->tf_rsp = PTROUT(args->stack); 659133819Stjr 660218612Sdchagin if (args->flags & LINUX_CLONE_SETTLS) 661218612Sdchagin linux_set_cloned_tls(td2, args->tls); 662168035Sjkim 663168035Sjkim#ifdef DEBUG 664133819Stjr if (ldebug(clone)) 665168063Sjkim printf(LMSG("clone: successful rfork to %d, " 666168063Sjkim "stack %p sig = %d"), (int)p2->p_pid, args->stack, 667168063Sjkim exit_signal); 668133819Stjr#endif 669167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 670166150Snetchild PROC_LOCK(p2); 671166150Snetchild p2->p_flag |= P_PPWAIT; 672166150Snetchild PROC_UNLOCK(p2); 673166150Snetchild } 674133819Stjr 675133819Stjr /* 676133819Stjr * Make this runnable after we are finished with it. 677133819Stjr */ 678170307Sjeff thread_lock(td2); 679133819Stjr TD_SET_CAN_RUN(td2); 680166188Sjeff sched_add(td2, SRQ_BORING); 681170307Sjeff thread_unlock(td2); 682133819Stjr 683133819Stjr td->td_retval[0] = p2->p_pid; 684133819Stjr td->td_retval[1] = 0; 685168063Sjkim 686167157Sjkim if (args->flags & LINUX_CLONE_VFORK) { 687168063Sjkim /* wait for the children to exit, ie. emulate vfork */ 688168063Sjkim PROC_LOCK(p2); 689163374Snetchild while (p2->p_flag & P_PPWAIT) 690188750Skib cv_wait(&p2->p_pwait, &p2->p_mtx); 691163374Snetchild PROC_UNLOCK(p2); 692163374Snetchild } 693163374Snetchild 694133819Stjr return (0); 695133819Stjr} 696133819Stjr 697133819Stjr#define STACK_SIZE (2 * 1024 * 1024) 698133819Stjr#define GUARD_SIZE (4 * PAGE_SIZE) 699133819Stjr 700133819Stjrint 701133819Stjrlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 702133819Stjr{ 703133819Stjr 704133819Stjr#ifdef DEBUG 705133819Stjr if (ldebug(mmap2)) 706168063Sjkim printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 707168063Sjkim args->addr, args->len, args->prot, 708133819Stjr args->flags, args->fd, args->pgoff); 709133819Stjr#endif 710133819Stjr 711198554Sjhb return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 712198554Sjhb args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 713198554Sjhb PAGE_SIZE)); 714133819Stjr} 715133819Stjr 716133819Stjrint 717133819Stjrlinux_mmap(struct thread *td, struct linux_mmap_args *args) 718133819Stjr{ 719133819Stjr int error; 720133819Stjr struct l_mmap_argv linux_args; 721133819Stjr 722133819Stjr error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 723133819Stjr if (error) 724133819Stjr return (error); 725133819Stjr 726133819Stjr#ifdef DEBUG 727133819Stjr if (ldebug(mmap)) 728168063Sjkim printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 729168063Sjkim linux_args.addr, linux_args.len, linux_args.prot, 730168063Sjkim linux_args.flags, linux_args.fd, linux_args.pgoff); 731133819Stjr#endif 732133819Stjr 733198554Sjhb return (linux_mmap_common(td, linux_args.addr, linux_args.len, 734198554Sjhb linux_args.prot, linux_args.flags, linux_args.fd, 735198554Sjhb (uint32_t)linux_args.pgoff)); 736133819Stjr} 737133819Stjr 738133819Stjrstatic int 739198554Sjhblinux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 740198554Sjhb l_int flags, l_int fd, l_loff_t pos) 741133819Stjr{ 742133819Stjr struct proc *p = td->td_proc; 743133819Stjr struct mmap_args /* { 744133819Stjr caddr_t addr; 745133819Stjr size_t len; 746133819Stjr int prot; 747133819Stjr int flags; 748133819Stjr int fd; 749133819Stjr long pad; 750133819Stjr off_t pos; 751133819Stjr } */ bsd_args; 752133819Stjr int error; 753165832Snetchild struct file *fp; 754133819Stjr 755133819Stjr error = 0; 756133819Stjr bsd_args.flags = 0; 757165832Snetchild fp = NULL; 758165832Snetchild 759165832Snetchild /* 760165832Snetchild * Linux mmap(2): 761165832Snetchild * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 762165832Snetchild */ 763198554Sjhb if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 764165832Snetchild return (EINVAL); 765165832Snetchild 766198554Sjhb if (flags & LINUX_MAP_SHARED) 767133819Stjr bsd_args.flags |= MAP_SHARED; 768198554Sjhb if (flags & LINUX_MAP_PRIVATE) 769133819Stjr bsd_args.flags |= MAP_PRIVATE; 770198554Sjhb if (flags & LINUX_MAP_FIXED) 771133819Stjr bsd_args.flags |= MAP_FIXED; 772208994Skan if (flags & LINUX_MAP_ANON) { 773208994Skan /* Enforce pos to be on page boundary, then ignore. */ 774208994Skan if ((pos & PAGE_MASK) != 0) 775208994Skan return (EINVAL); 776208994Skan pos = 0; 777133819Stjr bsd_args.flags |= MAP_ANON; 778208994Skan } else 779133819Stjr bsd_args.flags |= MAP_NOSYNC; 780198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) 781133819Stjr bsd_args.flags |= MAP_STACK; 782133819Stjr 783166727Sjkim /* 784166727Sjkim * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 785166727Sjkim * on Linux/i386. We do this to ensure maximum compatibility. 786166727Sjkim * Linux/ia64 does the same in i386 emulation mode. 787166727Sjkim */ 788198554Sjhb bsd_args.prot = prot; 789166727Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 790166727Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 791166727Sjkim 792167048Sjkim /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 793198554Sjhb bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 794167048Sjkim if (bsd_args.fd != -1) { 795166727Sjkim /* 796166727Sjkim * Linux follows Solaris mmap(2) description: 797166727Sjkim * The file descriptor fildes is opened with 798166727Sjkim * read permission, regardless of the 799166727Sjkim * protection options specified. 800166727Sjkim */ 801166727Sjkim 802167048Sjkim if ((error = fget(td, bsd_args.fd, &fp)) != 0) 803166727Sjkim return (error); 804166727Sjkim if (fp->f_type != DTYPE_VNODE) { 805166727Sjkim fdrop(fp, td); 806166727Sjkim return (EINVAL); 807166727Sjkim } 808166727Sjkim 809166727Sjkim /* Linux mmap() just fails for O_WRONLY files */ 810166727Sjkim if (!(fp->f_flag & FREAD)) { 811166727Sjkim fdrop(fp, td); 812166727Sjkim return (EACCES); 813166727Sjkim } 814166727Sjkim 815166727Sjkim fdrop(fp, td); 816166727Sjkim } 817166727Sjkim 818198554Sjhb if (flags & LINUX_MAP_GROWSDOWN) { 819168063Sjkim /* 820168063Sjkim * The Linux MAP_GROWSDOWN option does not limit auto 821133819Stjr * growth of the region. Linux mmap with this option 822133819Stjr * takes as addr the inital BOS, and as len, the initial 823133819Stjr * region size. It can then grow down from addr without 824168063Sjkim * limit. However, Linux threads has an implicit internal 825133819Stjr * limit to stack size of STACK_SIZE. Its just not 826168063Sjkim * enforced explicitly in Linux. But, here we impose 827133819Stjr * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 828133819Stjr * region, since we can do this with our mmap. 829133819Stjr * 830133819Stjr * Our mmap with MAP_STACK takes addr as the maximum 831133819Stjr * downsize limit on BOS, and as len the max size of 832168848Sjkim * the region. It then maps the top SGROWSIZ bytes, 833166944Snetchild * and auto grows the region down, up to the limit 834133819Stjr * in addr. 835133819Stjr * 836133819Stjr * If we don't use the MAP_STACK option, the effect 837133819Stjr * of this code is to allocate a stack region of a 838133819Stjr * fixed size of (STACK_SIZE - GUARD_SIZE). 839133819Stjr */ 840133819Stjr 841198554Sjhb if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 842168063Sjkim /* 843168063Sjkim * Some Linux apps will attempt to mmap 844133819Stjr * thread stacks near the top of their 845133819Stjr * address space. If their TOS is greater 846133819Stjr * than vm_maxsaddr, vm_map_growstack() 847133819Stjr * will confuse the thread stack with the 848133819Stjr * process stack and deliver a SEGV if they 849133819Stjr * attempt to grow the thread stack past their 850133819Stjr * current stacksize rlimit. To avoid this, 851133819Stjr * adjust vm_maxsaddr upwards to reflect 852133819Stjr * the current stacksize rlimit rather 853133819Stjr * than the maximum possible stacksize. 854133819Stjr * It would be better to adjust the 855133819Stjr * mmap'ed region, but some apps do not check 856133819Stjr * mmap's return value. 857133819Stjr */ 858133819Stjr PROC_LOCK(p); 859166727Sjkim p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 860133819Stjr lim_cur(p, RLIMIT_STACK); 861133819Stjr PROC_UNLOCK(p); 862133819Stjr } 863133819Stjr 864168063Sjkim /* 865176193Sjkim * This gives us our maximum stack size and a new BOS. 866176193Sjkim * If we're using VM_STACK, then mmap will just map 867176193Sjkim * the top SGROWSIZ bytes, and let the stack grow down 868176193Sjkim * to the limit at BOS. If we're not using VM_STACK 869176193Sjkim * we map the full stack, since we don't have a way 870176193Sjkim * to autogrow it. 871133819Stjr */ 872198554Sjhb if (len > STACK_SIZE - GUARD_SIZE) { 873198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 874198554Sjhb bsd_args.len = len; 875176193Sjkim } else { 876198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr) - 877198554Sjhb (STACK_SIZE - GUARD_SIZE - len); 878176193Sjkim bsd_args.len = STACK_SIZE - GUARD_SIZE; 879176193Sjkim } 880133819Stjr } else { 881198554Sjhb bsd_args.addr = (caddr_t)PTRIN(addr); 882198554Sjhb bsd_args.len = len; 883133819Stjr } 884198554Sjhb bsd_args.pos = pos; 885133819Stjr 886133819Stjr#ifdef DEBUG 887133819Stjr if (ldebug(mmap)) 888133819Stjr printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 889133819Stjr __func__, 890133843Sobrien (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 891133819Stjr bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 892133819Stjr#endif 893133819Stjr error = mmap(td, &bsd_args); 894133819Stjr#ifdef DEBUG 895133819Stjr if (ldebug(mmap)) 896133819Stjr printf("-> %s() return: 0x%x (0x%08x)\n", 897133819Stjr __func__, error, (u_int)td->td_retval[0]); 898133819Stjr#endif 899133819Stjr return (error); 900133819Stjr} 901133819Stjr 902133819Stjrint 903168035Sjkimlinux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 904168035Sjkim{ 905168035Sjkim struct mprotect_args bsd_args; 906168035Sjkim 907168035Sjkim bsd_args.addr = uap->addr; 908168035Sjkim bsd_args.len = uap->len; 909168035Sjkim bsd_args.prot = uap->prot; 910168035Sjkim if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 911168035Sjkim bsd_args.prot |= PROT_READ | PROT_EXEC; 912168035Sjkim return (mprotect(td, &bsd_args)); 913168035Sjkim} 914168035Sjkim 915168035Sjkimint 916166729Sjkimlinux_iopl(struct thread *td, struct linux_iopl_args *args) 917166729Sjkim{ 918166729Sjkim int error; 919166729Sjkim 920166729Sjkim if (args->level < 0 || args->level > 3) 921166729Sjkim return (EINVAL); 922166729Sjkim if ((error = priv_check(td, PRIV_IO)) != 0) 923166729Sjkim return (error); 924166729Sjkim if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 925166729Sjkim return (error); 926166729Sjkim td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 927166729Sjkim (args->level * (PSL_IOPL / 3)); 928166729Sjkim 929166729Sjkim return (0); 930166729Sjkim} 931166729Sjkim 932166729Sjkimint 933133819Stjrlinux_pipe(struct thread *td, struct linux_pipe_args *args) 934133819Stjr{ 935133819Stjr int error; 936184849Sed int fildes[2]; 937133819Stjr 938133819Stjr#ifdef DEBUG 939133819Stjr if (ldebug(pipe)) 940133819Stjr printf(ARGS(pipe, "*")); 941133819Stjr#endif 942133819Stjr 943184849Sed error = kern_pipe(td, fildes); 944184849Sed if (error) 945133819Stjr return (error); 946133819Stjr 947184849Sed /* XXX: Close descriptors on error. */ 948184849Sed return (copyout(fildes, args->pipefds, sizeof fildes)); 949133819Stjr} 950166731Sjkim 951133819Stjrint 952133819Stjrlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 953133819Stjr{ 954133819Stjr l_osigaction_t osa; 955133819Stjr l_sigaction_t act, oact; 956133819Stjr int error; 957133819Stjr 958133819Stjr#ifdef DEBUG 959133819Stjr if (ldebug(sigaction)) 960133819Stjr printf(ARGS(sigaction, "%d, %p, %p"), 961133819Stjr args->sig, (void *)args->nsa, (void *)args->osa); 962133819Stjr#endif 963133819Stjr 964133819Stjr if (args->nsa != NULL) { 965133819Stjr error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 966133819Stjr if (error) 967133819Stjr return (error); 968133819Stjr act.lsa_handler = osa.lsa_handler; 969133819Stjr act.lsa_flags = osa.lsa_flags; 970133819Stjr act.lsa_restorer = osa.lsa_restorer; 971133819Stjr LINUX_SIGEMPTYSET(act.lsa_mask); 972133819Stjr act.lsa_mask.__bits[0] = osa.lsa_mask; 973133819Stjr } 974133819Stjr 975133819Stjr error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 976133819Stjr args->osa ? &oact : NULL); 977133819Stjr 978133819Stjr if (args->osa != NULL && !error) { 979133819Stjr osa.lsa_handler = oact.lsa_handler; 980133819Stjr osa.lsa_flags = oact.lsa_flags; 981133819Stjr osa.lsa_restorer = oact.lsa_restorer; 982133819Stjr osa.lsa_mask = oact.lsa_mask.__bits[0]; 983133819Stjr error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 984133819Stjr } 985133819Stjr 986133819Stjr return (error); 987133819Stjr} 988133819Stjr 989133819Stjr/* 990168063Sjkim * Linux has two extra args, restart and oldmask. We don't use these, 991133819Stjr * but it seems that "restart" is actually a context pointer that 992133819Stjr * enables the signal to happen with a different register set. 993133819Stjr */ 994133819Stjrint 995133819Stjrlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 996133819Stjr{ 997133819Stjr sigset_t sigmask; 998133819Stjr l_sigset_t mask; 999133819Stjr 1000133819Stjr#ifdef DEBUG 1001133819Stjr if (ldebug(sigsuspend)) 1002133819Stjr printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 1003133819Stjr#endif 1004133819Stjr 1005133819Stjr LINUX_SIGEMPTYSET(mask); 1006133819Stjr mask.__bits[0] = args->mask; 1007133819Stjr linux_to_bsd_sigset(&mask, &sigmask); 1008133819Stjr return (kern_sigsuspend(td, sigmask)); 1009133819Stjr} 1010133819Stjr 1011133819Stjrint 1012133819Stjrlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 1013133819Stjr{ 1014133819Stjr l_sigset_t lmask; 1015133819Stjr sigset_t sigmask; 1016133819Stjr int error; 1017133819Stjr 1018133819Stjr#ifdef DEBUG 1019133819Stjr if (ldebug(rt_sigsuspend)) 1020133819Stjr printf(ARGS(rt_sigsuspend, "%p, %d"), 1021133819Stjr (void *)uap->newset, uap->sigsetsize); 1022133819Stjr#endif 1023133819Stjr 1024133819Stjr if (uap->sigsetsize != sizeof(l_sigset_t)) 1025133819Stjr return (EINVAL); 1026133819Stjr 1027133819Stjr error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 1028133819Stjr if (error) 1029133819Stjr return (error); 1030133819Stjr 1031133819Stjr linux_to_bsd_sigset(&lmask, &sigmask); 1032133819Stjr return (kern_sigsuspend(td, sigmask)); 1033133819Stjr} 1034133819Stjr 1035133819Stjrint 1036133819Stjrlinux_pause(struct thread *td, struct linux_pause_args *args) 1037133819Stjr{ 1038133819Stjr struct proc *p = td->td_proc; 1039133819Stjr sigset_t sigmask; 1040133819Stjr 1041133819Stjr#ifdef DEBUG 1042133819Stjr if (ldebug(pause)) 1043133819Stjr printf(ARGS(pause, "")); 1044133819Stjr#endif 1045133819Stjr 1046133819Stjr PROC_LOCK(p); 1047133819Stjr sigmask = td->td_sigmask; 1048133819Stjr PROC_UNLOCK(p); 1049133819Stjr return (kern_sigsuspend(td, sigmask)); 1050133819Stjr} 1051133819Stjr 1052133819Stjrint 1053133819Stjrlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 1054133819Stjr{ 1055133819Stjr stack_t ss, oss; 1056133819Stjr l_stack_t lss; 1057133819Stjr int error; 1058133819Stjr 1059133819Stjr#ifdef DEBUG 1060133819Stjr if (ldebug(sigaltstack)) 1061133819Stjr printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 1062133819Stjr#endif 1063133819Stjr 1064133819Stjr if (uap->uss != NULL) { 1065133819Stjr error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 1066133819Stjr if (error) 1067133819Stjr return (error); 1068133819Stjr 1069133819Stjr ss.ss_sp = PTRIN(lss.ss_sp); 1070133819Stjr ss.ss_size = lss.ss_size; 1071133819Stjr ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 1072133819Stjr } 1073134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 1074134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 1075133819Stjr if (!error && uap->uoss != NULL) { 1076133819Stjr lss.ss_sp = PTROUT(oss.ss_sp); 1077133819Stjr lss.ss_size = oss.ss_size; 1078133819Stjr lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 1079133819Stjr error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 1080133819Stjr } 1081133819Stjr 1082133819Stjr return (error); 1083133819Stjr} 1084133819Stjr 1085133819Stjrint 1086133819Stjrlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 1087133819Stjr{ 1088133819Stjr struct ftruncate_args sa; 1089133819Stjr 1090133819Stjr#ifdef DEBUG 1091133819Stjr if (ldebug(ftruncate64)) 1092133819Stjr printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 1093133819Stjr (intmax_t)args->length); 1094133819Stjr#endif 1095133819Stjr 1096133819Stjr sa.fd = args->fd; 1097133819Stjr sa.length = args->length; 1098133819Stjr return ftruncate(td, &sa); 1099133819Stjr} 1100133819Stjr 1101133819Stjrint 1102133819Stjrlinux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 1103133819Stjr{ 1104133819Stjr struct timeval atv; 1105133819Stjr l_timeval atv32; 1106133819Stjr struct timezone rtz; 1107133819Stjr int error = 0; 1108133819Stjr 1109133819Stjr if (uap->tp) { 1110133819Stjr microtime(&atv); 1111133819Stjr atv32.tv_sec = atv.tv_sec; 1112133819Stjr atv32.tv_usec = atv.tv_usec; 1113168844Sjkim error = copyout(&atv32, uap->tp, sizeof(atv32)); 1114133819Stjr } 1115133819Stjr if (error == 0 && uap->tzp != NULL) { 1116133819Stjr rtz.tz_minuteswest = tz_minuteswest; 1117133819Stjr rtz.tz_dsttime = tz_dsttime; 1118168844Sjkim error = copyout(&rtz, uap->tzp, sizeof(rtz)); 1119133819Stjr } 1120133819Stjr return (error); 1121133819Stjr} 1122133819Stjr 1123133819Stjrint 1124168843Sjkimlinux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 1125168843Sjkim{ 1126168843Sjkim l_timeval atv32; 1127168843Sjkim struct timeval atv, *tvp; 1128168843Sjkim struct timezone atz, *tzp; 1129168843Sjkim int error; 1130168843Sjkim 1131168843Sjkim if (uap->tp) { 1132168843Sjkim error = copyin(uap->tp, &atv32, sizeof(atv32)); 1133168843Sjkim if (error) 1134168843Sjkim return (error); 1135168843Sjkim atv.tv_sec = atv32.tv_sec; 1136168843Sjkim atv.tv_usec = atv32.tv_usec; 1137168843Sjkim tvp = &atv; 1138168843Sjkim } else 1139168843Sjkim tvp = NULL; 1140168843Sjkim if (uap->tzp) { 1141168843Sjkim error = copyin(uap->tzp, &atz, sizeof(atz)); 1142168843Sjkim if (error) 1143168843Sjkim return (error); 1144168843Sjkim tzp = &atz; 1145168843Sjkim } else 1146168843Sjkim tzp = NULL; 1147168843Sjkim return (kern_settimeofday(td, tvp, tzp)); 1148168843Sjkim} 1149168843Sjkim 1150168843Sjkimint 1151133819Stjrlinux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 1152133819Stjr{ 1153136152Sjhb struct l_rusage s32; 1154136152Sjhb struct rusage s; 1155133819Stjr int error; 1156133819Stjr 1157136152Sjhb error = kern_getrusage(td, uap->who, &s); 1158133819Stjr if (error != 0) 1159133819Stjr return (error); 1160136152Sjhb if (uap->rusage != NULL) { 1161218028Sdchagin bsd_to_linux_rusage(&s, &s32); 1162136152Sjhb error = copyout(&s32, uap->rusage, sizeof(s32)); 1163133819Stjr } 1164133819Stjr return (error); 1165133819Stjr} 1166133819Stjr 1167133819Stjrint 1168133819Stjrlinux_sched_rr_get_interval(struct thread *td, 1169133819Stjr struct linux_sched_rr_get_interval_args *uap) 1170133819Stjr{ 1171133819Stjr struct timespec ts; 1172133819Stjr struct l_timespec ts32; 1173133819Stjr int error; 1174133819Stjr 1175144449Sjhb error = kern_sched_rr_get_interval(td, uap->pid, &ts); 1176133819Stjr if (error != 0) 1177133819Stjr return (error); 1178133819Stjr ts32.tv_sec = ts.tv_sec; 1179133819Stjr ts32.tv_nsec = ts.tv_nsec; 1180133819Stjr return (copyout(&ts32, uap->interval, sizeof(ts32))); 1181133819Stjr} 1182133819Stjr 1183133819Stjrint 1184168035Sjkimlinux_set_thread_area(struct thread *td, 1185168035Sjkim struct linux_set_thread_area_args *args) 1186133819Stjr{ 1187168035Sjkim struct l_user_desc info; 1188168035Sjkim struct user_segment_descriptor sd; 1189216634Sjkim struct pcb *pcb; 1190168035Sjkim int a[2]; 1191168035Sjkim int error; 1192133819Stjr 1193168035Sjkim error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1194168035Sjkim if (error) 1195168035Sjkim return (error); 1196168035Sjkim 1197168035Sjkim#ifdef DEBUG 1198168035Sjkim if (ldebug(set_thread_area)) 1199168848Sjkim printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 1200168035Sjkim "%i, %i, %i"), info.entry_number, info.base_addr, 1201168035Sjkim info.limit, info.seg_32bit, info.contents, 1202168035Sjkim info.read_exec_only, info.limit_in_pages, 1203168035Sjkim info.seg_not_present, info.useable); 1204168035Sjkim#endif 1205168035Sjkim 1206168035Sjkim /* 1207168035Sjkim * Semantics of Linux version: every thread in the system has array 1208168035Sjkim * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 1209168035Sjkim * This syscall loads one of the selected TLS decriptors with a value 1210168035Sjkim * and also loads GDT descriptors 6, 7 and 8 with the content of 1211168035Sjkim * the per-thread descriptors. 1212168035Sjkim * 1213168035Sjkim * Semantics of FreeBSD version: I think we can ignore that Linux has 1214168035Sjkim * three per-thread descriptors and use just the first one. 1215168035Sjkim * The tls_array[] is used only in [gs]et_thread_area() syscalls and 1216168035Sjkim * for loading the GDT descriptors. We use just one GDT descriptor 1217168035Sjkim * for TLS, so we will load just one. 1218168848Sjkim * 1219168848Sjkim * XXX: This doesn't work when a user space process tries to use more 1220168035Sjkim * than one TLS segment. Comment in the Linux source says wine might 1221168848Sjkim * do this. 1222168035Sjkim */ 1223168035Sjkim 1224168035Sjkim /* 1225168035Sjkim * GLIBC reads current %gs and call set_thread_area() with it. 1226168035Sjkim * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 1227168035Sjkim * we use these segments. 1228168035Sjkim */ 1229168035Sjkim switch (info.entry_number) { 1230168035Sjkim case GUGS32_SEL: 1231168035Sjkim case GUDATA_SEL: 1232168035Sjkim case 6: 1233168035Sjkim case -1: 1234168035Sjkim info.entry_number = GUGS32_SEL; 1235168035Sjkim break; 1236168035Sjkim default: 1237168035Sjkim return (EINVAL); 1238168035Sjkim } 1239168035Sjkim 1240168035Sjkim /* 1241168035Sjkim * We have to copy out the GDT entry we use. 1242168848Sjkim * 1243168848Sjkim * XXX: What if a user space program does not check the return value 1244168848Sjkim * and tries to use 6, 7 or 8? 1245168035Sjkim */ 1246168035Sjkim error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1247168035Sjkim if (error) 1248168035Sjkim return (error); 1249168035Sjkim 1250168035Sjkim if (LINUX_LDT_empty(&info)) { 1251168035Sjkim a[0] = 0; 1252168035Sjkim a[1] = 0; 1253168035Sjkim } else { 1254168035Sjkim a[0] = LINUX_LDT_entry_a(&info); 1255168035Sjkim a[1] = LINUX_LDT_entry_b(&info); 1256168035Sjkim } 1257168035Sjkim 1258168035Sjkim memcpy(&sd, &a, sizeof(a)); 1259168035Sjkim#ifdef DEBUG 1260168035Sjkim if (ldebug(set_thread_area)) 1261168035Sjkim printf("Segment created in set_thread_area: " 1262168035Sjkim "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1263168035Sjkim "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1264168035Sjkim "def32: %i, gran: %i\n", 1265168035Sjkim sd.sd_lobase, 1266168035Sjkim sd.sd_hibase, 1267168035Sjkim sd.sd_lolimit, 1268168035Sjkim sd.sd_hilimit, 1269168035Sjkim sd.sd_type, 1270168035Sjkim sd.sd_dpl, 1271168035Sjkim sd.sd_p, 1272168035Sjkim sd.sd_xx, 1273168035Sjkim sd.sd_long, 1274168035Sjkim sd.sd_def32, 1275168035Sjkim sd.sd_gran); 1276168035Sjkim#endif 1277168035Sjkim 1278216634Sjkim pcb = td->td_pcb; 1279216634Sjkim pcb->pcb_gsbase = (register_t)info.base_addr; 1280216634Sjkim set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT); 1281190620Skib update_gdt_gsbase(td, info.base_addr); 1282168035Sjkim 1283168035Sjkim return (0); 1284133819Stjr} 1285218030Sdchagin 1286218030Sdchaginint 1287218030Sdchaginlinux_wait4(struct thread *td, struct linux_wait4_args *args) 1288218030Sdchagin{ 1289218030Sdchagin int error, options; 1290218030Sdchagin struct rusage ru, *rup; 1291218030Sdchagin struct l_rusage lru; 1292218030Sdchagin 1293218030Sdchagin#ifdef DEBUG 1294218030Sdchagin if (ldebug(wait4)) 1295218030Sdchagin printf(ARGS(wait4, "%d, %p, %d, %p"), 1296218030Sdchagin args->pid, (void *)args->status, args->options, 1297218030Sdchagin (void *)args->rusage); 1298218030Sdchagin#endif 1299218030Sdchagin 1300218030Sdchagin options = (args->options & (WNOHANG | WUNTRACED)); 1301218030Sdchagin /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1302218030Sdchagin if (args->options & __WCLONE) 1303218030Sdchagin options |= WLINUXCLONE; 1304218030Sdchagin 1305218030Sdchagin if (args->rusage != NULL) 1306218030Sdchagin rup = &ru; 1307218030Sdchagin else 1308218030Sdchagin rup = NULL; 1309218030Sdchagin error = linux_common_wait(td, args->pid, args->status, options, rup); 1310218030Sdchagin if (error) 1311218030Sdchagin return (error); 1312218030Sdchagin if (args->rusage != NULL) { 1313218030Sdchagin bsd_to_linux_rusage(rup, &lru); 1314218030Sdchagin error = copyout(&lru, args->rusage, sizeof(lru)); 1315218030Sdchagin } 1316218030Sdchagin 1317218030Sdchagin return (error); 1318218030Sdchagin} 1319