linux_machdep.c revision 166188
164921Smarcel/*- 264921Smarcel * Copyright (c) 2000 Marcel Moolenaar 364921Smarcel * All rights reserved. 464921Smarcel * 564921Smarcel * Redistribution and use in source and binary forms, with or without 664921Smarcel * modification, are permitted provided that the following conditions 764921Smarcel * are met: 864921Smarcel * 1. Redistributions of source code must retain the above copyright 9111798Sdes * notice, this list of conditions and the following disclaimer 1064921Smarcel * in this position and unchanged. 1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright 1264921Smarcel * notice, this list of conditions and the following disclaimer in the 1364921Smarcel * documentation and/or other materials provided with the distribution. 1464921Smarcel * 3. The name of the author may not be used to endorse or promote products 1565067Smarcel * derived from this software without specific prior written permission. 1664921Smarcel * 1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2764921Smarcel */ 2864921Smarcel 29115705Sobrien#include <sys/cdefs.h> 30115705Sobrien__FBSDID("$FreeBSD: head/sys/i386/linux/linux_machdep.c 166188 2007-01-23 08:46:51Z jeff $"); 31115705Sobrien 3264921Smarcel#include <sys/param.h> 3376166Smarkm#include <sys/systm.h> 34162472Snetchild#include <sys/file.h> 35162472Snetchild#include <sys/fcntl.h> 36140992Ssobomax#include <sys/imgact.h> 3784811Sjhb#include <sys/lock.h> 38140992Ssobomax#include <sys/malloc.h> 3964921Smarcel#include <sys/mman.h> 4076166Smarkm#include <sys/mutex.h> 41161310Snetchild#include <sys/sx.h> 42164033Srwatson#include <sys/priv.h> 4364921Smarcel#include <sys/proc.h> 44161310Snetchild#include <sys/queue.h> 4576166Smarkm#include <sys/resource.h> 4676166Smarkm#include <sys/resourcevar.h> 47134838Sdfr#include <sys/signalvar.h> 48102814Siedowse#include <sys/syscallsubr.h> 4964921Smarcel#include <sys/sysproto.h> 5064921Smarcel#include <sys/unistd.h> 51161310Snetchild#include <sys/wait.h> 52166188Sjeff#include <sys/sched.h> 5364921Smarcel 5464921Smarcel#include <machine/frame.h> 5564921Smarcel#include <machine/psl.h> 5664921Smarcel#include <machine/segments.h> 5764921Smarcel#include <machine/sysarch.h> 5864921Smarcel 5967238Sgallatin#include <vm/vm.h> 6067238Sgallatin#include <vm/pmap.h> 6167238Sgallatin#include <vm/vm_map.h> 6267238Sgallatin 6364921Smarcel#include <i386/linux/linux.h> 6468583Smarcel#include <i386/linux/linux_proto.h> 6564921Smarcel#include <compat/linux/linux_ipc.h> 6664921Smarcel#include <compat/linux/linux_signal.h> 6764921Smarcel#include <compat/linux/linux_util.h> 68161310Snetchild#include <compat/linux/linux_emul.h> 6964921Smarcel 70161310Snetchild#include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */ 71161310Snetchild 72161310Snetchild#include "opt_posix.h" 73161310Snetchild 74161310Snetchildextern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ 75161310Snetchild 7683221Smarcelstruct l_descriptor { 7783221Smarcel l_uint entry_number; 7883221Smarcel l_ulong base_addr; 7983221Smarcel l_uint limit; 8083221Smarcel l_uint seg_32bit:1; 8183221Smarcel l_uint contents:2; 8283221Smarcel l_uint read_exec_only:1; 8383221Smarcel l_uint limit_in_pages:1; 8483221Smarcel l_uint seg_not_present:1; 8583221Smarcel l_uint useable:1; 8664921Smarcel}; 8764921Smarcel 8883221Smarcelstruct l_old_select_argv { 8983221Smarcel l_int nfds; 9083221Smarcel l_fd_set *readfds; 9183221Smarcel l_fd_set *writefds; 9283221Smarcel l_fd_set *exceptfds; 9383221Smarcel struct l_timeval *timeout; 9464921Smarcel}; 9564921Smarcel 9664921Smarcelint 9767051Sgallatinlinux_to_bsd_sigaltstack(int lsa) 9867051Sgallatin{ 9967051Sgallatin int bsa = 0; 10067051Sgallatin 10167051Sgallatin if (lsa & LINUX_SS_DISABLE) 10267051Sgallatin bsa |= SS_DISABLE; 10367051Sgallatin if (lsa & LINUX_SS_ONSTACK) 10467051Sgallatin bsa |= SS_ONSTACK; 10567051Sgallatin return (bsa); 10667051Sgallatin} 10767051Sgallatin 10867051Sgallatinint 10967051Sgallatinbsd_to_linux_sigaltstack(int bsa) 11067051Sgallatin{ 11167051Sgallatin int lsa = 0; 11267051Sgallatin 11367051Sgallatin if (bsa & SS_DISABLE) 11467051Sgallatin lsa |= LINUX_SS_DISABLE; 11567051Sgallatin if (bsa & SS_ONSTACK) 11667051Sgallatin lsa |= LINUX_SS_ONSTACK; 11767051Sgallatin return (lsa); 11867051Sgallatin} 11967051Sgallatin 12067051Sgallatinint 12183366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args) 12264921Smarcel{ 123140992Ssobomax int error; 124140992Ssobomax char *newpath; 125140992Ssobomax struct image_args eargs; 12664921Smarcel 127141468Sjhb LCONVPATHEXIST(td, args->path, &newpath); 12864921Smarcel 12964921Smarcel#ifdef DEBUG 13072543Sjlemon if (ldebug(execve)) 131140992Ssobomax printf(ARGS(execve, "%s"), newpath); 13264921Smarcel#endif 13364921Smarcel 134140992Ssobomax error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, 135140992Ssobomax args->argp, args->envp); 136140992Ssobomax free(newpath, M_TEMP); 137140992Ssobomax if (error == 0) 138148623Ssobomax error = kern_execve(td, &eargs, NULL); 139161310Snetchild if (error == 0) 140161310Snetchild /* linux process can exec fbsd one, dont attempt 141161310Snetchild * to create emuldata for such process using 142161310Snetchild * linux_proc_init, this leads to a panic on KASSERT 143161310Snetchild * because such process has p->p_emuldata == NULL 144161310Snetchild */ 145161310Snetchild if (td->td_proc->p_sysent == &elf_linux_sysvec) 146161310Snetchild error = linux_proc_init(td, 0, 0); 147140992Ssobomax return (error); 14864921Smarcel} 14964921Smarcel 15083221Smarcelstruct l_ipc_kludge { 15183221Smarcel struct l_msgbuf *msgp; 15283221Smarcel l_long msgtyp; 15383221Smarcel}; 15483221Smarcel 15564921Smarcelint 15683366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args) 15764921Smarcel{ 15883221Smarcel 15983221Smarcel switch (args->what & 0xFFFF) { 16083221Smarcel case LINUX_SEMOP: { 16183221Smarcel struct linux_semop_args a; 16283221Smarcel 16383221Smarcel a.semid = args->arg1; 16483221Smarcel a.tsops = args->ptr; 16583221Smarcel a.nsops = args->arg2; 16683366Sjulian return (linux_semop(td, &a)); 16764921Smarcel } 16883221Smarcel case LINUX_SEMGET: { 16983221Smarcel struct linux_semget_args a; 17064921Smarcel 17183221Smarcel a.key = args->arg1; 17283221Smarcel a.nsems = args->arg2; 17383221Smarcel a.semflg = args->arg3; 17483366Sjulian return (linux_semget(td, &a)); 17583221Smarcel } 17683221Smarcel case LINUX_SEMCTL: { 17783221Smarcel struct linux_semctl_args a; 17883221Smarcel int error; 17983221Smarcel 18083221Smarcel a.semid = args->arg1; 18183221Smarcel a.semnum = args->arg2; 18283221Smarcel a.cmd = args->arg3; 183111797Sdes error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 18483221Smarcel if (error) 18583221Smarcel return (error); 18683366Sjulian return (linux_semctl(td, &a)); 18783221Smarcel } 18883221Smarcel case LINUX_MSGSND: { 18983221Smarcel struct linux_msgsnd_args a; 19083221Smarcel 19183221Smarcel a.msqid = args->arg1; 19283221Smarcel a.msgp = args->ptr; 19383221Smarcel a.msgsz = args->arg2; 19483221Smarcel a.msgflg = args->arg3; 19583366Sjulian return (linux_msgsnd(td, &a)); 19683221Smarcel } 19783221Smarcel case LINUX_MSGRCV: { 19883221Smarcel struct linux_msgrcv_args a; 19983221Smarcel 20083221Smarcel a.msqid = args->arg1; 20183221Smarcel a.msgsz = args->arg2; 20283221Smarcel a.msgflg = args->arg3; 20383221Smarcel if ((args->what >> 16) == 0) { 20483221Smarcel struct l_ipc_kludge tmp; 20583221Smarcel int error; 20683221Smarcel 20783221Smarcel if (args->ptr == NULL) 20883221Smarcel return (EINVAL); 209111797Sdes error = copyin(args->ptr, &tmp, sizeof(tmp)); 21083221Smarcel if (error) 21183221Smarcel return (error); 21283221Smarcel a.msgp = tmp.msgp; 21383221Smarcel a.msgtyp = tmp.msgtyp; 21483221Smarcel } else { 21583221Smarcel a.msgp = args->ptr; 21683221Smarcel a.msgtyp = args->arg5; 21783221Smarcel } 21883366Sjulian return (linux_msgrcv(td, &a)); 21983221Smarcel } 22083221Smarcel case LINUX_MSGGET: { 22183221Smarcel struct linux_msgget_args a; 22283221Smarcel 22383221Smarcel a.key = args->arg1; 22483221Smarcel a.msgflg = args->arg2; 22583366Sjulian return (linux_msgget(td, &a)); 22683221Smarcel } 22783221Smarcel case LINUX_MSGCTL: { 22883221Smarcel struct linux_msgctl_args a; 22983221Smarcel 23083221Smarcel a.msqid = args->arg1; 23183221Smarcel a.cmd = args->arg2; 23283221Smarcel a.buf = args->ptr; 23383366Sjulian return (linux_msgctl(td, &a)); 23483221Smarcel } 23583221Smarcel case LINUX_SHMAT: { 23683221Smarcel struct linux_shmat_args a; 23783221Smarcel 23883221Smarcel a.shmid = args->arg1; 23983221Smarcel a.shmaddr = args->ptr; 24083221Smarcel a.shmflg = args->arg2; 24183221Smarcel a.raddr = (l_ulong *)args->arg3; 24283366Sjulian return (linux_shmat(td, &a)); 24383221Smarcel } 24483221Smarcel case LINUX_SHMDT: { 24583221Smarcel struct linux_shmdt_args a; 24683221Smarcel 24783221Smarcel a.shmaddr = args->ptr; 24883366Sjulian return (linux_shmdt(td, &a)); 24983221Smarcel } 25083221Smarcel case LINUX_SHMGET: { 25183221Smarcel struct linux_shmget_args a; 25283221Smarcel 25383221Smarcel a.key = args->arg1; 25483221Smarcel a.size = args->arg2; 25583221Smarcel a.shmflg = args->arg3; 25683366Sjulian return (linux_shmget(td, &a)); 25783221Smarcel } 25883221Smarcel case LINUX_SHMCTL: { 25983221Smarcel struct linux_shmctl_args a; 26083221Smarcel 26183221Smarcel a.shmid = args->arg1; 26283221Smarcel a.cmd = args->arg2; 26383221Smarcel a.buf = args->ptr; 26483366Sjulian return (linux_shmctl(td, &a)); 26583221Smarcel } 26683221Smarcel default: 26783221Smarcel break; 26883221Smarcel } 26983221Smarcel 27083221Smarcel return (EINVAL); 27164921Smarcel} 27264921Smarcel 27364921Smarcelint 27483366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args) 27564921Smarcel{ 27683221Smarcel struct l_old_select_argv linux_args; 27783221Smarcel struct linux_select_args newsel; 27864921Smarcel int error; 27964921Smarcel 28083221Smarcel#ifdef DEBUG 28183221Smarcel if (ldebug(old_select)) 28291437Speter printf(ARGS(old_select, "%p"), args->ptr); 28364921Smarcel#endif 28464921Smarcel 285111797Sdes error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 28664921Smarcel if (error) 28764921Smarcel return (error); 28864921Smarcel 28964921Smarcel newsel.nfds = linux_args.nfds; 29064921Smarcel newsel.readfds = linux_args.readfds; 29164921Smarcel newsel.writefds = linux_args.writefds; 29264921Smarcel newsel.exceptfds = linux_args.exceptfds; 29364921Smarcel newsel.timeout = linux_args.timeout; 29483366Sjulian return (linux_select(td, &newsel)); 29564921Smarcel} 29664921Smarcel 29764921Smarcelint 29883366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args) 29964921Smarcel{ 30064921Smarcel int error; 301166150Snetchild struct proc *p2; 302166150Snetchild struct thread *td2; 30364921Smarcel 30464921Smarcel#ifdef DEBUG 30572543Sjlemon if (ldebug(fork)) 30672543Sjlemon printf(ARGS(fork, "")); 30764921Smarcel#endif 30864921Smarcel 309166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0) 31064921Smarcel return (error); 311166150Snetchild 312166150Snetchild if (error == 0) { 313166150Snetchild td->td_retval[0] = p2->p_pid; 314166150Snetchild td->td_retval[1] = 0; 315166150Snetchild } 31664921Smarcel 31783366Sjulian if (td->td_retval[1] == 1) 31883366Sjulian td->td_retval[0] = 0; 319161310Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 320161310Snetchild if (error) 321161310Snetchild return (error); 322161310Snetchild 323166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 324166150Snetchild 325166150Snetchild /* 326166150Snetchild * Make this runnable after we are finished with it. 327166150Snetchild */ 328166150Snetchild mtx_lock_spin(&sched_lock); 329166150Snetchild TD_SET_CAN_RUN(td2); 330166188Sjeff sched_add(td2, SRQ_BORING); 331166150Snetchild mtx_unlock_spin(&sched_lock); 332166150Snetchild 33364921Smarcel return (0); 33464921Smarcel} 33564921Smarcel 33664921Smarcelint 33783366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args) 33864921Smarcel{ 33964921Smarcel int error; 340161611Snetchild struct proc *p2; 341166150Snetchild struct thread *td2; 34264921Smarcel 34364921Smarcel#ifdef DEBUG 34472543Sjlemon if (ldebug(vfork)) 34572543Sjlemon printf(ARGS(vfork, "")); 34664921Smarcel#endif 34764921Smarcel 348161611Snetchild /* exclude RFPPWAIT */ 349166150Snetchild if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0) 35064921Smarcel return (error); 351161611Snetchild if (error == 0) { 352166150Snetchild td->td_retval[0] = p2->p_pid; 353161611Snetchild td->td_retval[1] = 0; 354161611Snetchild } 35564921Smarcel /* Are we the child? */ 35683366Sjulian if (td->td_retval[1] == 1) 35783366Sjulian td->td_retval[0] = 0; 358161310Snetchild error = linux_proc_init(td, td->td_retval[0], 0); 359161310Snetchild if (error) 360161310Snetchild return (error); 361166150Snetchild 362166150Snetchild PROC_LOCK(p2); 363166150Snetchild p2->p_flag |= P_PPWAIT; 364166150Snetchild PROC_UNLOCK(p2); 365166150Snetchild 366166150Snetchild td2 = FIRST_THREAD_IN_PROC(p2); 367166150Snetchild 368166150Snetchild /* 369166150Snetchild * Make this runnable after we are finished with it. 370166150Snetchild */ 371166150Snetchild mtx_lock_spin(&sched_lock); 372166150Snetchild TD_SET_CAN_RUN(td2); 373166188Sjeff sched_add(td2, SRQ_BORING); 374166150Snetchild mtx_unlock_spin(&sched_lock); 375166150Snetchild 376161611Snetchild /* wait for the children to exit, ie. emulate vfork */ 377161611Snetchild PROC_LOCK(p2); 378161611Snetchild while (p2->p_flag & P_PPWAIT) 379161611Snetchild msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0); 380161611Snetchild PROC_UNLOCK(p2); 381161611Snetchild 38264921Smarcel return (0); 38364921Smarcel} 38464921Smarcel 38564921Smarcelint 38683366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args) 38764921Smarcel{ 38873856Sjhb int error, ff = RFPROC | RFSTOPPED; 38964921Smarcel struct proc *p2; 390113689Sjhb struct thread *td2; 39164921Smarcel int exit_signal; 392161310Snetchild struct linux_emuldata *em; 39364921Smarcel 39464921Smarcel#ifdef DEBUG 39572543Sjlemon if (ldebug(clone)) { 396161310Snetchild printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"), 397161310Snetchild (unsigned int)args->flags, (unsigned int)args->stack, 398161310Snetchild (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr); 39972543Sjlemon } 40064921Smarcel#endif 40164921Smarcel 40264921Smarcel exit_signal = args->flags & 0x000000ff; 403163536Snetchild if (!LINUX_SIG_VALID(exit_signal) && exit_signal != 0) 40464921Smarcel return (EINVAL); 40564921Smarcel 40664921Smarcel if (exit_signal <= LINUX_SIGTBLSZ) 40764921Smarcel exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 40864921Smarcel 40964921Smarcel if (args->flags & CLONE_VM) 41064921Smarcel ff |= RFMEM; 41164921Smarcel if (args->flags & CLONE_SIGHAND) 41264921Smarcel ff |= RFSIGSHARE; 413163371Snetchild /* 414163371Snetchild * XXX: in linux sharing of fs info (chroot/cwd/umask) 415163371Snetchild * and open files is independant. in fbsd its in one 416163371Snetchild * structure but in reality it doesnt make any problems 417163371Snetchild * because both this flags are set at once usually. 418163371Snetchild */ 419163371Snetchild if (!(args->flags & (CLONE_FILES | CLONE_FS))) 42064921Smarcel ff |= RFFDG; 42164921Smarcel 422143108Ssobomax /* 423143108Ssobomax * Attempt to detect when linux_clone(2) is used for creating 424143108Ssobomax * kernel threads. Unfortunately despite the existence of the 425143108Ssobomax * CLONE_THREAD flag, version of linuxthreads package used in 426143108Ssobomax * most popular distros as of beginning of 2005 doesn't make 427143108Ssobomax * any use of it. Therefore, this detection relay fully on 428143108Ssobomax * empirical observation that linuxthreads sets certain 429143108Ssobomax * combination of flags, so that we can make more or less 430143108Ssobomax * precise detection and notify the FreeBSD kernel that several 431143108Ssobomax * processes are in fact part of the same threading group, so 432143108Ssobomax * that special treatment is necessary for signal delivery 433143108Ssobomax * between those processes and fd locking. 434143108Ssobomax */ 435143108Ssobomax if ((args->flags & 0xffffff00) == THREADING_FLAGS) 436143108Ssobomax ff |= RFTHREAD; 437143108Ssobomax 438104354Sscottl error = fork1(td, ff, 0, &p2); 439113689Sjhb if (error) 440113689Sjhb return (error); 441113689Sjhb 442161310Snetchild /* create the emuldata */ 443161310Snetchild error = linux_proc_init(td, p2->p_pid, args->flags); 444161310Snetchild /* reference it - no need to check this */ 445165867Snetchild em = em_find(p2, EMUL_DOLOCK); 446161310Snetchild KASSERT(em != NULL, ("clone: emuldata not found.\n")); 447161310Snetchild /* and adjust it */ 448161310Snetchild if (args->flags & CLONE_PARENT_SETTID) { 449161310Snetchild if (args->parent_tidptr == NULL) { 450161310Snetchild EMUL_UNLOCK(&emul_lock); 451161310Snetchild return (EINVAL); 452161310Snetchild } 453161310Snetchild error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); 454161310Snetchild if (error) { 455161310Snetchild EMUL_UNLOCK(&emul_lock); 456161310Snetchild return (error); 457161310Snetchild } 458161310Snetchild } 45964921Smarcel 460161673Snetchild if (args->flags & (CLONE_PARENT|CLONE_THREAD)) { 461161673Snetchild sx_xlock(&proctree_lock); 462161673Snetchild PROC_LOCK(p2); 463161673Snetchild proc_reparent(p2, td->td_proc->p_pptr); 464161673Snetchild PROC_UNLOCK(p2); 465161673Snetchild sx_xunlock(&proctree_lock); 466161310Snetchild } 467161673Snetchild 468161310Snetchild if (args->flags & CLONE_THREAD) { 469161310Snetchild /* XXX: linux mangles pgrp and pptr somehow 470161310Snetchild * I think it might be this but I am not sure. 471161310Snetchild */ 472161310Snetchild#ifdef notyet 473161673Snetchild PROC_LOCK(p2); 474161310Snetchild p2->p_pgrp = td->td_proc->p_pgrp; 475161673Snetchild PROC_UNLOCK(p2); 476161310Snetchild#endif 477161310Snetchild exit_signal = 0; 478161310Snetchild } 479161310Snetchild 480161310Snetchild if (args->flags & CLONE_CHILD_SETTID) 481161310Snetchild em->child_set_tid = args->child_tidptr; 482161310Snetchild else 483161310Snetchild em->child_set_tid = NULL; 484161310Snetchild 485161310Snetchild if (args->flags & CLONE_CHILD_CLEARTID) 486161310Snetchild em->child_clear_tid = args->child_tidptr; 487161310Snetchild else 488161310Snetchild em->child_clear_tid = NULL; 489161673Snetchild 490161310Snetchild EMUL_UNLOCK(&emul_lock); 491161310Snetchild 492113689Sjhb PROC_LOCK(p2); 493113689Sjhb p2->p_sigparent = exit_signal; 494113689Sjhb PROC_UNLOCK(p2); 495113689Sjhb td2 = FIRST_THREAD_IN_PROC(p2); 496161365Snetchild /* 497161365Snetchild * in a case of stack = NULL we are supposed to COW calling process stack 498161310Snetchild * this is what normal fork() does so we just keep the tf_esp arg intact 499161310Snetchild */ 500161310Snetchild if (args->stack) 501161310Snetchild td2->td_frame->tf_esp = (unsigned int)args->stack; 50264921Smarcel 503161310Snetchild if (args->flags & CLONE_SETTLS) { 504161310Snetchild struct l_user_desc info; 505161310Snetchild int idx; 506161310Snetchild int a[2]; 507161310Snetchild struct segment_descriptor sd; 508161310Snetchild 509161310Snetchild error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc)); 510161310Snetchild if (error) 511161310Snetchild return (error); 512161310Snetchild 513161310Snetchild idx = info.entry_number; 514161310Snetchild 515161365Snetchild /* 516161365Snetchild * looks like we're getting the idx we returned 517161310Snetchild * in the set_thread_area() syscall 518161310Snetchild */ 519161310Snetchild if (idx != 6 && idx != 3) 520161310Snetchild return (EINVAL); 521161310Snetchild 522161310Snetchild /* this doesnt happen in practice */ 523161310Snetchild if (idx == 6) { 524161310Snetchild /* we might copy out the entry_number as 3 */ 525161310Snetchild info.entry_number = 3; 526161310Snetchild error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc)); 527161310Snetchild if (error) 528161310Snetchild return (error); 529161310Snetchild } 530161310Snetchild 531161310Snetchild a[0] = LDT_entry_a(&info); 532161310Snetchild a[1] = LDT_entry_b(&info); 533161310Snetchild 534161310Snetchild memcpy(&sd, &a, sizeof(a)); 53564921Smarcel#ifdef DEBUG 536113689Sjhb if (ldebug(clone)) 537161310Snetchild printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, 538161310Snetchild sd.sd_hibase, 539161310Snetchild sd.sd_lolimit, 540161310Snetchild sd.sd_hilimit, 541161310Snetchild sd.sd_type, 542161310Snetchild sd.sd_dpl, 543161310Snetchild sd.sd_p, 544161310Snetchild sd.sd_xx, 545161310Snetchild sd.sd_def32, 546161310Snetchild sd.sd_gran); 547161310Snetchild#endif 548161310Snetchild 549161310Snetchild /* set %gs */ 550161310Snetchild td2->td_pcb->pcb_gsd = sd; 551161673Snetchild td2->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); 552161310Snetchild } 553161310Snetchild 554161310Snetchild#ifdef DEBUG 555161310Snetchild if (ldebug(clone)) 556113689Sjhb printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"), 557113689Sjhb (long)p2->p_pid, args->stack, exit_signal); 55864921Smarcel#endif 559166150Snetchild if (args->flags & CLONE_VFORK) { 560166150Snetchild PROC_LOCK(p2); 561166150Snetchild p2->p_flag |= P_PPWAIT; 562166150Snetchild PROC_UNLOCK(p2); 563166150Snetchild } 56464921Smarcel 565113689Sjhb /* 566113689Sjhb * Make this runnable after we are finished with it. 567113689Sjhb */ 568113689Sjhb mtx_lock_spin(&sched_lock); 569113689Sjhb TD_SET_CAN_RUN(td2); 570166188Sjeff sched_add(td2, SRQ_BORING); 571113689Sjhb mtx_unlock_spin(&sched_lock); 57273856Sjhb 573113689Sjhb td->td_retval[0] = p2->p_pid; 574113689Sjhb td->td_retval[1] = 0; 575163374Snetchild 576163374Snetchild if (args->flags & CLONE_VFORK) { 577163374Snetchild /* wait for the children to exit, ie. emulate vfork */ 578163374Snetchild PROC_LOCK(p2); 579163374Snetchild while (p2->p_flag & P_PPWAIT) 580163374Snetchild msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0); 581163374Snetchild PROC_UNLOCK(p2); 582163374Snetchild } 583163374Snetchild 584113689Sjhb return (0); 58564921Smarcel} 58664921Smarcel 58764921Smarcel/* XXX move */ 58883221Smarcelstruct l_mmap_argv { 58983221Smarcel l_caddr_t addr; 59083221Smarcel l_int len; 59183221Smarcel l_int prot; 59283221Smarcel l_int flags; 59383221Smarcel l_int fd; 59483221Smarcel l_int pos; 59564921Smarcel}; 59664921Smarcel 59764921Smarcel#define STACK_SIZE (2 * 1024 * 1024) 59864921Smarcel#define GUARD_SIZE (4 * PAGE_SIZE) 59964921Smarcel 600104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *); 601104893Ssobomax 60264921Smarcelint 603104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 604104893Ssobomax{ 605104893Ssobomax struct l_mmap_argv linux_args; 606104893Ssobomax 607104893Ssobomax#ifdef DEBUG 608104893Ssobomax if (ldebug(mmap2)) 609111798Sdes printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"), 610111798Sdes (void *)args->addr, args->len, args->prot, 611111798Sdes args->flags, args->fd, args->pgoff); 612104893Ssobomax#endif 613104893Ssobomax 614104893Ssobomax linux_args.addr = (l_caddr_t)args->addr; 615104893Ssobomax linux_args.len = args->len; 616104893Ssobomax linux_args.prot = args->prot; 617104893Ssobomax linux_args.flags = args->flags; 618104893Ssobomax linux_args.fd = args->fd; 619104893Ssobomax linux_args.pos = args->pgoff * PAGE_SIZE; 620104893Ssobomax 621104893Ssobomax return (linux_mmap_common(td, &linux_args)); 622104893Ssobomax} 623104893Ssobomax 624104893Ssobomaxint 62583366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args) 62664921Smarcel{ 62764921Smarcel int error; 62883221Smarcel struct l_mmap_argv linux_args; 62964921Smarcel 630111797Sdes error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 63164921Smarcel if (error) 63264921Smarcel return (error); 63364921Smarcel 63464921Smarcel#ifdef DEBUG 63572543Sjlemon if (ldebug(mmap)) 63672543Sjlemon printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"), 637104984Sbde (void *)linux_args.addr, linux_args.len, linux_args.prot, 638104984Sbde linux_args.flags, linux_args.fd, linux_args.pos); 63964921Smarcel#endif 64064921Smarcel 641104893Ssobomax return (linux_mmap_common(td, &linux_args)); 642104893Ssobomax} 643104893Ssobomax 644104893Ssobomaxstatic int 645104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args) 646104893Ssobomax{ 647104893Ssobomax struct proc *p = td->td_proc; 648104893Ssobomax struct mmap_args /* { 649104893Ssobomax caddr_t addr; 650104893Ssobomax size_t len; 651104893Ssobomax int prot; 652104893Ssobomax int flags; 653104893Ssobomax int fd; 654104893Ssobomax long pad; 655104893Ssobomax off_t pos; 656104893Ssobomax } */ bsd_args; 657112630Smdodd int error; 658162472Snetchild struct file *fp; 659104893Ssobomax 660112630Smdodd error = 0; 66164921Smarcel bsd_args.flags = 0; 662162472Snetchild fp = NULL; 663162472Snetchild 664162472Snetchild /* 665162472Snetchild * Linux mmap(2): 666162472Snetchild * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 667162472Snetchild */ 668162472Snetchild if (! ((linux_args->flags & LINUX_MAP_SHARED) ^ 669162472Snetchild (linux_args->flags & LINUX_MAP_PRIVATE))) 670162479Snetchild return (EINVAL); 671162472Snetchild 672104893Ssobomax if (linux_args->flags & LINUX_MAP_SHARED) 67364921Smarcel bsd_args.flags |= MAP_SHARED; 674104893Ssobomax if (linux_args->flags & LINUX_MAP_PRIVATE) 67564921Smarcel bsd_args.flags |= MAP_PRIVATE; 676104893Ssobomax if (linux_args->flags & LINUX_MAP_FIXED) 67764921Smarcel bsd_args.flags |= MAP_FIXED; 678104893Ssobomax if (linux_args->flags & LINUX_MAP_ANON) 67964921Smarcel bsd_args.flags |= MAP_ANON; 68073213Sdillon else 68173213Sdillon bsd_args.flags |= MAP_NOSYNC; 682104893Ssobomax if (linux_args->flags & LINUX_MAP_GROWSDOWN) { 68364921Smarcel bsd_args.flags |= MAP_STACK; 68464921Smarcel 685161365Snetchild /* 686161365Snetchild * The linux MAP_GROWSDOWN option does not limit auto 68764921Smarcel * growth of the region. Linux mmap with this option 68864921Smarcel * takes as addr the inital BOS, and as len, the initial 68964921Smarcel * region size. It can then grow down from addr without 69064921Smarcel * limit. However, linux threads has an implicit internal 69164921Smarcel * limit to stack size of STACK_SIZE. Its just not 69264921Smarcel * enforced explicitly in linux. But, here we impose 69364921Smarcel * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 69464921Smarcel * region, since we can do this with our mmap. 69564921Smarcel * 69664921Smarcel * Our mmap with MAP_STACK takes addr as the maximum 69764921Smarcel * downsize limit on BOS, and as len the max size of 69864921Smarcel * the region. It them maps the top SGROWSIZ bytes, 69964921Smarcel * and autgrows the region down, up to the limit 70064921Smarcel * in addr. 70164921Smarcel * 70264921Smarcel * If we don't use the MAP_STACK option, the effect 70364921Smarcel * of this code is to allocate a stack region of a 70464921Smarcel * fixed size of (STACK_SIZE - GUARD_SIZE). 70564921Smarcel */ 70664921Smarcel 70764921Smarcel /* This gives us TOS */ 708104893Ssobomax bsd_args.addr = linux_args->addr + linux_args->len; 70964921Smarcel 71067238Sgallatin if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) { 711161365Snetchild /* 712161365Snetchild * Some linux apps will attempt to mmap 71367238Sgallatin * thread stacks near the top of their 71467238Sgallatin * address space. If their TOS is greater 71567238Sgallatin * than vm_maxsaddr, vm_map_growstack() 71667238Sgallatin * will confuse the thread stack with the 71767238Sgallatin * process stack and deliver a SEGV if they 71867238Sgallatin * attempt to grow the thread stack past their 71967238Sgallatin * current stacksize rlimit. To avoid this, 72067238Sgallatin * adjust vm_maxsaddr upwards to reflect 72167238Sgallatin * the current stacksize rlimit rather 72267238Sgallatin * than the maximum possible stacksize. 72367238Sgallatin * It would be better to adjust the 72467238Sgallatin * mmap'ed region, but some apps do not check 72567238Sgallatin * mmap's return value. 72667238Sgallatin */ 727125454Sjhb PROC_LOCK(p); 72867238Sgallatin p->p_vmspace->vm_maxsaddr = (char *)USRSTACK - 729125454Sjhb lim_cur(p, RLIMIT_STACK); 730125454Sjhb PROC_UNLOCK(p); 73167238Sgallatin } 73267238Sgallatin 73364921Smarcel /* This gives us our maximum stack size */ 734104893Ssobomax if (linux_args->len > STACK_SIZE - GUARD_SIZE) 735104893Ssobomax bsd_args.len = linux_args->len; 73664921Smarcel else 73764921Smarcel bsd_args.len = STACK_SIZE - GUARD_SIZE; 73864921Smarcel 739161365Snetchild /* 740161365Snetchild * This gives us a new BOS. If we're using VM_STACK, then 74164921Smarcel * mmap will just map the top SGROWSIZ bytes, and let 74264921Smarcel * the stack grow down to the limit at BOS. If we're 74364921Smarcel * not using VM_STACK we map the full stack, since we 74464921Smarcel * don't have a way to autogrow it. 74564921Smarcel */ 74664921Smarcel bsd_args.addr -= bsd_args.len; 74764921Smarcel } else { 748104893Ssobomax bsd_args.addr = linux_args->addr; 749104893Ssobomax bsd_args.len = linux_args->len; 75064921Smarcel } 75164921Smarcel 752162472Snetchild bsd_args.prot = linux_args->prot; 753104893Ssobomax if (linux_args->flags & LINUX_MAP_ANON) 75464921Smarcel bsd_args.fd = -1; 755162472Snetchild else { 756162472Snetchild /* 757162472Snetchild * Linux follows Solaris mmap(2) description: 758162472Snetchild * The file descriptor fildes is opened with 759162472Snetchild * read permission, regardless of the 760162472Snetchild * protection options specified. 761162472Snetchild * If PROT_WRITE is specified, the application 762162472Snetchild * must have opened the file descriptor 763162472Snetchild * fildes with write permission unless 764162472Snetchild * MAP_PRIVATE is specified in the flag 765162472Snetchild * argument as described below. 766162472Snetchild */ 767162472Snetchild 768162472Snetchild if ((error = fget(td, linux_args->fd, &fp)) != 0) 769162479Snetchild return (error); 770162472Snetchild if (fp->f_type != DTYPE_VNODE) { 771162472Snetchild fdrop(fp, td); 772162479Snetchild return (EINVAL); 773162472Snetchild } 774162472Snetchild 775162472Snetchild /* Linux mmap() just fails for O_WRONLY files */ 776162472Snetchild if (! (fp->f_flag & FREAD)) { 777162472Snetchild fdrop(fp, td); 778162479Snetchild return (EACCES); 779162472Snetchild } 780162472Snetchild 781104893Ssobomax bsd_args.fd = linux_args->fd; 782162472Snetchild fdrop(fp, td); 783162472Snetchild } 784104893Ssobomax bsd_args.pos = linux_args->pos; 78564921Smarcel bsd_args.pad = 0; 78664921Smarcel 78764921Smarcel#ifdef DEBUG 78872543Sjlemon if (ldebug(mmap)) 789112630Smdodd printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 790112630Smdodd __func__, 79172543Sjlemon (void *)bsd_args.addr, bsd_args.len, bsd_args.prot, 79272543Sjlemon bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 79364921Smarcel#endif 794112630Smdodd error = mmap(td, &bsd_args); 795112630Smdodd#ifdef DEBUG 796112630Smdodd if (ldebug(mmap)) 797112630Smdodd printf("-> %s() return: 0x%x (0x%08x)\n", 798112630Smdodd __func__, error, (u_int)td->td_retval[0]); 799112630Smdodd#endif 800112630Smdodd return (error); 80164921Smarcel} 80264921Smarcel 80364921Smarcelint 80483366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args) 80564921Smarcel{ 80664921Smarcel int error; 80764921Smarcel int reg_edx; 80864921Smarcel 80964921Smarcel#ifdef DEBUG 81072543Sjlemon if (ldebug(pipe)) 81172543Sjlemon printf(ARGS(pipe, "*")); 81264921Smarcel#endif 81364921Smarcel 81483366Sjulian reg_edx = td->td_retval[1]; 81583366Sjulian error = pipe(td, 0); 81664921Smarcel if (error) { 81783366Sjulian td->td_retval[1] = reg_edx; 81864921Smarcel return (error); 81964921Smarcel } 82064921Smarcel 82183366Sjulian error = copyout(td->td_retval, args->pipefds, 2*sizeof(int)); 82264921Smarcel if (error) { 82383366Sjulian td->td_retval[1] = reg_edx; 82464921Smarcel return (error); 82564921Smarcel } 82664921Smarcel 82783366Sjulian td->td_retval[1] = reg_edx; 82883366Sjulian td->td_retval[0] = 0; 82964921Smarcel return (0); 83064921Smarcel} 83164921Smarcel 83264921Smarcelint 83383366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args) 83464921Smarcel{ 835140862Ssobomax int error; 836140862Ssobomax struct i386_ioperm_args iia; 83764921Smarcel 838140862Ssobomax iia.start = args->start; 839140862Ssobomax iia.length = args->length; 840140862Ssobomax iia.enable = args->enable; 841140862Ssobomax mtx_lock(&Giant); 842140862Ssobomax error = i386_set_ioperm(td, &iia); 843140862Ssobomax mtx_unlock(&Giant); 844140862Ssobomax return (error); 84564921Smarcel} 84664921Smarcel 84764921Smarcelint 84883366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args) 84964921Smarcel{ 85064921Smarcel int error; 85164921Smarcel 85264921Smarcel if (args->level < 0 || args->level > 3) 85364921Smarcel return (EINVAL); 854164033Srwatson if ((error = priv_check(td, PRIV_IO)) != 0) 85564921Smarcel return (error); 85691406Sjhb if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 85783981Srwatson return (error); 85883366Sjulian td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | 85964921Smarcel (args->level * (PSL_IOPL / 3)); 86064921Smarcel return (0); 86164921Smarcel} 86264921Smarcel 86364921Smarcelint 864105441Smarkmlinux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) 86564921Smarcel{ 86664921Smarcel int error; 867140862Ssobomax struct i386_ldt_args ldt; 86883221Smarcel struct l_descriptor ld; 869140862Ssobomax union descriptor desc; 87064921Smarcel 87164921Smarcel if (uap->ptr == NULL) 87264921Smarcel return (EINVAL); 87364921Smarcel 87464921Smarcel switch (uap->func) { 87564921Smarcel case 0x00: /* read_ldt */ 876140862Ssobomax ldt.start = 0; 877140862Ssobomax ldt.descs = uap->ptr; 878140862Ssobomax ldt.num = uap->bytecount / sizeof(union descriptor); 879140862Ssobomax mtx_lock(&Giant); 880140862Ssobomax error = i386_get_ldt(td, &ldt); 88183366Sjulian td->td_retval[0] *= sizeof(union descriptor); 882140862Ssobomax mtx_unlock(&Giant); 88364921Smarcel break; 88464921Smarcel case 0x01: /* write_ldt */ 88564921Smarcel case 0x11: /* write_ldt */ 88664921Smarcel if (uap->bytecount != sizeof(ld)) 88764921Smarcel return (EINVAL); 88864921Smarcel 88964921Smarcel error = copyin(uap->ptr, &ld, sizeof(ld)); 89064921Smarcel if (error) 89164921Smarcel return (error); 89264921Smarcel 893140862Ssobomax ldt.start = ld.entry_number; 894140862Ssobomax ldt.descs = &desc; 895140862Ssobomax ldt.num = 1; 896140862Ssobomax desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); 897140862Ssobomax desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; 898140862Ssobomax desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); 899140862Ssobomax desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; 900140862Ssobomax desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | 90164921Smarcel (ld.contents << 2); 902140862Ssobomax desc.sd.sd_dpl = 3; 903140862Ssobomax desc.sd.sd_p = (ld.seg_not_present ^ 1); 904140862Ssobomax desc.sd.sd_xx = 0; 905140862Ssobomax desc.sd.sd_def32 = ld.seg_32bit; 906140862Ssobomax desc.sd.sd_gran = ld.limit_in_pages; 907140862Ssobomax mtx_lock(&Giant); 908140862Ssobomax error = i386_set_ldt(td, &ldt, &desc); 909140862Ssobomax mtx_unlock(&Giant); 91064921Smarcel break; 91164921Smarcel default: 91264921Smarcel error = EINVAL; 91364921Smarcel break; 91464921Smarcel } 91564921Smarcel 91664921Smarcel if (error == EOPNOTSUPP) { 91764921Smarcel printf("linux: modify_ldt needs kernel option USER_LDT\n"); 91864921Smarcel error = ENOSYS; 91964921Smarcel } 92064921Smarcel 92164921Smarcel return (error); 92264921Smarcel} 92364921Smarcel 92464921Smarcelint 92583366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args) 92664921Smarcel{ 92783221Smarcel l_osigaction_t osa; 92883221Smarcel l_sigaction_t act, oact; 92964921Smarcel int error; 93064921Smarcel 93164921Smarcel#ifdef DEBUG 93272543Sjlemon if (ldebug(sigaction)) 93372543Sjlemon printf(ARGS(sigaction, "%d, %p, %p"), 93472543Sjlemon args->sig, (void *)args->nsa, (void *)args->osa); 93564921Smarcel#endif 93664921Smarcel 93764921Smarcel if (args->nsa != NULL) { 938111797Sdes error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 93964921Smarcel if (error) 94064921Smarcel return (error); 94164921Smarcel act.lsa_handler = osa.lsa_handler; 94264921Smarcel act.lsa_flags = osa.lsa_flags; 94364921Smarcel act.lsa_restorer = osa.lsa_restorer; 94464921Smarcel LINUX_SIGEMPTYSET(act.lsa_mask); 94564921Smarcel act.lsa_mask.__bits[0] = osa.lsa_mask; 94664921Smarcel } 94764921Smarcel 94883366Sjulian error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 94964921Smarcel args->osa ? &oact : NULL); 95064921Smarcel 95164921Smarcel if (args->osa != NULL && !error) { 95264921Smarcel osa.lsa_handler = oact.lsa_handler; 95364921Smarcel osa.lsa_flags = oact.lsa_flags; 95464921Smarcel osa.lsa_restorer = oact.lsa_restorer; 95564921Smarcel osa.lsa_mask = oact.lsa_mask.__bits[0]; 956111797Sdes error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 95764921Smarcel } 95864921Smarcel 95964921Smarcel return (error); 96064921Smarcel} 96164921Smarcel 96264921Smarcel/* 96364921Smarcel * Linux has two extra args, restart and oldmask. We dont use these, 96464921Smarcel * but it seems that "restart" is actually a context pointer that 96564921Smarcel * enables the signal to happen with a different register set. 96664921Smarcel */ 96764921Smarcelint 96883366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 96964921Smarcel{ 970102814Siedowse sigset_t sigmask; 97183221Smarcel l_sigset_t mask; 97264921Smarcel 97364921Smarcel#ifdef DEBUG 97472543Sjlemon if (ldebug(sigsuspend)) 97572543Sjlemon printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 97664921Smarcel#endif 97764921Smarcel 97864921Smarcel LINUX_SIGEMPTYSET(mask); 97964921Smarcel mask.__bits[0] = args->mask; 980102814Siedowse linux_to_bsd_sigset(&mask, &sigmask); 981102814Siedowse return (kern_sigsuspend(td, sigmask)); 98264921Smarcel} 98364921Smarcel 98464921Smarcelint 985105441Smarkmlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 98664921Smarcel{ 98783221Smarcel l_sigset_t lmask; 988102814Siedowse sigset_t sigmask; 98964921Smarcel int error; 99064921Smarcel 99164921Smarcel#ifdef DEBUG 99272543Sjlemon if (ldebug(rt_sigsuspend)) 99372543Sjlemon printf(ARGS(rt_sigsuspend, "%p, %d"), 99472543Sjlemon (void *)uap->newset, uap->sigsetsize); 99564921Smarcel#endif 99664921Smarcel 99783221Smarcel if (uap->sigsetsize != sizeof(l_sigset_t)) 99864921Smarcel return (EINVAL); 99964921Smarcel 100083221Smarcel error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 100164921Smarcel if (error) 100264921Smarcel return (error); 100364921Smarcel 1004102814Siedowse linux_to_bsd_sigset(&lmask, &sigmask); 1005102814Siedowse return (kern_sigsuspend(td, sigmask)); 100664921Smarcel} 100764921Smarcel 100864921Smarcelint 100983366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args) 101064921Smarcel{ 101183366Sjulian struct proc *p = td->td_proc; 1012102814Siedowse sigset_t sigmask; 101364921Smarcel 101464921Smarcel#ifdef DEBUG 101572543Sjlemon if (ldebug(pause)) 101672543Sjlemon printf(ARGS(pause, "")); 101764921Smarcel#endif 101864921Smarcel 101971494Sjhb PROC_LOCK(p); 1020112888Sjeff sigmask = td->td_sigmask; 102171494Sjhb PROC_UNLOCK(p); 1022102814Siedowse return (kern_sigsuspend(td, sigmask)); 102364921Smarcel} 102464921Smarcel 102564921Smarcelint 102683366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 102764921Smarcel{ 1028102814Siedowse stack_t ss, oss; 102983221Smarcel l_stack_t lss; 103064921Smarcel int error; 103164921Smarcel 103264921Smarcel#ifdef DEBUG 103372543Sjlemon if (ldebug(sigaltstack)) 103472543Sjlemon printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 103564921Smarcel#endif 103664921Smarcel 1037102814Siedowse if (uap->uss != NULL) { 103883221Smarcel error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 103967051Sgallatin if (error) 104067051Sgallatin return (error); 104164921Smarcel 1042102814Siedowse ss.ss_sp = lss.ss_sp; 1043102814Siedowse ss.ss_size = lss.ss_size; 1044102814Siedowse ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 104567051Sgallatin } 1046134269Sjhb error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 1047134269Sjhb (uap->uoss != NULL) ? &oss : NULL); 1048102814Siedowse if (!error && uap->uoss != NULL) { 1049102814Siedowse lss.ss_sp = oss.ss_sp; 1050102814Siedowse lss.ss_size = oss.ss_size; 1051102814Siedowse lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 105283221Smarcel error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 105364921Smarcel } 105464921Smarcel 105564921Smarcel return (error); 105664921Smarcel} 1057104893Ssobomax 1058104893Ssobomaxint 1059104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 1060104893Ssobomax{ 1061104893Ssobomax struct ftruncate_args sa; 1062104893Ssobomax 1063104893Ssobomax#ifdef DEBUG 1064104893Ssobomax if (ldebug(ftruncate64)) 1065104984Sbde printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 1066104984Sbde (intmax_t)args->length); 1067104893Ssobomax#endif 1068104893Ssobomax 1069104893Ssobomax sa.fd = args->fd; 1070104893Ssobomax sa.pad = 0; 1071104893Ssobomax sa.length = args->length; 1072104893Ssobomax return ftruncate(td, &sa); 1073104893Ssobomax} 1074134838Sdfr 1075134838Sdfrint 1076134838Sdfrlinux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) 1077134838Sdfr{ 1078161310Snetchild struct l_user_desc info; 1079161310Snetchild int error; 1080161310Snetchild int idx; 1081161310Snetchild int a[2]; 1082161310Snetchild struct segment_descriptor sd; 1083161310Snetchild 1084161310Snetchild error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1085161310Snetchild if (error) 1086161310Snetchild return (error); 1087161310Snetchild 1088161310Snetchild#ifdef DEBUG 1089161310Snetchild if (ldebug(set_thread_area)) 1090161310Snetchild printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"), 1091161310Snetchild info.entry_number, 1092161310Snetchild info.base_addr, 1093161310Snetchild info.limit, 1094161310Snetchild info.seg_32bit, 1095161310Snetchild info.contents, 1096161310Snetchild info.read_exec_only, 1097161310Snetchild info.limit_in_pages, 1098161310Snetchild info.seg_not_present, 1099161310Snetchild info.useable); 1100161310Snetchild#endif 1101161310Snetchild 1102161310Snetchild idx = info.entry_number; 1103161365Snetchild /* 1104161365Snetchild * Semantics of linux version: every thread in the system has array 1105161310Snetchild * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 1106161310Snetchild * syscall loads one of the selected tls decriptors with a value 1107161310Snetchild * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread 1108161310Snetchild * descriptors. 1109161310Snetchild * 1110161310Snetchild * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread 1111161310Snetchild * descriptors and use just the 1st one. The tls_array[] is used only in 1112161310Snetchild * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd 1113161310Snetchild * we use just one GDT descriptor for TLS so we will load just one. 1114161310Snetchild * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment 1115161310Snetchild * comment in the linux sources says wine might do that. 1116134838Sdfr */ 1117161310Snetchild 1118161365Snetchild /* 1119161365Snetchild * we support just GLIBC TLS now 1120161310Snetchild * we should let 3 proceed as well because we use this segment so 1121161310Snetchild * if code does two subsequent calls it should succeed 1122161310Snetchild */ 1123161310Snetchild if (idx != 6 && idx != -1 && idx != 3) 1124161310Snetchild return (EINVAL); 1125161310Snetchild 1126161365Snetchild /* 1127161365Snetchild * we have to copy out the GDT entry we use 1128161310Snetchild * FreeBSD uses GDT entry #3 for storing %gs so load that 1129161310Snetchild * XXX: what if userspace program doesnt check this value and tries 1130161310Snetchild * to use 6, 7 or 8? 1131161310Snetchild */ 1132161310Snetchild idx = info.entry_number = 3; 1133161310Snetchild error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1134161310Snetchild if (error) 1135161310Snetchild return (error); 1136161310Snetchild 1137161310Snetchild if (LDT_empty(&info)) { 1138161310Snetchild a[0] = 0; 1139161310Snetchild a[1] = 0; 1140161310Snetchild } else { 1141161310Snetchild a[0] = LDT_entry_a(&info); 1142161310Snetchild a[1] = LDT_entry_b(&info); 1143161310Snetchild } 1144161310Snetchild 1145161310Snetchild memcpy(&sd, &a, sizeof(a)); 1146161310Snetchild#ifdef DEBUG 1147161310Snetchild if (ldebug(set_thread_area)) 1148161310Snetchild printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, 1149161310Snetchild sd.sd_hibase, 1150161310Snetchild sd.sd_lolimit, 1151161310Snetchild sd.sd_hilimit, 1152161310Snetchild sd.sd_type, 1153161310Snetchild sd.sd_dpl, 1154161310Snetchild sd.sd_p, 1155161310Snetchild sd.sd_xx, 1156161310Snetchild sd.sd_def32, 1157161310Snetchild sd.sd_gran); 1158161310Snetchild#endif 1159161310Snetchild 1160161310Snetchild /* this is taken from i386 version of cpu_set_user_tls() */ 1161161310Snetchild critical_enter(); 1162161310Snetchild /* set %gs */ 1163161310Snetchild td->td_pcb->pcb_gsd = sd; 1164161310Snetchild PCPU_GET(fsgs_gdt)[1] = sd; 1165161310Snetchild load_gs(GSEL(GUGS_SEL, SEL_UPL)); 1166161310Snetchild critical_exit(); 1167161310Snetchild 1168161310Snetchild return (0); 1169134838Sdfr} 1170134838Sdfr 1171134838Sdfrint 1172161310Snetchildlinux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) 1173134838Sdfr{ 1174161310Snetchild 1175161310Snetchild struct l_user_desc info; 1176161310Snetchild int error; 1177161310Snetchild int idx; 1178161310Snetchild struct l_desc_struct desc; 1179161310Snetchild struct segment_descriptor sd; 1180134838Sdfr 1181161310Snetchild#ifdef DEBUG 1182161310Snetchild if (ldebug(get_thread_area)) 1183161310Snetchild printf(ARGS(get_thread_area, "%p"), args->desc); 1184161310Snetchild#endif 1185161310Snetchild 1186161310Snetchild error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 1187161310Snetchild if (error) 1188161310Snetchild return (error); 1189161310Snetchild 1190161310Snetchild idx = info.entry_number; 1191161310Snetchild /* XXX: I am not sure if we want 3 to be allowed too. */ 1192161310Snetchild if (idx != 6 && idx != 3) 1193161310Snetchild return (EINVAL); 1194161310Snetchild 1195161310Snetchild idx = 3; 1196161310Snetchild 1197161310Snetchild memset(&info, 0, sizeof(info)); 1198161310Snetchild 1199161310Snetchild sd = PCPU_GET(fsgs_gdt)[1]; 1200161310Snetchild 1201161310Snetchild memcpy(&desc, &sd, sizeof(desc)); 1202161310Snetchild 1203161310Snetchild info.entry_number = idx; 1204161310Snetchild info.base_addr = GET_BASE(&desc); 1205161310Snetchild info.limit = GET_LIMIT(&desc); 1206161310Snetchild info.seg_32bit = GET_32BIT(&desc); 1207161310Snetchild info.contents = GET_CONTENTS(&desc); 1208161310Snetchild info.read_exec_only = !GET_WRITABLE(&desc); 1209161310Snetchild info.limit_in_pages = GET_LIMIT_PAGES(&desc); 1210161310Snetchild info.seg_not_present = !GET_PRESENT(&desc); 1211161310Snetchild info.useable = GET_USEABLE(&desc); 1212161310Snetchild 1213161310Snetchild error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1214161310Snetchild if (error) 1215161310Snetchild return (EFAULT); 1216161310Snetchild 1217134838Sdfr return (0); 1218134838Sdfr} 1219134838Sdfr 1220161310Snetchild/* copied from kern/kern_time.c */ 1221134838Sdfrint 1222161310Snetchildlinux_timer_create(struct thread *td, struct linux_timer_create_args *args) 1223134838Sdfr{ 1224161310Snetchild return ktimer_create(td, (struct ktimer_create_args *) args); 1225161310Snetchild} 1226134838Sdfr 1227161310Snetchildint 1228161310Snetchildlinux_timer_settime(struct thread *td, struct linux_timer_settime_args *args) 1229161310Snetchild{ 1230161310Snetchild return ktimer_settime(td, (struct ktimer_settime_args *) args); 1231134838Sdfr} 1232134838Sdfr 1233161310Snetchildint 1234161310Snetchildlinux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args) 1235161310Snetchild{ 1236161310Snetchild return ktimer_gettime(td, (struct ktimer_gettime_args *) args); 1237161310Snetchild} 1238161310Snetchild 1239161310Snetchildint 1240161310Snetchildlinux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args) 1241161310Snetchild{ 1242161310Snetchild return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args); 1243161310Snetchild} 1244161310Snetchild 1245161310Snetchildint 1246161310Snetchildlinux_timer_delete(struct thread *td, struct linux_timer_delete_args *args) 1247161310Snetchild{ 1248161310Snetchild return ktimer_delete(td, (struct ktimer_delete_args *) args); 1249161310Snetchild} 1250161310Snetchild 1251161310Snetchild/* XXX: this wont work with module - convert it */ 1252161310Snetchildint 1253161310Snetchildlinux_mq_open(struct thread *td, struct linux_mq_open_args *args) 1254161310Snetchild{ 1255161310Snetchild#ifdef P1003_1B_MQUEUE 1256161310Snetchild return kmq_open(td, (struct kmq_open_args *) args); 1257161310Snetchild#else 1258161310Snetchild return (ENOSYS); 1259161310Snetchild#endif 1260161310Snetchild} 1261161310Snetchild 1262161310Snetchildint 1263161310Snetchildlinux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 1264161310Snetchild{ 1265161310Snetchild#ifdef P1003_1B_MQUEUE 1266161310Snetchild return kmq_unlink(td, (struct kmq_unlink_args *) args); 1267161310Snetchild#else 1268161310Snetchild return (ENOSYS); 1269161310Snetchild#endif 1270161310Snetchild} 1271161310Snetchild 1272161310Snetchildint 1273161310Snetchildlinux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 1274161310Snetchild{ 1275161310Snetchild#ifdef P1003_1B_MQUEUE 1276161310Snetchild return kmq_timedsend(td, (struct kmq_timedsend_args *) args); 1277161310Snetchild#else 1278161310Snetchild return (ENOSYS); 1279161310Snetchild#endif 1280161310Snetchild} 1281161310Snetchild 1282161310Snetchildint 1283161310Snetchildlinux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 1284161310Snetchild{ 1285161310Snetchild#ifdef P1003_1B_MQUEUE 1286161310Snetchild return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args); 1287161310Snetchild#else 1288161310Snetchild return (ENOSYS); 1289161310Snetchild#endif 1290161310Snetchild} 1291161310Snetchild 1292161310Snetchildint 1293161310Snetchildlinux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 1294161310Snetchild{ 1295161310Snetchild#ifdef P1003_1B_MQUEUE 1296161310Snetchild return kmq_notify(td, (struct kmq_notify_args *) args); 1297161310Snetchild#else 1298161310Snetchild return (ENOSYS); 1299161310Snetchild#endif 1300161310Snetchild} 1301161310Snetchild 1302161310Snetchildint 1303161310Snetchildlinux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 1304161310Snetchild{ 1305161310Snetchild#ifdef P1003_1B_MQUEUE 1306161310Snetchild return kmq_setattr(td, (struct kmq_setattr_args *) args); 1307161310Snetchild#else 1308161310Snetchild return (ENOSYS); 1309161310Snetchild#endif 1310161310Snetchild} 1311161310Snetchild 1312