linux_machdep.c revision 166188
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
9111798Sdes *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel */
2864921Smarcel
29115705Sobrien#include <sys/cdefs.h>
30115705Sobrien__FBSDID("$FreeBSD: head/sys/i386/linux/linux_machdep.c 166188 2007-01-23 08:46:51Z jeff $");
31115705Sobrien
3264921Smarcel#include <sys/param.h>
3376166Smarkm#include <sys/systm.h>
34162472Snetchild#include <sys/file.h>
35162472Snetchild#include <sys/fcntl.h>
36140992Ssobomax#include <sys/imgact.h>
3784811Sjhb#include <sys/lock.h>
38140992Ssobomax#include <sys/malloc.h>
3964921Smarcel#include <sys/mman.h>
4076166Smarkm#include <sys/mutex.h>
41161310Snetchild#include <sys/sx.h>
42164033Srwatson#include <sys/priv.h>
4364921Smarcel#include <sys/proc.h>
44161310Snetchild#include <sys/queue.h>
4576166Smarkm#include <sys/resource.h>
4676166Smarkm#include <sys/resourcevar.h>
47134838Sdfr#include <sys/signalvar.h>
48102814Siedowse#include <sys/syscallsubr.h>
4964921Smarcel#include <sys/sysproto.h>
5064921Smarcel#include <sys/unistd.h>
51161310Snetchild#include <sys/wait.h>
52166188Sjeff#include <sys/sched.h>
5364921Smarcel
5464921Smarcel#include <machine/frame.h>
5564921Smarcel#include <machine/psl.h>
5664921Smarcel#include <machine/segments.h>
5764921Smarcel#include <machine/sysarch.h>
5864921Smarcel
5967238Sgallatin#include <vm/vm.h>
6067238Sgallatin#include <vm/pmap.h>
6167238Sgallatin#include <vm/vm_map.h>
6267238Sgallatin
6364921Smarcel#include <i386/linux/linux.h>
6468583Smarcel#include <i386/linux/linux_proto.h>
6564921Smarcel#include <compat/linux/linux_ipc.h>
6664921Smarcel#include <compat/linux/linux_signal.h>
6764921Smarcel#include <compat/linux/linux_util.h>
68161310Snetchild#include <compat/linux/linux_emul.h>
6964921Smarcel
70161310Snetchild#include <i386/include/pcb.h>			/* needed for pcb definition in linux_set_thread_area */
71161310Snetchild
72161310Snetchild#include "opt_posix.h"
73161310Snetchild
74161310Snetchildextern struct sysentvec elf32_freebsd_sysvec;	/* defined in i386/i386/elf_machdep.c */
75161310Snetchild
7683221Smarcelstruct l_descriptor {
7783221Smarcel	l_uint		entry_number;
7883221Smarcel	l_ulong		base_addr;
7983221Smarcel	l_uint		limit;
8083221Smarcel	l_uint		seg_32bit:1;
8183221Smarcel	l_uint		contents:2;
8283221Smarcel	l_uint		read_exec_only:1;
8383221Smarcel	l_uint		limit_in_pages:1;
8483221Smarcel	l_uint		seg_not_present:1;
8583221Smarcel	l_uint		useable:1;
8664921Smarcel};
8764921Smarcel
8883221Smarcelstruct l_old_select_argv {
8983221Smarcel	l_int		nfds;
9083221Smarcel	l_fd_set	*readfds;
9183221Smarcel	l_fd_set	*writefds;
9283221Smarcel	l_fd_set	*exceptfds;
9383221Smarcel	struct l_timeval	*timeout;
9464921Smarcel};
9564921Smarcel
9664921Smarcelint
9767051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
9867051Sgallatin{
9967051Sgallatin	int bsa = 0;
10067051Sgallatin
10167051Sgallatin	if (lsa & LINUX_SS_DISABLE)
10267051Sgallatin		bsa |= SS_DISABLE;
10367051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
10467051Sgallatin		bsa |= SS_ONSTACK;
10567051Sgallatin	return (bsa);
10667051Sgallatin}
10767051Sgallatin
10867051Sgallatinint
10967051Sgallatinbsd_to_linux_sigaltstack(int bsa)
11067051Sgallatin{
11167051Sgallatin	int lsa = 0;
11267051Sgallatin
11367051Sgallatin	if (bsa & SS_DISABLE)
11467051Sgallatin		lsa |= LINUX_SS_DISABLE;
11567051Sgallatin	if (bsa & SS_ONSTACK)
11667051Sgallatin		lsa |= LINUX_SS_ONSTACK;
11767051Sgallatin	return (lsa);
11867051Sgallatin}
11967051Sgallatin
12067051Sgallatinint
12183366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
12264921Smarcel{
123140992Ssobomax	int error;
124140992Ssobomax	char *newpath;
125140992Ssobomax	struct image_args eargs;
12664921Smarcel
127141468Sjhb	LCONVPATHEXIST(td, args->path, &newpath);
12864921Smarcel
12964921Smarcel#ifdef DEBUG
13072543Sjlemon	if (ldebug(execve))
131140992Ssobomax		printf(ARGS(execve, "%s"), newpath);
13264921Smarcel#endif
13364921Smarcel
134140992Ssobomax	error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE,
135140992Ssobomax	    args->argp, args->envp);
136140992Ssobomax	free(newpath, M_TEMP);
137140992Ssobomax	if (error == 0)
138148623Ssobomax		error = kern_execve(td, &eargs, NULL);
139161310Snetchild	if (error == 0)
140161310Snetchild	   	/* linux process can exec fbsd one, dont attempt
141161310Snetchild		 * to create emuldata for such process using
142161310Snetchild		 * linux_proc_init, this leads to a panic on KASSERT
143161310Snetchild		 * because such process has p->p_emuldata == NULL
144161310Snetchild		 */
145161310Snetchild	   	if (td->td_proc->p_sysent == &elf_linux_sysvec)
146161310Snetchild   		   	error = linux_proc_init(td, 0, 0);
147140992Ssobomax	return (error);
14864921Smarcel}
14964921Smarcel
15083221Smarcelstruct l_ipc_kludge {
15183221Smarcel	struct l_msgbuf *msgp;
15283221Smarcel	l_long msgtyp;
15383221Smarcel};
15483221Smarcel
15564921Smarcelint
15683366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
15764921Smarcel{
15883221Smarcel
15983221Smarcel	switch (args->what & 0xFFFF) {
16083221Smarcel	case LINUX_SEMOP: {
16183221Smarcel		struct linux_semop_args a;
16283221Smarcel
16383221Smarcel		a.semid = args->arg1;
16483221Smarcel		a.tsops = args->ptr;
16583221Smarcel		a.nsops = args->arg2;
16683366Sjulian		return (linux_semop(td, &a));
16764921Smarcel	}
16883221Smarcel	case LINUX_SEMGET: {
16983221Smarcel		struct linux_semget_args a;
17064921Smarcel
17183221Smarcel		a.key = args->arg1;
17283221Smarcel		a.nsems = args->arg2;
17383221Smarcel		a.semflg = args->arg3;
17483366Sjulian		return (linux_semget(td, &a));
17583221Smarcel	}
17683221Smarcel	case LINUX_SEMCTL: {
17783221Smarcel		struct linux_semctl_args a;
17883221Smarcel		int error;
17983221Smarcel
18083221Smarcel		a.semid = args->arg1;
18183221Smarcel		a.semnum = args->arg2;
18283221Smarcel		a.cmd = args->arg3;
183111797Sdes		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
18483221Smarcel		if (error)
18583221Smarcel			return (error);
18683366Sjulian		return (linux_semctl(td, &a));
18783221Smarcel	}
18883221Smarcel	case LINUX_MSGSND: {
18983221Smarcel		struct linux_msgsnd_args a;
19083221Smarcel
19183221Smarcel		a.msqid = args->arg1;
19283221Smarcel		a.msgp = args->ptr;
19383221Smarcel		a.msgsz = args->arg2;
19483221Smarcel		a.msgflg = args->arg3;
19583366Sjulian		return (linux_msgsnd(td, &a));
19683221Smarcel	}
19783221Smarcel	case LINUX_MSGRCV: {
19883221Smarcel		struct linux_msgrcv_args a;
19983221Smarcel
20083221Smarcel		a.msqid = args->arg1;
20183221Smarcel		a.msgsz = args->arg2;
20283221Smarcel		a.msgflg = args->arg3;
20383221Smarcel		if ((args->what >> 16) == 0) {
20483221Smarcel			struct l_ipc_kludge tmp;
20583221Smarcel			int error;
20683221Smarcel
20783221Smarcel			if (args->ptr == NULL)
20883221Smarcel				return (EINVAL);
209111797Sdes			error = copyin(args->ptr, &tmp, sizeof(tmp));
21083221Smarcel			if (error)
21183221Smarcel				return (error);
21283221Smarcel			a.msgp = tmp.msgp;
21383221Smarcel			a.msgtyp = tmp.msgtyp;
21483221Smarcel		} else {
21583221Smarcel			a.msgp = args->ptr;
21683221Smarcel			a.msgtyp = args->arg5;
21783221Smarcel		}
21883366Sjulian		return (linux_msgrcv(td, &a));
21983221Smarcel	}
22083221Smarcel	case LINUX_MSGGET: {
22183221Smarcel		struct linux_msgget_args a;
22283221Smarcel
22383221Smarcel		a.key = args->arg1;
22483221Smarcel		a.msgflg = args->arg2;
22583366Sjulian		return (linux_msgget(td, &a));
22683221Smarcel	}
22783221Smarcel	case LINUX_MSGCTL: {
22883221Smarcel		struct linux_msgctl_args a;
22983221Smarcel
23083221Smarcel		a.msqid = args->arg1;
23183221Smarcel		a.cmd = args->arg2;
23283221Smarcel		a.buf = args->ptr;
23383366Sjulian		return (linux_msgctl(td, &a));
23483221Smarcel	}
23583221Smarcel	case LINUX_SHMAT: {
23683221Smarcel		struct linux_shmat_args a;
23783221Smarcel
23883221Smarcel		a.shmid = args->arg1;
23983221Smarcel		a.shmaddr = args->ptr;
24083221Smarcel		a.shmflg = args->arg2;
24183221Smarcel		a.raddr = (l_ulong *)args->arg3;
24283366Sjulian		return (linux_shmat(td, &a));
24383221Smarcel	}
24483221Smarcel	case LINUX_SHMDT: {
24583221Smarcel		struct linux_shmdt_args a;
24683221Smarcel
24783221Smarcel		a.shmaddr = args->ptr;
24883366Sjulian		return (linux_shmdt(td, &a));
24983221Smarcel	}
25083221Smarcel	case LINUX_SHMGET: {
25183221Smarcel		struct linux_shmget_args a;
25283221Smarcel
25383221Smarcel		a.key = args->arg1;
25483221Smarcel		a.size = args->arg2;
25583221Smarcel		a.shmflg = args->arg3;
25683366Sjulian		return (linux_shmget(td, &a));
25783221Smarcel	}
25883221Smarcel	case LINUX_SHMCTL: {
25983221Smarcel		struct linux_shmctl_args a;
26083221Smarcel
26183221Smarcel		a.shmid = args->arg1;
26283221Smarcel		a.cmd = args->arg2;
26383221Smarcel		a.buf = args->ptr;
26483366Sjulian		return (linux_shmctl(td, &a));
26583221Smarcel	}
26683221Smarcel	default:
26783221Smarcel		break;
26883221Smarcel	}
26983221Smarcel
27083221Smarcel	return (EINVAL);
27164921Smarcel}
27264921Smarcel
27364921Smarcelint
27483366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
27564921Smarcel{
27683221Smarcel	struct l_old_select_argv linux_args;
27783221Smarcel	struct linux_select_args newsel;
27864921Smarcel	int error;
27964921Smarcel
28083221Smarcel#ifdef DEBUG
28183221Smarcel	if (ldebug(old_select))
28291437Speter		printf(ARGS(old_select, "%p"), args->ptr);
28364921Smarcel#endif
28464921Smarcel
285111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
28664921Smarcel	if (error)
28764921Smarcel		return (error);
28864921Smarcel
28964921Smarcel	newsel.nfds = linux_args.nfds;
29064921Smarcel	newsel.readfds = linux_args.readfds;
29164921Smarcel	newsel.writefds = linux_args.writefds;
29264921Smarcel	newsel.exceptfds = linux_args.exceptfds;
29364921Smarcel	newsel.timeout = linux_args.timeout;
29483366Sjulian	return (linux_select(td, &newsel));
29564921Smarcel}
29664921Smarcel
29764921Smarcelint
29883366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
29964921Smarcel{
30064921Smarcel	int error;
301166150Snetchild	struct proc *p2;
302166150Snetchild	struct thread *td2;
30364921Smarcel
30464921Smarcel#ifdef DEBUG
30572543Sjlemon	if (ldebug(fork))
30672543Sjlemon		printf(ARGS(fork, ""));
30764921Smarcel#endif
30864921Smarcel
309166150Snetchild	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0)
31064921Smarcel		return (error);
311166150Snetchild
312166150Snetchild	if (error == 0) {
313166150Snetchild		td->td_retval[0] = p2->p_pid;
314166150Snetchild		td->td_retval[1] = 0;
315166150Snetchild	}
31664921Smarcel
31783366Sjulian	if (td->td_retval[1] == 1)
31883366Sjulian		td->td_retval[0] = 0;
319161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
320161310Snetchild	if (error)
321161310Snetchild		return (error);
322161310Snetchild
323166150Snetchild	td2 = FIRST_THREAD_IN_PROC(p2);
324166150Snetchild
325166150Snetchild	/*
326166150Snetchild	 * Make this runnable after we are finished with it.
327166150Snetchild	 */
328166150Snetchild	mtx_lock_spin(&sched_lock);
329166150Snetchild	TD_SET_CAN_RUN(td2);
330166188Sjeff	sched_add(td2, SRQ_BORING);
331166150Snetchild	mtx_unlock_spin(&sched_lock);
332166150Snetchild
33364921Smarcel	return (0);
33464921Smarcel}
33564921Smarcel
33664921Smarcelint
33783366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
33864921Smarcel{
33964921Smarcel	int error;
340161611Snetchild	struct proc *p2;
341166150Snetchild	struct thread *td2;
34264921Smarcel
34364921Smarcel#ifdef DEBUG
34472543Sjlemon	if (ldebug(vfork))
34572543Sjlemon		printf(ARGS(vfork, ""));
34664921Smarcel#endif
34764921Smarcel
348161611Snetchild	/* exclude RFPPWAIT */
349166150Snetchild	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0)
35064921Smarcel		return (error);
351161611Snetchild	if (error == 0) {
352166150Snetchild		td->td_retval[0] = p2->p_pid;
353161611Snetchild		td->td_retval[1] = 0;
354161611Snetchild	}
35564921Smarcel	/* Are we the child? */
35683366Sjulian	if (td->td_retval[1] == 1)
35783366Sjulian		td->td_retval[0] = 0;
358161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
359161310Snetchild	if (error)
360161310Snetchild		return (error);
361166150Snetchild
362166150Snetchild	PROC_LOCK(p2);
363166150Snetchild	p2->p_flag |= P_PPWAIT;
364166150Snetchild	PROC_UNLOCK(p2);
365166150Snetchild
366166150Snetchild	td2 = FIRST_THREAD_IN_PROC(p2);
367166150Snetchild
368166150Snetchild	/*
369166150Snetchild	 * Make this runnable after we are finished with it.
370166150Snetchild	 */
371166150Snetchild	mtx_lock_spin(&sched_lock);
372166150Snetchild	TD_SET_CAN_RUN(td2);
373166188Sjeff	sched_add(td2, SRQ_BORING);
374166150Snetchild	mtx_unlock_spin(&sched_lock);
375166150Snetchild
376161611Snetchild	/* wait for the children to exit, ie. emulate vfork */
377161611Snetchild	PROC_LOCK(p2);
378161611Snetchild	while (p2->p_flag & P_PPWAIT)
379161611Snetchild	   	msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0);
380161611Snetchild	PROC_UNLOCK(p2);
381161611Snetchild
38264921Smarcel	return (0);
38364921Smarcel}
38464921Smarcel
38564921Smarcelint
38683366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
38764921Smarcel{
38873856Sjhb	int error, ff = RFPROC | RFSTOPPED;
38964921Smarcel	struct proc *p2;
390113689Sjhb	struct thread *td2;
39164921Smarcel	int exit_signal;
392161310Snetchild	struct linux_emuldata *em;
39364921Smarcel
39464921Smarcel#ifdef DEBUG
39572543Sjlemon	if (ldebug(clone)) {
396161310Snetchild   	   	printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"),
397161310Snetchild		    (unsigned int)args->flags, (unsigned int)args->stack,
398161310Snetchild		    (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr);
39972543Sjlemon	}
40064921Smarcel#endif
40164921Smarcel
40264921Smarcel	exit_signal = args->flags & 0x000000ff;
403163536Snetchild	if (!LINUX_SIG_VALID(exit_signal) && exit_signal != 0)
40464921Smarcel		return (EINVAL);
40564921Smarcel
40664921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
40764921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
40864921Smarcel
40964921Smarcel	if (args->flags & CLONE_VM)
41064921Smarcel		ff |= RFMEM;
41164921Smarcel	if (args->flags & CLONE_SIGHAND)
41264921Smarcel		ff |= RFSIGSHARE;
413163371Snetchild	/*
414163371Snetchild	 * XXX: in linux sharing of fs info (chroot/cwd/umask)
415163371Snetchild	 * and open files is independant. in fbsd its in one
416163371Snetchild	 * structure but in reality it doesnt make any problems
417163371Snetchild	 * because both this flags are set at once usually.
418163371Snetchild	 */
419163371Snetchild	if (!(args->flags & (CLONE_FILES | CLONE_FS)))
42064921Smarcel		ff |= RFFDG;
42164921Smarcel
422143108Ssobomax	/*
423143108Ssobomax	 * Attempt to detect when linux_clone(2) is used for creating
424143108Ssobomax	 * kernel threads. Unfortunately despite the existence of the
425143108Ssobomax	 * CLONE_THREAD flag, version of linuxthreads package used in
426143108Ssobomax	 * most popular distros as of beginning of 2005 doesn't make
427143108Ssobomax	 * any use of it. Therefore, this detection relay fully on
428143108Ssobomax	 * empirical observation that linuxthreads sets certain
429143108Ssobomax	 * combination of flags, so that we can make more or less
430143108Ssobomax	 * precise detection and notify the FreeBSD kernel that several
431143108Ssobomax	 * processes are in fact part of the same threading group, so
432143108Ssobomax	 * that special treatment is necessary for signal delivery
433143108Ssobomax	 * between those processes and fd locking.
434143108Ssobomax	 */
435143108Ssobomax	if ((args->flags & 0xffffff00) == THREADING_FLAGS)
436143108Ssobomax		ff |= RFTHREAD;
437143108Ssobomax
438104354Sscottl	error = fork1(td, ff, 0, &p2);
439113689Sjhb	if (error)
440113689Sjhb		return (error);
441113689Sjhb
442161310Snetchild	/* create the emuldata */
443161310Snetchild	error = linux_proc_init(td, p2->p_pid, args->flags);
444161310Snetchild	/* reference it - no need to check this */
445165867Snetchild	em = em_find(p2, EMUL_DOLOCK);
446161310Snetchild	KASSERT(em != NULL, ("clone: emuldata not found.\n"));
447161310Snetchild	/* and adjust it */
448161310Snetchild	if (args->flags & CLONE_PARENT_SETTID) {
449161310Snetchild	   	if (args->parent_tidptr == NULL) {
450161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
451161310Snetchild			return (EINVAL);
452161310Snetchild		}
453161310Snetchild		error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid));
454161310Snetchild		if (error) {
455161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
456161310Snetchild			return (error);
457161310Snetchild		}
458161310Snetchild	}
45964921Smarcel
460161673Snetchild	if (args->flags & (CLONE_PARENT|CLONE_THREAD)) {
461161673Snetchild	   	sx_xlock(&proctree_lock);
462161673Snetchild		PROC_LOCK(p2);
463161673Snetchild		proc_reparent(p2, td->td_proc->p_pptr);
464161673Snetchild		PROC_UNLOCK(p2);
465161673Snetchild		sx_xunlock(&proctree_lock);
466161310Snetchild	}
467161673Snetchild
468161310Snetchild	if (args->flags & CLONE_THREAD) {
469161310Snetchild	   	/* XXX: linux mangles pgrp and pptr somehow
470161310Snetchild		 * I think it might be this but I am not sure.
471161310Snetchild		 */
472161310Snetchild#ifdef notyet
473161673Snetchild	   	PROC_LOCK(p2);
474161310Snetchild	   	p2->p_pgrp = td->td_proc->p_pgrp;
475161673Snetchild	   	PROC_UNLOCK(p2);
476161310Snetchild#endif
477161310Snetchild	 	exit_signal = 0;
478161310Snetchild	}
479161310Snetchild
480161310Snetchild	if (args->flags & CLONE_CHILD_SETTID)
481161310Snetchild		em->child_set_tid = args->child_tidptr;
482161310Snetchild	else
483161310Snetchild	   	em->child_set_tid = NULL;
484161310Snetchild
485161310Snetchild	if (args->flags & CLONE_CHILD_CLEARTID)
486161310Snetchild		em->child_clear_tid = args->child_tidptr;
487161310Snetchild	else
488161310Snetchild	   	em->child_clear_tid = NULL;
489161673Snetchild
490161310Snetchild	EMUL_UNLOCK(&emul_lock);
491161310Snetchild
492113689Sjhb	PROC_LOCK(p2);
493113689Sjhb	p2->p_sigparent = exit_signal;
494113689Sjhb	PROC_UNLOCK(p2);
495113689Sjhb	td2 = FIRST_THREAD_IN_PROC(p2);
496161365Snetchild	/*
497161365Snetchild	 * in a case of stack = NULL we are supposed to COW calling process stack
498161310Snetchild	 * this is what normal fork() does so we just keep the tf_esp arg intact
499161310Snetchild	 */
500161310Snetchild	if (args->stack)
501161310Snetchild   	   	td2->td_frame->tf_esp = (unsigned int)args->stack;
50264921Smarcel
503161310Snetchild	if (args->flags & CLONE_SETTLS) {
504161310Snetchild   	   	struct l_user_desc info;
505161310Snetchild   	   	int idx;
506161310Snetchild	   	int a[2];
507161310Snetchild		struct segment_descriptor sd;
508161310Snetchild
509161310Snetchild	   	error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc));
510161310Snetchild		if (error)
511161310Snetchild   		   	return (error);
512161310Snetchild
513161310Snetchild		idx = info.entry_number;
514161310Snetchild
515161365Snetchild		/*
516161365Snetchild		 * looks like we're getting the idx we returned
517161310Snetchild		 * in the set_thread_area() syscall
518161310Snetchild		 */
519161310Snetchild		if (idx != 6 && idx != 3)
520161310Snetchild			return (EINVAL);
521161310Snetchild
522161310Snetchild		/* this doesnt happen in practice */
523161310Snetchild		if (idx == 6) {
524161310Snetchild		   	/* we might copy out the entry_number as 3 */
525161310Snetchild		   	info.entry_number = 3;
526161310Snetchild			error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc));
527161310Snetchild			if (error)
528161310Snetchild	   		   	return (error);
529161310Snetchild		}
530161310Snetchild
531161310Snetchild		a[0] = LDT_entry_a(&info);
532161310Snetchild		a[1] = LDT_entry_b(&info);
533161310Snetchild
534161310Snetchild		memcpy(&sd, &a, sizeof(a));
53564921Smarcel#ifdef DEBUG
536113689Sjhb	if (ldebug(clone))
537161310Snetchild	   	printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
538161310Snetchild			sd.sd_hibase,
539161310Snetchild			sd.sd_lolimit,
540161310Snetchild			sd.sd_hilimit,
541161310Snetchild			sd.sd_type,
542161310Snetchild			sd.sd_dpl,
543161310Snetchild			sd.sd_p,
544161310Snetchild			sd.sd_xx,
545161310Snetchild			sd.sd_def32,
546161310Snetchild			sd.sd_gran);
547161310Snetchild#endif
548161310Snetchild
549161310Snetchild		/* set %gs */
550161310Snetchild		td2->td_pcb->pcb_gsd = sd;
551161673Snetchild		td2->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
552161310Snetchild	}
553161310Snetchild
554161310Snetchild#ifdef DEBUG
555161310Snetchild	if (ldebug(clone))
556113689Sjhb		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
557113689Sjhb		    (long)p2->p_pid, args->stack, exit_signal);
55864921Smarcel#endif
559166150Snetchild	if (args->flags & CLONE_VFORK) {
560166150Snetchild	   	PROC_LOCK(p2);
561166150Snetchild		p2->p_flag |= P_PPWAIT;
562166150Snetchild	   	PROC_UNLOCK(p2);
563166150Snetchild	}
56464921Smarcel
565113689Sjhb	/*
566113689Sjhb	 * Make this runnable after we are finished with it.
567113689Sjhb	 */
568113689Sjhb	mtx_lock_spin(&sched_lock);
569113689Sjhb	TD_SET_CAN_RUN(td2);
570166188Sjeff	sched_add(td2, SRQ_BORING);
571113689Sjhb	mtx_unlock_spin(&sched_lock);
57273856Sjhb
573113689Sjhb	td->td_retval[0] = p2->p_pid;
574113689Sjhb	td->td_retval[1] = 0;
575163374Snetchild
576163374Snetchild	if (args->flags & CLONE_VFORK) {
577163374Snetchild   	   	/* wait for the children to exit, ie. emulate vfork */
578163374Snetchild   	   	PROC_LOCK(p2);
579163374Snetchild		while (p2->p_flag & P_PPWAIT)
580163374Snetchild   		   	msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0);
581163374Snetchild		PROC_UNLOCK(p2);
582163374Snetchild	}
583163374Snetchild
584113689Sjhb	return (0);
58564921Smarcel}
58664921Smarcel
58764921Smarcel/* XXX move */
58883221Smarcelstruct l_mmap_argv {
58983221Smarcel	l_caddr_t	addr;
59083221Smarcel	l_int		len;
59183221Smarcel	l_int		prot;
59283221Smarcel	l_int		flags;
59383221Smarcel	l_int		fd;
59483221Smarcel	l_int		pos;
59564921Smarcel};
59664921Smarcel
59764921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
59864921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
59964921Smarcel
600104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *);
601104893Ssobomax
60264921Smarcelint
603104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args)
604104893Ssobomax{
605104893Ssobomax	struct l_mmap_argv linux_args;
606104893Ssobomax
607104893Ssobomax#ifdef DEBUG
608104893Ssobomax	if (ldebug(mmap2))
609111798Sdes		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
610111798Sdes		    (void *)args->addr, args->len, args->prot,
611111798Sdes		    args->flags, args->fd, args->pgoff);
612104893Ssobomax#endif
613104893Ssobomax
614104893Ssobomax	linux_args.addr = (l_caddr_t)args->addr;
615104893Ssobomax	linux_args.len = args->len;
616104893Ssobomax	linux_args.prot = args->prot;
617104893Ssobomax	linux_args.flags = args->flags;
618104893Ssobomax	linux_args.fd = args->fd;
619104893Ssobomax	linux_args.pos = args->pgoff * PAGE_SIZE;
620104893Ssobomax
621104893Ssobomax	return (linux_mmap_common(td, &linux_args));
622104893Ssobomax}
623104893Ssobomax
624104893Ssobomaxint
62583366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
62664921Smarcel{
62764921Smarcel	int error;
62883221Smarcel	struct l_mmap_argv linux_args;
62964921Smarcel
630111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
63164921Smarcel	if (error)
63264921Smarcel		return (error);
63364921Smarcel
63464921Smarcel#ifdef DEBUG
63572543Sjlemon	if (ldebug(mmap))
63672543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
637104984Sbde		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
638104984Sbde		    linux_args.flags, linux_args.fd, linux_args.pos);
63964921Smarcel#endif
64064921Smarcel
641104893Ssobomax	return (linux_mmap_common(td, &linux_args));
642104893Ssobomax}
643104893Ssobomax
644104893Ssobomaxstatic int
645104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
646104893Ssobomax{
647104893Ssobomax	struct proc *p = td->td_proc;
648104893Ssobomax	struct mmap_args /* {
649104893Ssobomax		caddr_t addr;
650104893Ssobomax		size_t len;
651104893Ssobomax		int prot;
652104893Ssobomax		int flags;
653104893Ssobomax		int fd;
654104893Ssobomax		long pad;
655104893Ssobomax		off_t pos;
656104893Ssobomax	} */ bsd_args;
657112630Smdodd	int error;
658162472Snetchild	struct file *fp;
659104893Ssobomax
660112630Smdodd	error = 0;
66164921Smarcel	bsd_args.flags = 0;
662162472Snetchild	fp = NULL;
663162472Snetchild
664162472Snetchild	/*
665162472Snetchild	 * Linux mmap(2):
666162472Snetchild	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
667162472Snetchild	 */
668162472Snetchild	if (! ((linux_args->flags & LINUX_MAP_SHARED) ^
669162472Snetchild	    (linux_args->flags & LINUX_MAP_PRIVATE)))
670162479Snetchild		return (EINVAL);
671162472Snetchild
672104893Ssobomax	if (linux_args->flags & LINUX_MAP_SHARED)
67364921Smarcel		bsd_args.flags |= MAP_SHARED;
674104893Ssobomax	if (linux_args->flags & LINUX_MAP_PRIVATE)
67564921Smarcel		bsd_args.flags |= MAP_PRIVATE;
676104893Ssobomax	if (linux_args->flags & LINUX_MAP_FIXED)
67764921Smarcel		bsd_args.flags |= MAP_FIXED;
678104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
67964921Smarcel		bsd_args.flags |= MAP_ANON;
68073213Sdillon	else
68173213Sdillon		bsd_args.flags |= MAP_NOSYNC;
682104893Ssobomax	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
68364921Smarcel		bsd_args.flags |= MAP_STACK;
68464921Smarcel
685161365Snetchild		/*
686161365Snetchild		 * The linux MAP_GROWSDOWN option does not limit auto
68764921Smarcel		 * growth of the region.  Linux mmap with this option
68864921Smarcel		 * takes as addr the inital BOS, and as len, the initial
68964921Smarcel		 * region size.  It can then grow down from addr without
69064921Smarcel		 * limit.  However, linux threads has an implicit internal
69164921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
69264921Smarcel		 * enforced explicitly in linux.  But, here we impose
69364921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
69464921Smarcel		 * region, since we can do this with our mmap.
69564921Smarcel		 *
69664921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
69764921Smarcel		 * downsize limit on BOS, and as len the max size of
69864921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
69964921Smarcel		 * and autgrows the region down, up to the limit
70064921Smarcel		 * in addr.
70164921Smarcel		 *
70264921Smarcel		 * If we don't use the MAP_STACK option, the effect
70364921Smarcel		 * of this code is to allocate a stack region of a
70464921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
70564921Smarcel		 */
70664921Smarcel
70764921Smarcel		/* This gives us TOS */
708104893Ssobomax		bsd_args.addr = linux_args->addr + linux_args->len;
70964921Smarcel
71067238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
711161365Snetchild			/*
712161365Snetchild			 * Some linux apps will attempt to mmap
71367238Sgallatin			 * thread stacks near the top of their
71467238Sgallatin			 * address space.  If their TOS is greater
71567238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
71667238Sgallatin			 * will confuse the thread stack with the
71767238Sgallatin			 * process stack and deliver a SEGV if they
71867238Sgallatin			 * attempt to grow the thread stack past their
71967238Sgallatin			 * current stacksize rlimit.  To avoid this,
72067238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
72167238Sgallatin			 * the current stacksize rlimit rather
72267238Sgallatin			 * than the maximum possible stacksize.
72367238Sgallatin			 * It would be better to adjust the
72467238Sgallatin			 * mmap'ed region, but some apps do not check
72567238Sgallatin			 * mmap's return value.
72667238Sgallatin			 */
727125454Sjhb			PROC_LOCK(p);
72867238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
729125454Sjhb			    lim_cur(p, RLIMIT_STACK);
730125454Sjhb			PROC_UNLOCK(p);
73167238Sgallatin		}
73267238Sgallatin
73364921Smarcel		/* This gives us our maximum stack size */
734104893Ssobomax		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
735104893Ssobomax			bsd_args.len = linux_args->len;
73664921Smarcel		else
73764921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
73864921Smarcel
739161365Snetchild		/*
740161365Snetchild		 * This gives us a new BOS.  If we're using VM_STACK, then
74164921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
74264921Smarcel		 * the stack grow down to the limit at BOS.  If we're
74364921Smarcel		 * not using VM_STACK we map the full stack, since we
74464921Smarcel		 * don't have a way to autogrow it.
74564921Smarcel		 */
74664921Smarcel		bsd_args.addr -= bsd_args.len;
74764921Smarcel	} else {
748104893Ssobomax		bsd_args.addr = linux_args->addr;
749104893Ssobomax		bsd_args.len  = linux_args->len;
75064921Smarcel	}
75164921Smarcel
752162472Snetchild	bsd_args.prot = linux_args->prot;
753104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
75464921Smarcel		bsd_args.fd = -1;
755162472Snetchild	else {
756162472Snetchild		/*
757162472Snetchild		 * Linux follows Solaris mmap(2) description:
758162472Snetchild		 * The file descriptor fildes is opened with
759162472Snetchild		 * read permission, regardless of the
760162472Snetchild		 * protection options specified.
761162472Snetchild		 * If PROT_WRITE is specified, the application
762162472Snetchild		 * must have opened the file descriptor
763162472Snetchild		 * fildes with write permission unless
764162472Snetchild		 * MAP_PRIVATE is specified in the flag
765162472Snetchild		 * argument as described below.
766162472Snetchild		 */
767162472Snetchild
768162472Snetchild		if ((error = fget(td, linux_args->fd, &fp)) != 0)
769162479Snetchild			return (error);
770162472Snetchild		if (fp->f_type != DTYPE_VNODE) {
771162472Snetchild			fdrop(fp, td);
772162479Snetchild			return (EINVAL);
773162472Snetchild		}
774162472Snetchild
775162472Snetchild		/* Linux mmap() just fails for O_WRONLY files */
776162472Snetchild		if (! (fp->f_flag & FREAD)) {
777162472Snetchild			fdrop(fp, td);
778162479Snetchild			return (EACCES);
779162472Snetchild		}
780162472Snetchild
781104893Ssobomax		bsd_args.fd = linux_args->fd;
782162472Snetchild		fdrop(fp, td);
783162472Snetchild	}
784104893Ssobomax	bsd_args.pos = linux_args->pos;
78564921Smarcel	bsd_args.pad = 0;
78664921Smarcel
78764921Smarcel#ifdef DEBUG
78872543Sjlemon	if (ldebug(mmap))
789112630Smdodd		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
790112630Smdodd		    __func__,
79172543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
79272543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
79364921Smarcel#endif
794112630Smdodd	error = mmap(td, &bsd_args);
795112630Smdodd#ifdef DEBUG
796112630Smdodd	if (ldebug(mmap))
797112630Smdodd		printf("-> %s() return: 0x%x (0x%08x)\n",
798112630Smdodd			__func__, error, (u_int)td->td_retval[0]);
799112630Smdodd#endif
800112630Smdodd	return (error);
80164921Smarcel}
80264921Smarcel
80364921Smarcelint
80483366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
80564921Smarcel{
80664921Smarcel	int error;
80764921Smarcel	int reg_edx;
80864921Smarcel
80964921Smarcel#ifdef DEBUG
81072543Sjlemon	if (ldebug(pipe))
81172543Sjlemon		printf(ARGS(pipe, "*"));
81264921Smarcel#endif
81364921Smarcel
81483366Sjulian	reg_edx = td->td_retval[1];
81583366Sjulian	error = pipe(td, 0);
81664921Smarcel	if (error) {
81783366Sjulian		td->td_retval[1] = reg_edx;
81864921Smarcel		return (error);
81964921Smarcel	}
82064921Smarcel
82183366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
82264921Smarcel	if (error) {
82383366Sjulian		td->td_retval[1] = reg_edx;
82464921Smarcel		return (error);
82564921Smarcel	}
82664921Smarcel
82783366Sjulian	td->td_retval[1] = reg_edx;
82883366Sjulian	td->td_retval[0] = 0;
82964921Smarcel	return (0);
83064921Smarcel}
83164921Smarcel
83264921Smarcelint
83383366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
83464921Smarcel{
835140862Ssobomax	int error;
836140862Ssobomax	struct i386_ioperm_args iia;
83764921Smarcel
838140862Ssobomax	iia.start = args->start;
839140862Ssobomax	iia.length = args->length;
840140862Ssobomax	iia.enable = args->enable;
841140862Ssobomax	mtx_lock(&Giant);
842140862Ssobomax	error = i386_set_ioperm(td, &iia);
843140862Ssobomax	mtx_unlock(&Giant);
844140862Ssobomax	return (error);
84564921Smarcel}
84664921Smarcel
84764921Smarcelint
84883366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
84964921Smarcel{
85064921Smarcel	int error;
85164921Smarcel
85264921Smarcel	if (args->level < 0 || args->level > 3)
85364921Smarcel		return (EINVAL);
854164033Srwatson	if ((error = priv_check(td, PRIV_IO)) != 0)
85564921Smarcel		return (error);
85691406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
85783981Srwatson		return (error);
85883366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
85964921Smarcel	    (args->level * (PSL_IOPL / 3));
86064921Smarcel	return (0);
86164921Smarcel}
86264921Smarcel
86364921Smarcelint
864105441Smarkmlinux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
86564921Smarcel{
86664921Smarcel	int error;
867140862Ssobomax	struct i386_ldt_args ldt;
86883221Smarcel	struct l_descriptor ld;
869140862Ssobomax	union descriptor desc;
87064921Smarcel
87164921Smarcel	if (uap->ptr == NULL)
87264921Smarcel		return (EINVAL);
87364921Smarcel
87464921Smarcel	switch (uap->func) {
87564921Smarcel	case 0x00: /* read_ldt */
876140862Ssobomax		ldt.start = 0;
877140862Ssobomax		ldt.descs = uap->ptr;
878140862Ssobomax		ldt.num = uap->bytecount / sizeof(union descriptor);
879140862Ssobomax		mtx_lock(&Giant);
880140862Ssobomax		error = i386_get_ldt(td, &ldt);
88183366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
882140862Ssobomax		mtx_unlock(&Giant);
88364921Smarcel		break;
88464921Smarcel	case 0x01: /* write_ldt */
88564921Smarcel	case 0x11: /* write_ldt */
88664921Smarcel		if (uap->bytecount != sizeof(ld))
88764921Smarcel			return (EINVAL);
88864921Smarcel
88964921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
89064921Smarcel		if (error)
89164921Smarcel			return (error);
89264921Smarcel
893140862Ssobomax		ldt.start = ld.entry_number;
894140862Ssobomax		ldt.descs = &desc;
895140862Ssobomax		ldt.num = 1;
896140862Ssobomax		desc.sd.sd_lolimit = (ld.limit & 0x0000ffff);
897140862Ssobomax		desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
898140862Ssobomax		desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff);
899140862Ssobomax		desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
900140862Ssobomax		desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
90164921Smarcel			(ld.contents << 2);
902140862Ssobomax		desc.sd.sd_dpl = 3;
903140862Ssobomax		desc.sd.sd_p = (ld.seg_not_present ^ 1);
904140862Ssobomax		desc.sd.sd_xx = 0;
905140862Ssobomax		desc.sd.sd_def32 = ld.seg_32bit;
906140862Ssobomax		desc.sd.sd_gran = ld.limit_in_pages;
907140862Ssobomax		mtx_lock(&Giant);
908140862Ssobomax		error = i386_set_ldt(td, &ldt, &desc);
909140862Ssobomax		mtx_unlock(&Giant);
91064921Smarcel		break;
91164921Smarcel	default:
91264921Smarcel		error = EINVAL;
91364921Smarcel		break;
91464921Smarcel	}
91564921Smarcel
91664921Smarcel	if (error == EOPNOTSUPP) {
91764921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
91864921Smarcel		error = ENOSYS;
91964921Smarcel	}
92064921Smarcel
92164921Smarcel	return (error);
92264921Smarcel}
92364921Smarcel
92464921Smarcelint
92583366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
92664921Smarcel{
92783221Smarcel	l_osigaction_t osa;
92883221Smarcel	l_sigaction_t act, oact;
92964921Smarcel	int error;
93064921Smarcel
93164921Smarcel#ifdef DEBUG
93272543Sjlemon	if (ldebug(sigaction))
93372543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
93472543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
93564921Smarcel#endif
93664921Smarcel
93764921Smarcel	if (args->nsa != NULL) {
938111797Sdes		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
93964921Smarcel		if (error)
94064921Smarcel			return (error);
94164921Smarcel		act.lsa_handler = osa.lsa_handler;
94264921Smarcel		act.lsa_flags = osa.lsa_flags;
94364921Smarcel		act.lsa_restorer = osa.lsa_restorer;
94464921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
94564921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
94664921Smarcel	}
94764921Smarcel
94883366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
94964921Smarcel	    args->osa ? &oact : NULL);
95064921Smarcel
95164921Smarcel	if (args->osa != NULL && !error) {
95264921Smarcel		osa.lsa_handler = oact.lsa_handler;
95364921Smarcel		osa.lsa_flags = oact.lsa_flags;
95464921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
95564921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
956111797Sdes		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
95764921Smarcel	}
95864921Smarcel
95964921Smarcel	return (error);
96064921Smarcel}
96164921Smarcel
96264921Smarcel/*
96364921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
96464921Smarcel * but it seems that "restart" is actually a context pointer that
96564921Smarcel * enables the signal to happen with a different register set.
96664921Smarcel */
96764921Smarcelint
96883366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
96964921Smarcel{
970102814Siedowse	sigset_t sigmask;
97183221Smarcel	l_sigset_t mask;
97264921Smarcel
97364921Smarcel#ifdef DEBUG
97472543Sjlemon	if (ldebug(sigsuspend))
97572543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
97664921Smarcel#endif
97764921Smarcel
97864921Smarcel	LINUX_SIGEMPTYSET(mask);
97964921Smarcel	mask.__bits[0] = args->mask;
980102814Siedowse	linux_to_bsd_sigset(&mask, &sigmask);
981102814Siedowse	return (kern_sigsuspend(td, sigmask));
98264921Smarcel}
98364921Smarcel
98464921Smarcelint
985105441Smarkmlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
98664921Smarcel{
98783221Smarcel	l_sigset_t lmask;
988102814Siedowse	sigset_t sigmask;
98964921Smarcel	int error;
99064921Smarcel
99164921Smarcel#ifdef DEBUG
99272543Sjlemon	if (ldebug(rt_sigsuspend))
99372543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
99472543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
99564921Smarcel#endif
99664921Smarcel
99783221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
99864921Smarcel		return (EINVAL);
99964921Smarcel
100083221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
100164921Smarcel	if (error)
100264921Smarcel		return (error);
100364921Smarcel
1004102814Siedowse	linux_to_bsd_sigset(&lmask, &sigmask);
1005102814Siedowse	return (kern_sigsuspend(td, sigmask));
100664921Smarcel}
100764921Smarcel
100864921Smarcelint
100983366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
101064921Smarcel{
101183366Sjulian	struct proc *p = td->td_proc;
1012102814Siedowse	sigset_t sigmask;
101364921Smarcel
101464921Smarcel#ifdef DEBUG
101572543Sjlemon	if (ldebug(pause))
101672543Sjlemon		printf(ARGS(pause, ""));
101764921Smarcel#endif
101864921Smarcel
101971494Sjhb	PROC_LOCK(p);
1020112888Sjeff	sigmask = td->td_sigmask;
102171494Sjhb	PROC_UNLOCK(p);
1022102814Siedowse	return (kern_sigsuspend(td, sigmask));
102364921Smarcel}
102464921Smarcel
102564921Smarcelint
102683366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
102764921Smarcel{
1028102814Siedowse	stack_t ss, oss;
102983221Smarcel	l_stack_t lss;
103064921Smarcel	int error;
103164921Smarcel
103264921Smarcel#ifdef DEBUG
103372543Sjlemon	if (ldebug(sigaltstack))
103472543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
103564921Smarcel#endif
103664921Smarcel
1037102814Siedowse	if (uap->uss != NULL) {
103883221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
103967051Sgallatin		if (error)
104067051Sgallatin			return (error);
104164921Smarcel
1042102814Siedowse		ss.ss_sp = lss.ss_sp;
1043102814Siedowse		ss.ss_size = lss.ss_size;
1044102814Siedowse		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
104567051Sgallatin	}
1046134269Sjhb	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
1047134269Sjhb	    (uap->uoss != NULL) ? &oss : NULL);
1048102814Siedowse	if (!error && uap->uoss != NULL) {
1049102814Siedowse		lss.ss_sp = oss.ss_sp;
1050102814Siedowse		lss.ss_size = oss.ss_size;
1051102814Siedowse		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
105283221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
105364921Smarcel	}
105464921Smarcel
105564921Smarcel	return (error);
105664921Smarcel}
1057104893Ssobomax
1058104893Ssobomaxint
1059104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
1060104893Ssobomax{
1061104893Ssobomax	struct ftruncate_args sa;
1062104893Ssobomax
1063104893Ssobomax#ifdef DEBUG
1064104893Ssobomax	if (ldebug(ftruncate64))
1065104984Sbde		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
1066104984Sbde		    (intmax_t)args->length);
1067104893Ssobomax#endif
1068104893Ssobomax
1069104893Ssobomax	sa.fd = args->fd;
1070104893Ssobomax	sa.pad = 0;
1071104893Ssobomax	sa.length = args->length;
1072104893Ssobomax	return ftruncate(td, &sa);
1073104893Ssobomax}
1074134838Sdfr
1075134838Sdfrint
1076134838Sdfrlinux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
1077134838Sdfr{
1078161310Snetchild	struct l_user_desc info;
1079161310Snetchild	int error;
1080161310Snetchild	int idx;
1081161310Snetchild	int a[2];
1082161310Snetchild	struct segment_descriptor sd;
1083161310Snetchild
1084161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1085161310Snetchild	if (error)
1086161310Snetchild		return (error);
1087161310Snetchild
1088161310Snetchild#ifdef DEBUG
1089161310Snetchild	if (ldebug(set_thread_area))
1090161310Snetchild	   	printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
1091161310Snetchild		      info.entry_number,
1092161310Snetchild      		      info.base_addr,
1093161310Snetchild      		      info.limit,
1094161310Snetchild      		      info.seg_32bit,
1095161310Snetchild		      info.contents,
1096161310Snetchild      		      info.read_exec_only,
1097161310Snetchild      		      info.limit_in_pages,
1098161310Snetchild      		      info.seg_not_present,
1099161310Snetchild      		      info.useable);
1100161310Snetchild#endif
1101161310Snetchild
1102161310Snetchild	idx = info.entry_number;
1103161365Snetchild	/*
1104161365Snetchild	 * Semantics of linux version: every thread in the system has array
1105161310Snetchild	 * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
1106161310Snetchild	 * syscall loads one of the selected tls decriptors with a value
1107161310Snetchild	 * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread
1108161310Snetchild	 * descriptors.
1109161310Snetchild	 *
1110161310Snetchild	 * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread
1111161310Snetchild	 * descriptors and use just the 1st one. The tls_array[] is used only in
1112161310Snetchild	 * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd
1113161310Snetchild	 * we use just one GDT descriptor for TLS so we will load just one.
1114161310Snetchild	 * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment
1115161310Snetchild	 * comment in the linux sources says wine might do that.
1116134838Sdfr	 */
1117161310Snetchild
1118161365Snetchild	/*
1119161365Snetchild	 * we support just GLIBC TLS now
1120161310Snetchild	 * we should let 3 proceed as well because we use this segment so
1121161310Snetchild	 * if code does two subsequent calls it should succeed
1122161310Snetchild	 */
1123161310Snetchild	if (idx != 6 && idx != -1 && idx != 3)
1124161310Snetchild		return (EINVAL);
1125161310Snetchild
1126161365Snetchild	/*
1127161365Snetchild	 * we have to copy out the GDT entry we use
1128161310Snetchild	 * FreeBSD uses GDT entry #3 for storing %gs so load that
1129161310Snetchild	 * XXX: what if userspace program doesnt check this value and tries
1130161310Snetchild	 * to use 6, 7 or 8?
1131161310Snetchild	 */
1132161310Snetchild	idx = info.entry_number = 3;
1133161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1134161310Snetchild	if (error)
1135161310Snetchild		return (error);
1136161310Snetchild
1137161310Snetchild	if (LDT_empty(&info)) {
1138161310Snetchild		a[0] = 0;
1139161310Snetchild		a[1] = 0;
1140161310Snetchild	} else {
1141161310Snetchild		a[0] = LDT_entry_a(&info);
1142161310Snetchild		a[1] = LDT_entry_b(&info);
1143161310Snetchild	}
1144161310Snetchild
1145161310Snetchild	memcpy(&sd, &a, sizeof(a));
1146161310Snetchild#ifdef DEBUG
1147161310Snetchild	if (ldebug(set_thread_area))
1148161310Snetchild	   	printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
1149161310Snetchild			sd.sd_hibase,
1150161310Snetchild			sd.sd_lolimit,
1151161310Snetchild			sd.sd_hilimit,
1152161310Snetchild			sd.sd_type,
1153161310Snetchild			sd.sd_dpl,
1154161310Snetchild			sd.sd_p,
1155161310Snetchild			sd.sd_xx,
1156161310Snetchild			sd.sd_def32,
1157161310Snetchild			sd.sd_gran);
1158161310Snetchild#endif
1159161310Snetchild
1160161310Snetchild	/* this is taken from i386 version of cpu_set_user_tls() */
1161161310Snetchild	critical_enter();
1162161310Snetchild	/* set %gs */
1163161310Snetchild	td->td_pcb->pcb_gsd = sd;
1164161310Snetchild	PCPU_GET(fsgs_gdt)[1] = sd;
1165161310Snetchild	load_gs(GSEL(GUGS_SEL, SEL_UPL));
1166161310Snetchild	critical_exit();
1167161310Snetchild
1168161310Snetchild	return (0);
1169134838Sdfr}
1170134838Sdfr
1171134838Sdfrint
1172161310Snetchildlinux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
1173134838Sdfr{
1174161310Snetchild
1175161310Snetchild	struct l_user_desc info;
1176161310Snetchild	int error;
1177161310Snetchild	int idx;
1178161310Snetchild	struct l_desc_struct desc;
1179161310Snetchild	struct segment_descriptor sd;
1180134838Sdfr
1181161310Snetchild#ifdef DEBUG
1182161310Snetchild	if (ldebug(get_thread_area))
1183161310Snetchild		printf(ARGS(get_thread_area, "%p"), args->desc);
1184161310Snetchild#endif
1185161310Snetchild
1186161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1187161310Snetchild	if (error)
1188161310Snetchild		return (error);
1189161310Snetchild
1190161310Snetchild	idx = info.entry_number;
1191161310Snetchild	/* XXX: I am not sure if we want 3 to be allowed too. */
1192161310Snetchild	if (idx != 6 && idx != 3)
1193161310Snetchild		return (EINVAL);
1194161310Snetchild
1195161310Snetchild	idx = 3;
1196161310Snetchild
1197161310Snetchild	memset(&info, 0, sizeof(info));
1198161310Snetchild
1199161310Snetchild	sd = PCPU_GET(fsgs_gdt)[1];
1200161310Snetchild
1201161310Snetchild	memcpy(&desc, &sd, sizeof(desc));
1202161310Snetchild
1203161310Snetchild	info.entry_number = idx;
1204161310Snetchild	info.base_addr = GET_BASE(&desc);
1205161310Snetchild	info.limit = GET_LIMIT(&desc);
1206161310Snetchild	info.seg_32bit = GET_32BIT(&desc);
1207161310Snetchild	info.contents = GET_CONTENTS(&desc);
1208161310Snetchild	info.read_exec_only = !GET_WRITABLE(&desc);
1209161310Snetchild	info.limit_in_pages = GET_LIMIT_PAGES(&desc);
1210161310Snetchild	info.seg_not_present = !GET_PRESENT(&desc);
1211161310Snetchild	info.useable = GET_USEABLE(&desc);
1212161310Snetchild
1213161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1214161310Snetchild	if (error)
1215161310Snetchild	   	return (EFAULT);
1216161310Snetchild
1217134838Sdfr	return (0);
1218134838Sdfr}
1219134838Sdfr
1220161310Snetchild/* copied from kern/kern_time.c */
1221134838Sdfrint
1222161310Snetchildlinux_timer_create(struct thread *td, struct linux_timer_create_args *args)
1223134838Sdfr{
1224161310Snetchild   	return ktimer_create(td, (struct ktimer_create_args *) args);
1225161310Snetchild}
1226134838Sdfr
1227161310Snetchildint
1228161310Snetchildlinux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
1229161310Snetchild{
1230161310Snetchild   	return ktimer_settime(td, (struct ktimer_settime_args *) args);
1231134838Sdfr}
1232134838Sdfr
1233161310Snetchildint
1234161310Snetchildlinux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
1235161310Snetchild{
1236161310Snetchild   	return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
1237161310Snetchild}
1238161310Snetchild
1239161310Snetchildint
1240161310Snetchildlinux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
1241161310Snetchild{
1242161310Snetchild   	return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
1243161310Snetchild}
1244161310Snetchild
1245161310Snetchildint
1246161310Snetchildlinux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
1247161310Snetchild{
1248161310Snetchild   	return ktimer_delete(td, (struct ktimer_delete_args *) args);
1249161310Snetchild}
1250161310Snetchild
1251161310Snetchild/* XXX: this wont work with module - convert it */
1252161310Snetchildint
1253161310Snetchildlinux_mq_open(struct thread *td, struct linux_mq_open_args *args)
1254161310Snetchild{
1255161310Snetchild#ifdef P1003_1B_MQUEUE
1256161310Snetchild   	return kmq_open(td, (struct kmq_open_args *) args);
1257161310Snetchild#else
1258161310Snetchild	return (ENOSYS);
1259161310Snetchild#endif
1260161310Snetchild}
1261161310Snetchild
1262161310Snetchildint
1263161310Snetchildlinux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
1264161310Snetchild{
1265161310Snetchild#ifdef P1003_1B_MQUEUE
1266161310Snetchild   	return kmq_unlink(td, (struct kmq_unlink_args *) args);
1267161310Snetchild#else
1268161310Snetchild	return (ENOSYS);
1269161310Snetchild#endif
1270161310Snetchild}
1271161310Snetchild
1272161310Snetchildint
1273161310Snetchildlinux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
1274161310Snetchild{
1275161310Snetchild#ifdef P1003_1B_MQUEUE
1276161310Snetchild   	return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
1277161310Snetchild#else
1278161310Snetchild	return (ENOSYS);
1279161310Snetchild#endif
1280161310Snetchild}
1281161310Snetchild
1282161310Snetchildint
1283161310Snetchildlinux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
1284161310Snetchild{
1285161310Snetchild#ifdef P1003_1B_MQUEUE
1286161310Snetchild   	return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
1287161310Snetchild#else
1288161310Snetchild	return (ENOSYS);
1289161310Snetchild#endif
1290161310Snetchild}
1291161310Snetchild
1292161310Snetchildint
1293161310Snetchildlinux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
1294161310Snetchild{
1295161310Snetchild#ifdef P1003_1B_MQUEUE
1296161310Snetchild	return kmq_notify(td, (struct kmq_notify_args *) args);
1297161310Snetchild#else
1298161310Snetchild	return (ENOSYS);
1299161310Snetchild#endif
1300161310Snetchild}
1301161310Snetchild
1302161310Snetchildint
1303161310Snetchildlinux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
1304161310Snetchild{
1305161310Snetchild#ifdef P1003_1B_MQUEUE
1306161310Snetchild   	return kmq_setattr(td, (struct kmq_setattr_args *) args);
1307161310Snetchild#else
1308161310Snetchild	return (ENOSYS);
1309161310Snetchild#endif
1310161310Snetchild}
1311161310Snetchild
1312