linux_machdep.c revision 164033
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
9111798Sdes *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel */
2864921Smarcel
29115705Sobrien#include <sys/cdefs.h>
30115705Sobrien__FBSDID("$FreeBSD: head/sys/i386/linux/linux_machdep.c 164033 2006-11-06 13:42:10Z rwatson $");
31115705Sobrien
3264921Smarcel#include <sys/param.h>
3376166Smarkm#include <sys/systm.h>
34162472Snetchild#include <sys/file.h>
35162472Snetchild#include <sys/fcntl.h>
36140992Ssobomax#include <sys/imgact.h>
3784811Sjhb#include <sys/lock.h>
38140992Ssobomax#include <sys/malloc.h>
3964921Smarcel#include <sys/mman.h>
4076166Smarkm#include <sys/mutex.h>
41161310Snetchild#include <sys/sx.h>
42164033Srwatson#include <sys/priv.h>
4364921Smarcel#include <sys/proc.h>
44161310Snetchild#include <sys/queue.h>
4576166Smarkm#include <sys/resource.h>
4676166Smarkm#include <sys/resourcevar.h>
47134838Sdfr#include <sys/signalvar.h>
48102814Siedowse#include <sys/syscallsubr.h>
4964921Smarcel#include <sys/sysproto.h>
5064921Smarcel#include <sys/unistd.h>
51161310Snetchild#include <sys/wait.h>
5264921Smarcel
5364921Smarcel#include <machine/frame.h>
5464921Smarcel#include <machine/psl.h>
5564921Smarcel#include <machine/segments.h>
5664921Smarcel#include <machine/sysarch.h>
5764921Smarcel
5867238Sgallatin#include <vm/vm.h>
5967238Sgallatin#include <vm/pmap.h>
6067238Sgallatin#include <vm/vm_map.h>
6167238Sgallatin
6264921Smarcel#include <i386/linux/linux.h>
6368583Smarcel#include <i386/linux/linux_proto.h>
6464921Smarcel#include <compat/linux/linux_ipc.h>
6564921Smarcel#include <compat/linux/linux_signal.h>
6664921Smarcel#include <compat/linux/linux_util.h>
67161310Snetchild#include <compat/linux/linux_emul.h>
6864921Smarcel
69161310Snetchild#include <i386/include/pcb.h>			/* needed for pcb definition in linux_set_thread_area */
70161310Snetchild
71161310Snetchild#include "opt_posix.h"
72161310Snetchild
73161310Snetchildextern struct sysentvec elf32_freebsd_sysvec;	/* defined in i386/i386/elf_machdep.c */
74161310Snetchild
7583221Smarcelstruct l_descriptor {
7683221Smarcel	l_uint		entry_number;
7783221Smarcel	l_ulong		base_addr;
7883221Smarcel	l_uint		limit;
7983221Smarcel	l_uint		seg_32bit:1;
8083221Smarcel	l_uint		contents:2;
8183221Smarcel	l_uint		read_exec_only:1;
8283221Smarcel	l_uint		limit_in_pages:1;
8383221Smarcel	l_uint		seg_not_present:1;
8483221Smarcel	l_uint		useable:1;
8564921Smarcel};
8664921Smarcel
8783221Smarcelstruct l_old_select_argv {
8883221Smarcel	l_int		nfds;
8983221Smarcel	l_fd_set	*readfds;
9083221Smarcel	l_fd_set	*writefds;
9183221Smarcel	l_fd_set	*exceptfds;
9283221Smarcel	struct l_timeval	*timeout;
9364921Smarcel};
9464921Smarcel
9564921Smarcelint
9667051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
9767051Sgallatin{
9867051Sgallatin	int bsa = 0;
9967051Sgallatin
10067051Sgallatin	if (lsa & LINUX_SS_DISABLE)
10167051Sgallatin		bsa |= SS_DISABLE;
10267051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
10367051Sgallatin		bsa |= SS_ONSTACK;
10467051Sgallatin	return (bsa);
10567051Sgallatin}
10667051Sgallatin
10767051Sgallatinint
10867051Sgallatinbsd_to_linux_sigaltstack(int bsa)
10967051Sgallatin{
11067051Sgallatin	int lsa = 0;
11167051Sgallatin
11267051Sgallatin	if (bsa & SS_DISABLE)
11367051Sgallatin		lsa |= LINUX_SS_DISABLE;
11467051Sgallatin	if (bsa & SS_ONSTACK)
11567051Sgallatin		lsa |= LINUX_SS_ONSTACK;
11667051Sgallatin	return (lsa);
11767051Sgallatin}
11867051Sgallatin
11967051Sgallatinint
12083366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
12164921Smarcel{
122140992Ssobomax	int error;
123140992Ssobomax	char *newpath;
124140992Ssobomax	struct image_args eargs;
12564921Smarcel
126141468Sjhb	LCONVPATHEXIST(td, args->path, &newpath);
12764921Smarcel
12864921Smarcel#ifdef DEBUG
12972543Sjlemon	if (ldebug(execve))
130140992Ssobomax		printf(ARGS(execve, "%s"), newpath);
13164921Smarcel#endif
13264921Smarcel
133140992Ssobomax	error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE,
134140992Ssobomax	    args->argp, args->envp);
135140992Ssobomax	free(newpath, M_TEMP);
136140992Ssobomax	if (error == 0)
137148623Ssobomax		error = kern_execve(td, &eargs, NULL);
138161310Snetchild	if (error == 0)
139161310Snetchild	   	/* linux process can exec fbsd one, dont attempt
140161310Snetchild		 * to create emuldata for such process using
141161310Snetchild		 * linux_proc_init, this leads to a panic on KASSERT
142161310Snetchild		 * because such process has p->p_emuldata == NULL
143161310Snetchild		 */
144161310Snetchild	   	if (td->td_proc->p_sysent == &elf_linux_sysvec)
145161310Snetchild   		   	error = linux_proc_init(td, 0, 0);
146140992Ssobomax	return (error);
14764921Smarcel}
14864921Smarcel
14983221Smarcelstruct l_ipc_kludge {
15083221Smarcel	struct l_msgbuf *msgp;
15183221Smarcel	l_long msgtyp;
15283221Smarcel};
15383221Smarcel
15464921Smarcelint
15583366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
15664921Smarcel{
15783221Smarcel
15883221Smarcel	switch (args->what & 0xFFFF) {
15983221Smarcel	case LINUX_SEMOP: {
16083221Smarcel		struct linux_semop_args a;
16183221Smarcel
16283221Smarcel		a.semid = args->arg1;
16383221Smarcel		a.tsops = args->ptr;
16483221Smarcel		a.nsops = args->arg2;
16583366Sjulian		return (linux_semop(td, &a));
16664921Smarcel	}
16783221Smarcel	case LINUX_SEMGET: {
16883221Smarcel		struct linux_semget_args a;
16964921Smarcel
17083221Smarcel		a.key = args->arg1;
17183221Smarcel		a.nsems = args->arg2;
17283221Smarcel		a.semflg = args->arg3;
17383366Sjulian		return (linux_semget(td, &a));
17483221Smarcel	}
17583221Smarcel	case LINUX_SEMCTL: {
17683221Smarcel		struct linux_semctl_args a;
17783221Smarcel		int error;
17883221Smarcel
17983221Smarcel		a.semid = args->arg1;
18083221Smarcel		a.semnum = args->arg2;
18183221Smarcel		a.cmd = args->arg3;
182111797Sdes		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
18383221Smarcel		if (error)
18483221Smarcel			return (error);
18583366Sjulian		return (linux_semctl(td, &a));
18683221Smarcel	}
18783221Smarcel	case LINUX_MSGSND: {
18883221Smarcel		struct linux_msgsnd_args a;
18983221Smarcel
19083221Smarcel		a.msqid = args->arg1;
19183221Smarcel		a.msgp = args->ptr;
19283221Smarcel		a.msgsz = args->arg2;
19383221Smarcel		a.msgflg = args->arg3;
19483366Sjulian		return (linux_msgsnd(td, &a));
19583221Smarcel	}
19683221Smarcel	case LINUX_MSGRCV: {
19783221Smarcel		struct linux_msgrcv_args a;
19883221Smarcel
19983221Smarcel		a.msqid = args->arg1;
20083221Smarcel		a.msgsz = args->arg2;
20183221Smarcel		a.msgflg = args->arg3;
20283221Smarcel		if ((args->what >> 16) == 0) {
20383221Smarcel			struct l_ipc_kludge tmp;
20483221Smarcel			int error;
20583221Smarcel
20683221Smarcel			if (args->ptr == NULL)
20783221Smarcel				return (EINVAL);
208111797Sdes			error = copyin(args->ptr, &tmp, sizeof(tmp));
20983221Smarcel			if (error)
21083221Smarcel				return (error);
21183221Smarcel			a.msgp = tmp.msgp;
21283221Smarcel			a.msgtyp = tmp.msgtyp;
21383221Smarcel		} else {
21483221Smarcel			a.msgp = args->ptr;
21583221Smarcel			a.msgtyp = args->arg5;
21683221Smarcel		}
21783366Sjulian		return (linux_msgrcv(td, &a));
21883221Smarcel	}
21983221Smarcel	case LINUX_MSGGET: {
22083221Smarcel		struct linux_msgget_args a;
22183221Smarcel
22283221Smarcel		a.key = args->arg1;
22383221Smarcel		a.msgflg = args->arg2;
22483366Sjulian		return (linux_msgget(td, &a));
22583221Smarcel	}
22683221Smarcel	case LINUX_MSGCTL: {
22783221Smarcel		struct linux_msgctl_args a;
22883221Smarcel
22983221Smarcel		a.msqid = args->arg1;
23083221Smarcel		a.cmd = args->arg2;
23183221Smarcel		a.buf = args->ptr;
23283366Sjulian		return (linux_msgctl(td, &a));
23383221Smarcel	}
23483221Smarcel	case LINUX_SHMAT: {
23583221Smarcel		struct linux_shmat_args a;
23683221Smarcel
23783221Smarcel		a.shmid = args->arg1;
23883221Smarcel		a.shmaddr = args->ptr;
23983221Smarcel		a.shmflg = args->arg2;
24083221Smarcel		a.raddr = (l_ulong *)args->arg3;
24183366Sjulian		return (linux_shmat(td, &a));
24283221Smarcel	}
24383221Smarcel	case LINUX_SHMDT: {
24483221Smarcel		struct linux_shmdt_args a;
24583221Smarcel
24683221Smarcel		a.shmaddr = args->ptr;
24783366Sjulian		return (linux_shmdt(td, &a));
24883221Smarcel	}
24983221Smarcel	case LINUX_SHMGET: {
25083221Smarcel		struct linux_shmget_args a;
25183221Smarcel
25283221Smarcel		a.key = args->arg1;
25383221Smarcel		a.size = args->arg2;
25483221Smarcel		a.shmflg = args->arg3;
25583366Sjulian		return (linux_shmget(td, &a));
25683221Smarcel	}
25783221Smarcel	case LINUX_SHMCTL: {
25883221Smarcel		struct linux_shmctl_args a;
25983221Smarcel
26083221Smarcel		a.shmid = args->arg1;
26183221Smarcel		a.cmd = args->arg2;
26283221Smarcel		a.buf = args->ptr;
26383366Sjulian		return (linux_shmctl(td, &a));
26483221Smarcel	}
26583221Smarcel	default:
26683221Smarcel		break;
26783221Smarcel	}
26883221Smarcel
26983221Smarcel	return (EINVAL);
27064921Smarcel}
27164921Smarcel
27264921Smarcelint
27383366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
27464921Smarcel{
27583221Smarcel	struct l_old_select_argv linux_args;
27683221Smarcel	struct linux_select_args newsel;
27764921Smarcel	int error;
27864921Smarcel
27983221Smarcel#ifdef DEBUG
28083221Smarcel	if (ldebug(old_select))
28191437Speter		printf(ARGS(old_select, "%p"), args->ptr);
28264921Smarcel#endif
28364921Smarcel
284111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
28564921Smarcel	if (error)
28664921Smarcel		return (error);
28764921Smarcel
28864921Smarcel	newsel.nfds = linux_args.nfds;
28964921Smarcel	newsel.readfds = linux_args.readfds;
29064921Smarcel	newsel.writefds = linux_args.writefds;
29164921Smarcel	newsel.exceptfds = linux_args.exceptfds;
29264921Smarcel	newsel.timeout = linux_args.timeout;
29383366Sjulian	return (linux_select(td, &newsel));
29464921Smarcel}
29564921Smarcel
29664921Smarcelint
29783366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
29864921Smarcel{
29964921Smarcel	int error;
30064921Smarcel
30164921Smarcel#ifdef DEBUG
30272543Sjlemon	if (ldebug(fork))
30372543Sjlemon		printf(ARGS(fork, ""));
30464921Smarcel#endif
30564921Smarcel
30683366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
30764921Smarcel		return (error);
30864921Smarcel
30983366Sjulian	if (td->td_retval[1] == 1)
31083366Sjulian		td->td_retval[0] = 0;
311161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
312161310Snetchild	if (error)
313161310Snetchild		return (error);
314161310Snetchild
31564921Smarcel	return (0);
31664921Smarcel}
31764921Smarcel
31864921Smarcelint
31983366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
32064921Smarcel{
32164921Smarcel	int error;
322161611Snetchild	struct proc *p2;
32364921Smarcel
32464921Smarcel#ifdef DEBUG
32572543Sjlemon	if (ldebug(vfork))
32672543Sjlemon		printf(ARGS(vfork, ""));
32764921Smarcel#endif
32864921Smarcel
329161611Snetchild	/* exclude RFPPWAIT */
330161611Snetchild	if ((error = fork1(td, RFFDG | RFPROC | RFMEM, 0, &p2)) != 0)
33164921Smarcel		return (error);
332161611Snetchild	if (error == 0) {
333161611Snetchild	   	td->td_retval[0] = p2->p_pid;
334161611Snetchild		td->td_retval[1] = 0;
335161611Snetchild	}
33664921Smarcel	/* Are we the child? */
33783366Sjulian	if (td->td_retval[1] == 1)
33883366Sjulian		td->td_retval[0] = 0;
339161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
340161310Snetchild	if (error)
341161310Snetchild		return (error);
342161611Snetchild	/* wait for the children to exit, ie. emulate vfork */
343161611Snetchild	PROC_LOCK(p2);
344161611Snetchild	while (p2->p_flag & P_PPWAIT)
345161611Snetchild	   	msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0);
346161611Snetchild	PROC_UNLOCK(p2);
347161611Snetchild
34864921Smarcel	return (0);
34964921Smarcel}
35064921Smarcel
35164921Smarcelint
35283366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
35364921Smarcel{
35473856Sjhb	int error, ff = RFPROC | RFSTOPPED;
35564921Smarcel	struct proc *p2;
356113689Sjhb	struct thread *td2;
35764921Smarcel	int exit_signal;
358161310Snetchild	struct linux_emuldata *em;
35964921Smarcel
36064921Smarcel#ifdef DEBUG
36172543Sjlemon	if (ldebug(clone)) {
362161310Snetchild   	   	printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"),
363161310Snetchild		    (unsigned int)args->flags, (unsigned int)args->stack,
364161310Snetchild		    (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr);
36572543Sjlemon	}
36664921Smarcel#endif
36764921Smarcel
36864921Smarcel	exit_signal = args->flags & 0x000000ff;
369163536Snetchild	if (!LINUX_SIG_VALID(exit_signal) && exit_signal != 0)
37064921Smarcel		return (EINVAL);
37164921Smarcel
37264921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
37364921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
37464921Smarcel
37564921Smarcel	if (args->flags & CLONE_VM)
37664921Smarcel		ff |= RFMEM;
37764921Smarcel	if (args->flags & CLONE_SIGHAND)
37864921Smarcel		ff |= RFSIGSHARE;
379163371Snetchild	/*
380163371Snetchild	 * XXX: in linux sharing of fs info (chroot/cwd/umask)
381163371Snetchild	 * and open files is independant. in fbsd its in one
382163371Snetchild	 * structure but in reality it doesnt make any problems
383163371Snetchild	 * because both this flags are set at once usually.
384163371Snetchild	 */
385163371Snetchild	if (!(args->flags & (CLONE_FILES | CLONE_FS)))
38664921Smarcel		ff |= RFFDG;
38764921Smarcel
388143108Ssobomax	/*
389143108Ssobomax	 * Attempt to detect when linux_clone(2) is used for creating
390143108Ssobomax	 * kernel threads. Unfortunately despite the existence of the
391143108Ssobomax	 * CLONE_THREAD flag, version of linuxthreads package used in
392143108Ssobomax	 * most popular distros as of beginning of 2005 doesn't make
393143108Ssobomax	 * any use of it. Therefore, this detection relay fully on
394143108Ssobomax	 * empirical observation that linuxthreads sets certain
395143108Ssobomax	 * combination of flags, so that we can make more or less
396143108Ssobomax	 * precise detection and notify the FreeBSD kernel that several
397143108Ssobomax	 * processes are in fact part of the same threading group, so
398143108Ssobomax	 * that special treatment is necessary for signal delivery
399143108Ssobomax	 * between those processes and fd locking.
400143108Ssobomax	 */
401143108Ssobomax	if ((args->flags & 0xffffff00) == THREADING_FLAGS)
402143108Ssobomax		ff |= RFTHREAD;
403143108Ssobomax
404104354Sscottl	error = fork1(td, ff, 0, &p2);
405113689Sjhb	if (error)
406113689Sjhb		return (error);
407113689Sjhb
408161310Snetchild	/* create the emuldata */
409161310Snetchild	error = linux_proc_init(td, p2->p_pid, args->flags);
410161310Snetchild	/* reference it - no need to check this */
411161310Snetchild	em = em_find(p2, EMUL_UNLOCKED);
412161310Snetchild	KASSERT(em != NULL, ("clone: emuldata not found.\n"));
413161310Snetchild	/* and adjust it */
414161310Snetchild	if (args->flags & CLONE_PARENT_SETTID) {
415161310Snetchild	   	if (args->parent_tidptr == NULL) {
416161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
417161310Snetchild			return (EINVAL);
418161310Snetchild		}
419161310Snetchild		error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid));
420161310Snetchild		if (error) {
421161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
422161310Snetchild			return (error);
423161310Snetchild		}
424161310Snetchild	}
42564921Smarcel
426161673Snetchild	if (args->flags & (CLONE_PARENT|CLONE_THREAD)) {
427161673Snetchild	   	sx_xlock(&proctree_lock);
428161673Snetchild		PROC_LOCK(p2);
429161673Snetchild		proc_reparent(p2, td->td_proc->p_pptr);
430161673Snetchild		PROC_UNLOCK(p2);
431161673Snetchild		sx_xunlock(&proctree_lock);
432161310Snetchild	}
433161673Snetchild
434161310Snetchild	if (args->flags & CLONE_THREAD) {
435161310Snetchild	   	/* XXX: linux mangles pgrp and pptr somehow
436161310Snetchild		 * I think it might be this but I am not sure.
437161310Snetchild		 */
438161310Snetchild#ifdef notyet
439161673Snetchild	   	PROC_LOCK(p2);
440161310Snetchild	   	p2->p_pgrp = td->td_proc->p_pgrp;
441161673Snetchild	   	PROC_UNLOCK(p2);
442161310Snetchild#endif
443161310Snetchild	 	exit_signal = 0;
444161310Snetchild	}
445161310Snetchild
446161310Snetchild	if (args->flags & CLONE_CHILD_SETTID)
447161310Snetchild		em->child_set_tid = args->child_tidptr;
448161310Snetchild	else
449161310Snetchild	   	em->child_set_tid = NULL;
450161310Snetchild
451161310Snetchild	if (args->flags & CLONE_CHILD_CLEARTID)
452161310Snetchild		em->child_clear_tid = args->child_tidptr;
453161310Snetchild	else
454161310Snetchild	   	em->child_clear_tid = NULL;
455161673Snetchild
456161310Snetchild	EMUL_UNLOCK(&emul_lock);
457161310Snetchild
458113689Sjhb	PROC_LOCK(p2);
459113689Sjhb	p2->p_sigparent = exit_signal;
460113689Sjhb	PROC_UNLOCK(p2);
461113689Sjhb	td2 = FIRST_THREAD_IN_PROC(p2);
462161365Snetchild	/*
463161365Snetchild	 * in a case of stack = NULL we are supposed to COW calling process stack
464161310Snetchild	 * this is what normal fork() does so we just keep the tf_esp arg intact
465161310Snetchild	 */
466161310Snetchild	if (args->stack)
467161310Snetchild   	   	td2->td_frame->tf_esp = (unsigned int)args->stack;
46864921Smarcel
469161310Snetchild	if (args->flags & CLONE_SETTLS) {
470161310Snetchild   	   	struct l_user_desc info;
471161310Snetchild   	   	int idx;
472161310Snetchild	   	int a[2];
473161310Snetchild		struct segment_descriptor sd;
474161310Snetchild
475161310Snetchild	   	error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc));
476161310Snetchild		if (error)
477161310Snetchild   		   	return (error);
478161310Snetchild
479161310Snetchild		idx = info.entry_number;
480161310Snetchild
481161365Snetchild		/*
482161365Snetchild		 * looks like we're getting the idx we returned
483161310Snetchild		 * in the set_thread_area() syscall
484161310Snetchild		 */
485161310Snetchild		if (idx != 6 && idx != 3)
486161310Snetchild			return (EINVAL);
487161310Snetchild
488161310Snetchild		/* this doesnt happen in practice */
489161310Snetchild		if (idx == 6) {
490161310Snetchild		   	/* we might copy out the entry_number as 3 */
491161310Snetchild		   	info.entry_number = 3;
492161310Snetchild			error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc));
493161310Snetchild			if (error)
494161310Snetchild	   		   	return (error);
495161310Snetchild		}
496161310Snetchild
497161310Snetchild		a[0] = LDT_entry_a(&info);
498161310Snetchild		a[1] = LDT_entry_b(&info);
499161310Snetchild
500161310Snetchild		memcpy(&sd, &a, sizeof(a));
50164921Smarcel#ifdef DEBUG
502113689Sjhb	if (ldebug(clone))
503161310Snetchild	   	printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
504161310Snetchild			sd.sd_hibase,
505161310Snetchild			sd.sd_lolimit,
506161310Snetchild			sd.sd_hilimit,
507161310Snetchild			sd.sd_type,
508161310Snetchild			sd.sd_dpl,
509161310Snetchild			sd.sd_p,
510161310Snetchild			sd.sd_xx,
511161310Snetchild			sd.sd_def32,
512161310Snetchild			sd.sd_gran);
513161310Snetchild#endif
514161310Snetchild
515161310Snetchild		/* set %gs */
516161310Snetchild		td2->td_pcb->pcb_gsd = sd;
517161673Snetchild		td2->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
518161310Snetchild	}
519161310Snetchild
520161310Snetchild#ifdef DEBUG
521161310Snetchild	if (ldebug(clone))
522113689Sjhb		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
523113689Sjhb		    (long)p2->p_pid, args->stack, exit_signal);
52464921Smarcel#endif
52564921Smarcel
526113689Sjhb	/*
527113689Sjhb	 * Make this runnable after we are finished with it.
528113689Sjhb	 */
529113689Sjhb	mtx_lock_spin(&sched_lock);
530113689Sjhb	TD_SET_CAN_RUN(td2);
531134586Sjulian	setrunqueue(td2, SRQ_BORING);
532113689Sjhb	mtx_unlock_spin(&sched_lock);
53373856Sjhb
534113689Sjhb	td->td_retval[0] = p2->p_pid;
535113689Sjhb	td->td_retval[1] = 0;
536163374Snetchild
537163374Snetchild	if (args->flags & CLONE_VFORK) {
538163374Snetchild   	   	/* wait for the children to exit, ie. emulate vfork */
539163374Snetchild   	   	PROC_LOCK(p2);
540163374Snetchild		p2->p_flag |= P_PPWAIT;
541163374Snetchild		while (p2->p_flag & P_PPWAIT)
542163374Snetchild   		   	msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0);
543163374Snetchild		PROC_UNLOCK(p2);
544163374Snetchild	}
545163374Snetchild
546113689Sjhb	return (0);
54764921Smarcel}
54864921Smarcel
54964921Smarcel/* XXX move */
55083221Smarcelstruct l_mmap_argv {
55183221Smarcel	l_caddr_t	addr;
55283221Smarcel	l_int		len;
55383221Smarcel	l_int		prot;
55483221Smarcel	l_int		flags;
55583221Smarcel	l_int		fd;
55683221Smarcel	l_int		pos;
55764921Smarcel};
55864921Smarcel
55964921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
56064921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
56164921Smarcel
562104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *);
563104893Ssobomax
56464921Smarcelint
565104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args)
566104893Ssobomax{
567104893Ssobomax	struct l_mmap_argv linux_args;
568104893Ssobomax
569104893Ssobomax#ifdef DEBUG
570104893Ssobomax	if (ldebug(mmap2))
571111798Sdes		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
572111798Sdes		    (void *)args->addr, args->len, args->prot,
573111798Sdes		    args->flags, args->fd, args->pgoff);
574104893Ssobomax#endif
575104893Ssobomax
576104893Ssobomax	linux_args.addr = (l_caddr_t)args->addr;
577104893Ssobomax	linux_args.len = args->len;
578104893Ssobomax	linux_args.prot = args->prot;
579104893Ssobomax	linux_args.flags = args->flags;
580104893Ssobomax	linux_args.fd = args->fd;
581104893Ssobomax	linux_args.pos = args->pgoff * PAGE_SIZE;
582104893Ssobomax
583104893Ssobomax	return (linux_mmap_common(td, &linux_args));
584104893Ssobomax}
585104893Ssobomax
586104893Ssobomaxint
58783366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
58864921Smarcel{
58964921Smarcel	int error;
59083221Smarcel	struct l_mmap_argv linux_args;
59164921Smarcel
592111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
59364921Smarcel	if (error)
59464921Smarcel		return (error);
59564921Smarcel
59664921Smarcel#ifdef DEBUG
59772543Sjlemon	if (ldebug(mmap))
59872543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
599104984Sbde		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
600104984Sbde		    linux_args.flags, linux_args.fd, linux_args.pos);
60164921Smarcel#endif
60264921Smarcel
603104893Ssobomax	return (linux_mmap_common(td, &linux_args));
604104893Ssobomax}
605104893Ssobomax
606104893Ssobomaxstatic int
607104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
608104893Ssobomax{
609104893Ssobomax	struct proc *p = td->td_proc;
610104893Ssobomax	struct mmap_args /* {
611104893Ssobomax		caddr_t addr;
612104893Ssobomax		size_t len;
613104893Ssobomax		int prot;
614104893Ssobomax		int flags;
615104893Ssobomax		int fd;
616104893Ssobomax		long pad;
617104893Ssobomax		off_t pos;
618104893Ssobomax	} */ bsd_args;
619112630Smdodd	int error;
620162472Snetchild	struct file *fp;
621104893Ssobomax
622112630Smdodd	error = 0;
62364921Smarcel	bsd_args.flags = 0;
624162472Snetchild	fp = NULL;
625162472Snetchild
626162472Snetchild	/*
627162472Snetchild	 * Linux mmap(2):
628162472Snetchild	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
629162472Snetchild	 */
630162472Snetchild	if (! ((linux_args->flags & LINUX_MAP_SHARED) ^
631162472Snetchild	    (linux_args->flags & LINUX_MAP_PRIVATE)))
632162479Snetchild		return (EINVAL);
633162472Snetchild
634104893Ssobomax	if (linux_args->flags & LINUX_MAP_SHARED)
63564921Smarcel		bsd_args.flags |= MAP_SHARED;
636104893Ssobomax	if (linux_args->flags & LINUX_MAP_PRIVATE)
63764921Smarcel		bsd_args.flags |= MAP_PRIVATE;
638104893Ssobomax	if (linux_args->flags & LINUX_MAP_FIXED)
63964921Smarcel		bsd_args.flags |= MAP_FIXED;
640104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
64164921Smarcel		bsd_args.flags |= MAP_ANON;
64273213Sdillon	else
64373213Sdillon		bsd_args.flags |= MAP_NOSYNC;
644104893Ssobomax	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
64564921Smarcel		bsd_args.flags |= MAP_STACK;
64664921Smarcel
647161365Snetchild		/*
648161365Snetchild		 * The linux MAP_GROWSDOWN option does not limit auto
64964921Smarcel		 * growth of the region.  Linux mmap with this option
65064921Smarcel		 * takes as addr the inital BOS, and as len, the initial
65164921Smarcel		 * region size.  It can then grow down from addr without
65264921Smarcel		 * limit.  However, linux threads has an implicit internal
65364921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
65464921Smarcel		 * enforced explicitly in linux.  But, here we impose
65564921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
65664921Smarcel		 * region, since we can do this with our mmap.
65764921Smarcel		 *
65864921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
65964921Smarcel		 * downsize limit on BOS, and as len the max size of
66064921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
66164921Smarcel		 * and autgrows the region down, up to the limit
66264921Smarcel		 * in addr.
66364921Smarcel		 *
66464921Smarcel		 * If we don't use the MAP_STACK option, the effect
66564921Smarcel		 * of this code is to allocate a stack region of a
66664921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
66764921Smarcel		 */
66864921Smarcel
66964921Smarcel		/* This gives us TOS */
670104893Ssobomax		bsd_args.addr = linux_args->addr + linux_args->len;
67164921Smarcel
67267238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
673161365Snetchild			/*
674161365Snetchild			 * Some linux apps will attempt to mmap
67567238Sgallatin			 * thread stacks near the top of their
67667238Sgallatin			 * address space.  If their TOS is greater
67767238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
67867238Sgallatin			 * will confuse the thread stack with the
67967238Sgallatin			 * process stack and deliver a SEGV if they
68067238Sgallatin			 * attempt to grow the thread stack past their
68167238Sgallatin			 * current stacksize rlimit.  To avoid this,
68267238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
68367238Sgallatin			 * the current stacksize rlimit rather
68467238Sgallatin			 * than the maximum possible stacksize.
68567238Sgallatin			 * It would be better to adjust the
68667238Sgallatin			 * mmap'ed region, but some apps do not check
68767238Sgallatin			 * mmap's return value.
68867238Sgallatin			 */
689125454Sjhb			PROC_LOCK(p);
69067238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
691125454Sjhb			    lim_cur(p, RLIMIT_STACK);
692125454Sjhb			PROC_UNLOCK(p);
69367238Sgallatin		}
69467238Sgallatin
69564921Smarcel		/* This gives us our maximum stack size */
696104893Ssobomax		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
697104893Ssobomax			bsd_args.len = linux_args->len;
69864921Smarcel		else
69964921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
70064921Smarcel
701161365Snetchild		/*
702161365Snetchild		 * This gives us a new BOS.  If we're using VM_STACK, then
70364921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
70464921Smarcel		 * the stack grow down to the limit at BOS.  If we're
70564921Smarcel		 * not using VM_STACK we map the full stack, since we
70664921Smarcel		 * don't have a way to autogrow it.
70764921Smarcel		 */
70864921Smarcel		bsd_args.addr -= bsd_args.len;
70964921Smarcel	} else {
710104893Ssobomax		bsd_args.addr = linux_args->addr;
711104893Ssobomax		bsd_args.len  = linux_args->len;
71264921Smarcel	}
71364921Smarcel
714162472Snetchild	bsd_args.prot = linux_args->prot;
715104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
71664921Smarcel		bsd_args.fd = -1;
717162472Snetchild	else {
718162472Snetchild		/*
719162472Snetchild		 * Linux follows Solaris mmap(2) description:
720162472Snetchild		 * The file descriptor fildes is opened with
721162472Snetchild		 * read permission, regardless of the
722162472Snetchild		 * protection options specified.
723162472Snetchild		 * If PROT_WRITE is specified, the application
724162472Snetchild		 * must have opened the file descriptor
725162472Snetchild		 * fildes with write permission unless
726162472Snetchild		 * MAP_PRIVATE is specified in the flag
727162472Snetchild		 * argument as described below.
728162472Snetchild		 */
729162472Snetchild
730162472Snetchild		if ((error = fget(td, linux_args->fd, &fp)) != 0)
731162479Snetchild			return (error);
732162472Snetchild		if (fp->f_type != DTYPE_VNODE) {
733162472Snetchild			fdrop(fp, td);
734162479Snetchild			return (EINVAL);
735162472Snetchild		}
736162472Snetchild
737162472Snetchild		/* Linux mmap() just fails for O_WRONLY files */
738162472Snetchild		if (! (fp->f_flag & FREAD)) {
739162472Snetchild			fdrop(fp, td);
740162479Snetchild			return (EACCES);
741162472Snetchild		}
742162472Snetchild
743104893Ssobomax		bsd_args.fd = linux_args->fd;
744162472Snetchild		fdrop(fp, td);
745162472Snetchild	}
746104893Ssobomax	bsd_args.pos = linux_args->pos;
74764921Smarcel	bsd_args.pad = 0;
74864921Smarcel
74964921Smarcel#ifdef DEBUG
75072543Sjlemon	if (ldebug(mmap))
751112630Smdodd		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
752112630Smdodd		    __func__,
75372543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
75472543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
75564921Smarcel#endif
756112630Smdodd	error = mmap(td, &bsd_args);
757112630Smdodd#ifdef DEBUG
758112630Smdodd	if (ldebug(mmap))
759112630Smdodd		printf("-> %s() return: 0x%x (0x%08x)\n",
760112630Smdodd			__func__, error, (u_int)td->td_retval[0]);
761112630Smdodd#endif
762112630Smdodd	return (error);
76364921Smarcel}
76464921Smarcel
76564921Smarcelint
76683366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
76764921Smarcel{
76864921Smarcel	int error;
76964921Smarcel	int reg_edx;
77064921Smarcel
77164921Smarcel#ifdef DEBUG
77272543Sjlemon	if (ldebug(pipe))
77372543Sjlemon		printf(ARGS(pipe, "*"));
77464921Smarcel#endif
77564921Smarcel
77683366Sjulian	reg_edx = td->td_retval[1];
77783366Sjulian	error = pipe(td, 0);
77864921Smarcel	if (error) {
77983366Sjulian		td->td_retval[1] = reg_edx;
78064921Smarcel		return (error);
78164921Smarcel	}
78264921Smarcel
78383366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
78464921Smarcel	if (error) {
78583366Sjulian		td->td_retval[1] = reg_edx;
78664921Smarcel		return (error);
78764921Smarcel	}
78864921Smarcel
78983366Sjulian	td->td_retval[1] = reg_edx;
79083366Sjulian	td->td_retval[0] = 0;
79164921Smarcel	return (0);
79264921Smarcel}
79364921Smarcel
79464921Smarcelint
79583366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
79664921Smarcel{
797140862Ssobomax	int error;
798140862Ssobomax	struct i386_ioperm_args iia;
79964921Smarcel
800140862Ssobomax	iia.start = args->start;
801140862Ssobomax	iia.length = args->length;
802140862Ssobomax	iia.enable = args->enable;
803140862Ssobomax	mtx_lock(&Giant);
804140862Ssobomax	error = i386_set_ioperm(td, &iia);
805140862Ssobomax	mtx_unlock(&Giant);
806140862Ssobomax	return (error);
80764921Smarcel}
80864921Smarcel
80964921Smarcelint
81083366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
81164921Smarcel{
81264921Smarcel	int error;
81364921Smarcel
81464921Smarcel	if (args->level < 0 || args->level > 3)
81564921Smarcel		return (EINVAL);
816164033Srwatson	if ((error = priv_check(td, PRIV_IO)) != 0)
81764921Smarcel		return (error);
81891406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
81983981Srwatson		return (error);
82083366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
82164921Smarcel	    (args->level * (PSL_IOPL / 3));
82264921Smarcel	return (0);
82364921Smarcel}
82464921Smarcel
82564921Smarcelint
826105441Smarkmlinux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
82764921Smarcel{
82864921Smarcel	int error;
829140862Ssobomax	struct i386_ldt_args ldt;
83083221Smarcel	struct l_descriptor ld;
831140862Ssobomax	union descriptor desc;
83264921Smarcel
83364921Smarcel	if (uap->ptr == NULL)
83464921Smarcel		return (EINVAL);
83564921Smarcel
83664921Smarcel	switch (uap->func) {
83764921Smarcel	case 0x00: /* read_ldt */
838140862Ssobomax		ldt.start = 0;
839140862Ssobomax		ldt.descs = uap->ptr;
840140862Ssobomax		ldt.num = uap->bytecount / sizeof(union descriptor);
841140862Ssobomax		mtx_lock(&Giant);
842140862Ssobomax		error = i386_get_ldt(td, &ldt);
84383366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
844140862Ssobomax		mtx_unlock(&Giant);
84564921Smarcel		break;
84664921Smarcel	case 0x01: /* write_ldt */
84764921Smarcel	case 0x11: /* write_ldt */
84864921Smarcel		if (uap->bytecount != sizeof(ld))
84964921Smarcel			return (EINVAL);
85064921Smarcel
85164921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
85264921Smarcel		if (error)
85364921Smarcel			return (error);
85464921Smarcel
855140862Ssobomax		ldt.start = ld.entry_number;
856140862Ssobomax		ldt.descs = &desc;
857140862Ssobomax		ldt.num = 1;
858140862Ssobomax		desc.sd.sd_lolimit = (ld.limit & 0x0000ffff);
859140862Ssobomax		desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
860140862Ssobomax		desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff);
861140862Ssobomax		desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
862140862Ssobomax		desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
86364921Smarcel			(ld.contents << 2);
864140862Ssobomax		desc.sd.sd_dpl = 3;
865140862Ssobomax		desc.sd.sd_p = (ld.seg_not_present ^ 1);
866140862Ssobomax		desc.sd.sd_xx = 0;
867140862Ssobomax		desc.sd.sd_def32 = ld.seg_32bit;
868140862Ssobomax		desc.sd.sd_gran = ld.limit_in_pages;
869140862Ssobomax		mtx_lock(&Giant);
870140862Ssobomax		error = i386_set_ldt(td, &ldt, &desc);
871140862Ssobomax		mtx_unlock(&Giant);
87264921Smarcel		break;
87364921Smarcel	default:
87464921Smarcel		error = EINVAL;
87564921Smarcel		break;
87664921Smarcel	}
87764921Smarcel
87864921Smarcel	if (error == EOPNOTSUPP) {
87964921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
88064921Smarcel		error = ENOSYS;
88164921Smarcel	}
88264921Smarcel
88364921Smarcel	return (error);
88464921Smarcel}
88564921Smarcel
88664921Smarcelint
88783366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
88864921Smarcel{
88983221Smarcel	l_osigaction_t osa;
89083221Smarcel	l_sigaction_t act, oact;
89164921Smarcel	int error;
89264921Smarcel
89364921Smarcel#ifdef DEBUG
89472543Sjlemon	if (ldebug(sigaction))
89572543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
89672543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
89764921Smarcel#endif
89864921Smarcel
89964921Smarcel	if (args->nsa != NULL) {
900111797Sdes		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
90164921Smarcel		if (error)
90264921Smarcel			return (error);
90364921Smarcel		act.lsa_handler = osa.lsa_handler;
90464921Smarcel		act.lsa_flags = osa.lsa_flags;
90564921Smarcel		act.lsa_restorer = osa.lsa_restorer;
90664921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
90764921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
90864921Smarcel	}
90964921Smarcel
91083366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
91164921Smarcel	    args->osa ? &oact : NULL);
91264921Smarcel
91364921Smarcel	if (args->osa != NULL && !error) {
91464921Smarcel		osa.lsa_handler = oact.lsa_handler;
91564921Smarcel		osa.lsa_flags = oact.lsa_flags;
91664921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
91764921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
918111797Sdes		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
91964921Smarcel	}
92064921Smarcel
92164921Smarcel	return (error);
92264921Smarcel}
92364921Smarcel
92464921Smarcel/*
92564921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
92664921Smarcel * but it seems that "restart" is actually a context pointer that
92764921Smarcel * enables the signal to happen with a different register set.
92864921Smarcel */
92964921Smarcelint
93083366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
93164921Smarcel{
932102814Siedowse	sigset_t sigmask;
93383221Smarcel	l_sigset_t mask;
93464921Smarcel
93564921Smarcel#ifdef DEBUG
93672543Sjlemon	if (ldebug(sigsuspend))
93772543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
93864921Smarcel#endif
93964921Smarcel
94064921Smarcel	LINUX_SIGEMPTYSET(mask);
94164921Smarcel	mask.__bits[0] = args->mask;
942102814Siedowse	linux_to_bsd_sigset(&mask, &sigmask);
943102814Siedowse	return (kern_sigsuspend(td, sigmask));
94464921Smarcel}
94564921Smarcel
94664921Smarcelint
947105441Smarkmlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
94864921Smarcel{
94983221Smarcel	l_sigset_t lmask;
950102814Siedowse	sigset_t sigmask;
95164921Smarcel	int error;
95264921Smarcel
95364921Smarcel#ifdef DEBUG
95472543Sjlemon	if (ldebug(rt_sigsuspend))
95572543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
95672543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
95764921Smarcel#endif
95864921Smarcel
95983221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
96064921Smarcel		return (EINVAL);
96164921Smarcel
96283221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
96364921Smarcel	if (error)
96464921Smarcel		return (error);
96564921Smarcel
966102814Siedowse	linux_to_bsd_sigset(&lmask, &sigmask);
967102814Siedowse	return (kern_sigsuspend(td, sigmask));
96864921Smarcel}
96964921Smarcel
97064921Smarcelint
97183366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
97264921Smarcel{
97383366Sjulian	struct proc *p = td->td_proc;
974102814Siedowse	sigset_t sigmask;
97564921Smarcel
97664921Smarcel#ifdef DEBUG
97772543Sjlemon	if (ldebug(pause))
97872543Sjlemon		printf(ARGS(pause, ""));
97964921Smarcel#endif
98064921Smarcel
98171494Sjhb	PROC_LOCK(p);
982112888Sjeff	sigmask = td->td_sigmask;
98371494Sjhb	PROC_UNLOCK(p);
984102814Siedowse	return (kern_sigsuspend(td, sigmask));
98564921Smarcel}
98664921Smarcel
98764921Smarcelint
98883366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
98964921Smarcel{
990102814Siedowse	stack_t ss, oss;
99183221Smarcel	l_stack_t lss;
99264921Smarcel	int error;
99364921Smarcel
99464921Smarcel#ifdef DEBUG
99572543Sjlemon	if (ldebug(sigaltstack))
99672543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
99764921Smarcel#endif
99864921Smarcel
999102814Siedowse	if (uap->uss != NULL) {
100083221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
100167051Sgallatin		if (error)
100267051Sgallatin			return (error);
100364921Smarcel
1004102814Siedowse		ss.ss_sp = lss.ss_sp;
1005102814Siedowse		ss.ss_size = lss.ss_size;
1006102814Siedowse		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
100767051Sgallatin	}
1008134269Sjhb	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
1009134269Sjhb	    (uap->uoss != NULL) ? &oss : NULL);
1010102814Siedowse	if (!error && uap->uoss != NULL) {
1011102814Siedowse		lss.ss_sp = oss.ss_sp;
1012102814Siedowse		lss.ss_size = oss.ss_size;
1013102814Siedowse		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
101483221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
101564921Smarcel	}
101664921Smarcel
101764921Smarcel	return (error);
101864921Smarcel}
1019104893Ssobomax
1020104893Ssobomaxint
1021104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
1022104893Ssobomax{
1023104893Ssobomax	struct ftruncate_args sa;
1024104893Ssobomax
1025104893Ssobomax#ifdef DEBUG
1026104893Ssobomax	if (ldebug(ftruncate64))
1027104984Sbde		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
1028104984Sbde		    (intmax_t)args->length);
1029104893Ssobomax#endif
1030104893Ssobomax
1031104893Ssobomax	sa.fd = args->fd;
1032104893Ssobomax	sa.pad = 0;
1033104893Ssobomax	sa.length = args->length;
1034104893Ssobomax	return ftruncate(td, &sa);
1035104893Ssobomax}
1036134838Sdfr
1037134838Sdfrint
1038134838Sdfrlinux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
1039134838Sdfr{
1040161310Snetchild	struct l_user_desc info;
1041161310Snetchild	int error;
1042161310Snetchild	int idx;
1043161310Snetchild	int a[2];
1044161310Snetchild	struct segment_descriptor sd;
1045161310Snetchild
1046161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1047161310Snetchild	if (error)
1048161310Snetchild		return (error);
1049161310Snetchild
1050161310Snetchild#ifdef DEBUG
1051161310Snetchild	if (ldebug(set_thread_area))
1052161310Snetchild	   	printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
1053161310Snetchild		      info.entry_number,
1054161310Snetchild      		      info.base_addr,
1055161310Snetchild      		      info.limit,
1056161310Snetchild      		      info.seg_32bit,
1057161310Snetchild		      info.contents,
1058161310Snetchild      		      info.read_exec_only,
1059161310Snetchild      		      info.limit_in_pages,
1060161310Snetchild      		      info.seg_not_present,
1061161310Snetchild      		      info.useable);
1062161310Snetchild#endif
1063161310Snetchild
1064161310Snetchild	idx = info.entry_number;
1065161365Snetchild	/*
1066161365Snetchild	 * Semantics of linux version: every thread in the system has array
1067161310Snetchild	 * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
1068161310Snetchild	 * syscall loads one of the selected tls decriptors with a value
1069161310Snetchild	 * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread
1070161310Snetchild	 * descriptors.
1071161310Snetchild	 *
1072161310Snetchild	 * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread
1073161310Snetchild	 * descriptors and use just the 1st one. The tls_array[] is used only in
1074161310Snetchild	 * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd
1075161310Snetchild	 * we use just one GDT descriptor for TLS so we will load just one.
1076161310Snetchild	 * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment
1077161310Snetchild	 * comment in the linux sources says wine might do that.
1078134838Sdfr	 */
1079161310Snetchild
1080161365Snetchild	/*
1081161365Snetchild	 * we support just GLIBC TLS now
1082161310Snetchild	 * we should let 3 proceed as well because we use this segment so
1083161310Snetchild	 * if code does two subsequent calls it should succeed
1084161310Snetchild	 */
1085161310Snetchild	if (idx != 6 && idx != -1 && idx != 3)
1086161310Snetchild		return (EINVAL);
1087161310Snetchild
1088161365Snetchild	/*
1089161365Snetchild	 * we have to copy out the GDT entry we use
1090161310Snetchild	 * FreeBSD uses GDT entry #3 for storing %gs so load that
1091161310Snetchild	 * XXX: what if userspace program doesnt check this value and tries
1092161310Snetchild	 * to use 6, 7 or 8?
1093161310Snetchild	 */
1094161310Snetchild	idx = info.entry_number = 3;
1095161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1096161310Snetchild	if (error)
1097161310Snetchild		return (error);
1098161310Snetchild
1099161310Snetchild	if (LDT_empty(&info)) {
1100161310Snetchild		a[0] = 0;
1101161310Snetchild		a[1] = 0;
1102161310Snetchild	} else {
1103161310Snetchild		a[0] = LDT_entry_a(&info);
1104161310Snetchild		a[1] = LDT_entry_b(&info);
1105161310Snetchild	}
1106161310Snetchild
1107161310Snetchild	memcpy(&sd, &a, sizeof(a));
1108161310Snetchild#ifdef DEBUG
1109161310Snetchild	if (ldebug(set_thread_area))
1110161310Snetchild	   	printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
1111161310Snetchild			sd.sd_hibase,
1112161310Snetchild			sd.sd_lolimit,
1113161310Snetchild			sd.sd_hilimit,
1114161310Snetchild			sd.sd_type,
1115161310Snetchild			sd.sd_dpl,
1116161310Snetchild			sd.sd_p,
1117161310Snetchild			sd.sd_xx,
1118161310Snetchild			sd.sd_def32,
1119161310Snetchild			sd.sd_gran);
1120161310Snetchild#endif
1121161310Snetchild
1122161310Snetchild	/* this is taken from i386 version of cpu_set_user_tls() */
1123161310Snetchild	critical_enter();
1124161310Snetchild	/* set %gs */
1125161310Snetchild	td->td_pcb->pcb_gsd = sd;
1126161310Snetchild	PCPU_GET(fsgs_gdt)[1] = sd;
1127161310Snetchild	load_gs(GSEL(GUGS_SEL, SEL_UPL));
1128161310Snetchild	critical_exit();
1129161310Snetchild
1130161310Snetchild	return (0);
1131134838Sdfr}
1132134838Sdfr
1133134838Sdfrint
1134161310Snetchildlinux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
1135134838Sdfr{
1136161310Snetchild
1137161310Snetchild	struct l_user_desc info;
1138161310Snetchild	int error;
1139161310Snetchild	int idx;
1140161310Snetchild	struct l_desc_struct desc;
1141161310Snetchild	struct segment_descriptor sd;
1142134838Sdfr
1143161310Snetchild#ifdef DEBUG
1144161310Snetchild	if (ldebug(get_thread_area))
1145161310Snetchild		printf(ARGS(get_thread_area, "%p"), args->desc);
1146161310Snetchild#endif
1147161310Snetchild
1148161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1149161310Snetchild	if (error)
1150161310Snetchild		return (error);
1151161310Snetchild
1152161310Snetchild	idx = info.entry_number;
1153161310Snetchild	/* XXX: I am not sure if we want 3 to be allowed too. */
1154161310Snetchild	if (idx != 6 && idx != 3)
1155161310Snetchild		return (EINVAL);
1156161310Snetchild
1157161310Snetchild	idx = 3;
1158161310Snetchild
1159161310Snetchild	memset(&info, 0, sizeof(info));
1160161310Snetchild
1161161310Snetchild	sd = PCPU_GET(fsgs_gdt)[1];
1162161310Snetchild
1163161310Snetchild	memcpy(&desc, &sd, sizeof(desc));
1164161310Snetchild
1165161310Snetchild	info.entry_number = idx;
1166161310Snetchild	info.base_addr = GET_BASE(&desc);
1167161310Snetchild	info.limit = GET_LIMIT(&desc);
1168161310Snetchild	info.seg_32bit = GET_32BIT(&desc);
1169161310Snetchild	info.contents = GET_CONTENTS(&desc);
1170161310Snetchild	info.read_exec_only = !GET_WRITABLE(&desc);
1171161310Snetchild	info.limit_in_pages = GET_LIMIT_PAGES(&desc);
1172161310Snetchild	info.seg_not_present = !GET_PRESENT(&desc);
1173161310Snetchild	info.useable = GET_USEABLE(&desc);
1174161310Snetchild
1175161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1176161310Snetchild	if (error)
1177161310Snetchild	   	return (EFAULT);
1178161310Snetchild
1179134838Sdfr	return (0);
1180134838Sdfr}
1181134838Sdfr
1182161310Snetchild/* copied from kern/kern_time.c */
1183134838Sdfrint
1184161310Snetchildlinux_timer_create(struct thread *td, struct linux_timer_create_args *args)
1185134838Sdfr{
1186161310Snetchild   	return ktimer_create(td, (struct ktimer_create_args *) args);
1187161310Snetchild}
1188134838Sdfr
1189161310Snetchildint
1190161310Snetchildlinux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
1191161310Snetchild{
1192161310Snetchild   	return ktimer_settime(td, (struct ktimer_settime_args *) args);
1193134838Sdfr}
1194134838Sdfr
1195161310Snetchildint
1196161310Snetchildlinux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
1197161310Snetchild{
1198161310Snetchild   	return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
1199161310Snetchild}
1200161310Snetchild
1201161310Snetchildint
1202161310Snetchildlinux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
1203161310Snetchild{
1204161310Snetchild   	return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
1205161310Snetchild}
1206161310Snetchild
1207161310Snetchildint
1208161310Snetchildlinux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
1209161310Snetchild{
1210161310Snetchild   	return ktimer_delete(td, (struct ktimer_delete_args *) args);
1211161310Snetchild}
1212161310Snetchild
1213161310Snetchild/* XXX: this wont work with module - convert it */
1214161310Snetchildint
1215161310Snetchildlinux_mq_open(struct thread *td, struct linux_mq_open_args *args)
1216161310Snetchild{
1217161310Snetchild#ifdef P1003_1B_MQUEUE
1218161310Snetchild   	return kmq_open(td, (struct kmq_open_args *) args);
1219161310Snetchild#else
1220161310Snetchild	return (ENOSYS);
1221161310Snetchild#endif
1222161310Snetchild}
1223161310Snetchild
1224161310Snetchildint
1225161310Snetchildlinux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
1226161310Snetchild{
1227161310Snetchild#ifdef P1003_1B_MQUEUE
1228161310Snetchild   	return kmq_unlink(td, (struct kmq_unlink_args *) args);
1229161310Snetchild#else
1230161310Snetchild	return (ENOSYS);
1231161310Snetchild#endif
1232161310Snetchild}
1233161310Snetchild
1234161310Snetchildint
1235161310Snetchildlinux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
1236161310Snetchild{
1237161310Snetchild#ifdef P1003_1B_MQUEUE
1238161310Snetchild   	return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
1239161310Snetchild#else
1240161310Snetchild	return (ENOSYS);
1241161310Snetchild#endif
1242161310Snetchild}
1243161310Snetchild
1244161310Snetchildint
1245161310Snetchildlinux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
1246161310Snetchild{
1247161310Snetchild#ifdef P1003_1B_MQUEUE
1248161310Snetchild   	return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
1249161310Snetchild#else
1250161310Snetchild	return (ENOSYS);
1251161310Snetchild#endif
1252161310Snetchild}
1253161310Snetchild
1254161310Snetchildint
1255161310Snetchildlinux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
1256161310Snetchild{
1257161310Snetchild#ifdef P1003_1B_MQUEUE
1258161310Snetchild	return kmq_notify(td, (struct kmq_notify_args *) args);
1259161310Snetchild#else
1260161310Snetchild	return (ENOSYS);
1261161310Snetchild#endif
1262161310Snetchild}
1263161310Snetchild
1264161310Snetchildint
1265161310Snetchildlinux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
1266161310Snetchild{
1267161310Snetchild#ifdef P1003_1B_MQUEUE
1268161310Snetchild   	return kmq_setattr(td, (struct kmq_setattr_args *) args);
1269161310Snetchild#else
1270161310Snetchild	return (ENOSYS);
1271161310Snetchild#endif
1272161310Snetchild}
1273161310Snetchild
1274