linux_machdep.c revision 162472
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
9111798Sdes *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel */
2864921Smarcel
29115705Sobrien#include <sys/cdefs.h>
30115705Sobrien__FBSDID("$FreeBSD: head/sys/i386/linux/linux_machdep.c 162472 2006-09-20 17:24:20Z netchild $");
31115705Sobrien
3264921Smarcel#include <sys/param.h>
3376166Smarkm#include <sys/systm.h>
34162472Snetchild#include <sys/file.h>
35162472Snetchild#include <sys/fcntl.h>
36140992Ssobomax#include <sys/imgact.h>
3784811Sjhb#include <sys/lock.h>
38140992Ssobomax#include <sys/malloc.h>
3964921Smarcel#include <sys/mman.h>
4076166Smarkm#include <sys/mutex.h>
41161310Snetchild#include <sys/sx.h>
4264921Smarcel#include <sys/proc.h>
43161310Snetchild#include <sys/queue.h>
4476166Smarkm#include <sys/resource.h>
4576166Smarkm#include <sys/resourcevar.h>
46134838Sdfr#include <sys/signalvar.h>
47102814Siedowse#include <sys/syscallsubr.h>
4864921Smarcel#include <sys/sysproto.h>
4964921Smarcel#include <sys/unistd.h>
50161310Snetchild#include <sys/wait.h>
5164921Smarcel
5264921Smarcel#include <machine/frame.h>
5364921Smarcel#include <machine/psl.h>
5464921Smarcel#include <machine/segments.h>
5564921Smarcel#include <machine/sysarch.h>
5664921Smarcel
5767238Sgallatin#include <vm/vm.h>
5867238Sgallatin#include <vm/pmap.h>
5967238Sgallatin#include <vm/vm_map.h>
6067238Sgallatin
6164921Smarcel#include <i386/linux/linux.h>
6268583Smarcel#include <i386/linux/linux_proto.h>
6364921Smarcel#include <compat/linux/linux_ipc.h>
6464921Smarcel#include <compat/linux/linux_signal.h>
6564921Smarcel#include <compat/linux/linux_util.h>
66161310Snetchild#include <compat/linux/linux_emul.h>
6764921Smarcel
68161310Snetchild#include <i386/include/pcb.h>			/* needed for pcb definition in linux_set_thread_area */
69161310Snetchild
70161310Snetchild#include "opt_posix.h"
71161310Snetchild
72161310Snetchildextern struct sysentvec elf32_freebsd_sysvec;	/* defined in i386/i386/elf_machdep.c */
73161310Snetchild
7483221Smarcelstruct l_descriptor {
7583221Smarcel	l_uint		entry_number;
7683221Smarcel	l_ulong		base_addr;
7783221Smarcel	l_uint		limit;
7883221Smarcel	l_uint		seg_32bit:1;
7983221Smarcel	l_uint		contents:2;
8083221Smarcel	l_uint		read_exec_only:1;
8183221Smarcel	l_uint		limit_in_pages:1;
8283221Smarcel	l_uint		seg_not_present:1;
8383221Smarcel	l_uint		useable:1;
8464921Smarcel};
8564921Smarcel
8683221Smarcelstruct l_old_select_argv {
8783221Smarcel	l_int		nfds;
8883221Smarcel	l_fd_set	*readfds;
8983221Smarcel	l_fd_set	*writefds;
9083221Smarcel	l_fd_set	*exceptfds;
9183221Smarcel	struct l_timeval	*timeout;
9264921Smarcel};
9364921Smarcel
9464921Smarcelint
9567051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
9667051Sgallatin{
9767051Sgallatin	int bsa = 0;
9867051Sgallatin
9967051Sgallatin	if (lsa & LINUX_SS_DISABLE)
10067051Sgallatin		bsa |= SS_DISABLE;
10167051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
10267051Sgallatin		bsa |= SS_ONSTACK;
10367051Sgallatin	return (bsa);
10467051Sgallatin}
10567051Sgallatin
10667051Sgallatinint
10767051Sgallatinbsd_to_linux_sigaltstack(int bsa)
10867051Sgallatin{
10967051Sgallatin	int lsa = 0;
11067051Sgallatin
11167051Sgallatin	if (bsa & SS_DISABLE)
11267051Sgallatin		lsa |= LINUX_SS_DISABLE;
11367051Sgallatin	if (bsa & SS_ONSTACK)
11467051Sgallatin		lsa |= LINUX_SS_ONSTACK;
11567051Sgallatin	return (lsa);
11667051Sgallatin}
11767051Sgallatin
11867051Sgallatinint
11983366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
12064921Smarcel{
121140992Ssobomax	int error;
122140992Ssobomax	char *newpath;
123140992Ssobomax	struct image_args eargs;
12464921Smarcel
125141468Sjhb	LCONVPATHEXIST(td, args->path, &newpath);
12664921Smarcel
12764921Smarcel#ifdef DEBUG
12872543Sjlemon	if (ldebug(execve))
129140992Ssobomax		printf(ARGS(execve, "%s"), newpath);
13064921Smarcel#endif
13164921Smarcel
132140992Ssobomax	error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE,
133140992Ssobomax	    args->argp, args->envp);
134140992Ssobomax	free(newpath, M_TEMP);
135140992Ssobomax	if (error == 0)
136148623Ssobomax		error = kern_execve(td, &eargs, NULL);
137161310Snetchild	if (error == 0)
138161310Snetchild	   	/* linux process can exec fbsd one, dont attempt
139161310Snetchild		 * to create emuldata for such process using
140161310Snetchild		 * linux_proc_init, this leads to a panic on KASSERT
141161310Snetchild		 * because such process has p->p_emuldata == NULL
142161310Snetchild		 */
143161310Snetchild	   	if (td->td_proc->p_sysent == &elf_linux_sysvec)
144161310Snetchild   		   	error = linux_proc_init(td, 0, 0);
145140992Ssobomax	return (error);
14664921Smarcel}
14764921Smarcel
14883221Smarcelstruct l_ipc_kludge {
14983221Smarcel	struct l_msgbuf *msgp;
15083221Smarcel	l_long msgtyp;
15183221Smarcel};
15283221Smarcel
15364921Smarcelint
15483366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
15564921Smarcel{
15683221Smarcel
15783221Smarcel	switch (args->what & 0xFFFF) {
15883221Smarcel	case LINUX_SEMOP: {
15983221Smarcel		struct linux_semop_args a;
16083221Smarcel
16183221Smarcel		a.semid = args->arg1;
16283221Smarcel		a.tsops = args->ptr;
16383221Smarcel		a.nsops = args->arg2;
16483366Sjulian		return (linux_semop(td, &a));
16564921Smarcel	}
16683221Smarcel	case LINUX_SEMGET: {
16783221Smarcel		struct linux_semget_args a;
16864921Smarcel
16983221Smarcel		a.key = args->arg1;
17083221Smarcel		a.nsems = args->arg2;
17183221Smarcel		a.semflg = args->arg3;
17283366Sjulian		return (linux_semget(td, &a));
17383221Smarcel	}
17483221Smarcel	case LINUX_SEMCTL: {
17583221Smarcel		struct linux_semctl_args a;
17683221Smarcel		int error;
17783221Smarcel
17883221Smarcel		a.semid = args->arg1;
17983221Smarcel		a.semnum = args->arg2;
18083221Smarcel		a.cmd = args->arg3;
181111797Sdes		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
18283221Smarcel		if (error)
18383221Smarcel			return (error);
18483366Sjulian		return (linux_semctl(td, &a));
18583221Smarcel	}
18683221Smarcel	case LINUX_MSGSND: {
18783221Smarcel		struct linux_msgsnd_args a;
18883221Smarcel
18983221Smarcel		a.msqid = args->arg1;
19083221Smarcel		a.msgp = args->ptr;
19183221Smarcel		a.msgsz = args->arg2;
19283221Smarcel		a.msgflg = args->arg3;
19383366Sjulian		return (linux_msgsnd(td, &a));
19483221Smarcel	}
19583221Smarcel	case LINUX_MSGRCV: {
19683221Smarcel		struct linux_msgrcv_args a;
19783221Smarcel
19883221Smarcel		a.msqid = args->arg1;
19983221Smarcel		a.msgsz = args->arg2;
20083221Smarcel		a.msgflg = args->arg3;
20183221Smarcel		if ((args->what >> 16) == 0) {
20283221Smarcel			struct l_ipc_kludge tmp;
20383221Smarcel			int error;
20483221Smarcel
20583221Smarcel			if (args->ptr == NULL)
20683221Smarcel				return (EINVAL);
207111797Sdes			error = copyin(args->ptr, &tmp, sizeof(tmp));
20883221Smarcel			if (error)
20983221Smarcel				return (error);
21083221Smarcel			a.msgp = tmp.msgp;
21183221Smarcel			a.msgtyp = tmp.msgtyp;
21283221Smarcel		} else {
21383221Smarcel			a.msgp = args->ptr;
21483221Smarcel			a.msgtyp = args->arg5;
21583221Smarcel		}
21683366Sjulian		return (linux_msgrcv(td, &a));
21783221Smarcel	}
21883221Smarcel	case LINUX_MSGGET: {
21983221Smarcel		struct linux_msgget_args a;
22083221Smarcel
22183221Smarcel		a.key = args->arg1;
22283221Smarcel		a.msgflg = args->arg2;
22383366Sjulian		return (linux_msgget(td, &a));
22483221Smarcel	}
22583221Smarcel	case LINUX_MSGCTL: {
22683221Smarcel		struct linux_msgctl_args a;
22783221Smarcel
22883221Smarcel		a.msqid = args->arg1;
22983221Smarcel		a.cmd = args->arg2;
23083221Smarcel		a.buf = args->ptr;
23183366Sjulian		return (linux_msgctl(td, &a));
23283221Smarcel	}
23383221Smarcel	case LINUX_SHMAT: {
23483221Smarcel		struct linux_shmat_args a;
23583221Smarcel
23683221Smarcel		a.shmid = args->arg1;
23783221Smarcel		a.shmaddr = args->ptr;
23883221Smarcel		a.shmflg = args->arg2;
23983221Smarcel		a.raddr = (l_ulong *)args->arg3;
24083366Sjulian		return (linux_shmat(td, &a));
24183221Smarcel	}
24283221Smarcel	case LINUX_SHMDT: {
24383221Smarcel		struct linux_shmdt_args a;
24483221Smarcel
24583221Smarcel		a.shmaddr = args->ptr;
24683366Sjulian		return (linux_shmdt(td, &a));
24783221Smarcel	}
24883221Smarcel	case LINUX_SHMGET: {
24983221Smarcel		struct linux_shmget_args a;
25083221Smarcel
25183221Smarcel		a.key = args->arg1;
25283221Smarcel		a.size = args->arg2;
25383221Smarcel		a.shmflg = args->arg3;
25483366Sjulian		return (linux_shmget(td, &a));
25583221Smarcel	}
25683221Smarcel	case LINUX_SHMCTL: {
25783221Smarcel		struct linux_shmctl_args a;
25883221Smarcel
25983221Smarcel		a.shmid = args->arg1;
26083221Smarcel		a.cmd = args->arg2;
26183221Smarcel		a.buf = args->ptr;
26283366Sjulian		return (linux_shmctl(td, &a));
26383221Smarcel	}
26483221Smarcel	default:
26583221Smarcel		break;
26683221Smarcel	}
26783221Smarcel
26883221Smarcel	return (EINVAL);
26964921Smarcel}
27064921Smarcel
27164921Smarcelint
27283366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
27364921Smarcel{
27483221Smarcel	struct l_old_select_argv linux_args;
27583221Smarcel	struct linux_select_args newsel;
27664921Smarcel	int error;
27764921Smarcel
27883221Smarcel#ifdef DEBUG
27983221Smarcel	if (ldebug(old_select))
28091437Speter		printf(ARGS(old_select, "%p"), args->ptr);
28164921Smarcel#endif
28264921Smarcel
283111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
28464921Smarcel	if (error)
28564921Smarcel		return (error);
28664921Smarcel
28764921Smarcel	newsel.nfds = linux_args.nfds;
28864921Smarcel	newsel.readfds = linux_args.readfds;
28964921Smarcel	newsel.writefds = linux_args.writefds;
29064921Smarcel	newsel.exceptfds = linux_args.exceptfds;
29164921Smarcel	newsel.timeout = linux_args.timeout;
29283366Sjulian	return (linux_select(td, &newsel));
29364921Smarcel}
29464921Smarcel
29564921Smarcelint
29683366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
29764921Smarcel{
29864921Smarcel	int error;
29964921Smarcel
30064921Smarcel#ifdef DEBUG
30172543Sjlemon	if (ldebug(fork))
30272543Sjlemon		printf(ARGS(fork, ""));
30364921Smarcel#endif
30464921Smarcel
30583366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
30664921Smarcel		return (error);
30764921Smarcel
30883366Sjulian	if (td->td_retval[1] == 1)
30983366Sjulian		td->td_retval[0] = 0;
310161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
311161310Snetchild	if (error)
312161310Snetchild		return (error);
313161310Snetchild
31464921Smarcel	return (0);
31564921Smarcel}
31664921Smarcel
31764921Smarcelint
31883366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
31964921Smarcel{
32064921Smarcel	int error;
321161611Snetchild	struct proc *p2;
32264921Smarcel
32364921Smarcel#ifdef DEBUG
32472543Sjlemon	if (ldebug(vfork))
32572543Sjlemon		printf(ARGS(vfork, ""));
32664921Smarcel#endif
32764921Smarcel
328161611Snetchild	/* exclude RFPPWAIT */
329161611Snetchild	if ((error = fork1(td, RFFDG | RFPROC | RFMEM, 0, &p2)) != 0)
33064921Smarcel		return (error);
331161611Snetchild	if (error == 0) {
332161611Snetchild	   	td->td_retval[0] = p2->p_pid;
333161611Snetchild		td->td_retval[1] = 0;
334161611Snetchild	}
33564921Smarcel	/* Are we the child? */
33683366Sjulian	if (td->td_retval[1] == 1)
33783366Sjulian		td->td_retval[0] = 0;
338161310Snetchild	error = linux_proc_init(td, td->td_retval[0], 0);
339161310Snetchild	if (error)
340161310Snetchild		return (error);
341161611Snetchild	/* wait for the children to exit, ie. emulate vfork */
342161611Snetchild	PROC_LOCK(p2);
343161611Snetchild	while (p2->p_flag & P_PPWAIT)
344161611Snetchild	   	msleep(td->td_proc, &p2->p_mtx, PWAIT, "ppwait", 0);
345161611Snetchild	PROC_UNLOCK(p2);
346161611Snetchild
34764921Smarcel	return (0);
34864921Smarcel}
34964921Smarcel
35064921Smarcelint
35183366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
35264921Smarcel{
35373856Sjhb	int error, ff = RFPROC | RFSTOPPED;
35464921Smarcel	struct proc *p2;
355113689Sjhb	struct thread *td2;
35664921Smarcel	int exit_signal;
357161310Snetchild	struct linux_emuldata *em;
35864921Smarcel
35964921Smarcel#ifdef DEBUG
36072543Sjlemon	if (ldebug(clone)) {
361161310Snetchild   	   	printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"),
362161310Snetchild		    (unsigned int)args->flags, (unsigned int)args->stack,
363161310Snetchild		    (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr);
36472543Sjlemon	}
36564921Smarcel#endif
36664921Smarcel
36764921Smarcel	exit_signal = args->flags & 0x000000ff;
36864921Smarcel	if (exit_signal >= LINUX_NSIG)
36964921Smarcel		return (EINVAL);
37064921Smarcel
37164921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
37264921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
37364921Smarcel
37464921Smarcel	if (args->flags & CLONE_VM)
37564921Smarcel		ff |= RFMEM;
37664921Smarcel	if (args->flags & CLONE_SIGHAND)
37764921Smarcel		ff |= RFSIGSHARE;
37864921Smarcel	if (!(args->flags & CLONE_FILES))
37964921Smarcel		ff |= RFFDG;
38064921Smarcel
381143108Ssobomax	/*
382143108Ssobomax	 * Attempt to detect when linux_clone(2) is used for creating
383143108Ssobomax	 * kernel threads. Unfortunately despite the existence of the
384143108Ssobomax	 * CLONE_THREAD flag, version of linuxthreads package used in
385143108Ssobomax	 * most popular distros as of beginning of 2005 doesn't make
386143108Ssobomax	 * any use of it. Therefore, this detection relay fully on
387143108Ssobomax	 * empirical observation that linuxthreads sets certain
388143108Ssobomax	 * combination of flags, so that we can make more or less
389143108Ssobomax	 * precise detection and notify the FreeBSD kernel that several
390143108Ssobomax	 * processes are in fact part of the same threading group, so
391143108Ssobomax	 * that special treatment is necessary for signal delivery
392143108Ssobomax	 * between those processes and fd locking.
393143108Ssobomax	 */
394143108Ssobomax	if ((args->flags & 0xffffff00) == THREADING_FLAGS)
395143108Ssobomax		ff |= RFTHREAD;
396143108Ssobomax
397104354Sscottl	error = fork1(td, ff, 0, &p2);
398113689Sjhb	if (error)
399113689Sjhb		return (error);
400113689Sjhb
401161310Snetchild	/* create the emuldata */
402161310Snetchild	error = linux_proc_init(td, p2->p_pid, args->flags);
403161310Snetchild	/* reference it - no need to check this */
404161310Snetchild	em = em_find(p2, EMUL_UNLOCKED);
405161310Snetchild	KASSERT(em != NULL, ("clone: emuldata not found.\n"));
406161310Snetchild	/* and adjust it */
407161310Snetchild	if (args->flags & CLONE_PARENT_SETTID) {
408161310Snetchild	   	if (args->parent_tidptr == NULL) {
409161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
410161310Snetchild			return (EINVAL);
411161310Snetchild		}
412161310Snetchild		error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid));
413161310Snetchild		if (error) {
414161310Snetchild		   	EMUL_UNLOCK(&emul_lock);
415161310Snetchild			return (error);
416161310Snetchild		}
417161310Snetchild	}
41864921Smarcel
419161673Snetchild	if (args->flags & (CLONE_PARENT|CLONE_THREAD)) {
420161673Snetchild	   	sx_xlock(&proctree_lock);
421161673Snetchild		PROC_LOCK(p2);
422161673Snetchild		proc_reparent(p2, td->td_proc->p_pptr);
423161673Snetchild		PROC_UNLOCK(p2);
424161673Snetchild		sx_xunlock(&proctree_lock);
425161310Snetchild	}
426161673Snetchild
427161310Snetchild	if (args->flags & CLONE_THREAD) {
428161310Snetchild	   	/* XXX: linux mangles pgrp and pptr somehow
429161310Snetchild		 * I think it might be this but I am not sure.
430161310Snetchild		 */
431161310Snetchild#ifdef notyet
432161673Snetchild	   	PROC_LOCK(p2);
433161310Snetchild	   	p2->p_pgrp = td->td_proc->p_pgrp;
434161673Snetchild	   	PROC_UNLOCK(p2);
435161310Snetchild#endif
436161310Snetchild	 	exit_signal = 0;
437161310Snetchild	}
438161310Snetchild
439161310Snetchild	if (args->flags & CLONE_CHILD_SETTID)
440161310Snetchild		em->child_set_tid = args->child_tidptr;
441161310Snetchild	else
442161310Snetchild	   	em->child_set_tid = NULL;
443161310Snetchild
444161310Snetchild	if (args->flags & CLONE_CHILD_CLEARTID)
445161310Snetchild		em->child_clear_tid = args->child_tidptr;
446161310Snetchild	else
447161310Snetchild	   	em->child_clear_tid = NULL;
448161673Snetchild
449161310Snetchild	EMUL_UNLOCK(&emul_lock);
450161310Snetchild
451113689Sjhb	PROC_LOCK(p2);
452113689Sjhb	p2->p_sigparent = exit_signal;
453113689Sjhb	PROC_UNLOCK(p2);
454113689Sjhb	td2 = FIRST_THREAD_IN_PROC(p2);
455161365Snetchild	/*
456161365Snetchild	 * in a case of stack = NULL we are supposed to COW calling process stack
457161310Snetchild	 * this is what normal fork() does so we just keep the tf_esp arg intact
458161310Snetchild	 */
459161310Snetchild	if (args->stack)
460161310Snetchild   	   	td2->td_frame->tf_esp = (unsigned int)args->stack;
46164921Smarcel
462161310Snetchild	if (args->flags & CLONE_SETTLS) {
463161310Snetchild   	   	struct l_user_desc info;
464161310Snetchild   	   	int idx;
465161310Snetchild	   	int a[2];
466161310Snetchild		struct segment_descriptor sd;
467161310Snetchild
468161310Snetchild	   	error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc));
469161310Snetchild		if (error)
470161310Snetchild   		   	return (error);
471161310Snetchild
472161310Snetchild		idx = info.entry_number;
473161310Snetchild
474161365Snetchild		/*
475161365Snetchild		 * looks like we're getting the idx we returned
476161310Snetchild		 * in the set_thread_area() syscall
477161310Snetchild		 */
478161310Snetchild		if (idx != 6 && idx != 3)
479161310Snetchild			return (EINVAL);
480161310Snetchild
481161310Snetchild		/* this doesnt happen in practice */
482161310Snetchild		if (idx == 6) {
483161310Snetchild		   	/* we might copy out the entry_number as 3 */
484161310Snetchild		   	info.entry_number = 3;
485161310Snetchild			error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc));
486161310Snetchild			if (error)
487161310Snetchild	   		   	return (error);
488161310Snetchild		}
489161310Snetchild
490161310Snetchild		a[0] = LDT_entry_a(&info);
491161310Snetchild		a[1] = LDT_entry_b(&info);
492161310Snetchild
493161310Snetchild		memcpy(&sd, &a, sizeof(a));
49464921Smarcel#ifdef DEBUG
495113689Sjhb	if (ldebug(clone))
496161310Snetchild	   	printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
497161310Snetchild			sd.sd_hibase,
498161310Snetchild			sd.sd_lolimit,
499161310Snetchild			sd.sd_hilimit,
500161310Snetchild			sd.sd_type,
501161310Snetchild			sd.sd_dpl,
502161310Snetchild			sd.sd_p,
503161310Snetchild			sd.sd_xx,
504161310Snetchild			sd.sd_def32,
505161310Snetchild			sd.sd_gran);
506161310Snetchild#endif
507161310Snetchild
508161310Snetchild		/* set %gs */
509161310Snetchild		td2->td_pcb->pcb_gsd = sd;
510161673Snetchild		td2->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
511161310Snetchild	}
512161310Snetchild
513161310Snetchild#ifdef DEBUG
514161310Snetchild	if (ldebug(clone))
515113689Sjhb		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
516113689Sjhb		    (long)p2->p_pid, args->stack, exit_signal);
51764921Smarcel#endif
51864921Smarcel
519113689Sjhb	/*
520113689Sjhb	 * Make this runnable after we are finished with it.
521113689Sjhb	 */
522113689Sjhb	mtx_lock_spin(&sched_lock);
523113689Sjhb	TD_SET_CAN_RUN(td2);
524134586Sjulian	setrunqueue(td2, SRQ_BORING);
525113689Sjhb	mtx_unlock_spin(&sched_lock);
52673856Sjhb
527113689Sjhb	td->td_retval[0] = p2->p_pid;
528113689Sjhb	td->td_retval[1] = 0;
529113689Sjhb	return (0);
53064921Smarcel}
53164921Smarcel
53264921Smarcel/* XXX move */
53383221Smarcelstruct l_mmap_argv {
53483221Smarcel	l_caddr_t	addr;
53583221Smarcel	l_int		len;
53683221Smarcel	l_int		prot;
53783221Smarcel	l_int		flags;
53883221Smarcel	l_int		fd;
53983221Smarcel	l_int		pos;
54064921Smarcel};
54164921Smarcel
54264921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
54364921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
54464921Smarcel
545104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *);
546104893Ssobomax
54764921Smarcelint
548104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args)
549104893Ssobomax{
550104893Ssobomax	struct l_mmap_argv linux_args;
551104893Ssobomax
552104893Ssobomax#ifdef DEBUG
553104893Ssobomax	if (ldebug(mmap2))
554111798Sdes		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
555111798Sdes		    (void *)args->addr, args->len, args->prot,
556111798Sdes		    args->flags, args->fd, args->pgoff);
557104893Ssobomax#endif
558104893Ssobomax
559104893Ssobomax	linux_args.addr = (l_caddr_t)args->addr;
560104893Ssobomax	linux_args.len = args->len;
561104893Ssobomax	linux_args.prot = args->prot;
562104893Ssobomax	linux_args.flags = args->flags;
563104893Ssobomax	linux_args.fd = args->fd;
564104893Ssobomax	linux_args.pos = args->pgoff * PAGE_SIZE;
565104893Ssobomax
566104893Ssobomax	return (linux_mmap_common(td, &linux_args));
567104893Ssobomax}
568104893Ssobomax
569104893Ssobomaxint
57083366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
57164921Smarcel{
57264921Smarcel	int error;
57383221Smarcel	struct l_mmap_argv linux_args;
57464921Smarcel
575111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
57664921Smarcel	if (error)
57764921Smarcel		return (error);
57864921Smarcel
57964921Smarcel#ifdef DEBUG
58072543Sjlemon	if (ldebug(mmap))
58172543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
582104984Sbde		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
583104984Sbde		    linux_args.flags, linux_args.fd, linux_args.pos);
58464921Smarcel#endif
58564921Smarcel
586104893Ssobomax	return (linux_mmap_common(td, &linux_args));
587104893Ssobomax}
588104893Ssobomax
589104893Ssobomaxstatic int
590104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
591104893Ssobomax{
592104893Ssobomax	struct proc *p = td->td_proc;
593104893Ssobomax	struct mmap_args /* {
594104893Ssobomax		caddr_t addr;
595104893Ssobomax		size_t len;
596104893Ssobomax		int prot;
597104893Ssobomax		int flags;
598104893Ssobomax		int fd;
599104893Ssobomax		long pad;
600104893Ssobomax		off_t pos;
601104893Ssobomax	} */ bsd_args;
602112630Smdodd	int error;
603162472Snetchild	struct file *fp;
604104893Ssobomax
605112630Smdodd	error = 0;
60664921Smarcel	bsd_args.flags = 0;
607162472Snetchild	fp = NULL;
608162472Snetchild
609162472Snetchild	/*
610162472Snetchild	 * Linux mmap(2):
611162472Snetchild	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
612162472Snetchild	 */
613162472Snetchild	if (! ((linux_args->flags & LINUX_MAP_SHARED) ^
614162472Snetchild	    (linux_args->flags & LINUX_MAP_PRIVATE)))
615162472Snetchild		return EINVAL;
616162472Snetchild
617104893Ssobomax	if (linux_args->flags & LINUX_MAP_SHARED)
61864921Smarcel		bsd_args.flags |= MAP_SHARED;
619104893Ssobomax	if (linux_args->flags & LINUX_MAP_PRIVATE)
62064921Smarcel		bsd_args.flags |= MAP_PRIVATE;
621104893Ssobomax	if (linux_args->flags & LINUX_MAP_FIXED)
62264921Smarcel		bsd_args.flags |= MAP_FIXED;
623104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
62464921Smarcel		bsd_args.flags |= MAP_ANON;
62573213Sdillon	else
62673213Sdillon		bsd_args.flags |= MAP_NOSYNC;
627104893Ssobomax	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
62864921Smarcel		bsd_args.flags |= MAP_STACK;
62964921Smarcel
630161365Snetchild		/*
631161365Snetchild		 * The linux MAP_GROWSDOWN option does not limit auto
63264921Smarcel		 * growth of the region.  Linux mmap with this option
63364921Smarcel		 * takes as addr the inital BOS, and as len, the initial
63464921Smarcel		 * region size.  It can then grow down from addr without
63564921Smarcel		 * limit.  However, linux threads has an implicit internal
63664921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
63764921Smarcel		 * enforced explicitly in linux.  But, here we impose
63864921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
63964921Smarcel		 * region, since we can do this with our mmap.
64064921Smarcel		 *
64164921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
64264921Smarcel		 * downsize limit on BOS, and as len the max size of
64364921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
64464921Smarcel		 * and autgrows the region down, up to the limit
64564921Smarcel		 * in addr.
64664921Smarcel		 *
64764921Smarcel		 * If we don't use the MAP_STACK option, the effect
64864921Smarcel		 * of this code is to allocate a stack region of a
64964921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
65064921Smarcel		 */
65164921Smarcel
65264921Smarcel		/* This gives us TOS */
653104893Ssobomax		bsd_args.addr = linux_args->addr + linux_args->len;
65464921Smarcel
65567238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
656161365Snetchild			/*
657161365Snetchild			 * Some linux apps will attempt to mmap
65867238Sgallatin			 * thread stacks near the top of their
65967238Sgallatin			 * address space.  If their TOS is greater
66067238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
66167238Sgallatin			 * will confuse the thread stack with the
66267238Sgallatin			 * process stack and deliver a SEGV if they
66367238Sgallatin			 * attempt to grow the thread stack past their
66467238Sgallatin			 * current stacksize rlimit.  To avoid this,
66567238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
66667238Sgallatin			 * the current stacksize rlimit rather
66767238Sgallatin			 * than the maximum possible stacksize.
66867238Sgallatin			 * It would be better to adjust the
66967238Sgallatin			 * mmap'ed region, but some apps do not check
67067238Sgallatin			 * mmap's return value.
67167238Sgallatin			 */
672125454Sjhb			PROC_LOCK(p);
67367238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
674125454Sjhb			    lim_cur(p, RLIMIT_STACK);
675125454Sjhb			PROC_UNLOCK(p);
67667238Sgallatin		}
67767238Sgallatin
67864921Smarcel		/* This gives us our maximum stack size */
679104893Ssobomax		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
680104893Ssobomax			bsd_args.len = linux_args->len;
68164921Smarcel		else
68264921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
68364921Smarcel
684161365Snetchild		/*
685161365Snetchild		 * This gives us a new BOS.  If we're using VM_STACK, then
68664921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
68764921Smarcel		 * the stack grow down to the limit at BOS.  If we're
68864921Smarcel		 * not using VM_STACK we map the full stack, since we
68964921Smarcel		 * don't have a way to autogrow it.
69064921Smarcel		 */
69164921Smarcel		bsd_args.addr -= bsd_args.len;
69264921Smarcel	} else {
693104893Ssobomax		bsd_args.addr = linux_args->addr;
694104893Ssobomax		bsd_args.len  = linux_args->len;
69564921Smarcel	}
69664921Smarcel
697162472Snetchild	bsd_args.prot = linux_args->prot;
698104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
69964921Smarcel		bsd_args.fd = -1;
700162472Snetchild	else {
701162472Snetchild		/*
702162472Snetchild		 * Linux follows Solaris mmap(2) description:
703162472Snetchild		 * The file descriptor fildes is opened with
704162472Snetchild		 * read permission, regardless of the
705162472Snetchild		 * protection options specified.
706162472Snetchild		 * If PROT_WRITE is specified, the application
707162472Snetchild		 * must have opened the file descriptor
708162472Snetchild		 * fildes with write permission unless
709162472Snetchild		 * MAP_PRIVATE is specified in the flag
710162472Snetchild		 * argument as described below.
711162472Snetchild		 */
712162472Snetchild
713162472Snetchild		if ((error = fget(td, linux_args->fd, &fp)) != 0)
714162472Snetchild			return error;
715162472Snetchild		if (fp->f_type != DTYPE_VNODE) {
716162472Snetchild			fdrop(fp, td);
717162472Snetchild			return EINVAL;
718162472Snetchild		}
719162472Snetchild
720162472Snetchild		/* Linux mmap() just fails for O_WRONLY files */
721162472Snetchild		if (! (fp->f_flag & FREAD)) {
722162472Snetchild			fdrop(fp, td);
723162472Snetchild			return EACCES;
724162472Snetchild		}
725162472Snetchild
726104893Ssobomax		bsd_args.fd = linux_args->fd;
727162472Snetchild		fdrop(fp, td);
728162472Snetchild	}
729104893Ssobomax	bsd_args.pos = linux_args->pos;
73064921Smarcel	bsd_args.pad = 0;
73164921Smarcel
73264921Smarcel#ifdef DEBUG
73372543Sjlemon	if (ldebug(mmap))
734112630Smdodd		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
735112630Smdodd		    __func__,
73672543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
73772543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
73864921Smarcel#endif
739112630Smdodd	error = mmap(td, &bsd_args);
740112630Smdodd#ifdef DEBUG
741112630Smdodd	if (ldebug(mmap))
742112630Smdodd		printf("-> %s() return: 0x%x (0x%08x)\n",
743112630Smdodd			__func__, error, (u_int)td->td_retval[0]);
744112630Smdodd#endif
745112630Smdodd	return (error);
74664921Smarcel}
74764921Smarcel
74864921Smarcelint
74983366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
75064921Smarcel{
75164921Smarcel	int error;
75264921Smarcel	int reg_edx;
75364921Smarcel
75464921Smarcel#ifdef DEBUG
75572543Sjlemon	if (ldebug(pipe))
75672543Sjlemon		printf(ARGS(pipe, "*"));
75764921Smarcel#endif
75864921Smarcel
75983366Sjulian	reg_edx = td->td_retval[1];
76083366Sjulian	error = pipe(td, 0);
76164921Smarcel	if (error) {
76283366Sjulian		td->td_retval[1] = reg_edx;
76364921Smarcel		return (error);
76464921Smarcel	}
76564921Smarcel
76683366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
76764921Smarcel	if (error) {
76883366Sjulian		td->td_retval[1] = reg_edx;
76964921Smarcel		return (error);
77064921Smarcel	}
77164921Smarcel
77283366Sjulian	td->td_retval[1] = reg_edx;
77383366Sjulian	td->td_retval[0] = 0;
77464921Smarcel	return (0);
77564921Smarcel}
77664921Smarcel
77764921Smarcelint
77883366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
77964921Smarcel{
780140862Ssobomax	int error;
781140862Ssobomax	struct i386_ioperm_args iia;
78264921Smarcel
783140862Ssobomax	iia.start = args->start;
784140862Ssobomax	iia.length = args->length;
785140862Ssobomax	iia.enable = args->enable;
786140862Ssobomax	mtx_lock(&Giant);
787140862Ssobomax	error = i386_set_ioperm(td, &iia);
788140862Ssobomax	mtx_unlock(&Giant);
789140862Ssobomax	return (error);
79064921Smarcel}
79164921Smarcel
79264921Smarcelint
79383366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
79464921Smarcel{
79564921Smarcel	int error;
79664921Smarcel
79764921Smarcel	if (args->level < 0 || args->level > 3)
79864921Smarcel		return (EINVAL);
79993593Sjhb	if ((error = suser(td)) != 0)
80064921Smarcel		return (error);
80191406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
80283981Srwatson		return (error);
80383366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
80464921Smarcel	    (args->level * (PSL_IOPL / 3));
80564921Smarcel	return (0);
80664921Smarcel}
80764921Smarcel
80864921Smarcelint
809105441Smarkmlinux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
81064921Smarcel{
81164921Smarcel	int error;
812140862Ssobomax	struct i386_ldt_args ldt;
81383221Smarcel	struct l_descriptor ld;
814140862Ssobomax	union descriptor desc;
81564921Smarcel
81664921Smarcel	if (uap->ptr == NULL)
81764921Smarcel		return (EINVAL);
81864921Smarcel
81964921Smarcel	switch (uap->func) {
82064921Smarcel	case 0x00: /* read_ldt */
821140862Ssobomax		ldt.start = 0;
822140862Ssobomax		ldt.descs = uap->ptr;
823140862Ssobomax		ldt.num = uap->bytecount / sizeof(union descriptor);
824140862Ssobomax		mtx_lock(&Giant);
825140862Ssobomax		error = i386_get_ldt(td, &ldt);
82683366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
827140862Ssobomax		mtx_unlock(&Giant);
82864921Smarcel		break;
82964921Smarcel	case 0x01: /* write_ldt */
83064921Smarcel	case 0x11: /* write_ldt */
83164921Smarcel		if (uap->bytecount != sizeof(ld))
83264921Smarcel			return (EINVAL);
83364921Smarcel
83464921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
83564921Smarcel		if (error)
83664921Smarcel			return (error);
83764921Smarcel
838140862Ssobomax		ldt.start = ld.entry_number;
839140862Ssobomax		ldt.descs = &desc;
840140862Ssobomax		ldt.num = 1;
841140862Ssobomax		desc.sd.sd_lolimit = (ld.limit & 0x0000ffff);
842140862Ssobomax		desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
843140862Ssobomax		desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff);
844140862Ssobomax		desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
845140862Ssobomax		desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
84664921Smarcel			(ld.contents << 2);
847140862Ssobomax		desc.sd.sd_dpl = 3;
848140862Ssobomax		desc.sd.sd_p = (ld.seg_not_present ^ 1);
849140862Ssobomax		desc.sd.sd_xx = 0;
850140862Ssobomax		desc.sd.sd_def32 = ld.seg_32bit;
851140862Ssobomax		desc.sd.sd_gran = ld.limit_in_pages;
852140862Ssobomax		mtx_lock(&Giant);
853140862Ssobomax		error = i386_set_ldt(td, &ldt, &desc);
854140862Ssobomax		mtx_unlock(&Giant);
85564921Smarcel		break;
85664921Smarcel	default:
85764921Smarcel		error = EINVAL;
85864921Smarcel		break;
85964921Smarcel	}
86064921Smarcel
86164921Smarcel	if (error == EOPNOTSUPP) {
86264921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
86364921Smarcel		error = ENOSYS;
86464921Smarcel	}
86564921Smarcel
86664921Smarcel	return (error);
86764921Smarcel}
86864921Smarcel
86964921Smarcelint
87083366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
87164921Smarcel{
87283221Smarcel	l_osigaction_t osa;
87383221Smarcel	l_sigaction_t act, oact;
87464921Smarcel	int error;
87564921Smarcel
87664921Smarcel#ifdef DEBUG
87772543Sjlemon	if (ldebug(sigaction))
87872543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
87972543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
88064921Smarcel#endif
88164921Smarcel
88264921Smarcel	if (args->nsa != NULL) {
883111797Sdes		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
88464921Smarcel		if (error)
88564921Smarcel			return (error);
88664921Smarcel		act.lsa_handler = osa.lsa_handler;
88764921Smarcel		act.lsa_flags = osa.lsa_flags;
88864921Smarcel		act.lsa_restorer = osa.lsa_restorer;
88964921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
89064921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
89164921Smarcel	}
89264921Smarcel
89383366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
89464921Smarcel	    args->osa ? &oact : NULL);
89564921Smarcel
89664921Smarcel	if (args->osa != NULL && !error) {
89764921Smarcel		osa.lsa_handler = oact.lsa_handler;
89864921Smarcel		osa.lsa_flags = oact.lsa_flags;
89964921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
90064921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
901111797Sdes		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
90264921Smarcel	}
90364921Smarcel
90464921Smarcel	return (error);
90564921Smarcel}
90664921Smarcel
90764921Smarcel/*
90864921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
90964921Smarcel * but it seems that "restart" is actually a context pointer that
91064921Smarcel * enables the signal to happen with a different register set.
91164921Smarcel */
91264921Smarcelint
91383366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
91464921Smarcel{
915102814Siedowse	sigset_t sigmask;
91683221Smarcel	l_sigset_t mask;
91764921Smarcel
91864921Smarcel#ifdef DEBUG
91972543Sjlemon	if (ldebug(sigsuspend))
92072543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
92164921Smarcel#endif
92264921Smarcel
92364921Smarcel	LINUX_SIGEMPTYSET(mask);
92464921Smarcel	mask.__bits[0] = args->mask;
925102814Siedowse	linux_to_bsd_sigset(&mask, &sigmask);
926102814Siedowse	return (kern_sigsuspend(td, sigmask));
92764921Smarcel}
92864921Smarcel
92964921Smarcelint
930105441Smarkmlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
93164921Smarcel{
93283221Smarcel	l_sigset_t lmask;
933102814Siedowse	sigset_t sigmask;
93464921Smarcel	int error;
93564921Smarcel
93664921Smarcel#ifdef DEBUG
93772543Sjlemon	if (ldebug(rt_sigsuspend))
93872543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
93972543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
94064921Smarcel#endif
94164921Smarcel
94283221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
94364921Smarcel		return (EINVAL);
94464921Smarcel
94583221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
94664921Smarcel	if (error)
94764921Smarcel		return (error);
94864921Smarcel
949102814Siedowse	linux_to_bsd_sigset(&lmask, &sigmask);
950102814Siedowse	return (kern_sigsuspend(td, sigmask));
95164921Smarcel}
95264921Smarcel
95364921Smarcelint
95483366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
95564921Smarcel{
95683366Sjulian	struct proc *p = td->td_proc;
957102814Siedowse	sigset_t sigmask;
95864921Smarcel
95964921Smarcel#ifdef DEBUG
96072543Sjlemon	if (ldebug(pause))
96172543Sjlemon		printf(ARGS(pause, ""));
96264921Smarcel#endif
96364921Smarcel
96471494Sjhb	PROC_LOCK(p);
965112888Sjeff	sigmask = td->td_sigmask;
96671494Sjhb	PROC_UNLOCK(p);
967102814Siedowse	return (kern_sigsuspend(td, sigmask));
96864921Smarcel}
96964921Smarcel
97064921Smarcelint
97183366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
97264921Smarcel{
973102814Siedowse	stack_t ss, oss;
97483221Smarcel	l_stack_t lss;
97564921Smarcel	int error;
97664921Smarcel
97764921Smarcel#ifdef DEBUG
97872543Sjlemon	if (ldebug(sigaltstack))
97972543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
98064921Smarcel#endif
98164921Smarcel
982102814Siedowse	if (uap->uss != NULL) {
98383221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
98467051Sgallatin		if (error)
98567051Sgallatin			return (error);
98664921Smarcel
987102814Siedowse		ss.ss_sp = lss.ss_sp;
988102814Siedowse		ss.ss_size = lss.ss_size;
989102814Siedowse		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
99067051Sgallatin	}
991134269Sjhb	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
992134269Sjhb	    (uap->uoss != NULL) ? &oss : NULL);
993102814Siedowse	if (!error && uap->uoss != NULL) {
994102814Siedowse		lss.ss_sp = oss.ss_sp;
995102814Siedowse		lss.ss_size = oss.ss_size;
996102814Siedowse		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
99783221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
99864921Smarcel	}
99964921Smarcel
100064921Smarcel	return (error);
100164921Smarcel}
1002104893Ssobomax
1003104893Ssobomaxint
1004104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
1005104893Ssobomax{
1006104893Ssobomax	struct ftruncate_args sa;
1007104893Ssobomax
1008104893Ssobomax#ifdef DEBUG
1009104893Ssobomax	if (ldebug(ftruncate64))
1010104984Sbde		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
1011104984Sbde		    (intmax_t)args->length);
1012104893Ssobomax#endif
1013104893Ssobomax
1014104893Ssobomax	sa.fd = args->fd;
1015104893Ssobomax	sa.pad = 0;
1016104893Ssobomax	sa.length = args->length;
1017104893Ssobomax	return ftruncate(td, &sa);
1018104893Ssobomax}
1019134838Sdfr
1020134838Sdfrint
1021134838Sdfrlinux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
1022134838Sdfr{
1023161310Snetchild	struct l_user_desc info;
1024161310Snetchild	int error;
1025161310Snetchild	int idx;
1026161310Snetchild	int a[2];
1027161310Snetchild	struct segment_descriptor sd;
1028161310Snetchild
1029161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1030161310Snetchild	if (error)
1031161310Snetchild		return (error);
1032161310Snetchild
1033161310Snetchild#ifdef DEBUG
1034161310Snetchild	if (ldebug(set_thread_area))
1035161310Snetchild	   	printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
1036161310Snetchild		      info.entry_number,
1037161310Snetchild      		      info.base_addr,
1038161310Snetchild      		      info.limit,
1039161310Snetchild      		      info.seg_32bit,
1040161310Snetchild		      info.contents,
1041161310Snetchild      		      info.read_exec_only,
1042161310Snetchild      		      info.limit_in_pages,
1043161310Snetchild      		      info.seg_not_present,
1044161310Snetchild      		      info.useable);
1045161310Snetchild#endif
1046161310Snetchild
1047161310Snetchild	idx = info.entry_number;
1048161365Snetchild	/*
1049161365Snetchild	 * Semantics of linux version: every thread in the system has array
1050161310Snetchild	 * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
1051161310Snetchild	 * syscall loads one of the selected tls decriptors with a value
1052161310Snetchild	 * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread
1053161310Snetchild	 * descriptors.
1054161310Snetchild	 *
1055161310Snetchild	 * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread
1056161310Snetchild	 * descriptors and use just the 1st one. The tls_array[] is used only in
1057161310Snetchild	 * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd
1058161310Snetchild	 * we use just one GDT descriptor for TLS so we will load just one.
1059161310Snetchild	 * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment
1060161310Snetchild	 * comment in the linux sources says wine might do that.
1061134838Sdfr	 */
1062161310Snetchild
1063161365Snetchild	/*
1064161365Snetchild	 * we support just GLIBC TLS now
1065161310Snetchild	 * we should let 3 proceed as well because we use this segment so
1066161310Snetchild	 * if code does two subsequent calls it should succeed
1067161310Snetchild	 */
1068161310Snetchild	if (idx != 6 && idx != -1 && idx != 3)
1069161310Snetchild		return (EINVAL);
1070161310Snetchild
1071161365Snetchild	/*
1072161365Snetchild	 * we have to copy out the GDT entry we use
1073161310Snetchild	 * FreeBSD uses GDT entry #3 for storing %gs so load that
1074161310Snetchild	 * XXX: what if userspace program doesnt check this value and tries
1075161310Snetchild	 * to use 6, 7 or 8?
1076161310Snetchild	 */
1077161310Snetchild	idx = info.entry_number = 3;
1078161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1079161310Snetchild	if (error)
1080161310Snetchild		return (error);
1081161310Snetchild
1082161310Snetchild	if (LDT_empty(&info)) {
1083161310Snetchild		a[0] = 0;
1084161310Snetchild		a[1] = 0;
1085161310Snetchild	} else {
1086161310Snetchild		a[0] = LDT_entry_a(&info);
1087161310Snetchild		a[1] = LDT_entry_b(&info);
1088161310Snetchild	}
1089161310Snetchild
1090161310Snetchild	memcpy(&sd, &a, sizeof(a));
1091161310Snetchild#ifdef DEBUG
1092161310Snetchild	if (ldebug(set_thread_area))
1093161310Snetchild	   	printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
1094161310Snetchild			sd.sd_hibase,
1095161310Snetchild			sd.sd_lolimit,
1096161310Snetchild			sd.sd_hilimit,
1097161310Snetchild			sd.sd_type,
1098161310Snetchild			sd.sd_dpl,
1099161310Snetchild			sd.sd_p,
1100161310Snetchild			sd.sd_xx,
1101161310Snetchild			sd.sd_def32,
1102161310Snetchild			sd.sd_gran);
1103161310Snetchild#endif
1104161310Snetchild
1105161310Snetchild	/* this is taken from i386 version of cpu_set_user_tls() */
1106161310Snetchild	critical_enter();
1107161310Snetchild	/* set %gs */
1108161310Snetchild	td->td_pcb->pcb_gsd = sd;
1109161310Snetchild	PCPU_GET(fsgs_gdt)[1] = sd;
1110161310Snetchild	load_gs(GSEL(GUGS_SEL, SEL_UPL));
1111161310Snetchild	critical_exit();
1112161310Snetchild
1113161310Snetchild	return (0);
1114134838Sdfr}
1115134838Sdfr
1116134838Sdfrint
1117161310Snetchildlinux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
1118134838Sdfr{
1119161310Snetchild
1120161310Snetchild	struct l_user_desc info;
1121161310Snetchild	int error;
1122161310Snetchild	int idx;
1123161310Snetchild	struct l_desc_struct desc;
1124161310Snetchild	struct segment_descriptor sd;
1125134838Sdfr
1126161310Snetchild#ifdef DEBUG
1127161310Snetchild	if (ldebug(get_thread_area))
1128161310Snetchild		printf(ARGS(get_thread_area, "%p"), args->desc);
1129161310Snetchild#endif
1130161310Snetchild
1131161310Snetchild	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1132161310Snetchild	if (error)
1133161310Snetchild		return (error);
1134161310Snetchild
1135161310Snetchild	idx = info.entry_number;
1136161310Snetchild	/* XXX: I am not sure if we want 3 to be allowed too. */
1137161310Snetchild	if (idx != 6 && idx != 3)
1138161310Snetchild		return (EINVAL);
1139161310Snetchild
1140161310Snetchild	idx = 3;
1141161310Snetchild
1142161310Snetchild	memset(&info, 0, sizeof(info));
1143161310Snetchild
1144161310Snetchild	sd = PCPU_GET(fsgs_gdt)[1];
1145161310Snetchild
1146161310Snetchild	memcpy(&desc, &sd, sizeof(desc));
1147161310Snetchild
1148161310Snetchild	info.entry_number = idx;
1149161310Snetchild	info.base_addr = GET_BASE(&desc);
1150161310Snetchild	info.limit = GET_LIMIT(&desc);
1151161310Snetchild	info.seg_32bit = GET_32BIT(&desc);
1152161310Snetchild	info.contents = GET_CONTENTS(&desc);
1153161310Snetchild	info.read_exec_only = !GET_WRITABLE(&desc);
1154161310Snetchild	info.limit_in_pages = GET_LIMIT_PAGES(&desc);
1155161310Snetchild	info.seg_not_present = !GET_PRESENT(&desc);
1156161310Snetchild	info.useable = GET_USEABLE(&desc);
1157161310Snetchild
1158161310Snetchild	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1159161310Snetchild	if (error)
1160161310Snetchild	   	return (EFAULT);
1161161310Snetchild
1162134838Sdfr	return (0);
1163134838Sdfr}
1164134838Sdfr
1165161310Snetchild/* copied from kern/kern_time.c */
1166134838Sdfrint
1167161310Snetchildlinux_timer_create(struct thread *td, struct linux_timer_create_args *args)
1168134838Sdfr{
1169161310Snetchild   	return ktimer_create(td, (struct ktimer_create_args *) args);
1170161310Snetchild}
1171134838Sdfr
1172161310Snetchildint
1173161310Snetchildlinux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
1174161310Snetchild{
1175161310Snetchild   	return ktimer_settime(td, (struct ktimer_settime_args *) args);
1176134838Sdfr}
1177134838Sdfr
1178161310Snetchildint
1179161310Snetchildlinux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
1180161310Snetchild{
1181161310Snetchild   	return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
1182161310Snetchild}
1183161310Snetchild
1184161310Snetchildint
1185161310Snetchildlinux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
1186161310Snetchild{
1187161310Snetchild   	return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
1188161310Snetchild}
1189161310Snetchild
1190161310Snetchildint
1191161310Snetchildlinux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
1192161310Snetchild{
1193161310Snetchild   	return ktimer_delete(td, (struct ktimer_delete_args *) args);
1194161310Snetchild}
1195161310Snetchild
1196161310Snetchild/* XXX: this wont work with module - convert it */
1197161310Snetchildint
1198161310Snetchildlinux_mq_open(struct thread *td, struct linux_mq_open_args *args)
1199161310Snetchild{
1200161310Snetchild#ifdef P1003_1B_MQUEUE
1201161310Snetchild   	return kmq_open(td, (struct kmq_open_args *) args);
1202161310Snetchild#else
1203161310Snetchild	return (ENOSYS);
1204161310Snetchild#endif
1205161310Snetchild}
1206161310Snetchild
1207161310Snetchildint
1208161310Snetchildlinux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
1209161310Snetchild{
1210161310Snetchild#ifdef P1003_1B_MQUEUE
1211161310Snetchild   	return kmq_unlink(td, (struct kmq_unlink_args *) args);
1212161310Snetchild#else
1213161310Snetchild	return (ENOSYS);
1214161310Snetchild#endif
1215161310Snetchild}
1216161310Snetchild
1217161310Snetchildint
1218161310Snetchildlinux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
1219161310Snetchild{
1220161310Snetchild#ifdef P1003_1B_MQUEUE
1221161310Snetchild   	return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
1222161310Snetchild#else
1223161310Snetchild	return (ENOSYS);
1224161310Snetchild#endif
1225161310Snetchild}
1226161310Snetchild
1227161310Snetchildint
1228161310Snetchildlinux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
1229161310Snetchild{
1230161310Snetchild#ifdef P1003_1B_MQUEUE
1231161310Snetchild   	return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
1232161310Snetchild#else
1233161310Snetchild	return (ENOSYS);
1234161310Snetchild#endif
1235161310Snetchild}
1236161310Snetchild
1237161310Snetchildint
1238161310Snetchildlinux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
1239161310Snetchild{
1240161310Snetchild#ifdef P1003_1B_MQUEUE
1241161310Snetchild	return kmq_notify(td, (struct kmq_notify_args *) args);
1242161310Snetchild#else
1243161310Snetchild	return (ENOSYS);
1244161310Snetchild#endif
1245161310Snetchild}
1246161310Snetchild
1247161310Snetchildint
1248161310Snetchildlinux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
1249161310Snetchild{
1250161310Snetchild#ifdef P1003_1B_MQUEUE
1251161310Snetchild   	return kmq_setattr(td, (struct kmq_setattr_args *) args);
1252161310Snetchild#else
1253161310Snetchild	return (ENOSYS);
1254161310Snetchild#endif
1255161310Snetchild}
1256161310Snetchild
1257