linux_machdep.c revision 91437
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
964921Smarcel *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel *
2864921Smarcel * $FreeBSD: head/sys/i386/linux/linux_machdep.c 91437 2002-02-27 23:21:46Z peter $
2964921Smarcel */
3064921Smarcel
3164921Smarcel#include <sys/param.h>
3276166Smarkm#include <sys/systm.h>
3384811Sjhb#include <sys/lock.h>
3464921Smarcel#include <sys/mman.h>
3576166Smarkm#include <sys/mutex.h>
3664921Smarcel#include <sys/proc.h>
3776166Smarkm#include <sys/resource.h>
3876166Smarkm#include <sys/resourcevar.h>
3964921Smarcel#include <sys/sysproto.h>
4064921Smarcel#include <sys/unistd.h>
4164921Smarcel
4264921Smarcel#include <machine/frame.h>
4364921Smarcel#include <machine/psl.h>
4464921Smarcel#include <machine/segments.h>
4564921Smarcel#include <machine/sysarch.h>
4664921Smarcel
4767238Sgallatin#include <vm/vm.h>
4867238Sgallatin#include <vm/pmap.h>
4967238Sgallatin#include <vm/vm_map.h>
5067238Sgallatin
5164921Smarcel#include <i386/linux/linux.h>
5268583Smarcel#include <i386/linux/linux_proto.h>
5364921Smarcel#include <compat/linux/linux_ipc.h>
5464921Smarcel#include <compat/linux/linux_signal.h>
5564921Smarcel#include <compat/linux/linux_util.h>
5664921Smarcel
5783221Smarcelstruct l_descriptor {
5883221Smarcel	l_uint		entry_number;
5983221Smarcel	l_ulong		base_addr;
6083221Smarcel	l_uint		limit;
6183221Smarcel	l_uint		seg_32bit:1;
6283221Smarcel	l_uint		contents:2;
6383221Smarcel	l_uint		read_exec_only:1;
6483221Smarcel	l_uint		limit_in_pages:1;
6583221Smarcel	l_uint		seg_not_present:1;
6683221Smarcel	l_uint		useable:1;
6764921Smarcel};
6864921Smarcel
6983221Smarcelstruct l_old_select_argv {
7083221Smarcel	l_int		nfds;
7183221Smarcel	l_fd_set	*readfds;
7283221Smarcel	l_fd_set	*writefds;
7383221Smarcel	l_fd_set	*exceptfds;
7483221Smarcel	struct l_timeval	*timeout;
7564921Smarcel};
7664921Smarcel
7764921Smarcelint
7867051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
7967051Sgallatin{
8067051Sgallatin	int bsa = 0;
8167051Sgallatin
8267051Sgallatin	if (lsa & LINUX_SS_DISABLE)
8367051Sgallatin		bsa |= SS_DISABLE;
8467051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
8567051Sgallatin		bsa |= SS_ONSTACK;
8667051Sgallatin	return (bsa);
8767051Sgallatin}
8867051Sgallatin
8967051Sgallatinint
9067051Sgallatinbsd_to_linux_sigaltstack(int bsa)
9167051Sgallatin{
9267051Sgallatin	int lsa = 0;
9367051Sgallatin
9467051Sgallatin	if (bsa & SS_DISABLE)
9567051Sgallatin		lsa |= LINUX_SS_DISABLE;
9667051Sgallatin	if (bsa & SS_ONSTACK)
9767051Sgallatin		lsa |= LINUX_SS_ONSTACK;
9867051Sgallatin	return (lsa);
9967051Sgallatin}
10067051Sgallatin
10167051Sgallatinint
10283366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
10364921Smarcel{
10464921Smarcel	struct execve_args bsd;
10564921Smarcel	caddr_t sg;
10664921Smarcel
10764921Smarcel	sg = stackgap_init();
10883366Sjulian	CHECKALTEXIST(td, &sg, args->path);
10964921Smarcel
11064921Smarcel#ifdef DEBUG
11172543Sjlemon	if (ldebug(execve))
11272543Sjlemon		printf(ARGS(execve, "%s"), args->path);
11364921Smarcel#endif
11464921Smarcel
11564921Smarcel	bsd.fname = args->path;
11664921Smarcel	bsd.argv = args->argp;
11764921Smarcel	bsd.envv = args->envp;
11883366Sjulian	return (execve(td, &bsd));
11964921Smarcel}
12064921Smarcel
12183221Smarcelstruct l_ipc_kludge {
12283221Smarcel	struct l_msgbuf *msgp;
12383221Smarcel	l_long msgtyp;
12483221Smarcel};
12583221Smarcel
12664921Smarcelint
12783366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
12864921Smarcel{
12983221Smarcel
13083221Smarcel	switch (args->what & 0xFFFF) {
13183221Smarcel	case LINUX_SEMOP: {
13283221Smarcel		struct linux_semop_args a;
13383221Smarcel
13483221Smarcel		a.semid = args->arg1;
13583221Smarcel		a.tsops = args->ptr;
13683221Smarcel		a.nsops = args->arg2;
13783366Sjulian		return (linux_semop(td, &a));
13864921Smarcel	}
13983221Smarcel	case LINUX_SEMGET: {
14083221Smarcel		struct linux_semget_args a;
14164921Smarcel
14283221Smarcel		a.key = args->arg1;
14383221Smarcel		a.nsems = args->arg2;
14483221Smarcel		a.semflg = args->arg3;
14583366Sjulian		return (linux_semget(td, &a));
14683221Smarcel	}
14783221Smarcel	case LINUX_SEMCTL: {
14883221Smarcel		struct linux_semctl_args a;
14983221Smarcel		int error;
15083221Smarcel
15183221Smarcel		a.semid = args->arg1;
15283221Smarcel		a.semnum = args->arg2;
15383221Smarcel		a.cmd = args->arg3;
15483221Smarcel		error = copyin((caddr_t)args->ptr, &a.arg, sizeof(a.arg));
15583221Smarcel		if (error)
15683221Smarcel			return (error);
15783366Sjulian		return (linux_semctl(td, &a));
15883221Smarcel	}
15983221Smarcel	case LINUX_MSGSND: {
16083221Smarcel		struct linux_msgsnd_args a;
16183221Smarcel
16283221Smarcel		a.msqid = args->arg1;
16383221Smarcel		a.msgp = args->ptr;
16483221Smarcel		a.msgsz = args->arg2;
16583221Smarcel		a.msgflg = args->arg3;
16683366Sjulian		return (linux_msgsnd(td, &a));
16783221Smarcel	}
16883221Smarcel	case LINUX_MSGRCV: {
16983221Smarcel		struct linux_msgrcv_args a;
17083221Smarcel
17183221Smarcel		a.msqid = args->arg1;
17283221Smarcel		a.msgsz = args->arg2;
17383221Smarcel		a.msgflg = args->arg3;
17483221Smarcel		if ((args->what >> 16) == 0) {
17583221Smarcel			struct l_ipc_kludge tmp;
17683221Smarcel			int error;
17783221Smarcel
17883221Smarcel			if (args->ptr == NULL)
17983221Smarcel				return (EINVAL);
18083221Smarcel			error = copyin((caddr_t)args->ptr, &tmp, sizeof(tmp));
18183221Smarcel			if (error)
18283221Smarcel				return (error);
18383221Smarcel			a.msgp = tmp.msgp;
18483221Smarcel			a.msgtyp = tmp.msgtyp;
18583221Smarcel		} else {
18683221Smarcel			a.msgp = args->ptr;
18783221Smarcel			a.msgtyp = args->arg5;
18883221Smarcel		}
18983366Sjulian		return (linux_msgrcv(td, &a));
19083221Smarcel	}
19183221Smarcel	case LINUX_MSGGET: {
19283221Smarcel		struct linux_msgget_args a;
19383221Smarcel
19483221Smarcel		a.key = args->arg1;
19583221Smarcel		a.msgflg = args->arg2;
19683366Sjulian		return (linux_msgget(td, &a));
19783221Smarcel	}
19883221Smarcel	case LINUX_MSGCTL: {
19983221Smarcel		struct linux_msgctl_args a;
20083221Smarcel
20183221Smarcel		a.msqid = args->arg1;
20283221Smarcel		a.cmd = args->arg2;
20383221Smarcel		a.buf = args->ptr;
20483366Sjulian		return (linux_msgctl(td, &a));
20583221Smarcel	}
20683221Smarcel	case LINUX_SHMAT: {
20783221Smarcel		struct linux_shmat_args a;
20883221Smarcel
20983221Smarcel		a.shmid = args->arg1;
21083221Smarcel		a.shmaddr = args->ptr;
21183221Smarcel		a.shmflg = args->arg2;
21283221Smarcel		a.raddr = (l_ulong *)args->arg3;
21383366Sjulian		return (linux_shmat(td, &a));
21483221Smarcel	}
21583221Smarcel	case LINUX_SHMDT: {
21683221Smarcel		struct linux_shmdt_args a;
21783221Smarcel
21883221Smarcel		a.shmaddr = args->ptr;
21983366Sjulian		return (linux_shmdt(td, &a));
22083221Smarcel	}
22183221Smarcel	case LINUX_SHMGET: {
22283221Smarcel		struct linux_shmget_args a;
22383221Smarcel
22483221Smarcel		a.key = args->arg1;
22583221Smarcel		a.size = args->arg2;
22683221Smarcel		a.shmflg = args->arg3;
22783366Sjulian		return (linux_shmget(td, &a));
22883221Smarcel	}
22983221Smarcel	case LINUX_SHMCTL: {
23083221Smarcel		struct linux_shmctl_args a;
23183221Smarcel
23283221Smarcel		a.shmid = args->arg1;
23383221Smarcel		a.cmd = args->arg2;
23483221Smarcel		a.buf = args->ptr;
23583366Sjulian		return (linux_shmctl(td, &a));
23683221Smarcel	}
23783221Smarcel	default:
23883221Smarcel		break;
23983221Smarcel	}
24083221Smarcel
24183221Smarcel	return (EINVAL);
24264921Smarcel}
24364921Smarcel
24464921Smarcelint
24583366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
24664921Smarcel{
24783221Smarcel	struct l_old_select_argv linux_args;
24883221Smarcel	struct linux_select_args newsel;
24964921Smarcel	int error;
25064921Smarcel
25183221Smarcel#ifdef DEBUG
25283221Smarcel	if (ldebug(old_select))
25391437Speter		printf(ARGS(old_select, "%p"), args->ptr);
25464921Smarcel#endif
25564921Smarcel
25683221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
25764921Smarcel	if (error)
25864921Smarcel		return (error);
25964921Smarcel
26064921Smarcel	newsel.nfds = linux_args.nfds;
26164921Smarcel	newsel.readfds = linux_args.readfds;
26264921Smarcel	newsel.writefds = linux_args.writefds;
26364921Smarcel	newsel.exceptfds = linux_args.exceptfds;
26464921Smarcel	newsel.timeout = linux_args.timeout;
26583366Sjulian	return (linux_select(td, &newsel));
26664921Smarcel}
26764921Smarcel
26864921Smarcelint
26983366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
27064921Smarcel{
27164921Smarcel	int error;
27264921Smarcel
27364921Smarcel#ifdef DEBUG
27472543Sjlemon	if (ldebug(fork))
27572543Sjlemon		printf(ARGS(fork, ""));
27664921Smarcel#endif
27764921Smarcel
27883366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
27964921Smarcel		return (error);
28064921Smarcel
28183366Sjulian	if (td->td_retval[1] == 1)
28283366Sjulian		td->td_retval[0] = 0;
28364921Smarcel	return (0);
28464921Smarcel}
28564921Smarcel
28664921Smarcelint
28783366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
28864921Smarcel{
28964921Smarcel	int error;
29064921Smarcel
29164921Smarcel#ifdef DEBUG
29272543Sjlemon	if (ldebug(vfork))
29372543Sjlemon		printf(ARGS(vfork, ""));
29464921Smarcel#endif
29564921Smarcel
29683366Sjulian	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
29764921Smarcel		return (error);
29864921Smarcel	/* Are we the child? */
29983366Sjulian	if (td->td_retval[1] == 1)
30083366Sjulian		td->td_retval[0] = 0;
30164921Smarcel	return (0);
30264921Smarcel}
30364921Smarcel
30464921Smarcel#define CLONE_VM	0x100
30564921Smarcel#define CLONE_FS	0x200
30664921Smarcel#define CLONE_FILES	0x400
30764921Smarcel#define CLONE_SIGHAND	0x800
30864921Smarcel#define CLONE_PID	0x1000
30964921Smarcel
31064921Smarcelint
31183366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
31264921Smarcel{
31373856Sjhb	int error, ff = RFPROC | RFSTOPPED;
31464921Smarcel	struct proc *p2;
31564921Smarcel	int exit_signal;
31664921Smarcel
31764921Smarcel#ifdef DEBUG
31872543Sjlemon	if (ldebug(clone)) {
31972543Sjlemon		printf(ARGS(clone, "flags %x, stack %x"),
32072543Sjlemon		    (unsigned int)args->flags, (unsigned int)args->stack);
32172543Sjlemon		if (args->flags & CLONE_PID)
32272543Sjlemon			printf(LMSG("CLONE_PID not yet supported"));
32372543Sjlemon	}
32464921Smarcel#endif
32564921Smarcel
32664921Smarcel	if (!args->stack)
32764921Smarcel		return (EINVAL);
32864921Smarcel
32964921Smarcel	exit_signal = args->flags & 0x000000ff;
33064921Smarcel	if (exit_signal >= LINUX_NSIG)
33164921Smarcel		return (EINVAL);
33264921Smarcel
33364921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
33464921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
33564921Smarcel
33664921Smarcel	if (args->flags & CLONE_VM)
33764921Smarcel		ff |= RFMEM;
33864921Smarcel	if (args->flags & CLONE_SIGHAND)
33964921Smarcel		ff |= RFSIGSHARE;
34064921Smarcel	if (!(args->flags & CLONE_FILES))
34164921Smarcel		ff |= RFFDG;
34264921Smarcel
34383221Smarcel	mtx_lock(&Giant);
34483366Sjulian	error = fork1(td, ff, &p2);
34583221Smarcel	if (error == 0) {
34683366Sjulian		td->td_retval[0] = p2->p_pid;
34783366Sjulian		td->td_retval[1] = 0;
34864921Smarcel
34983221Smarcel		PROC_LOCK(p2);
35083221Smarcel		p2->p_sigparent = exit_signal;
35190361Sjulian		FIRST_THREAD_IN_PROC(p2)->td_frame->tf_esp =
35290361Sjulian					(unsigned int)args->stack;
35364921Smarcel
35464921Smarcel#ifdef DEBUG
35583221Smarcel		if (ldebug(clone))
35683221Smarcel			printf(LMSG("clone: successful rfork to %ld"),
35783221Smarcel			    (long)p2->p_pid);
35864921Smarcel#endif
35964921Smarcel
36083221Smarcel		/*
36183221Smarcel		 * Make this runnable after we are finished with it.
36283221Smarcel		 */
36383221Smarcel		mtx_lock_spin(&sched_lock);
36483221Smarcel		p2->p_stat = SRUN;
36590361Sjulian		setrunqueue(FIRST_THREAD_IN_PROC(p2));
36683221Smarcel		mtx_unlock_spin(&sched_lock);
36783221Smarcel		PROC_UNLOCK(p2);
36883221Smarcel	}
36983221Smarcel	mtx_unlock(&Giant);
37073856Sjhb
37183221Smarcel	return (error);
37264921Smarcel}
37364921Smarcel
37464921Smarcel/* XXX move */
37583221Smarcelstruct l_mmap_argv {
37683221Smarcel	l_caddr_t	addr;
37783221Smarcel	l_int		len;
37883221Smarcel	l_int		prot;
37983221Smarcel	l_int		flags;
38083221Smarcel	l_int		fd;
38183221Smarcel	l_int		pos;
38264921Smarcel};
38364921Smarcel
38464921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
38564921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
38664921Smarcel
38764921Smarcelint
38883366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
38964921Smarcel{
39083366Sjulian	struct proc *p = td->td_proc;
39164921Smarcel	struct mmap_args /* {
39264921Smarcel		caddr_t addr;
39364921Smarcel		size_t len;
39464921Smarcel		int prot;
39564921Smarcel		int flags;
39664921Smarcel		int fd;
39764921Smarcel		long pad;
39864921Smarcel		off_t pos;
39964921Smarcel	} */ bsd_args;
40064921Smarcel	int error;
40183221Smarcel	struct l_mmap_argv linux_args;
40264921Smarcel
40383221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
40464921Smarcel	if (error)
40564921Smarcel		return (error);
40664921Smarcel
40764921Smarcel#ifdef DEBUG
40872543Sjlemon	if (ldebug(mmap))
40972543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
41072543Sjlemon		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
41172543Sjlemon		    linux_args.flags, linux_args.fd, linux_args.pos);
41264921Smarcel#endif
41364921Smarcel
41464921Smarcel	bsd_args.flags = 0;
41564921Smarcel	if (linux_args.flags & LINUX_MAP_SHARED)
41664921Smarcel		bsd_args.flags |= MAP_SHARED;
41764921Smarcel	if (linux_args.flags & LINUX_MAP_PRIVATE)
41864921Smarcel		bsd_args.flags |= MAP_PRIVATE;
41964921Smarcel	if (linux_args.flags & LINUX_MAP_FIXED)
42064921Smarcel		bsd_args.flags |= MAP_FIXED;
42164921Smarcel	if (linux_args.flags & LINUX_MAP_ANON)
42264921Smarcel		bsd_args.flags |= MAP_ANON;
42373213Sdillon	else
42473213Sdillon		bsd_args.flags |= MAP_NOSYNC;
42564921Smarcel	if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
42664921Smarcel		bsd_args.flags |= MAP_STACK;
42764921Smarcel
42864921Smarcel		/* The linux MAP_GROWSDOWN option does not limit auto
42964921Smarcel		 * growth of the region.  Linux mmap with this option
43064921Smarcel		 * takes as addr the inital BOS, and as len, the initial
43164921Smarcel		 * region size.  It can then grow down from addr without
43264921Smarcel		 * limit.  However, linux threads has an implicit internal
43364921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
43464921Smarcel		 * enforced explicitly in linux.  But, here we impose
43564921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
43664921Smarcel		 * region, since we can do this with our mmap.
43764921Smarcel		 *
43864921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
43964921Smarcel		 * downsize limit on BOS, and as len the max size of
44064921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
44164921Smarcel		 * and autgrows the region down, up to the limit
44264921Smarcel		 * in addr.
44364921Smarcel		 *
44464921Smarcel		 * If we don't use the MAP_STACK option, the effect
44564921Smarcel		 * of this code is to allocate a stack region of a
44664921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
44764921Smarcel		 */
44864921Smarcel
44964921Smarcel		/* This gives us TOS */
45064921Smarcel		bsd_args.addr = linux_args.addr + linux_args.len;
45164921Smarcel
45267238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
45367238Sgallatin			/* Some linux apps will attempt to mmap
45467238Sgallatin			 * thread stacks near the top of their
45567238Sgallatin			 * address space.  If their TOS is greater
45667238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
45767238Sgallatin			 * will confuse the thread stack with the
45867238Sgallatin			 * process stack and deliver a SEGV if they
45967238Sgallatin			 * attempt to grow the thread stack past their
46067238Sgallatin			 * current stacksize rlimit.  To avoid this,
46167238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
46267238Sgallatin			 * the current stacksize rlimit rather
46367238Sgallatin			 * than the maximum possible stacksize.
46467238Sgallatin			 * It would be better to adjust the
46567238Sgallatin			 * mmap'ed region, but some apps do not check
46667238Sgallatin			 * mmap's return value.
46767238Sgallatin			 */
46871494Sjhb			mtx_assert(&Giant, MA_OWNED);
46967238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
47067238Sgallatin			    p->p_rlimit[RLIMIT_STACK].rlim_cur;
47167238Sgallatin		}
47267238Sgallatin
47364921Smarcel		/* This gives us our maximum stack size */
47464921Smarcel		if (linux_args.len > STACK_SIZE - GUARD_SIZE)
47564921Smarcel			bsd_args.len = linux_args.len;
47664921Smarcel		else
47764921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
47864921Smarcel
47964921Smarcel		/* This gives us a new BOS.  If we're using VM_STACK, then
48064921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
48164921Smarcel		 * the stack grow down to the limit at BOS.  If we're
48264921Smarcel		 * not using VM_STACK we map the full stack, since we
48364921Smarcel		 * don't have a way to autogrow it.
48464921Smarcel		 */
48564921Smarcel		bsd_args.addr -= bsd_args.len;
48664921Smarcel	} else {
48764921Smarcel		bsd_args.addr = linux_args.addr;
48864921Smarcel		bsd_args.len  = linux_args.len;
48964921Smarcel	}
49064921Smarcel
49164921Smarcel	bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
49264921Smarcel	if (linux_args.flags & LINUX_MAP_ANON)
49364921Smarcel		bsd_args.fd = -1;
49464921Smarcel	else
49564921Smarcel		bsd_args.fd = linux_args.fd;
49664921Smarcel	bsd_args.pos = linux_args.pos;
49764921Smarcel	bsd_args.pad = 0;
49864921Smarcel
49964921Smarcel#ifdef DEBUG
50072543Sjlemon	if (ldebug(mmap))
50172543Sjlemon		printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
50272543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
50372543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
50464921Smarcel#endif
50564921Smarcel
50683366Sjulian	return (mmap(td, &bsd_args));
50764921Smarcel}
50864921Smarcel
50964921Smarcelint
51083366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
51164921Smarcel{
51264921Smarcel	int error;
51364921Smarcel	int reg_edx;
51464921Smarcel
51564921Smarcel#ifdef DEBUG
51672543Sjlemon	if (ldebug(pipe))
51772543Sjlemon		printf(ARGS(pipe, "*"));
51864921Smarcel#endif
51964921Smarcel
52083366Sjulian	reg_edx = td->td_retval[1];
52183366Sjulian	error = pipe(td, 0);
52264921Smarcel	if (error) {
52383366Sjulian		td->td_retval[1] = reg_edx;
52464921Smarcel		return (error);
52564921Smarcel	}
52664921Smarcel
52783366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
52864921Smarcel	if (error) {
52983366Sjulian		td->td_retval[1] = reg_edx;
53064921Smarcel		return (error);
53164921Smarcel	}
53264921Smarcel
53383366Sjulian	td->td_retval[1] = reg_edx;
53483366Sjulian	td->td_retval[0] = 0;
53564921Smarcel	return (0);
53664921Smarcel}
53764921Smarcel
53864921Smarcelint
53983366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
54064921Smarcel{
54164921Smarcel	struct sysarch_args sa;
54264921Smarcel	struct i386_ioperm_args *iia;
54364921Smarcel	caddr_t sg;
54464921Smarcel
54564921Smarcel	sg = stackgap_init();
54664921Smarcel	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
54764921Smarcel	iia->start = args->start;
54864921Smarcel	iia->length = args->length;
54964921Smarcel	iia->enable = args->enable;
55064921Smarcel	sa.op = I386_SET_IOPERM;
55164921Smarcel	sa.parms = (char *)iia;
55283366Sjulian	return (sysarch(td, &sa));
55364921Smarcel}
55464921Smarcel
55564921Smarcelint
55683366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
55764921Smarcel{
55864921Smarcel	int error;
55964921Smarcel
56064921Smarcel	if (args->level < 0 || args->level > 3)
56164921Smarcel		return (EINVAL);
56283366Sjulian	if ((error = suser_td(td)) != 0)
56364921Smarcel		return (error);
56491406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
56583981Srwatson		return (error);
56683366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
56764921Smarcel	    (args->level * (PSL_IOPL / 3));
56864921Smarcel	return (0);
56964921Smarcel}
57064921Smarcel
57164921Smarcelint
57283366Sjulianlinux_modify_ldt(td, uap)
57383366Sjulian	struct thread *td;
57464921Smarcel	struct linux_modify_ldt_args *uap;
57564921Smarcel{
57664921Smarcel	int error;
57764921Smarcel	caddr_t sg;
57864921Smarcel	struct sysarch_args args;
57964921Smarcel	struct i386_ldt_args *ldt;
58083221Smarcel	struct l_descriptor ld;
58164921Smarcel	union descriptor *desc;
58264921Smarcel
58364921Smarcel	sg = stackgap_init();
58464921Smarcel
58564921Smarcel	if (uap->ptr == NULL)
58664921Smarcel		return (EINVAL);
58764921Smarcel
58864921Smarcel	switch (uap->func) {
58964921Smarcel	case 0x00: /* read_ldt */
59064921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
59164921Smarcel		ldt->start = 0;
59264921Smarcel		ldt->descs = uap->ptr;
59364921Smarcel		ldt->num = uap->bytecount / sizeof(union descriptor);
59464921Smarcel		args.op = I386_GET_LDT;
59564921Smarcel		args.parms = (char*)ldt;
59683366Sjulian		error = sysarch(td, &args);
59783366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
59864921Smarcel		break;
59964921Smarcel	case 0x01: /* write_ldt */
60064921Smarcel	case 0x11: /* write_ldt */
60164921Smarcel		if (uap->bytecount != sizeof(ld))
60264921Smarcel			return (EINVAL);
60364921Smarcel
60464921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
60564921Smarcel		if (error)
60664921Smarcel			return (error);
60764921Smarcel
60864921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
60964921Smarcel		desc = stackgap_alloc(&sg, sizeof(*desc));
61064921Smarcel		ldt->start = ld.entry_number;
61164921Smarcel		ldt->descs = desc;
61264921Smarcel		ldt->num = 1;
61364921Smarcel		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
61464921Smarcel		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
61564921Smarcel		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
61664921Smarcel		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
61764921Smarcel		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
61864921Smarcel			(ld.contents << 2);
61964921Smarcel		desc->sd.sd_dpl = 3;
62064921Smarcel		desc->sd.sd_p = (ld.seg_not_present ^ 1);
62164921Smarcel		desc->sd.sd_xx = 0;
62264921Smarcel		desc->sd.sd_def32 = ld.seg_32bit;
62364921Smarcel		desc->sd.sd_gran = ld.limit_in_pages;
62464921Smarcel		args.op = I386_SET_LDT;
62564921Smarcel		args.parms = (char*)ldt;
62683366Sjulian		error = sysarch(td, &args);
62764921Smarcel		break;
62864921Smarcel	default:
62964921Smarcel		error = EINVAL;
63064921Smarcel		break;
63164921Smarcel	}
63264921Smarcel
63364921Smarcel	if (error == EOPNOTSUPP) {
63464921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
63564921Smarcel		error = ENOSYS;
63664921Smarcel	}
63764921Smarcel
63864921Smarcel	return (error);
63964921Smarcel}
64064921Smarcel
64164921Smarcelint
64283366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
64364921Smarcel{
64483221Smarcel	l_osigaction_t osa;
64583221Smarcel	l_sigaction_t act, oact;
64664921Smarcel	int error;
64764921Smarcel
64864921Smarcel#ifdef DEBUG
64972543Sjlemon	if (ldebug(sigaction))
65072543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
65172543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
65264921Smarcel#endif
65364921Smarcel
65464921Smarcel	if (args->nsa != NULL) {
65583221Smarcel		error = copyin((caddr_t)args->nsa, &osa,
65683221Smarcel		    sizeof(l_osigaction_t));
65764921Smarcel		if (error)
65864921Smarcel			return (error);
65964921Smarcel		act.lsa_handler = osa.lsa_handler;
66064921Smarcel		act.lsa_flags = osa.lsa_flags;
66164921Smarcel		act.lsa_restorer = osa.lsa_restorer;
66264921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
66364921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
66464921Smarcel	}
66564921Smarcel
66683366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
66764921Smarcel	    args->osa ? &oact : NULL);
66864921Smarcel
66964921Smarcel	if (args->osa != NULL && !error) {
67064921Smarcel		osa.lsa_handler = oact.lsa_handler;
67164921Smarcel		osa.lsa_flags = oact.lsa_flags;
67264921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
67364921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
67483221Smarcel		error = copyout(&osa, (caddr_t)args->osa,
67583221Smarcel		    sizeof(l_osigaction_t));
67664921Smarcel	}
67764921Smarcel
67864921Smarcel	return (error);
67964921Smarcel}
68064921Smarcel
68164921Smarcel/*
68264921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
68364921Smarcel * but it seems that "restart" is actually a context pointer that
68464921Smarcel * enables the signal to happen with a different register set.
68564921Smarcel */
68664921Smarcelint
68783366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
68864921Smarcel{
68964921Smarcel	struct sigsuspend_args bsd;
69064921Smarcel	sigset_t *sigmask;
69183221Smarcel	l_sigset_t mask;
69264921Smarcel	caddr_t sg = stackgap_init();
69364921Smarcel
69464921Smarcel#ifdef DEBUG
69572543Sjlemon	if (ldebug(sigsuspend))
69672543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
69764921Smarcel#endif
69864921Smarcel
69964921Smarcel	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
70064921Smarcel	LINUX_SIGEMPTYSET(mask);
70164921Smarcel	mask.__bits[0] = args->mask;
70264921Smarcel	linux_to_bsd_sigset(&mask, sigmask);
70364921Smarcel	bsd.sigmask = sigmask;
70483366Sjulian	return (sigsuspend(td, &bsd));
70564921Smarcel}
70664921Smarcel
70764921Smarcelint
70883366Sjulianlinux_rt_sigsuspend(td, uap)
70983366Sjulian	struct thread *td;
71064921Smarcel	struct linux_rt_sigsuspend_args *uap;
71164921Smarcel{
71283221Smarcel	l_sigset_t lmask;
71364921Smarcel	sigset_t *bmask;
71464921Smarcel	struct sigsuspend_args bsd;
71564921Smarcel	caddr_t sg = stackgap_init();
71664921Smarcel	int error;
71764921Smarcel
71864921Smarcel#ifdef DEBUG
71972543Sjlemon	if (ldebug(rt_sigsuspend))
72072543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
72172543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
72264921Smarcel#endif
72364921Smarcel
72483221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
72564921Smarcel		return (EINVAL);
72664921Smarcel
72783221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
72864921Smarcel	if (error)
72964921Smarcel		return (error);
73064921Smarcel
73164921Smarcel	bmask = stackgap_alloc(&sg, sizeof(sigset_t));
73264921Smarcel	linux_to_bsd_sigset(&lmask, bmask);
73364921Smarcel	bsd.sigmask = bmask;
73483366Sjulian	return (sigsuspend(td, &bsd));
73564921Smarcel}
73664921Smarcel
73764921Smarcelint
73883366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
73964921Smarcel{
74083366Sjulian	struct proc *p = td->td_proc;
74164921Smarcel	struct sigsuspend_args bsd;
74264921Smarcel	sigset_t *sigmask;
74364921Smarcel	caddr_t sg = stackgap_init();
74464921Smarcel
74564921Smarcel#ifdef DEBUG
74672543Sjlemon	if (ldebug(pause))
74772543Sjlemon		printf(ARGS(pause, ""));
74864921Smarcel#endif
74964921Smarcel
75064921Smarcel	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
75171494Sjhb	PROC_LOCK(p);
75264921Smarcel	*sigmask = p->p_sigmask;
75371494Sjhb	PROC_UNLOCK(p);
75464921Smarcel	bsd.sigmask = sigmask;
75583366Sjulian	return (sigsuspend(td, &bsd));
75664921Smarcel}
75764921Smarcel
75864921Smarcelint
75983366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
76064921Smarcel{
76164921Smarcel	struct sigaltstack_args bsd;
76264921Smarcel	stack_t *ss, *oss;
76383221Smarcel	l_stack_t lss;
76464921Smarcel	int error;
76564921Smarcel	caddr_t sg = stackgap_init();
76664921Smarcel
76764921Smarcel#ifdef DEBUG
76872543Sjlemon	if (ldebug(sigaltstack))
76972543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
77064921Smarcel#endif
77164921Smarcel
77267051Sgallatin	if (uap->uss == NULL) {
77367051Sgallatin		ss = NULL;
77467051Sgallatin	} else {
77583221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
77667051Sgallatin		if (error)
77767051Sgallatin			return (error);
77864921Smarcel
77967051Sgallatin		ss = stackgap_alloc(&sg, sizeof(stack_t));
78067051Sgallatin		ss->ss_sp = lss.ss_sp;
78168520Smarcel		ss->ss_size = lss.ss_size;
78267051Sgallatin		ss->ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
78367051Sgallatin	}
78464921Smarcel	oss = (uap->uoss != NULL)
78564921Smarcel	    ? stackgap_alloc(&sg, sizeof(stack_t))
78664921Smarcel	    : NULL;
78764921Smarcel
78864921Smarcel	bsd.ss = ss;
78964921Smarcel	bsd.oss = oss;
79083366Sjulian	error = sigaltstack(td, &bsd);
79164921Smarcel
79264921Smarcel	if (!error && oss != NULL) {
79364921Smarcel		lss.ss_sp = oss->ss_sp;
79464921Smarcel		lss.ss_size = oss->ss_size;
79567051Sgallatin		lss.ss_flags = bsd_to_linux_sigaltstack(oss->ss_flags);
79683221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
79764921Smarcel	}
79864921Smarcel
79964921Smarcel	return (error);
80064921Smarcel}
801