linux_machdep.c revision 84811
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
964921Smarcel *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel *
2864921Smarcel * $FreeBSD: head/sys/i386/linux/linux_machdep.c 84811 2001-10-11 17:52:20Z jhb $
2964921Smarcel */
3064921Smarcel
3164921Smarcel#include <sys/param.h>
3276166Smarkm#include <sys/systm.h>
3384811Sjhb#include <sys/lock.h>
3464921Smarcel#include <sys/mman.h>
3576166Smarkm#include <sys/mutex.h>
3664921Smarcel#include <sys/proc.h>
3776166Smarkm#include <sys/resource.h>
3876166Smarkm#include <sys/resourcevar.h>
3964921Smarcel#include <sys/sysproto.h>
4064921Smarcel#include <sys/unistd.h>
4164921Smarcel
4264921Smarcel#include <machine/frame.h>
4364921Smarcel#include <machine/psl.h>
4464921Smarcel#include <machine/segments.h>
4564921Smarcel#include <machine/sysarch.h>
4664921Smarcel
4767238Sgallatin#include <vm/vm.h>
4867238Sgallatin#include <sys/lock.h>
4967238Sgallatin#include <vm/pmap.h>
5067238Sgallatin#include <vm/vm_map.h>
5167238Sgallatin
5264921Smarcel#include <i386/linux/linux.h>
5368583Smarcel#include <i386/linux/linux_proto.h>
5464921Smarcel#include <compat/linux/linux_ipc.h>
5564921Smarcel#include <compat/linux/linux_signal.h>
5664921Smarcel#include <compat/linux/linux_util.h>
5764921Smarcel
5883221Smarcelstruct l_descriptor {
5983221Smarcel	l_uint		entry_number;
6083221Smarcel	l_ulong		base_addr;
6183221Smarcel	l_uint		limit;
6283221Smarcel	l_uint		seg_32bit:1;
6383221Smarcel	l_uint		contents:2;
6483221Smarcel	l_uint		read_exec_only:1;
6583221Smarcel	l_uint		limit_in_pages:1;
6683221Smarcel	l_uint		seg_not_present:1;
6783221Smarcel	l_uint		useable:1;
6864921Smarcel};
6964921Smarcel
7083221Smarcelstruct l_old_select_argv {
7183221Smarcel	l_int		nfds;
7283221Smarcel	l_fd_set	*readfds;
7383221Smarcel	l_fd_set	*writefds;
7483221Smarcel	l_fd_set	*exceptfds;
7583221Smarcel	struct l_timeval	*timeout;
7664921Smarcel};
7764921Smarcel
7864921Smarcelint
7967051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
8067051Sgallatin{
8167051Sgallatin	int bsa = 0;
8267051Sgallatin
8367051Sgallatin	if (lsa & LINUX_SS_DISABLE)
8467051Sgallatin		bsa |= SS_DISABLE;
8567051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
8667051Sgallatin		bsa |= SS_ONSTACK;
8767051Sgallatin	return (bsa);
8867051Sgallatin}
8967051Sgallatin
9067051Sgallatinint
9167051Sgallatinbsd_to_linux_sigaltstack(int bsa)
9267051Sgallatin{
9367051Sgallatin	int lsa = 0;
9467051Sgallatin
9567051Sgallatin	if (bsa & SS_DISABLE)
9667051Sgallatin		lsa |= LINUX_SS_DISABLE;
9767051Sgallatin	if (bsa & SS_ONSTACK)
9867051Sgallatin		lsa |= LINUX_SS_ONSTACK;
9967051Sgallatin	return (lsa);
10067051Sgallatin}
10167051Sgallatin
10267051Sgallatinint
10383366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
10464921Smarcel{
10564921Smarcel	struct execve_args bsd;
10664921Smarcel	caddr_t sg;
10764921Smarcel
10864921Smarcel	sg = stackgap_init();
10983366Sjulian	CHECKALTEXIST(td, &sg, args->path);
11064921Smarcel
11164921Smarcel#ifdef DEBUG
11272543Sjlemon	if (ldebug(execve))
11372543Sjlemon		printf(ARGS(execve, "%s"), args->path);
11464921Smarcel#endif
11564921Smarcel
11664921Smarcel	bsd.fname = args->path;
11764921Smarcel	bsd.argv = args->argp;
11864921Smarcel	bsd.envv = args->envp;
11983366Sjulian	return (execve(td, &bsd));
12064921Smarcel}
12164921Smarcel
12283221Smarcelstruct l_ipc_kludge {
12383221Smarcel	struct l_msgbuf *msgp;
12483221Smarcel	l_long msgtyp;
12583221Smarcel};
12683221Smarcel
12764921Smarcelint
12883366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
12964921Smarcel{
13083221Smarcel
13183221Smarcel	switch (args->what & 0xFFFF) {
13283221Smarcel	case LINUX_SEMOP: {
13383221Smarcel		struct linux_semop_args a;
13483221Smarcel
13583221Smarcel		a.semid = args->arg1;
13683221Smarcel		a.tsops = args->ptr;
13783221Smarcel		a.nsops = args->arg2;
13883366Sjulian		return (linux_semop(td, &a));
13964921Smarcel	}
14083221Smarcel	case LINUX_SEMGET: {
14183221Smarcel		struct linux_semget_args a;
14264921Smarcel
14383221Smarcel		a.key = args->arg1;
14483221Smarcel		a.nsems = args->arg2;
14583221Smarcel		a.semflg = args->arg3;
14683366Sjulian		return (linux_semget(td, &a));
14783221Smarcel	}
14883221Smarcel	case LINUX_SEMCTL: {
14983221Smarcel		struct linux_semctl_args a;
15083221Smarcel		int error;
15183221Smarcel
15283221Smarcel		a.semid = args->arg1;
15383221Smarcel		a.semnum = args->arg2;
15483221Smarcel		a.cmd = args->arg3;
15583221Smarcel		error = copyin((caddr_t)args->ptr, &a.arg, sizeof(a.arg));
15683221Smarcel		if (error)
15783221Smarcel			return (error);
15883366Sjulian		return (linux_semctl(td, &a));
15983221Smarcel	}
16083221Smarcel	case LINUX_MSGSND: {
16183221Smarcel		struct linux_msgsnd_args a;
16283221Smarcel
16383221Smarcel		a.msqid = args->arg1;
16483221Smarcel		a.msgp = args->ptr;
16583221Smarcel		a.msgsz = args->arg2;
16683221Smarcel		a.msgflg = args->arg3;
16783366Sjulian		return (linux_msgsnd(td, &a));
16883221Smarcel	}
16983221Smarcel	case LINUX_MSGRCV: {
17083221Smarcel		struct linux_msgrcv_args a;
17183221Smarcel
17283221Smarcel		a.msqid = args->arg1;
17383221Smarcel		a.msgsz = args->arg2;
17483221Smarcel		a.msgflg = args->arg3;
17583221Smarcel		if ((args->what >> 16) == 0) {
17683221Smarcel			struct l_ipc_kludge tmp;
17783221Smarcel			int error;
17883221Smarcel
17983221Smarcel			if (args->ptr == NULL)
18083221Smarcel				return (EINVAL);
18183221Smarcel			error = copyin((caddr_t)args->ptr, &tmp, sizeof(tmp));
18283221Smarcel			if (error)
18383221Smarcel				return (error);
18483221Smarcel			a.msgp = tmp.msgp;
18583221Smarcel			a.msgtyp = tmp.msgtyp;
18683221Smarcel		} else {
18783221Smarcel			a.msgp = args->ptr;
18883221Smarcel			a.msgtyp = args->arg5;
18983221Smarcel		}
19083366Sjulian		return (linux_msgrcv(td, &a));
19183221Smarcel	}
19283221Smarcel	case LINUX_MSGGET: {
19383221Smarcel		struct linux_msgget_args a;
19483221Smarcel
19583221Smarcel		a.key = args->arg1;
19683221Smarcel		a.msgflg = args->arg2;
19783366Sjulian		return (linux_msgget(td, &a));
19883221Smarcel	}
19983221Smarcel	case LINUX_MSGCTL: {
20083221Smarcel		struct linux_msgctl_args a;
20183221Smarcel
20283221Smarcel		a.msqid = args->arg1;
20383221Smarcel		a.cmd = args->arg2;
20483221Smarcel		a.buf = args->ptr;
20583366Sjulian		return (linux_msgctl(td, &a));
20683221Smarcel	}
20783221Smarcel	case LINUX_SHMAT: {
20883221Smarcel		struct linux_shmat_args a;
20983221Smarcel
21083221Smarcel		a.shmid = args->arg1;
21183221Smarcel		a.shmaddr = args->ptr;
21283221Smarcel		a.shmflg = args->arg2;
21383221Smarcel		a.raddr = (l_ulong *)args->arg3;
21483366Sjulian		return (linux_shmat(td, &a));
21583221Smarcel	}
21683221Smarcel	case LINUX_SHMDT: {
21783221Smarcel		struct linux_shmdt_args a;
21883221Smarcel
21983221Smarcel		a.shmaddr = args->ptr;
22083366Sjulian		return (linux_shmdt(td, &a));
22183221Smarcel	}
22283221Smarcel	case LINUX_SHMGET: {
22383221Smarcel		struct linux_shmget_args a;
22483221Smarcel
22583221Smarcel		a.key = args->arg1;
22683221Smarcel		a.size = args->arg2;
22783221Smarcel		a.shmflg = args->arg3;
22883366Sjulian		return (linux_shmget(td, &a));
22983221Smarcel	}
23083221Smarcel	case LINUX_SHMCTL: {
23183221Smarcel		struct linux_shmctl_args a;
23283221Smarcel
23383221Smarcel		a.shmid = args->arg1;
23483221Smarcel		a.cmd = args->arg2;
23583221Smarcel		a.buf = args->ptr;
23683366Sjulian		return (linux_shmctl(td, &a));
23783221Smarcel	}
23883221Smarcel	default:
23983221Smarcel		break;
24083221Smarcel	}
24183221Smarcel
24283221Smarcel	return (EINVAL);
24364921Smarcel}
24464921Smarcel
24564921Smarcelint
24683366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
24764921Smarcel{
24883221Smarcel	struct l_old_select_argv linux_args;
24983221Smarcel	struct linux_select_args newsel;
25064921Smarcel	int error;
25164921Smarcel
25283221Smarcel#ifdef DEBUG
25383221Smarcel	if (ldebug(old_select))
25483221Smarcel		printf(ARGS(old_select, "%x"), args->ptr);
25564921Smarcel#endif
25664921Smarcel
25783221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
25864921Smarcel	if (error)
25964921Smarcel		return (error);
26064921Smarcel
26164921Smarcel	newsel.nfds = linux_args.nfds;
26264921Smarcel	newsel.readfds = linux_args.readfds;
26364921Smarcel	newsel.writefds = linux_args.writefds;
26464921Smarcel	newsel.exceptfds = linux_args.exceptfds;
26564921Smarcel	newsel.timeout = linux_args.timeout;
26683366Sjulian	return (linux_select(td, &newsel));
26764921Smarcel}
26864921Smarcel
26964921Smarcelint
27083366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
27164921Smarcel{
27264921Smarcel	int error;
27364921Smarcel
27464921Smarcel#ifdef DEBUG
27572543Sjlemon	if (ldebug(fork))
27672543Sjlemon		printf(ARGS(fork, ""));
27764921Smarcel#endif
27864921Smarcel
27983366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
28064921Smarcel		return (error);
28164921Smarcel
28283366Sjulian	if (td->td_retval[1] == 1)
28383366Sjulian		td->td_retval[0] = 0;
28464921Smarcel	return (0);
28564921Smarcel}
28664921Smarcel
28764921Smarcelint
28883366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
28964921Smarcel{
29064921Smarcel	int error;
29164921Smarcel
29264921Smarcel#ifdef DEBUG
29372543Sjlemon	if (ldebug(vfork))
29472543Sjlemon		printf(ARGS(vfork, ""));
29564921Smarcel#endif
29664921Smarcel
29783366Sjulian	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
29864921Smarcel		return (error);
29964921Smarcel	/* Are we the child? */
30083366Sjulian	if (td->td_retval[1] == 1)
30183366Sjulian		td->td_retval[0] = 0;
30264921Smarcel	return (0);
30364921Smarcel}
30464921Smarcel
30564921Smarcel#define CLONE_VM	0x100
30664921Smarcel#define CLONE_FS	0x200
30764921Smarcel#define CLONE_FILES	0x400
30864921Smarcel#define CLONE_SIGHAND	0x800
30964921Smarcel#define CLONE_PID	0x1000
31064921Smarcel
31164921Smarcelint
31283366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
31364921Smarcel{
31473856Sjhb	int error, ff = RFPROC | RFSTOPPED;
31564921Smarcel	struct proc *p2;
31664921Smarcel	int exit_signal;
31764921Smarcel
31864921Smarcel#ifdef DEBUG
31972543Sjlemon	if (ldebug(clone)) {
32072543Sjlemon		printf(ARGS(clone, "flags %x, stack %x"),
32172543Sjlemon		    (unsigned int)args->flags, (unsigned int)args->stack);
32272543Sjlemon		if (args->flags & CLONE_PID)
32372543Sjlemon			printf(LMSG("CLONE_PID not yet supported"));
32472543Sjlemon	}
32564921Smarcel#endif
32664921Smarcel
32764921Smarcel	if (!args->stack)
32864921Smarcel		return (EINVAL);
32964921Smarcel
33064921Smarcel	exit_signal = args->flags & 0x000000ff;
33164921Smarcel	if (exit_signal >= LINUX_NSIG)
33264921Smarcel		return (EINVAL);
33364921Smarcel
33464921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
33564921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
33664921Smarcel
33764921Smarcel	if (args->flags & CLONE_VM)
33864921Smarcel		ff |= RFMEM;
33964921Smarcel	if (args->flags & CLONE_SIGHAND)
34064921Smarcel		ff |= RFSIGSHARE;
34164921Smarcel	if (!(args->flags & CLONE_FILES))
34264921Smarcel		ff |= RFFDG;
34364921Smarcel
34483221Smarcel	mtx_lock(&Giant);
34583366Sjulian	error = fork1(td, ff, &p2);
34683221Smarcel	if (error == 0) {
34783366Sjulian		td->td_retval[0] = p2->p_pid;
34883366Sjulian		td->td_retval[1] = 0;
34964921Smarcel
35083221Smarcel		PROC_LOCK(p2);
35183221Smarcel		p2->p_sigparent = exit_signal;
35283366Sjulian		p2->p_thread.td_frame->tf_esp = (unsigned int)args->stack;
35364921Smarcel
35464921Smarcel#ifdef DEBUG
35583221Smarcel		if (ldebug(clone))
35683221Smarcel			printf(LMSG("clone: successful rfork to %ld"),
35783221Smarcel			    (long)p2->p_pid);
35864921Smarcel#endif
35964921Smarcel
36083221Smarcel		/*
36183221Smarcel		 * Make this runnable after we are finished with it.
36283221Smarcel		 */
36383221Smarcel		mtx_lock_spin(&sched_lock);
36483221Smarcel		p2->p_stat = SRUN;
36583366Sjulian		setrunqueue(&p2->p_thread);
36683221Smarcel		mtx_unlock_spin(&sched_lock);
36783221Smarcel		PROC_UNLOCK(p2);
36883221Smarcel	}
36983221Smarcel	mtx_unlock(&Giant);
37073856Sjhb
37183221Smarcel	return (error);
37264921Smarcel}
37364921Smarcel
37464921Smarcel/* XXX move */
37583221Smarcelstruct l_mmap_argv {
37683221Smarcel	l_caddr_t	addr;
37783221Smarcel	l_int		len;
37883221Smarcel	l_int		prot;
37983221Smarcel	l_int		flags;
38083221Smarcel	l_int		fd;
38183221Smarcel	l_int		pos;
38264921Smarcel};
38364921Smarcel
38464921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
38564921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
38664921Smarcel
38764921Smarcelint
38883366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
38964921Smarcel{
39083366Sjulian	struct proc *p = td->td_proc;
39164921Smarcel	struct mmap_args /* {
39264921Smarcel		caddr_t addr;
39364921Smarcel		size_t len;
39464921Smarcel		int prot;
39564921Smarcel		int flags;
39664921Smarcel		int fd;
39764921Smarcel		long pad;
39864921Smarcel		off_t pos;
39964921Smarcel	} */ bsd_args;
40064921Smarcel	int error;
40183221Smarcel	struct l_mmap_argv linux_args;
40264921Smarcel
40383221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
40464921Smarcel	if (error)
40564921Smarcel		return (error);
40664921Smarcel
40764921Smarcel#ifdef DEBUG
40872543Sjlemon	if (ldebug(mmap))
40972543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
41072543Sjlemon		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
41172543Sjlemon		    linux_args.flags, linux_args.fd, linux_args.pos);
41264921Smarcel#endif
41364921Smarcel
41464921Smarcel	bsd_args.flags = 0;
41564921Smarcel	if (linux_args.flags & LINUX_MAP_SHARED)
41664921Smarcel		bsd_args.flags |= MAP_SHARED;
41764921Smarcel	if (linux_args.flags & LINUX_MAP_PRIVATE)
41864921Smarcel		bsd_args.flags |= MAP_PRIVATE;
41964921Smarcel	if (linux_args.flags & LINUX_MAP_FIXED)
42064921Smarcel		bsd_args.flags |= MAP_FIXED;
42164921Smarcel	if (linux_args.flags & LINUX_MAP_ANON)
42264921Smarcel		bsd_args.flags |= MAP_ANON;
42373213Sdillon	else
42473213Sdillon		bsd_args.flags |= MAP_NOSYNC;
42564921Smarcel	if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
42664921Smarcel		bsd_args.flags |= MAP_STACK;
42764921Smarcel
42864921Smarcel		/* The linux MAP_GROWSDOWN option does not limit auto
42964921Smarcel		 * growth of the region.  Linux mmap with this option
43064921Smarcel		 * takes as addr the inital BOS, and as len, the initial
43164921Smarcel		 * region size.  It can then grow down from addr without
43264921Smarcel		 * limit.  However, linux threads has an implicit internal
43364921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
43464921Smarcel		 * enforced explicitly in linux.  But, here we impose
43564921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
43664921Smarcel		 * region, since we can do this with our mmap.
43764921Smarcel		 *
43864921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
43964921Smarcel		 * downsize limit on BOS, and as len the max size of
44064921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
44164921Smarcel		 * and autgrows the region down, up to the limit
44264921Smarcel		 * in addr.
44364921Smarcel		 *
44464921Smarcel		 * If we don't use the MAP_STACK option, the effect
44564921Smarcel		 * of this code is to allocate a stack region of a
44664921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
44764921Smarcel		 */
44864921Smarcel
44964921Smarcel		/* This gives us TOS */
45064921Smarcel		bsd_args.addr = linux_args.addr + linux_args.len;
45164921Smarcel
45267238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
45367238Sgallatin			/* Some linux apps will attempt to mmap
45467238Sgallatin			 * thread stacks near the top of their
45567238Sgallatin			 * address space.  If their TOS is greater
45667238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
45767238Sgallatin			 * will confuse the thread stack with the
45867238Sgallatin			 * process stack and deliver a SEGV if they
45967238Sgallatin			 * attempt to grow the thread stack past their
46067238Sgallatin			 * current stacksize rlimit.  To avoid this,
46167238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
46267238Sgallatin			 * the current stacksize rlimit rather
46367238Sgallatin			 * than the maximum possible stacksize.
46467238Sgallatin			 * It would be better to adjust the
46567238Sgallatin			 * mmap'ed region, but some apps do not check
46667238Sgallatin			 * mmap's return value.
46767238Sgallatin			 */
46871494Sjhb			mtx_assert(&Giant, MA_OWNED);
46967238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
47067238Sgallatin			    p->p_rlimit[RLIMIT_STACK].rlim_cur;
47167238Sgallatin		}
47267238Sgallatin
47364921Smarcel		/* This gives us our maximum stack size */
47464921Smarcel		if (linux_args.len > STACK_SIZE - GUARD_SIZE)
47564921Smarcel			bsd_args.len = linux_args.len;
47664921Smarcel		else
47764921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
47864921Smarcel
47964921Smarcel		/* This gives us a new BOS.  If we're using VM_STACK, then
48064921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
48164921Smarcel		 * the stack grow down to the limit at BOS.  If we're
48264921Smarcel		 * not using VM_STACK we map the full stack, since we
48364921Smarcel		 * don't have a way to autogrow it.
48464921Smarcel		 */
48564921Smarcel		bsd_args.addr -= bsd_args.len;
48664921Smarcel	} else {
48764921Smarcel		bsd_args.addr = linux_args.addr;
48864921Smarcel		bsd_args.len  = linux_args.len;
48964921Smarcel	}
49064921Smarcel
49164921Smarcel	bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
49264921Smarcel	if (linux_args.flags & LINUX_MAP_ANON)
49364921Smarcel		bsd_args.fd = -1;
49464921Smarcel	else
49564921Smarcel		bsd_args.fd = linux_args.fd;
49664921Smarcel	bsd_args.pos = linux_args.pos;
49764921Smarcel	bsd_args.pad = 0;
49864921Smarcel
49964921Smarcel#ifdef DEBUG
50072543Sjlemon	if (ldebug(mmap))
50172543Sjlemon		printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
50272543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
50372543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
50464921Smarcel#endif
50564921Smarcel
50683366Sjulian	return (mmap(td, &bsd_args));
50764921Smarcel}
50864921Smarcel
50964921Smarcelint
51083366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
51164921Smarcel{
51264921Smarcel	int error;
51364921Smarcel	int reg_edx;
51464921Smarcel
51564921Smarcel#ifdef DEBUG
51672543Sjlemon	if (ldebug(pipe))
51772543Sjlemon		printf(ARGS(pipe, "*"));
51864921Smarcel#endif
51964921Smarcel
52083366Sjulian	reg_edx = td->td_retval[1];
52183366Sjulian	error = pipe(td, 0);
52264921Smarcel	if (error) {
52383366Sjulian		td->td_retval[1] = reg_edx;
52464921Smarcel		return (error);
52564921Smarcel	}
52664921Smarcel
52783366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
52864921Smarcel	if (error) {
52983366Sjulian		td->td_retval[1] = reg_edx;
53064921Smarcel		return (error);
53164921Smarcel	}
53264921Smarcel
53383366Sjulian	td->td_retval[1] = reg_edx;
53483366Sjulian	td->td_retval[0] = 0;
53564921Smarcel	return (0);
53664921Smarcel}
53764921Smarcel
53864921Smarcelint
53983366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
54064921Smarcel{
54164921Smarcel	struct sysarch_args sa;
54264921Smarcel	struct i386_ioperm_args *iia;
54364921Smarcel	caddr_t sg;
54464921Smarcel
54564921Smarcel	sg = stackgap_init();
54664921Smarcel	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
54764921Smarcel	iia->start = args->start;
54864921Smarcel	iia->length = args->length;
54964921Smarcel	iia->enable = args->enable;
55064921Smarcel	sa.op = I386_SET_IOPERM;
55164921Smarcel	sa.parms = (char *)iia;
55283366Sjulian	return (sysarch(td, &sa));
55364921Smarcel}
55464921Smarcel
55564921Smarcelint
55683366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
55764921Smarcel{
55864921Smarcel	int error;
55964921Smarcel
56064921Smarcel	if (args->level < 0 || args->level > 3)
56164921Smarcel		return (EINVAL);
56283366Sjulian	if ((error = suser_td(td)) != 0)
56364921Smarcel		return (error);
56483981Srwatson	if ((error = securelevel_gt(td->td_proc->p_ucred, 0)) != 0)
56583981Srwatson		return (error);
56683366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
56764921Smarcel	    (args->level * (PSL_IOPL / 3));
56864921Smarcel	return (0);
56964921Smarcel}
57064921Smarcel
57164921Smarcelint
57283366Sjulianlinux_modify_ldt(td, uap)
57383366Sjulian	struct thread *td;
57464921Smarcel	struct linux_modify_ldt_args *uap;
57564921Smarcel{
57664921Smarcel	int error;
57764921Smarcel	caddr_t sg;
57864921Smarcel	struct sysarch_args args;
57964921Smarcel	struct i386_ldt_args *ldt;
58083221Smarcel	struct l_descriptor ld;
58164921Smarcel	union descriptor *desc;
58264921Smarcel
58364921Smarcel	sg = stackgap_init();
58464921Smarcel
58564921Smarcel	if (uap->ptr == NULL)
58664921Smarcel		return (EINVAL);
58764921Smarcel
58864921Smarcel	switch (uap->func) {
58964921Smarcel	case 0x00: /* read_ldt */
59064921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
59164921Smarcel		ldt->start = 0;
59264921Smarcel		ldt->descs = uap->ptr;
59364921Smarcel		ldt->num = uap->bytecount / sizeof(union descriptor);
59464921Smarcel		args.op = I386_GET_LDT;
59564921Smarcel		args.parms = (char*)ldt;
59683366Sjulian		error = sysarch(td, &args);
59783366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
59864921Smarcel		break;
59964921Smarcel	case 0x01: /* write_ldt */
60064921Smarcel	case 0x11: /* write_ldt */
60164921Smarcel		if (uap->bytecount != sizeof(ld))
60264921Smarcel			return (EINVAL);
60364921Smarcel
60464921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
60564921Smarcel		if (error)
60664921Smarcel			return (error);
60764921Smarcel
60864921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
60964921Smarcel		desc = stackgap_alloc(&sg, sizeof(*desc));
61064921Smarcel		ldt->start = ld.entry_number;
61164921Smarcel		ldt->descs = desc;
61264921Smarcel		ldt->num = 1;
61364921Smarcel		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
61464921Smarcel		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
61564921Smarcel		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
61664921Smarcel		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
61764921Smarcel		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
61864921Smarcel			(ld.contents << 2);
61964921Smarcel		desc->sd.sd_dpl = 3;
62064921Smarcel		desc->sd.sd_p = (ld.seg_not_present ^ 1);
62164921Smarcel		desc->sd.sd_xx = 0;
62264921Smarcel		desc->sd.sd_def32 = ld.seg_32bit;
62364921Smarcel		desc->sd.sd_gran = ld.limit_in_pages;
62464921Smarcel		args.op = I386_SET_LDT;
62564921Smarcel		args.parms = (char*)ldt;
62683366Sjulian		error = sysarch(td, &args);
62764921Smarcel		break;
62864921Smarcel	default:
62964921Smarcel		error = EINVAL;
63064921Smarcel		break;
63164921Smarcel	}
63264921Smarcel
63364921Smarcel	if (error == EOPNOTSUPP) {
63464921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
63564921Smarcel		error = ENOSYS;
63664921Smarcel	}
63764921Smarcel
63864921Smarcel	return (error);
63964921Smarcel}
64064921Smarcel
64164921Smarcelint
64283366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
64364921Smarcel{
64483221Smarcel	l_osigaction_t osa;
64583221Smarcel	l_sigaction_t act, oact;
64664921Smarcel	int error;
64764921Smarcel
64864921Smarcel#ifdef DEBUG
64972543Sjlemon	if (ldebug(sigaction))
65072543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
65172543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
65264921Smarcel#endif
65364921Smarcel
65464921Smarcel	if (args->nsa != NULL) {
65583221Smarcel		error = copyin((caddr_t)args->nsa, &osa,
65683221Smarcel		    sizeof(l_osigaction_t));
65764921Smarcel		if (error)
65864921Smarcel			return (error);
65964921Smarcel		act.lsa_handler = osa.lsa_handler;
66064921Smarcel		act.lsa_flags = osa.lsa_flags;
66164921Smarcel		act.lsa_restorer = osa.lsa_restorer;
66264921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
66364921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
66464921Smarcel	}
66564921Smarcel
66683366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
66764921Smarcel	    args->osa ? &oact : NULL);
66864921Smarcel
66964921Smarcel	if (args->osa != NULL && !error) {
67064921Smarcel		osa.lsa_handler = oact.lsa_handler;
67164921Smarcel		osa.lsa_flags = oact.lsa_flags;
67264921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
67364921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
67483221Smarcel		error = copyout(&osa, (caddr_t)args->osa,
67583221Smarcel		    sizeof(l_osigaction_t));
67664921Smarcel	}
67764921Smarcel
67864921Smarcel	return (error);
67964921Smarcel}
68064921Smarcel
68164921Smarcel/*
68264921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
68364921Smarcel * but it seems that "restart" is actually a context pointer that
68464921Smarcel * enables the signal to happen with a different register set.
68564921Smarcel */
68664921Smarcelint
68783366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
68864921Smarcel{
68964921Smarcel	struct sigsuspend_args bsd;
69064921Smarcel	sigset_t *sigmask;
69183221Smarcel	l_sigset_t mask;
69264921Smarcel	caddr_t sg = stackgap_init();
69364921Smarcel
69464921Smarcel#ifdef DEBUG
69572543Sjlemon	if (ldebug(sigsuspend))
69672543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
69764921Smarcel#endif
69864921Smarcel
69964921Smarcel	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
70064921Smarcel	LINUX_SIGEMPTYSET(mask);
70164921Smarcel	mask.__bits[0] = args->mask;
70264921Smarcel	linux_to_bsd_sigset(&mask, sigmask);
70364921Smarcel	bsd.sigmask = sigmask;
70483366Sjulian	return (sigsuspend(td, &bsd));
70564921Smarcel}
70664921Smarcel
70764921Smarcelint
70883366Sjulianlinux_rt_sigsuspend(td, uap)
70983366Sjulian	struct thread *td;
71064921Smarcel	struct linux_rt_sigsuspend_args *uap;
71164921Smarcel{
71283221Smarcel	l_sigset_t lmask;
71364921Smarcel	sigset_t *bmask;
71464921Smarcel	struct sigsuspend_args bsd;
71564921Smarcel	caddr_t sg = stackgap_init();
71664921Smarcel	int error;
71764921Smarcel
71864921Smarcel#ifdef DEBUG
71972543Sjlemon	if (ldebug(rt_sigsuspend))
72072543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
72172543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
72264921Smarcel#endif
72364921Smarcel
72483221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
72564921Smarcel		return (EINVAL);
72664921Smarcel
72783221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
72864921Smarcel	if (error)
72964921Smarcel		return (error);
73064921Smarcel
73164921Smarcel	bmask = stackgap_alloc(&sg, sizeof(sigset_t));
73264921Smarcel	linux_to_bsd_sigset(&lmask, bmask);
73364921Smarcel	bsd.sigmask = bmask;
73483366Sjulian	return (sigsuspend(td, &bsd));
73564921Smarcel}
73664921Smarcel
73764921Smarcelint
73883366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
73964921Smarcel{
74083366Sjulian	struct proc *p = td->td_proc;
74164921Smarcel	struct sigsuspend_args bsd;
74264921Smarcel	sigset_t *sigmask;
74364921Smarcel	caddr_t sg = stackgap_init();
74464921Smarcel
74564921Smarcel#ifdef DEBUG
74672543Sjlemon	if (ldebug(pause))
74772543Sjlemon		printf(ARGS(pause, ""));
74864921Smarcel#endif
74964921Smarcel
75064921Smarcel	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
75171494Sjhb	PROC_LOCK(p);
75264921Smarcel	*sigmask = p->p_sigmask;
75371494Sjhb	PROC_UNLOCK(p);
75464921Smarcel	bsd.sigmask = sigmask;
75583366Sjulian	return (sigsuspend(td, &bsd));
75664921Smarcel}
75764921Smarcel
75864921Smarcelint
75983366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
76064921Smarcel{
76164921Smarcel	struct sigaltstack_args bsd;
76264921Smarcel	stack_t *ss, *oss;
76383221Smarcel	l_stack_t lss;
76464921Smarcel	int error;
76564921Smarcel	caddr_t sg = stackgap_init();
76664921Smarcel
76764921Smarcel#ifdef DEBUG
76872543Sjlemon	if (ldebug(sigaltstack))
76972543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
77064921Smarcel#endif
77164921Smarcel
77267051Sgallatin	if (uap->uss == NULL) {
77367051Sgallatin		ss = NULL;
77467051Sgallatin	} else {
77583221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
77667051Sgallatin		if (error)
77767051Sgallatin			return (error);
77864921Smarcel
77967051Sgallatin		ss = stackgap_alloc(&sg, sizeof(stack_t));
78067051Sgallatin		ss->ss_sp = lss.ss_sp;
78168520Smarcel		ss->ss_size = lss.ss_size;
78267051Sgallatin		ss->ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
78367051Sgallatin	}
78464921Smarcel	oss = (uap->uoss != NULL)
78564921Smarcel	    ? stackgap_alloc(&sg, sizeof(stack_t))
78664921Smarcel	    : NULL;
78764921Smarcel
78864921Smarcel	bsd.ss = ss;
78964921Smarcel	bsd.oss = oss;
79083366Sjulian	error = sigaltstack(td, &bsd);
79164921Smarcel
79264921Smarcel	if (!error && oss != NULL) {
79364921Smarcel		lss.ss_sp = oss->ss_sp;
79464921Smarcel		lss.ss_size = oss->ss_size;
79567051Sgallatin		lss.ss_flags = bsd_to_linux_sigaltstack(oss->ss_flags);
79683221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
79764921Smarcel	}
79864921Smarcel
79964921Smarcel	return (error);
80064921Smarcel}
801