linux_machdep.c revision 104984
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
964921Smarcel *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel *
2864921Smarcel * $FreeBSD: head/sys/i386/linux/linux_machdep.c 104984 2002-10-12 13:48:21Z bde $
2964921Smarcel */
3064921Smarcel
3164921Smarcel#include <sys/param.h>
3276166Smarkm#include <sys/systm.h>
3384811Sjhb#include <sys/lock.h>
3464921Smarcel#include <sys/mman.h>
3576166Smarkm#include <sys/mutex.h>
3664921Smarcel#include <sys/proc.h>
3776166Smarkm#include <sys/resource.h>
3876166Smarkm#include <sys/resourcevar.h>
39104984Sbde#include <sys/stdint.h>
40102814Siedowse#include <sys/syscallsubr.h>
4164921Smarcel#include <sys/sysproto.h>
4264921Smarcel#include <sys/unistd.h>
4364921Smarcel
4464921Smarcel#include <machine/frame.h>
4564921Smarcel#include <machine/psl.h>
4664921Smarcel#include <machine/segments.h>
4764921Smarcel#include <machine/sysarch.h>
4864921Smarcel
4967238Sgallatin#include <vm/vm.h>
5067238Sgallatin#include <vm/pmap.h>
5167238Sgallatin#include <vm/vm_map.h>
5267238Sgallatin
5364921Smarcel#include <i386/linux/linux.h>
5468583Smarcel#include <i386/linux/linux_proto.h>
5564921Smarcel#include <compat/linux/linux_ipc.h>
5664921Smarcel#include <compat/linux/linux_signal.h>
5764921Smarcel#include <compat/linux/linux_util.h>
5864921Smarcel
5983221Smarcelstruct l_descriptor {
6083221Smarcel	l_uint		entry_number;
6183221Smarcel	l_ulong		base_addr;
6283221Smarcel	l_uint		limit;
6383221Smarcel	l_uint		seg_32bit:1;
6483221Smarcel	l_uint		contents:2;
6583221Smarcel	l_uint		read_exec_only:1;
6683221Smarcel	l_uint		limit_in_pages:1;
6783221Smarcel	l_uint		seg_not_present:1;
6883221Smarcel	l_uint		useable:1;
6964921Smarcel};
7064921Smarcel
7183221Smarcelstruct l_old_select_argv {
7283221Smarcel	l_int		nfds;
7383221Smarcel	l_fd_set	*readfds;
7483221Smarcel	l_fd_set	*writefds;
7583221Smarcel	l_fd_set	*exceptfds;
7683221Smarcel	struct l_timeval	*timeout;
7764921Smarcel};
7864921Smarcel
7964921Smarcelint
8067051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
8167051Sgallatin{
8267051Sgallatin	int bsa = 0;
8367051Sgallatin
8467051Sgallatin	if (lsa & LINUX_SS_DISABLE)
8567051Sgallatin		bsa |= SS_DISABLE;
8667051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
8767051Sgallatin		bsa |= SS_ONSTACK;
8867051Sgallatin	return (bsa);
8967051Sgallatin}
9067051Sgallatin
9167051Sgallatinint
9267051Sgallatinbsd_to_linux_sigaltstack(int bsa)
9367051Sgallatin{
9467051Sgallatin	int lsa = 0;
9567051Sgallatin
9667051Sgallatin	if (bsa & SS_DISABLE)
9767051Sgallatin		lsa |= LINUX_SS_DISABLE;
9867051Sgallatin	if (bsa & SS_ONSTACK)
9967051Sgallatin		lsa |= LINUX_SS_ONSTACK;
10067051Sgallatin	return (lsa);
10167051Sgallatin}
10267051Sgallatin
10367051Sgallatinint
10483366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
10564921Smarcel{
10664921Smarcel	struct execve_args bsd;
10764921Smarcel	caddr_t sg;
10864921Smarcel
10964921Smarcel	sg = stackgap_init();
11083366Sjulian	CHECKALTEXIST(td, &sg, args->path);
11164921Smarcel
11264921Smarcel#ifdef DEBUG
11372543Sjlemon	if (ldebug(execve))
11472543Sjlemon		printf(ARGS(execve, "%s"), args->path);
11564921Smarcel#endif
11664921Smarcel
11764921Smarcel	bsd.fname = args->path;
11864921Smarcel	bsd.argv = args->argp;
11964921Smarcel	bsd.envv = args->envp;
12083366Sjulian	return (execve(td, &bsd));
12164921Smarcel}
12264921Smarcel
12383221Smarcelstruct l_ipc_kludge {
12483221Smarcel	struct l_msgbuf *msgp;
12583221Smarcel	l_long msgtyp;
12683221Smarcel};
12783221Smarcel
12864921Smarcelint
12983366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
13064921Smarcel{
13183221Smarcel
13283221Smarcel	switch (args->what & 0xFFFF) {
13383221Smarcel	case LINUX_SEMOP: {
13483221Smarcel		struct linux_semop_args a;
13583221Smarcel
13683221Smarcel		a.semid = args->arg1;
13783221Smarcel		a.tsops = args->ptr;
13883221Smarcel		a.nsops = args->arg2;
13983366Sjulian		return (linux_semop(td, &a));
14064921Smarcel	}
14183221Smarcel	case LINUX_SEMGET: {
14283221Smarcel		struct linux_semget_args a;
14364921Smarcel
14483221Smarcel		a.key = args->arg1;
14583221Smarcel		a.nsems = args->arg2;
14683221Smarcel		a.semflg = args->arg3;
14783366Sjulian		return (linux_semget(td, &a));
14883221Smarcel	}
14983221Smarcel	case LINUX_SEMCTL: {
15083221Smarcel		struct linux_semctl_args a;
15183221Smarcel		int error;
15283221Smarcel
15383221Smarcel		a.semid = args->arg1;
15483221Smarcel		a.semnum = args->arg2;
15583221Smarcel		a.cmd = args->arg3;
15683221Smarcel		error = copyin((caddr_t)args->ptr, &a.arg, sizeof(a.arg));
15783221Smarcel		if (error)
15883221Smarcel			return (error);
15983366Sjulian		return (linux_semctl(td, &a));
16083221Smarcel	}
16183221Smarcel	case LINUX_MSGSND: {
16283221Smarcel		struct linux_msgsnd_args a;
16383221Smarcel
16483221Smarcel		a.msqid = args->arg1;
16583221Smarcel		a.msgp = args->ptr;
16683221Smarcel		a.msgsz = args->arg2;
16783221Smarcel		a.msgflg = args->arg3;
16883366Sjulian		return (linux_msgsnd(td, &a));
16983221Smarcel	}
17083221Smarcel	case LINUX_MSGRCV: {
17183221Smarcel		struct linux_msgrcv_args a;
17283221Smarcel
17383221Smarcel		a.msqid = args->arg1;
17483221Smarcel		a.msgsz = args->arg2;
17583221Smarcel		a.msgflg = args->arg3;
17683221Smarcel		if ((args->what >> 16) == 0) {
17783221Smarcel			struct l_ipc_kludge tmp;
17883221Smarcel			int error;
17983221Smarcel
18083221Smarcel			if (args->ptr == NULL)
18183221Smarcel				return (EINVAL);
18283221Smarcel			error = copyin((caddr_t)args->ptr, &tmp, sizeof(tmp));
18383221Smarcel			if (error)
18483221Smarcel				return (error);
18583221Smarcel			a.msgp = tmp.msgp;
18683221Smarcel			a.msgtyp = tmp.msgtyp;
18783221Smarcel		} else {
18883221Smarcel			a.msgp = args->ptr;
18983221Smarcel			a.msgtyp = args->arg5;
19083221Smarcel		}
19183366Sjulian		return (linux_msgrcv(td, &a));
19283221Smarcel	}
19383221Smarcel	case LINUX_MSGGET: {
19483221Smarcel		struct linux_msgget_args a;
19583221Smarcel
19683221Smarcel		a.key = args->arg1;
19783221Smarcel		a.msgflg = args->arg2;
19883366Sjulian		return (linux_msgget(td, &a));
19983221Smarcel	}
20083221Smarcel	case LINUX_MSGCTL: {
20183221Smarcel		struct linux_msgctl_args a;
20283221Smarcel
20383221Smarcel		a.msqid = args->arg1;
20483221Smarcel		a.cmd = args->arg2;
20583221Smarcel		a.buf = args->ptr;
20683366Sjulian		return (linux_msgctl(td, &a));
20783221Smarcel	}
20883221Smarcel	case LINUX_SHMAT: {
20983221Smarcel		struct linux_shmat_args a;
21083221Smarcel
21183221Smarcel		a.shmid = args->arg1;
21283221Smarcel		a.shmaddr = args->ptr;
21383221Smarcel		a.shmflg = args->arg2;
21483221Smarcel		a.raddr = (l_ulong *)args->arg3;
21583366Sjulian		return (linux_shmat(td, &a));
21683221Smarcel	}
21783221Smarcel	case LINUX_SHMDT: {
21883221Smarcel		struct linux_shmdt_args a;
21983221Smarcel
22083221Smarcel		a.shmaddr = args->ptr;
22183366Sjulian		return (linux_shmdt(td, &a));
22283221Smarcel	}
22383221Smarcel	case LINUX_SHMGET: {
22483221Smarcel		struct linux_shmget_args a;
22583221Smarcel
22683221Smarcel		a.key = args->arg1;
22783221Smarcel		a.size = args->arg2;
22883221Smarcel		a.shmflg = args->arg3;
22983366Sjulian		return (linux_shmget(td, &a));
23083221Smarcel	}
23183221Smarcel	case LINUX_SHMCTL: {
23283221Smarcel		struct linux_shmctl_args a;
23383221Smarcel
23483221Smarcel		a.shmid = args->arg1;
23583221Smarcel		a.cmd = args->arg2;
23683221Smarcel		a.buf = args->ptr;
23783366Sjulian		return (linux_shmctl(td, &a));
23883221Smarcel	}
23983221Smarcel	default:
24083221Smarcel		break;
24183221Smarcel	}
24283221Smarcel
24383221Smarcel	return (EINVAL);
24464921Smarcel}
24564921Smarcel
24664921Smarcelint
24783366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
24864921Smarcel{
24983221Smarcel	struct l_old_select_argv linux_args;
25083221Smarcel	struct linux_select_args newsel;
25164921Smarcel	int error;
25264921Smarcel
25383221Smarcel#ifdef DEBUG
25483221Smarcel	if (ldebug(old_select))
25591437Speter		printf(ARGS(old_select, "%p"), args->ptr);
25664921Smarcel#endif
25764921Smarcel
25883221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
25964921Smarcel	if (error)
26064921Smarcel		return (error);
26164921Smarcel
26264921Smarcel	newsel.nfds = linux_args.nfds;
26364921Smarcel	newsel.readfds = linux_args.readfds;
26464921Smarcel	newsel.writefds = linux_args.writefds;
26564921Smarcel	newsel.exceptfds = linux_args.exceptfds;
26664921Smarcel	newsel.timeout = linux_args.timeout;
26783366Sjulian	return (linux_select(td, &newsel));
26864921Smarcel}
26964921Smarcel
27064921Smarcelint
27183366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
27264921Smarcel{
27364921Smarcel	int error;
27464921Smarcel
27564921Smarcel#ifdef DEBUG
27672543Sjlemon	if (ldebug(fork))
27772543Sjlemon		printf(ARGS(fork, ""));
27864921Smarcel#endif
27964921Smarcel
28083366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
28164921Smarcel		return (error);
28264921Smarcel
28383366Sjulian	if (td->td_retval[1] == 1)
28483366Sjulian		td->td_retval[0] = 0;
28564921Smarcel	return (0);
28664921Smarcel}
28764921Smarcel
28864921Smarcelint
28983366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
29064921Smarcel{
29164921Smarcel	int error;
29264921Smarcel
29364921Smarcel#ifdef DEBUG
29472543Sjlemon	if (ldebug(vfork))
29572543Sjlemon		printf(ARGS(vfork, ""));
29664921Smarcel#endif
29764921Smarcel
29883366Sjulian	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
29964921Smarcel		return (error);
30064921Smarcel	/* Are we the child? */
30183366Sjulian	if (td->td_retval[1] == 1)
30283366Sjulian		td->td_retval[0] = 0;
30364921Smarcel	return (0);
30464921Smarcel}
30564921Smarcel
30664921Smarcel#define CLONE_VM	0x100
30764921Smarcel#define CLONE_FS	0x200
30864921Smarcel#define CLONE_FILES	0x400
30964921Smarcel#define CLONE_SIGHAND	0x800
31064921Smarcel#define CLONE_PID	0x1000
31164921Smarcel
31264921Smarcelint
31383366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
31464921Smarcel{
31573856Sjhb	int error, ff = RFPROC | RFSTOPPED;
31664921Smarcel	struct proc *p2;
31764921Smarcel	int exit_signal;
31864921Smarcel
31964921Smarcel#ifdef DEBUG
32072543Sjlemon	if (ldebug(clone)) {
32172543Sjlemon		printf(ARGS(clone, "flags %x, stack %x"),
32272543Sjlemon		    (unsigned int)args->flags, (unsigned int)args->stack);
32372543Sjlemon		if (args->flags & CLONE_PID)
32472543Sjlemon			printf(LMSG("CLONE_PID not yet supported"));
32572543Sjlemon	}
32664921Smarcel#endif
32764921Smarcel
32864921Smarcel	if (!args->stack)
32964921Smarcel		return (EINVAL);
33064921Smarcel
33164921Smarcel	exit_signal = args->flags & 0x000000ff;
33264921Smarcel	if (exit_signal >= LINUX_NSIG)
33364921Smarcel		return (EINVAL);
33464921Smarcel
33564921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
33664921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
33764921Smarcel
33864921Smarcel	if (args->flags & CLONE_VM)
33964921Smarcel		ff |= RFMEM;
34064921Smarcel	if (args->flags & CLONE_SIGHAND)
34164921Smarcel		ff |= RFSIGSHARE;
34264921Smarcel	if (!(args->flags & CLONE_FILES))
34364921Smarcel		ff |= RFFDG;
34464921Smarcel
34583221Smarcel	mtx_lock(&Giant);
346104354Sscottl	error = fork1(td, ff, 0, &p2);
34783221Smarcel	if (error == 0) {
34883366Sjulian		td->td_retval[0] = p2->p_pid;
34983366Sjulian		td->td_retval[1] = 0;
35064921Smarcel
35183221Smarcel		PROC_LOCK(p2);
35283221Smarcel		p2->p_sigparent = exit_signal;
35390361Sjulian		FIRST_THREAD_IN_PROC(p2)->td_frame->tf_esp =
35490361Sjulian					(unsigned int)args->stack;
35564921Smarcel
35664921Smarcel#ifdef DEBUG
35783221Smarcel		if (ldebug(clone))
35883221Smarcel			printf(LMSG("clone: successful rfork to %ld"),
35983221Smarcel			    (long)p2->p_pid);
36064921Smarcel#endif
36164921Smarcel
36283221Smarcel		/*
36383221Smarcel		 * Make this runnable after we are finished with it.
36483221Smarcel		 */
36583221Smarcel		mtx_lock_spin(&sched_lock);
366103216Sjulian		TD_SET_CAN_RUN(FIRST_THREAD_IN_PROC(p2));
36790361Sjulian		setrunqueue(FIRST_THREAD_IN_PROC(p2));
36883221Smarcel		mtx_unlock_spin(&sched_lock);
36983221Smarcel		PROC_UNLOCK(p2);
37083221Smarcel	}
37183221Smarcel	mtx_unlock(&Giant);
37273856Sjhb
37383221Smarcel	return (error);
37464921Smarcel}
37564921Smarcel
37664921Smarcel/* XXX move */
37783221Smarcelstruct l_mmap_argv {
37883221Smarcel	l_caddr_t	addr;
37983221Smarcel	l_int		len;
38083221Smarcel	l_int		prot;
38183221Smarcel	l_int		flags;
38283221Smarcel	l_int		fd;
38383221Smarcel	l_int		pos;
38464921Smarcel};
38564921Smarcel
38664921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
38764921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
38864921Smarcel
389104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *);
390104893Ssobomax
39164921Smarcelint
392104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args)
393104893Ssobomax{
394104893Ssobomax	struct l_mmap_argv linux_args;
395104893Ssobomax
396104893Ssobomax#ifdef DEBUG
397104893Ssobomax	if (ldebug(mmap2))
398104893Ssobomax		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
399104893Ssobomax		    (void *)args->addr, args->len, args->prot,
400104893Ssobomax		    args->flags, args->fd, args->pgoff);
401104893Ssobomax#endif
402104893Ssobomax
403104893Ssobomax	linux_args.addr = (l_caddr_t)args->addr;
404104893Ssobomax	linux_args.len = args->len;
405104893Ssobomax	linux_args.prot = args->prot;
406104893Ssobomax	linux_args.flags = args->flags;
407104893Ssobomax	linux_args.fd = args->fd;
408104893Ssobomax	linux_args.pos = args->pgoff * PAGE_SIZE;
409104893Ssobomax
410104893Ssobomax	return (linux_mmap_common(td, &linux_args));
411104893Ssobomax}
412104893Ssobomax
413104893Ssobomaxint
41483366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
41564921Smarcel{
41664921Smarcel	int error;
41783221Smarcel	struct l_mmap_argv linux_args;
41864921Smarcel
41983221Smarcel	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
42064921Smarcel	if (error)
42164921Smarcel		return (error);
42264921Smarcel
42364921Smarcel#ifdef DEBUG
42472543Sjlemon	if (ldebug(mmap))
42572543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
426104984Sbde		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
427104984Sbde		    linux_args.flags, linux_args.fd, linux_args.pos);
42864921Smarcel#endif
42964921Smarcel
430104893Ssobomax	return (linux_mmap_common(td, &linux_args));
431104893Ssobomax}
432104893Ssobomax
433104893Ssobomaxstatic int
434104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
435104893Ssobomax{
436104893Ssobomax	struct proc *p = td->td_proc;
437104893Ssobomax	struct mmap_args /* {
438104893Ssobomax		caddr_t addr;
439104893Ssobomax		size_t len;
440104893Ssobomax		int prot;
441104893Ssobomax		int flags;
442104893Ssobomax		int fd;
443104893Ssobomax		long pad;
444104893Ssobomax		off_t pos;
445104893Ssobomax	} */ bsd_args;
446104893Ssobomax
44764921Smarcel	bsd_args.flags = 0;
448104893Ssobomax	if (linux_args->flags & LINUX_MAP_SHARED)
44964921Smarcel		bsd_args.flags |= MAP_SHARED;
450104893Ssobomax	if (linux_args->flags & LINUX_MAP_PRIVATE)
45164921Smarcel		bsd_args.flags |= MAP_PRIVATE;
452104893Ssobomax	if (linux_args->flags & LINUX_MAP_FIXED)
45364921Smarcel		bsd_args.flags |= MAP_FIXED;
454104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
45564921Smarcel		bsd_args.flags |= MAP_ANON;
45673213Sdillon	else
45773213Sdillon		bsd_args.flags |= MAP_NOSYNC;
458104893Ssobomax	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
45964921Smarcel		bsd_args.flags |= MAP_STACK;
46064921Smarcel
46164921Smarcel		/* The linux MAP_GROWSDOWN option does not limit auto
46264921Smarcel		 * growth of the region.  Linux mmap with this option
46364921Smarcel		 * takes as addr the inital BOS, and as len, the initial
46464921Smarcel		 * region size.  It can then grow down from addr without
46564921Smarcel		 * limit.  However, linux threads has an implicit internal
46664921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
46764921Smarcel		 * enforced explicitly in linux.  But, here we impose
46864921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
46964921Smarcel		 * region, since we can do this with our mmap.
47064921Smarcel		 *
47164921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
47264921Smarcel		 * downsize limit on BOS, and as len the max size of
47364921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
47464921Smarcel		 * and autgrows the region down, up to the limit
47564921Smarcel		 * in addr.
47664921Smarcel		 *
47764921Smarcel		 * If we don't use the MAP_STACK option, the effect
47864921Smarcel		 * of this code is to allocate a stack region of a
47964921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
48064921Smarcel		 */
48164921Smarcel
48264921Smarcel		/* This gives us TOS */
483104893Ssobomax		bsd_args.addr = linux_args->addr + linux_args->len;
48464921Smarcel
48567238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
48667238Sgallatin			/* Some linux apps will attempt to mmap
48767238Sgallatin			 * thread stacks near the top of their
48867238Sgallatin			 * address space.  If their TOS is greater
48967238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
49067238Sgallatin			 * will confuse the thread stack with the
49167238Sgallatin			 * process stack and deliver a SEGV if they
49267238Sgallatin			 * attempt to grow the thread stack past their
49367238Sgallatin			 * current stacksize rlimit.  To avoid this,
49467238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
49567238Sgallatin			 * the current stacksize rlimit rather
49667238Sgallatin			 * than the maximum possible stacksize.
49767238Sgallatin			 * It would be better to adjust the
49867238Sgallatin			 * mmap'ed region, but some apps do not check
49967238Sgallatin			 * mmap's return value.
50067238Sgallatin			 */
50171494Sjhb			mtx_assert(&Giant, MA_OWNED);
50267238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
50367238Sgallatin			    p->p_rlimit[RLIMIT_STACK].rlim_cur;
50467238Sgallatin		}
50567238Sgallatin
50664921Smarcel		/* This gives us our maximum stack size */
507104893Ssobomax		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
508104893Ssobomax			bsd_args.len = linux_args->len;
50964921Smarcel		else
51064921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
51164921Smarcel
51264921Smarcel		/* This gives us a new BOS.  If we're using VM_STACK, then
51364921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
51464921Smarcel		 * the stack grow down to the limit at BOS.  If we're
51564921Smarcel		 * not using VM_STACK we map the full stack, since we
51664921Smarcel		 * don't have a way to autogrow it.
51764921Smarcel		 */
51864921Smarcel		bsd_args.addr -= bsd_args.len;
51964921Smarcel	} else {
520104893Ssobomax		bsd_args.addr = linux_args->addr;
521104893Ssobomax		bsd_args.len  = linux_args->len;
52264921Smarcel	}
52364921Smarcel
524104893Ssobomax	bsd_args.prot = linux_args->prot | PROT_READ;	/* always required */
525104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
52664921Smarcel		bsd_args.fd = -1;
52764921Smarcel	else
528104893Ssobomax		bsd_args.fd = linux_args->fd;
529104893Ssobomax	bsd_args.pos = linux_args->pos;
53064921Smarcel	bsd_args.pad = 0;
53164921Smarcel
53264921Smarcel#ifdef DEBUG
53372543Sjlemon	if (ldebug(mmap))
53472543Sjlemon		printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
53572543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
53672543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
53764921Smarcel#endif
53864921Smarcel
53983366Sjulian	return (mmap(td, &bsd_args));
54064921Smarcel}
54164921Smarcel
54264921Smarcelint
54383366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
54464921Smarcel{
54564921Smarcel	int error;
54664921Smarcel	int reg_edx;
54764921Smarcel
54864921Smarcel#ifdef DEBUG
54972543Sjlemon	if (ldebug(pipe))
55072543Sjlemon		printf(ARGS(pipe, "*"));
55164921Smarcel#endif
55264921Smarcel
55383366Sjulian	reg_edx = td->td_retval[1];
55483366Sjulian	error = pipe(td, 0);
55564921Smarcel	if (error) {
55683366Sjulian		td->td_retval[1] = reg_edx;
55764921Smarcel		return (error);
55864921Smarcel	}
55964921Smarcel
56083366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
56164921Smarcel	if (error) {
56283366Sjulian		td->td_retval[1] = reg_edx;
56364921Smarcel		return (error);
56464921Smarcel	}
56564921Smarcel
56683366Sjulian	td->td_retval[1] = reg_edx;
56783366Sjulian	td->td_retval[0] = 0;
56864921Smarcel	return (0);
56964921Smarcel}
57064921Smarcel
57164921Smarcelint
57283366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
57364921Smarcel{
57464921Smarcel	struct sysarch_args sa;
57564921Smarcel	struct i386_ioperm_args *iia;
57664921Smarcel	caddr_t sg;
57764921Smarcel
57864921Smarcel	sg = stackgap_init();
57964921Smarcel	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
58064921Smarcel	iia->start = args->start;
58164921Smarcel	iia->length = args->length;
58264921Smarcel	iia->enable = args->enable;
58364921Smarcel	sa.op = I386_SET_IOPERM;
58464921Smarcel	sa.parms = (char *)iia;
58583366Sjulian	return (sysarch(td, &sa));
58664921Smarcel}
58764921Smarcel
58864921Smarcelint
58983366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
59064921Smarcel{
59164921Smarcel	int error;
59264921Smarcel
59364921Smarcel	if (args->level < 0 || args->level > 3)
59464921Smarcel		return (EINVAL);
59593593Sjhb	if ((error = suser(td)) != 0)
59664921Smarcel		return (error);
59791406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
59883981Srwatson		return (error);
59983366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
60064921Smarcel	    (args->level * (PSL_IOPL / 3));
60164921Smarcel	return (0);
60264921Smarcel}
60364921Smarcel
60464921Smarcelint
60583366Sjulianlinux_modify_ldt(td, uap)
60683366Sjulian	struct thread *td;
60764921Smarcel	struct linux_modify_ldt_args *uap;
60864921Smarcel{
60964921Smarcel	int error;
61064921Smarcel	caddr_t sg;
61164921Smarcel	struct sysarch_args args;
61264921Smarcel	struct i386_ldt_args *ldt;
61383221Smarcel	struct l_descriptor ld;
61464921Smarcel	union descriptor *desc;
61564921Smarcel
61664921Smarcel	sg = stackgap_init();
61764921Smarcel
61864921Smarcel	if (uap->ptr == NULL)
61964921Smarcel		return (EINVAL);
62064921Smarcel
62164921Smarcel	switch (uap->func) {
62264921Smarcel	case 0x00: /* read_ldt */
62364921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
62464921Smarcel		ldt->start = 0;
62564921Smarcel		ldt->descs = uap->ptr;
62664921Smarcel		ldt->num = uap->bytecount / sizeof(union descriptor);
62764921Smarcel		args.op = I386_GET_LDT;
62864921Smarcel		args.parms = (char*)ldt;
62983366Sjulian		error = sysarch(td, &args);
63083366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
63164921Smarcel		break;
63264921Smarcel	case 0x01: /* write_ldt */
63364921Smarcel	case 0x11: /* write_ldt */
63464921Smarcel		if (uap->bytecount != sizeof(ld))
63564921Smarcel			return (EINVAL);
63664921Smarcel
63764921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
63864921Smarcel		if (error)
63964921Smarcel			return (error);
64064921Smarcel
64164921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
64264921Smarcel		desc = stackgap_alloc(&sg, sizeof(*desc));
64364921Smarcel		ldt->start = ld.entry_number;
64464921Smarcel		ldt->descs = desc;
64564921Smarcel		ldt->num = 1;
64664921Smarcel		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
64764921Smarcel		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
64864921Smarcel		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
64964921Smarcel		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
65064921Smarcel		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
65164921Smarcel			(ld.contents << 2);
65264921Smarcel		desc->sd.sd_dpl = 3;
65364921Smarcel		desc->sd.sd_p = (ld.seg_not_present ^ 1);
65464921Smarcel		desc->sd.sd_xx = 0;
65564921Smarcel		desc->sd.sd_def32 = ld.seg_32bit;
65664921Smarcel		desc->sd.sd_gran = ld.limit_in_pages;
65764921Smarcel		args.op = I386_SET_LDT;
65864921Smarcel		args.parms = (char*)ldt;
65983366Sjulian		error = sysarch(td, &args);
66064921Smarcel		break;
66164921Smarcel	default:
66264921Smarcel		error = EINVAL;
66364921Smarcel		break;
66464921Smarcel	}
66564921Smarcel
66664921Smarcel	if (error == EOPNOTSUPP) {
66764921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
66864921Smarcel		error = ENOSYS;
66964921Smarcel	}
67064921Smarcel
67164921Smarcel	return (error);
67264921Smarcel}
67364921Smarcel
67464921Smarcelint
67583366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
67664921Smarcel{
67783221Smarcel	l_osigaction_t osa;
67883221Smarcel	l_sigaction_t act, oact;
67964921Smarcel	int error;
68064921Smarcel
68164921Smarcel#ifdef DEBUG
68272543Sjlemon	if (ldebug(sigaction))
68372543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
68472543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
68564921Smarcel#endif
68664921Smarcel
68764921Smarcel	if (args->nsa != NULL) {
68883221Smarcel		error = copyin((caddr_t)args->nsa, &osa,
68983221Smarcel		    sizeof(l_osigaction_t));
69064921Smarcel		if (error)
69164921Smarcel			return (error);
69264921Smarcel		act.lsa_handler = osa.lsa_handler;
69364921Smarcel		act.lsa_flags = osa.lsa_flags;
69464921Smarcel		act.lsa_restorer = osa.lsa_restorer;
69564921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
69664921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
69764921Smarcel	}
69864921Smarcel
69983366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
70064921Smarcel	    args->osa ? &oact : NULL);
70164921Smarcel
70264921Smarcel	if (args->osa != NULL && !error) {
70364921Smarcel		osa.lsa_handler = oact.lsa_handler;
70464921Smarcel		osa.lsa_flags = oact.lsa_flags;
70564921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
70664921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
70783221Smarcel		error = copyout(&osa, (caddr_t)args->osa,
70883221Smarcel		    sizeof(l_osigaction_t));
70964921Smarcel	}
71064921Smarcel
71164921Smarcel	return (error);
71264921Smarcel}
71364921Smarcel
71464921Smarcel/*
71564921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
71664921Smarcel * but it seems that "restart" is actually a context pointer that
71764921Smarcel * enables the signal to happen with a different register set.
71864921Smarcel */
71964921Smarcelint
72083366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
72164921Smarcel{
722102814Siedowse	sigset_t sigmask;
72383221Smarcel	l_sigset_t mask;
72464921Smarcel
72564921Smarcel#ifdef DEBUG
72672543Sjlemon	if (ldebug(sigsuspend))
72772543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
72864921Smarcel#endif
72964921Smarcel
73064921Smarcel	LINUX_SIGEMPTYSET(mask);
73164921Smarcel	mask.__bits[0] = args->mask;
732102814Siedowse	linux_to_bsd_sigset(&mask, &sigmask);
733102814Siedowse	return (kern_sigsuspend(td, sigmask));
73464921Smarcel}
73564921Smarcel
73664921Smarcelint
73783366Sjulianlinux_rt_sigsuspend(td, uap)
73883366Sjulian	struct thread *td;
73964921Smarcel	struct linux_rt_sigsuspend_args *uap;
74064921Smarcel{
74183221Smarcel	l_sigset_t lmask;
742102814Siedowse	sigset_t sigmask;
74364921Smarcel	int error;
74464921Smarcel
74564921Smarcel#ifdef DEBUG
74672543Sjlemon	if (ldebug(rt_sigsuspend))
74772543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
74872543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
74964921Smarcel#endif
75064921Smarcel
75183221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
75264921Smarcel		return (EINVAL);
75364921Smarcel
75483221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
75564921Smarcel	if (error)
75664921Smarcel		return (error);
75764921Smarcel
758102814Siedowse	linux_to_bsd_sigset(&lmask, &sigmask);
759102814Siedowse	return (kern_sigsuspend(td, sigmask));
76064921Smarcel}
76164921Smarcel
76264921Smarcelint
76383366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
76464921Smarcel{
76583366Sjulian	struct proc *p = td->td_proc;
766102814Siedowse	sigset_t sigmask;
76764921Smarcel
76864921Smarcel#ifdef DEBUG
76972543Sjlemon	if (ldebug(pause))
77072543Sjlemon		printf(ARGS(pause, ""));
77164921Smarcel#endif
77264921Smarcel
77371494Sjhb	PROC_LOCK(p);
774102814Siedowse	sigmask = p->p_sigmask;
77571494Sjhb	PROC_UNLOCK(p);
776102814Siedowse	return (kern_sigsuspend(td, sigmask));
77764921Smarcel}
77864921Smarcel
77964921Smarcelint
78083366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
78164921Smarcel{
782102814Siedowse	stack_t ss, oss;
78383221Smarcel	l_stack_t lss;
78464921Smarcel	int error;
78564921Smarcel
78664921Smarcel#ifdef DEBUG
78772543Sjlemon	if (ldebug(sigaltstack))
78872543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
78964921Smarcel#endif
79064921Smarcel
791102814Siedowse	if (uap->uss != NULL) {
79283221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
79367051Sgallatin		if (error)
79467051Sgallatin			return (error);
79564921Smarcel
796102814Siedowse		ss.ss_sp = lss.ss_sp;
797102814Siedowse		ss.ss_size = lss.ss_size;
798102814Siedowse		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
79967051Sgallatin	}
800102814Siedowse	error = kern_sigaltstack(td, (uap->uoss != NULL) ? &oss : NULL,
801102814Siedowse	    (uap->uss != NULL) ? &ss : NULL);
802102814Siedowse	if (!error && uap->uoss != NULL) {
803102814Siedowse		lss.ss_sp = oss.ss_sp;
804102814Siedowse		lss.ss_size = oss.ss_size;
805102814Siedowse		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
80683221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
80764921Smarcel	}
80864921Smarcel
80964921Smarcel	return (error);
81064921Smarcel}
811104893Ssobomax
812104893Ssobomaxint
813104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
814104893Ssobomax{
815104893Ssobomax	struct ftruncate_args sa;
816104893Ssobomax
817104893Ssobomax#ifdef DEBUG
818104893Ssobomax	if (ldebug(ftruncate64))
819104984Sbde		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
820104984Sbde		    (intmax_t)args->length);
821104893Ssobomax#endif
822104893Ssobomax
823104893Ssobomax	sa.fd = args->fd;
824104893Ssobomax	sa.pad = 0;
825104893Ssobomax	sa.length = args->length;
826104893Ssobomax	return ftruncate(td, &sa);
827104893Ssobomax}
828