linux_machdep.c revision 125454
164921Smarcel/*-
264921Smarcel * Copyright (c) 2000 Marcel Moolenaar
364921Smarcel * All rights reserved.
464921Smarcel *
564921Smarcel * Redistribution and use in source and binary forms, with or without
664921Smarcel * modification, are permitted provided that the following conditions
764921Smarcel * are met:
864921Smarcel * 1. Redistributions of source code must retain the above copyright
9111798Sdes *    notice, this list of conditions and the following disclaimer
1064921Smarcel *    in this position and unchanged.
1164921Smarcel * 2. Redistributions in binary form must reproduce the above copyright
1264921Smarcel *    notice, this list of conditions and the following disclaimer in the
1364921Smarcel *    documentation and/or other materials provided with the distribution.
1464921Smarcel * 3. The name of the author may not be used to endorse or promote products
1565067Smarcel *    derived from this software without specific prior written permission.
1664921Smarcel *
1764921Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1864921Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1964921Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2064921Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2164921Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2264921Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2364921Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2464921Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2564921Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2664921Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2764921Smarcel */
2864921Smarcel
29115705Sobrien#include <sys/cdefs.h>
30115705Sobrien__FBSDID("$FreeBSD: head/sys/i386/linux/linux_machdep.c 125454 2004-02-04 21:52:57Z jhb $");
31115705Sobrien
3264921Smarcel#include <sys/param.h>
3376166Smarkm#include <sys/systm.h>
3484811Sjhb#include <sys/lock.h>
3564921Smarcel#include <sys/mman.h>
3676166Smarkm#include <sys/mutex.h>
3764921Smarcel#include <sys/proc.h>
3876166Smarkm#include <sys/resource.h>
3976166Smarkm#include <sys/resourcevar.h>
40102814Siedowse#include <sys/syscallsubr.h>
4164921Smarcel#include <sys/sysproto.h>
4264921Smarcel#include <sys/unistd.h>
4364921Smarcel
4464921Smarcel#include <machine/frame.h>
4564921Smarcel#include <machine/psl.h>
4664921Smarcel#include <machine/segments.h>
4764921Smarcel#include <machine/sysarch.h>
4864921Smarcel
4967238Sgallatin#include <vm/vm.h>
5067238Sgallatin#include <vm/pmap.h>
5167238Sgallatin#include <vm/vm_map.h>
5267238Sgallatin
5364921Smarcel#include <i386/linux/linux.h>
5468583Smarcel#include <i386/linux/linux_proto.h>
5564921Smarcel#include <compat/linux/linux_ipc.h>
5664921Smarcel#include <compat/linux/linux_signal.h>
5764921Smarcel#include <compat/linux/linux_util.h>
5864921Smarcel
5983221Smarcelstruct l_descriptor {
6083221Smarcel	l_uint		entry_number;
6183221Smarcel	l_ulong		base_addr;
6283221Smarcel	l_uint		limit;
6383221Smarcel	l_uint		seg_32bit:1;
6483221Smarcel	l_uint		contents:2;
6583221Smarcel	l_uint		read_exec_only:1;
6683221Smarcel	l_uint		limit_in_pages:1;
6783221Smarcel	l_uint		seg_not_present:1;
6883221Smarcel	l_uint		useable:1;
6964921Smarcel};
7064921Smarcel
7183221Smarcelstruct l_old_select_argv {
7283221Smarcel	l_int		nfds;
7383221Smarcel	l_fd_set	*readfds;
7483221Smarcel	l_fd_set	*writefds;
7583221Smarcel	l_fd_set	*exceptfds;
7683221Smarcel	struct l_timeval	*timeout;
7764921Smarcel};
7864921Smarcel
7964921Smarcelint
8067051Sgallatinlinux_to_bsd_sigaltstack(int lsa)
8167051Sgallatin{
8267051Sgallatin	int bsa = 0;
8367051Sgallatin
8467051Sgallatin	if (lsa & LINUX_SS_DISABLE)
8567051Sgallatin		bsa |= SS_DISABLE;
8667051Sgallatin	if (lsa & LINUX_SS_ONSTACK)
8767051Sgallatin		bsa |= SS_ONSTACK;
8867051Sgallatin	return (bsa);
8967051Sgallatin}
9067051Sgallatin
9167051Sgallatinint
9267051Sgallatinbsd_to_linux_sigaltstack(int bsa)
9367051Sgallatin{
9467051Sgallatin	int lsa = 0;
9567051Sgallatin
9667051Sgallatin	if (bsa & SS_DISABLE)
9767051Sgallatin		lsa |= LINUX_SS_DISABLE;
9867051Sgallatin	if (bsa & SS_ONSTACK)
9967051Sgallatin		lsa |= LINUX_SS_ONSTACK;
10067051Sgallatin	return (lsa);
10167051Sgallatin}
10267051Sgallatin
10367051Sgallatinint
10483366Sjulianlinux_execve(struct thread *td, struct linux_execve_args *args)
10564921Smarcel{
10664921Smarcel	struct execve_args bsd;
10764921Smarcel	caddr_t sg;
10864921Smarcel
10964921Smarcel	sg = stackgap_init();
11083366Sjulian	CHECKALTEXIST(td, &sg, args->path);
11164921Smarcel
11264921Smarcel#ifdef DEBUG
11372543Sjlemon	if (ldebug(execve))
11472543Sjlemon		printf(ARGS(execve, "%s"), args->path);
11564921Smarcel#endif
11664921Smarcel
11764921Smarcel	bsd.fname = args->path;
11864921Smarcel	bsd.argv = args->argp;
11964921Smarcel	bsd.envv = args->envp;
12083366Sjulian	return (execve(td, &bsd));
12164921Smarcel}
12264921Smarcel
12383221Smarcelstruct l_ipc_kludge {
12483221Smarcel	struct l_msgbuf *msgp;
12583221Smarcel	l_long msgtyp;
12683221Smarcel};
12783221Smarcel
12864921Smarcelint
12983366Sjulianlinux_ipc(struct thread *td, struct linux_ipc_args *args)
13064921Smarcel{
13183221Smarcel
13283221Smarcel	switch (args->what & 0xFFFF) {
13383221Smarcel	case LINUX_SEMOP: {
13483221Smarcel		struct linux_semop_args a;
13583221Smarcel
13683221Smarcel		a.semid = args->arg1;
13783221Smarcel		a.tsops = args->ptr;
13883221Smarcel		a.nsops = args->arg2;
13983366Sjulian		return (linux_semop(td, &a));
14064921Smarcel	}
14183221Smarcel	case LINUX_SEMGET: {
14283221Smarcel		struct linux_semget_args a;
14364921Smarcel
14483221Smarcel		a.key = args->arg1;
14583221Smarcel		a.nsems = args->arg2;
14683221Smarcel		a.semflg = args->arg3;
14783366Sjulian		return (linux_semget(td, &a));
14883221Smarcel	}
14983221Smarcel	case LINUX_SEMCTL: {
15083221Smarcel		struct linux_semctl_args a;
15183221Smarcel		int error;
15283221Smarcel
15383221Smarcel		a.semid = args->arg1;
15483221Smarcel		a.semnum = args->arg2;
15583221Smarcel		a.cmd = args->arg3;
156111797Sdes		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
15783221Smarcel		if (error)
15883221Smarcel			return (error);
15983366Sjulian		return (linux_semctl(td, &a));
16083221Smarcel	}
16183221Smarcel	case LINUX_MSGSND: {
16283221Smarcel		struct linux_msgsnd_args a;
16383221Smarcel
16483221Smarcel		a.msqid = args->arg1;
16583221Smarcel		a.msgp = args->ptr;
16683221Smarcel		a.msgsz = args->arg2;
16783221Smarcel		a.msgflg = args->arg3;
16883366Sjulian		return (linux_msgsnd(td, &a));
16983221Smarcel	}
17083221Smarcel	case LINUX_MSGRCV: {
17183221Smarcel		struct linux_msgrcv_args a;
17283221Smarcel
17383221Smarcel		a.msqid = args->arg1;
17483221Smarcel		a.msgsz = args->arg2;
17583221Smarcel		a.msgflg = args->arg3;
17683221Smarcel		if ((args->what >> 16) == 0) {
17783221Smarcel			struct l_ipc_kludge tmp;
17883221Smarcel			int error;
17983221Smarcel
18083221Smarcel			if (args->ptr == NULL)
18183221Smarcel				return (EINVAL);
182111797Sdes			error = copyin(args->ptr, &tmp, sizeof(tmp));
18383221Smarcel			if (error)
18483221Smarcel				return (error);
18583221Smarcel			a.msgp = tmp.msgp;
18683221Smarcel			a.msgtyp = tmp.msgtyp;
18783221Smarcel		} else {
18883221Smarcel			a.msgp = args->ptr;
18983221Smarcel			a.msgtyp = args->arg5;
19083221Smarcel		}
19183366Sjulian		return (linux_msgrcv(td, &a));
19283221Smarcel	}
19383221Smarcel	case LINUX_MSGGET: {
19483221Smarcel		struct linux_msgget_args a;
19583221Smarcel
19683221Smarcel		a.key = args->arg1;
19783221Smarcel		a.msgflg = args->arg2;
19883366Sjulian		return (linux_msgget(td, &a));
19983221Smarcel	}
20083221Smarcel	case LINUX_MSGCTL: {
20183221Smarcel		struct linux_msgctl_args a;
20283221Smarcel
20383221Smarcel		a.msqid = args->arg1;
20483221Smarcel		a.cmd = args->arg2;
20583221Smarcel		a.buf = args->ptr;
20683366Sjulian		return (linux_msgctl(td, &a));
20783221Smarcel	}
20883221Smarcel	case LINUX_SHMAT: {
20983221Smarcel		struct linux_shmat_args a;
21083221Smarcel
21183221Smarcel		a.shmid = args->arg1;
21283221Smarcel		a.shmaddr = args->ptr;
21383221Smarcel		a.shmflg = args->arg2;
21483221Smarcel		a.raddr = (l_ulong *)args->arg3;
21583366Sjulian		return (linux_shmat(td, &a));
21683221Smarcel	}
21783221Smarcel	case LINUX_SHMDT: {
21883221Smarcel		struct linux_shmdt_args a;
21983221Smarcel
22083221Smarcel		a.shmaddr = args->ptr;
22183366Sjulian		return (linux_shmdt(td, &a));
22283221Smarcel	}
22383221Smarcel	case LINUX_SHMGET: {
22483221Smarcel		struct linux_shmget_args a;
22583221Smarcel
22683221Smarcel		a.key = args->arg1;
22783221Smarcel		a.size = args->arg2;
22883221Smarcel		a.shmflg = args->arg3;
22983366Sjulian		return (linux_shmget(td, &a));
23083221Smarcel	}
23183221Smarcel	case LINUX_SHMCTL: {
23283221Smarcel		struct linux_shmctl_args a;
23383221Smarcel
23483221Smarcel		a.shmid = args->arg1;
23583221Smarcel		a.cmd = args->arg2;
23683221Smarcel		a.buf = args->ptr;
23783366Sjulian		return (linux_shmctl(td, &a));
23883221Smarcel	}
23983221Smarcel	default:
24083221Smarcel		break;
24183221Smarcel	}
24283221Smarcel
24383221Smarcel	return (EINVAL);
24464921Smarcel}
24564921Smarcel
24664921Smarcelint
24783366Sjulianlinux_old_select(struct thread *td, struct linux_old_select_args *args)
24864921Smarcel{
24983221Smarcel	struct l_old_select_argv linux_args;
25083221Smarcel	struct linux_select_args newsel;
25164921Smarcel	int error;
25264921Smarcel
25383221Smarcel#ifdef DEBUG
25483221Smarcel	if (ldebug(old_select))
25591437Speter		printf(ARGS(old_select, "%p"), args->ptr);
25664921Smarcel#endif
25764921Smarcel
258111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
25964921Smarcel	if (error)
26064921Smarcel		return (error);
26164921Smarcel
26264921Smarcel	newsel.nfds = linux_args.nfds;
26364921Smarcel	newsel.readfds = linux_args.readfds;
26464921Smarcel	newsel.writefds = linux_args.writefds;
26564921Smarcel	newsel.exceptfds = linux_args.exceptfds;
26664921Smarcel	newsel.timeout = linux_args.timeout;
26783366Sjulian	return (linux_select(td, &newsel));
26864921Smarcel}
26964921Smarcel
27064921Smarcelint
27183366Sjulianlinux_fork(struct thread *td, struct linux_fork_args *args)
27264921Smarcel{
27364921Smarcel	int error;
27464921Smarcel
27564921Smarcel#ifdef DEBUG
27672543Sjlemon	if (ldebug(fork))
27772543Sjlemon		printf(ARGS(fork, ""));
27864921Smarcel#endif
27964921Smarcel
28083366Sjulian	if ((error = fork(td, (struct fork_args *)args)) != 0)
28164921Smarcel		return (error);
28264921Smarcel
28383366Sjulian	if (td->td_retval[1] == 1)
28483366Sjulian		td->td_retval[0] = 0;
28564921Smarcel	return (0);
28664921Smarcel}
28764921Smarcel
28864921Smarcelint
28983366Sjulianlinux_vfork(struct thread *td, struct linux_vfork_args *args)
29064921Smarcel{
29164921Smarcel	int error;
29264921Smarcel
29364921Smarcel#ifdef DEBUG
29472543Sjlemon	if (ldebug(vfork))
29572543Sjlemon		printf(ARGS(vfork, ""));
29664921Smarcel#endif
29764921Smarcel
29883366Sjulian	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
29964921Smarcel		return (error);
30064921Smarcel	/* Are we the child? */
30183366Sjulian	if (td->td_retval[1] == 1)
30283366Sjulian		td->td_retval[0] = 0;
30364921Smarcel	return (0);
30464921Smarcel}
30564921Smarcel
30664921Smarcel#define CLONE_VM	0x100
30764921Smarcel#define CLONE_FS	0x200
30864921Smarcel#define CLONE_FILES	0x400
30964921Smarcel#define CLONE_SIGHAND	0x800
31064921Smarcel#define CLONE_PID	0x1000
31164921Smarcel
31264921Smarcelint
31383366Sjulianlinux_clone(struct thread *td, struct linux_clone_args *args)
31464921Smarcel{
31573856Sjhb	int error, ff = RFPROC | RFSTOPPED;
31664921Smarcel	struct proc *p2;
317113689Sjhb	struct thread *td2;
31864921Smarcel	int exit_signal;
31964921Smarcel
32064921Smarcel#ifdef DEBUG
32172543Sjlemon	if (ldebug(clone)) {
322111798Sdes		printf(ARGS(clone, "flags %x, stack %x"),
32372543Sjlemon		    (unsigned int)args->flags, (unsigned int)args->stack);
32472543Sjlemon		if (args->flags & CLONE_PID)
32572543Sjlemon			printf(LMSG("CLONE_PID not yet supported"));
32672543Sjlemon	}
32764921Smarcel#endif
32864921Smarcel
32964921Smarcel	if (!args->stack)
33064921Smarcel		return (EINVAL);
33164921Smarcel
33264921Smarcel	exit_signal = args->flags & 0x000000ff;
33364921Smarcel	if (exit_signal >= LINUX_NSIG)
33464921Smarcel		return (EINVAL);
33564921Smarcel
33664921Smarcel	if (exit_signal <= LINUX_SIGTBLSZ)
33764921Smarcel		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
33864921Smarcel
33964921Smarcel	if (args->flags & CLONE_VM)
34064921Smarcel		ff |= RFMEM;
34164921Smarcel	if (args->flags & CLONE_SIGHAND)
34264921Smarcel		ff |= RFSIGSHARE;
34364921Smarcel	if (!(args->flags & CLONE_FILES))
34464921Smarcel		ff |= RFFDG;
34564921Smarcel
346104354Sscottl	error = fork1(td, ff, 0, &p2);
347113689Sjhb	if (error)
348113689Sjhb		return (error);
349113689Sjhb
35064921Smarcel
351113689Sjhb	PROC_LOCK(p2);
352113689Sjhb	p2->p_sigparent = exit_signal;
353113689Sjhb	PROC_UNLOCK(p2);
354113689Sjhb	td2 = FIRST_THREAD_IN_PROC(p2);
355113689Sjhb	td2->td_frame->tf_esp = (unsigned int)args->stack;
35664921Smarcel
35764921Smarcel#ifdef DEBUG
358113689Sjhb	if (ldebug(clone))
359113689Sjhb		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
360113689Sjhb		    (long)p2->p_pid, args->stack, exit_signal);
36164921Smarcel#endif
36264921Smarcel
363113689Sjhb	/*
364113689Sjhb	 * Make this runnable after we are finished with it.
365113689Sjhb	 */
366113689Sjhb	mtx_lock_spin(&sched_lock);
367113689Sjhb	TD_SET_CAN_RUN(td2);
368113689Sjhb	setrunqueue(td2);
369113689Sjhb	mtx_unlock_spin(&sched_lock);
37073856Sjhb
371113689Sjhb	td->td_retval[0] = p2->p_pid;
372113689Sjhb	td->td_retval[1] = 0;
373113689Sjhb	return (0);
37464921Smarcel}
37564921Smarcel
37664921Smarcel/* XXX move */
37783221Smarcelstruct l_mmap_argv {
37883221Smarcel	l_caddr_t	addr;
37983221Smarcel	l_int		len;
38083221Smarcel	l_int		prot;
38183221Smarcel	l_int		flags;
38283221Smarcel	l_int		fd;
38383221Smarcel	l_int		pos;
38464921Smarcel};
38564921Smarcel
38664921Smarcel#define STACK_SIZE  (2 * 1024 * 1024)
38764921Smarcel#define GUARD_SIZE  (4 * PAGE_SIZE)
38864921Smarcel
389104893Ssobomaxstatic int linux_mmap_common(struct thread *, struct l_mmap_argv *);
390104893Ssobomax
39164921Smarcelint
392104893Ssobomaxlinux_mmap2(struct thread *td, struct linux_mmap2_args *args)
393104893Ssobomax{
394104893Ssobomax	struct l_mmap_argv linux_args;
395104893Ssobomax
396104893Ssobomax#ifdef DEBUG
397104893Ssobomax	if (ldebug(mmap2))
398111798Sdes		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
399111798Sdes		    (void *)args->addr, args->len, args->prot,
400111798Sdes		    args->flags, args->fd, args->pgoff);
401104893Ssobomax#endif
402104893Ssobomax
403104893Ssobomax	linux_args.addr = (l_caddr_t)args->addr;
404104893Ssobomax	linux_args.len = args->len;
405104893Ssobomax	linux_args.prot = args->prot;
406104893Ssobomax	linux_args.flags = args->flags;
407104893Ssobomax	linux_args.fd = args->fd;
408104893Ssobomax	linux_args.pos = args->pgoff * PAGE_SIZE;
409104893Ssobomax
410104893Ssobomax	return (linux_mmap_common(td, &linux_args));
411104893Ssobomax}
412104893Ssobomax
413104893Ssobomaxint
41483366Sjulianlinux_mmap(struct thread *td, struct linux_mmap_args *args)
41564921Smarcel{
41664921Smarcel	int error;
41783221Smarcel	struct l_mmap_argv linux_args;
41864921Smarcel
419111797Sdes	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
42064921Smarcel	if (error)
42164921Smarcel		return (error);
42264921Smarcel
42364921Smarcel#ifdef DEBUG
42472543Sjlemon	if (ldebug(mmap))
42572543Sjlemon		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
426104984Sbde		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
427104984Sbde		    linux_args.flags, linux_args.fd, linux_args.pos);
42864921Smarcel#endif
42964921Smarcel
430104893Ssobomax	return (linux_mmap_common(td, &linux_args));
431104893Ssobomax}
432104893Ssobomax
433104893Ssobomaxstatic int
434104893Ssobomaxlinux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
435104893Ssobomax{
436104893Ssobomax	struct proc *p = td->td_proc;
437104893Ssobomax	struct mmap_args /* {
438104893Ssobomax		caddr_t addr;
439104893Ssobomax		size_t len;
440104893Ssobomax		int prot;
441104893Ssobomax		int flags;
442104893Ssobomax		int fd;
443104893Ssobomax		long pad;
444104893Ssobomax		off_t pos;
445104893Ssobomax	} */ bsd_args;
446112630Smdodd	int error;
447104893Ssobomax
448112630Smdodd	error = 0;
44964921Smarcel	bsd_args.flags = 0;
450104893Ssobomax	if (linux_args->flags & LINUX_MAP_SHARED)
45164921Smarcel		bsd_args.flags |= MAP_SHARED;
452104893Ssobomax	if (linux_args->flags & LINUX_MAP_PRIVATE)
45364921Smarcel		bsd_args.flags |= MAP_PRIVATE;
454104893Ssobomax	if (linux_args->flags & LINUX_MAP_FIXED)
45564921Smarcel		bsd_args.flags |= MAP_FIXED;
456104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
45764921Smarcel		bsd_args.flags |= MAP_ANON;
45873213Sdillon	else
45973213Sdillon		bsd_args.flags |= MAP_NOSYNC;
460104893Ssobomax	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
46164921Smarcel		bsd_args.flags |= MAP_STACK;
46264921Smarcel
46364921Smarcel		/* The linux MAP_GROWSDOWN option does not limit auto
46464921Smarcel		 * growth of the region.  Linux mmap with this option
46564921Smarcel		 * takes as addr the inital BOS, and as len, the initial
46664921Smarcel		 * region size.  It can then grow down from addr without
46764921Smarcel		 * limit.  However, linux threads has an implicit internal
46864921Smarcel		 * limit to stack size of STACK_SIZE.  Its just not
46964921Smarcel		 * enforced explicitly in linux.  But, here we impose
47064921Smarcel		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
47164921Smarcel		 * region, since we can do this with our mmap.
47264921Smarcel		 *
47364921Smarcel		 * Our mmap with MAP_STACK takes addr as the maximum
47464921Smarcel		 * downsize limit on BOS, and as len the max size of
47564921Smarcel		 * the region.  It them maps the top SGROWSIZ bytes,
47664921Smarcel		 * and autgrows the region down, up to the limit
47764921Smarcel		 * in addr.
47864921Smarcel		 *
47964921Smarcel		 * If we don't use the MAP_STACK option, the effect
48064921Smarcel		 * of this code is to allocate a stack region of a
48164921Smarcel		 * fixed size of (STACK_SIZE - GUARD_SIZE).
48264921Smarcel		 */
48364921Smarcel
48464921Smarcel		/* This gives us TOS */
485104893Ssobomax		bsd_args.addr = linux_args->addr + linux_args->len;
48664921Smarcel
48767238Sgallatin		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
48867238Sgallatin			/* Some linux apps will attempt to mmap
48967238Sgallatin			 * thread stacks near the top of their
49067238Sgallatin			 * address space.  If their TOS is greater
49167238Sgallatin			 * than vm_maxsaddr, vm_map_growstack()
49267238Sgallatin			 * will confuse the thread stack with the
49367238Sgallatin			 * process stack and deliver a SEGV if they
49467238Sgallatin			 * attempt to grow the thread stack past their
49567238Sgallatin			 * current stacksize rlimit.  To avoid this,
49667238Sgallatin			 * adjust vm_maxsaddr upwards to reflect
49767238Sgallatin			 * the current stacksize rlimit rather
49867238Sgallatin			 * than the maximum possible stacksize.
49967238Sgallatin			 * It would be better to adjust the
50067238Sgallatin			 * mmap'ed region, but some apps do not check
50167238Sgallatin			 * mmap's return value.
50267238Sgallatin			 */
503125454Sjhb			PROC_LOCK(p);
50467238Sgallatin			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
505125454Sjhb			    lim_cur(p, RLIMIT_STACK);
506125454Sjhb			PROC_UNLOCK(p);
50767238Sgallatin		}
50867238Sgallatin
50964921Smarcel		/* This gives us our maximum stack size */
510104893Ssobomax		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
511104893Ssobomax			bsd_args.len = linux_args->len;
51264921Smarcel		else
51364921Smarcel			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
51464921Smarcel
51564921Smarcel		/* This gives us a new BOS.  If we're using VM_STACK, then
51664921Smarcel		 * mmap will just map the top SGROWSIZ bytes, and let
51764921Smarcel		 * the stack grow down to the limit at BOS.  If we're
51864921Smarcel		 * not using VM_STACK we map the full stack, since we
51964921Smarcel		 * don't have a way to autogrow it.
52064921Smarcel		 */
52164921Smarcel		bsd_args.addr -= bsd_args.len;
52264921Smarcel	} else {
523104893Ssobomax		bsd_args.addr = linux_args->addr;
524104893Ssobomax		bsd_args.len  = linux_args->len;
52564921Smarcel	}
52664921Smarcel
527104893Ssobomax	bsd_args.prot = linux_args->prot | PROT_READ;	/* always required */
528104893Ssobomax	if (linux_args->flags & LINUX_MAP_ANON)
52964921Smarcel		bsd_args.fd = -1;
53064921Smarcel	else
531104893Ssobomax		bsd_args.fd = linux_args->fd;
532104893Ssobomax	bsd_args.pos = linux_args->pos;
53364921Smarcel	bsd_args.pad = 0;
53464921Smarcel
53564921Smarcel#ifdef DEBUG
53672543Sjlemon	if (ldebug(mmap))
537112630Smdodd		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
538112630Smdodd		    __func__,
53972543Sjlemon		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
54072543Sjlemon		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
54164921Smarcel#endif
542112630Smdodd	error = mmap(td, &bsd_args);
543112630Smdodd#ifdef DEBUG
544112630Smdodd	if (ldebug(mmap))
545112630Smdodd		printf("-> %s() return: 0x%x (0x%08x)\n",
546112630Smdodd			__func__, error, (u_int)td->td_retval[0]);
547112630Smdodd#endif
548112630Smdodd	return (error);
54964921Smarcel}
55064921Smarcel
55164921Smarcelint
55283366Sjulianlinux_pipe(struct thread *td, struct linux_pipe_args *args)
55364921Smarcel{
55464921Smarcel	int error;
55564921Smarcel	int reg_edx;
55664921Smarcel
55764921Smarcel#ifdef DEBUG
55872543Sjlemon	if (ldebug(pipe))
55972543Sjlemon		printf(ARGS(pipe, "*"));
56064921Smarcel#endif
56164921Smarcel
56283366Sjulian	reg_edx = td->td_retval[1];
56383366Sjulian	error = pipe(td, 0);
56464921Smarcel	if (error) {
56583366Sjulian		td->td_retval[1] = reg_edx;
56664921Smarcel		return (error);
56764921Smarcel	}
56864921Smarcel
56983366Sjulian	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
57064921Smarcel	if (error) {
57183366Sjulian		td->td_retval[1] = reg_edx;
57264921Smarcel		return (error);
57364921Smarcel	}
57464921Smarcel
57583366Sjulian	td->td_retval[1] = reg_edx;
57683366Sjulian	td->td_retval[0] = 0;
57764921Smarcel	return (0);
57864921Smarcel}
57964921Smarcel
58064921Smarcelint
58183366Sjulianlinux_ioperm(struct thread *td, struct linux_ioperm_args *args)
58264921Smarcel{
58364921Smarcel	struct sysarch_args sa;
58464921Smarcel	struct i386_ioperm_args *iia;
58564921Smarcel	caddr_t sg;
58664921Smarcel
58764921Smarcel	sg = stackgap_init();
58864921Smarcel	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
58964921Smarcel	iia->start = args->start;
59064921Smarcel	iia->length = args->length;
59164921Smarcel	iia->enable = args->enable;
59264921Smarcel	sa.op = I386_SET_IOPERM;
59364921Smarcel	sa.parms = (char *)iia;
59483366Sjulian	return (sysarch(td, &sa));
59564921Smarcel}
59664921Smarcel
59764921Smarcelint
59883366Sjulianlinux_iopl(struct thread *td, struct linux_iopl_args *args)
59964921Smarcel{
60064921Smarcel	int error;
60164921Smarcel
60264921Smarcel	if (args->level < 0 || args->level > 3)
60364921Smarcel		return (EINVAL);
60493593Sjhb	if ((error = suser(td)) != 0)
60564921Smarcel		return (error);
60691406Sjhb	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
60783981Srwatson		return (error);
60883366Sjulian	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
60964921Smarcel	    (args->level * (PSL_IOPL / 3));
61064921Smarcel	return (0);
61164921Smarcel}
61264921Smarcel
61364921Smarcelint
614105441Smarkmlinux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
61564921Smarcel{
61664921Smarcel	int error;
61764921Smarcel	caddr_t sg;
61864921Smarcel	struct sysarch_args args;
61964921Smarcel	struct i386_ldt_args *ldt;
62083221Smarcel	struct l_descriptor ld;
62164921Smarcel	union descriptor *desc;
62264921Smarcel
62364921Smarcel	sg = stackgap_init();
62464921Smarcel
62564921Smarcel	if (uap->ptr == NULL)
62664921Smarcel		return (EINVAL);
62764921Smarcel
62864921Smarcel	switch (uap->func) {
62964921Smarcel	case 0x00: /* read_ldt */
63064921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
63164921Smarcel		ldt->start = 0;
63264921Smarcel		ldt->descs = uap->ptr;
63364921Smarcel		ldt->num = uap->bytecount / sizeof(union descriptor);
63464921Smarcel		args.op = I386_GET_LDT;
63564921Smarcel		args.parms = (char*)ldt;
63683366Sjulian		error = sysarch(td, &args);
63783366Sjulian		td->td_retval[0] *= sizeof(union descriptor);
63864921Smarcel		break;
63964921Smarcel	case 0x01: /* write_ldt */
64064921Smarcel	case 0x11: /* write_ldt */
64164921Smarcel		if (uap->bytecount != sizeof(ld))
64264921Smarcel			return (EINVAL);
64364921Smarcel
64464921Smarcel		error = copyin(uap->ptr, &ld, sizeof(ld));
64564921Smarcel		if (error)
64664921Smarcel			return (error);
64764921Smarcel
64864921Smarcel		ldt = stackgap_alloc(&sg, sizeof(*ldt));
64964921Smarcel		desc = stackgap_alloc(&sg, sizeof(*desc));
65064921Smarcel		ldt->start = ld.entry_number;
65164921Smarcel		ldt->descs = desc;
65264921Smarcel		ldt->num = 1;
65364921Smarcel		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
65464921Smarcel		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
65564921Smarcel		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
65664921Smarcel		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
65764921Smarcel		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
65864921Smarcel			(ld.contents << 2);
65964921Smarcel		desc->sd.sd_dpl = 3;
66064921Smarcel		desc->sd.sd_p = (ld.seg_not_present ^ 1);
66164921Smarcel		desc->sd.sd_xx = 0;
66264921Smarcel		desc->sd.sd_def32 = ld.seg_32bit;
66364921Smarcel		desc->sd.sd_gran = ld.limit_in_pages;
66464921Smarcel		args.op = I386_SET_LDT;
66564921Smarcel		args.parms = (char*)ldt;
66683366Sjulian		error = sysarch(td, &args);
66764921Smarcel		break;
66864921Smarcel	default:
66964921Smarcel		error = EINVAL;
67064921Smarcel		break;
67164921Smarcel	}
67264921Smarcel
67364921Smarcel	if (error == EOPNOTSUPP) {
67464921Smarcel		printf("linux: modify_ldt needs kernel option USER_LDT\n");
67564921Smarcel		error = ENOSYS;
67664921Smarcel	}
67764921Smarcel
67864921Smarcel	return (error);
67964921Smarcel}
68064921Smarcel
68164921Smarcelint
68283366Sjulianlinux_sigaction(struct thread *td, struct linux_sigaction_args *args)
68364921Smarcel{
68483221Smarcel	l_osigaction_t osa;
68583221Smarcel	l_sigaction_t act, oact;
68664921Smarcel	int error;
68764921Smarcel
68864921Smarcel#ifdef DEBUG
68972543Sjlemon	if (ldebug(sigaction))
69072543Sjlemon		printf(ARGS(sigaction, "%d, %p, %p"),
69172543Sjlemon		    args->sig, (void *)args->nsa, (void *)args->osa);
69264921Smarcel#endif
69364921Smarcel
69464921Smarcel	if (args->nsa != NULL) {
695111797Sdes		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
69664921Smarcel		if (error)
69764921Smarcel			return (error);
69864921Smarcel		act.lsa_handler = osa.lsa_handler;
69964921Smarcel		act.lsa_flags = osa.lsa_flags;
70064921Smarcel		act.lsa_restorer = osa.lsa_restorer;
70164921Smarcel		LINUX_SIGEMPTYSET(act.lsa_mask);
70264921Smarcel		act.lsa_mask.__bits[0] = osa.lsa_mask;
70364921Smarcel	}
70464921Smarcel
70583366Sjulian	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
70664921Smarcel	    args->osa ? &oact : NULL);
70764921Smarcel
70864921Smarcel	if (args->osa != NULL && !error) {
70964921Smarcel		osa.lsa_handler = oact.lsa_handler;
71064921Smarcel		osa.lsa_flags = oact.lsa_flags;
71164921Smarcel		osa.lsa_restorer = oact.lsa_restorer;
71264921Smarcel		osa.lsa_mask = oact.lsa_mask.__bits[0];
713111797Sdes		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
71464921Smarcel	}
71564921Smarcel
71664921Smarcel	return (error);
71764921Smarcel}
71864921Smarcel
71964921Smarcel/*
72064921Smarcel * Linux has two extra args, restart and oldmask.  We dont use these,
72164921Smarcel * but it seems that "restart" is actually a context pointer that
72264921Smarcel * enables the signal to happen with a different register set.
72364921Smarcel */
72464921Smarcelint
72583366Sjulianlinux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
72664921Smarcel{
727102814Siedowse	sigset_t sigmask;
72883221Smarcel	l_sigset_t mask;
72964921Smarcel
73064921Smarcel#ifdef DEBUG
73172543Sjlemon	if (ldebug(sigsuspend))
73272543Sjlemon		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
73364921Smarcel#endif
73464921Smarcel
73564921Smarcel	LINUX_SIGEMPTYSET(mask);
73664921Smarcel	mask.__bits[0] = args->mask;
737102814Siedowse	linux_to_bsd_sigset(&mask, &sigmask);
738102814Siedowse	return (kern_sigsuspend(td, sigmask));
73964921Smarcel}
74064921Smarcel
74164921Smarcelint
742105441Smarkmlinux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
74364921Smarcel{
74483221Smarcel	l_sigset_t lmask;
745102814Siedowse	sigset_t sigmask;
74664921Smarcel	int error;
74764921Smarcel
74864921Smarcel#ifdef DEBUG
74972543Sjlemon	if (ldebug(rt_sigsuspend))
75072543Sjlemon		printf(ARGS(rt_sigsuspend, "%p, %d"),
75172543Sjlemon		    (void *)uap->newset, uap->sigsetsize);
75264921Smarcel#endif
75364921Smarcel
75483221Smarcel	if (uap->sigsetsize != sizeof(l_sigset_t))
75564921Smarcel		return (EINVAL);
75664921Smarcel
75783221Smarcel	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
75864921Smarcel	if (error)
75964921Smarcel		return (error);
76064921Smarcel
761102814Siedowse	linux_to_bsd_sigset(&lmask, &sigmask);
762102814Siedowse	return (kern_sigsuspend(td, sigmask));
76364921Smarcel}
76464921Smarcel
76564921Smarcelint
76683366Sjulianlinux_pause(struct thread *td, struct linux_pause_args *args)
76764921Smarcel{
76883366Sjulian	struct proc *p = td->td_proc;
769102814Siedowse	sigset_t sigmask;
77064921Smarcel
77164921Smarcel#ifdef DEBUG
77272543Sjlemon	if (ldebug(pause))
77372543Sjlemon		printf(ARGS(pause, ""));
77464921Smarcel#endif
77564921Smarcel
77671494Sjhb	PROC_LOCK(p);
777112888Sjeff	sigmask = td->td_sigmask;
77871494Sjhb	PROC_UNLOCK(p);
779102814Siedowse	return (kern_sigsuspend(td, sigmask));
78064921Smarcel}
78164921Smarcel
78264921Smarcelint
78383366Sjulianlinux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
78464921Smarcel{
785102814Siedowse	stack_t ss, oss;
78683221Smarcel	l_stack_t lss;
78764921Smarcel	int error;
78864921Smarcel
78964921Smarcel#ifdef DEBUG
79072543Sjlemon	if (ldebug(sigaltstack))
79172543Sjlemon		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
79264921Smarcel#endif
79364921Smarcel
794102814Siedowse	if (uap->uss != NULL) {
79583221Smarcel		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
79667051Sgallatin		if (error)
79767051Sgallatin			return (error);
79864921Smarcel
799102814Siedowse		ss.ss_sp = lss.ss_sp;
800102814Siedowse		ss.ss_size = lss.ss_size;
801102814Siedowse		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
80267051Sgallatin	}
803102814Siedowse	error = kern_sigaltstack(td, (uap->uoss != NULL) ? &oss : NULL,
804102814Siedowse	    (uap->uss != NULL) ? &ss : NULL);
805102814Siedowse	if (!error && uap->uoss != NULL) {
806102814Siedowse		lss.ss_sp = oss.ss_sp;
807102814Siedowse		lss.ss_size = oss.ss_size;
808102814Siedowse		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
80983221Smarcel		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
81064921Smarcel	}
81164921Smarcel
81264921Smarcel	return (error);
81364921Smarcel}
814104893Ssobomax
815104893Ssobomaxint
816104893Ssobomaxlinux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
817104893Ssobomax{
818104893Ssobomax	struct ftruncate_args sa;
819104893Ssobomax
820104893Ssobomax#ifdef DEBUG
821104893Ssobomax	if (ldebug(ftruncate64))
822104984Sbde		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
823104984Sbde		    (intmax_t)args->length);
824104893Ssobomax#endif
825104893Ssobomax
826104893Ssobomax	sa.fd = args->fd;
827104893Ssobomax	sa.pad = 0;
828104893Ssobomax	sa.length = args->length;
829104893Ssobomax	return ftruncate(td, &sa);
830104893Ssobomax}
831