linux_machdep.c revision 83221
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 2000 Marcel Moolenaar
31590Srgrimes * All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer
101590Srgrimes *    in this position and unchanged.
111590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer in the
131590Srgrimes *    documentation and/or other materials provided with the distribution.
141590Srgrimes * 3. The name of the author may not be used to endorse or promote products
151590Srgrimes *    derived from this software without specific prior written permission.
161590Srgrimes *
171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
181590Srgrimes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
191590Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
201590Srgrimes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
211590Srgrimes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
221590Srgrimes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231590Srgrimes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241590Srgrimes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251590Srgrimes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
261590Srgrimes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271590Srgrimes *
281590Srgrimes * $FreeBSD: head/sys/i386/linux/linux_machdep.c 83221 2001-09-08 19:07:04Z marcel $
291590Srgrimes */
301590Srgrimes
311590Srgrimes#include <sys/param.h>
321590Srgrimes#include <sys/systm.h>
331590Srgrimes#include <sys/mman.h>
341590Srgrimes#include <sys/mutex.h>
351590Srgrimes#include <sys/proc.h>
361590Srgrimes#include <sys/resource.h>
371590Srgrimes#include <sys/resourcevar.h>
381590Srgrimes#include <sys/sysproto.h>
391590Srgrimes#include <sys/unistd.h>
401590Srgrimes
411590Srgrimes#include <machine/frame.h>
421590Srgrimes#include <machine/psl.h>
431590Srgrimes#include <machine/segments.h>
441590Srgrimes#include <machine/sysarch.h>
451590Srgrimes
461590Srgrimes#include <vm/vm.h>
471590Srgrimes#include <sys/lock.h>
481590Srgrimes#include <vm/pmap.h>
491590Srgrimes#include <vm/vm_map.h>
501590Srgrimes
511590Srgrimes#include <i386/linux/linux.h>
521590Srgrimes#include <i386/linux/linux_proto.h>
531590Srgrimes#include <compat/linux/linux_ipc.h>
541590Srgrimes#include <compat/linux/linux_signal.h>
551590Srgrimes#include <compat/linux/linux_util.h>
561590Srgrimes
571590Srgrimesstruct l_descriptor {
581590Srgrimes	l_uint		entry_number;
591590Srgrimes	l_ulong		base_addr;
601590Srgrimes	l_uint		limit;
611590Srgrimes	l_uint		seg_32bit:1;
621590Srgrimes	l_uint		contents:2;
631590Srgrimes	l_uint		read_exec_only:1;
641590Srgrimes	l_uint		limit_in_pages:1;
651590Srgrimes	l_uint		seg_not_present:1;
661590Srgrimes	l_uint		useable:1;
671590Srgrimes};
681590Srgrimes
691590Srgrimesstruct l_old_select_argv {
701590Srgrimes	l_int		nfds;
711590Srgrimes	l_fd_set	*readfds;
721590Srgrimes	l_fd_set	*writefds;
731590Srgrimes	l_fd_set	*exceptfds;
741590Srgrimes	struct l_timeval	*timeout;
751590Srgrimes};
761590Srgrimes
771590Srgrimesint
781590Srgrimeslinux_to_bsd_sigaltstack(int lsa)
791590Srgrimes{
801590Srgrimes	int bsa = 0;
811590Srgrimes
821590Srgrimes	if (lsa & LINUX_SS_DISABLE)
831590Srgrimes		bsa |= SS_DISABLE;
841590Srgrimes	if (lsa & LINUX_SS_ONSTACK)
851590Srgrimes		bsa |= SS_ONSTACK;
861590Srgrimes	return (bsa);
871590Srgrimes}
881590Srgrimes
891590Srgrimesint
901590Srgrimesbsd_to_linux_sigaltstack(int bsa)
911590Srgrimes{
921590Srgrimes	int lsa = 0;
939336Sdfr
941590Srgrimes	if (bsa & SS_DISABLE)
951590Srgrimes		lsa |= LINUX_SS_DISABLE;
961590Srgrimes	if (bsa & SS_ONSTACK)
971590Srgrimes		lsa |= LINUX_SS_ONSTACK;
981590Srgrimes	return (lsa);
991590Srgrimes}
1001590Srgrimes
1011590Srgrimesint
1021590Srgrimeslinux_execve(struct proc *p, struct linux_execve_args *args)
1031590Srgrimes{
1041590Srgrimes	struct execve_args bsd;
1059336Sdfr	caddr_t sg;
1061590Srgrimes
1071590Srgrimes	sg = stackgap_init();
1081590Srgrimes	CHECKALTEXIST(p, &sg, args->path);
1091590Srgrimes
1109336Sdfr#ifdef DEBUG
1111590Srgrimes	if (ldebug(execve))
1121590Srgrimes		printf(ARGS(execve, "%s"), args->path);
1131590Srgrimes#endif
1141590Srgrimes
1151590Srgrimes	bsd.fname = args->path;
1161590Srgrimes	bsd.argv = args->argp;
1171590Srgrimes	bsd.envv = args->envp;
1181590Srgrimes	return (execve(p, &bsd));
1191590Srgrimes}
1201590Srgrimes
1211590Srgrimesstruct l_ipc_kludge {
1221590Srgrimes	struct l_msgbuf *msgp;
1231590Srgrimes	l_long msgtyp;
1241590Srgrimes};
1251590Srgrimes
1261590Srgrimesint
1271590Srgrimeslinux_ipc(struct proc *p, struct linux_ipc_args *args)
1281590Srgrimes{
1299336Sdfr
1309336Sdfr	switch (args->what & 0xFFFF) {
1319336Sdfr	case LINUX_SEMOP: {
1321590Srgrimes		struct linux_semop_args a;
1331590Srgrimes
1341590Srgrimes		a.semid = args->arg1;
1351590Srgrimes		a.tsops = args->ptr;
1361590Srgrimes		a.nsops = args->arg2;
1371590Srgrimes		return (linux_semop(p, &a));
1381590Srgrimes	}
1391590Srgrimes	case LINUX_SEMGET: {
1401590Srgrimes		struct linux_semget_args a;
1411590Srgrimes
1421590Srgrimes		a.key = args->arg1;
1431590Srgrimes		a.nsems = args->arg2;
1441590Srgrimes		a.semflg = args->arg3;
1451590Srgrimes		return (linux_semget(p, &a));
1461590Srgrimes	}
1471590Srgrimes	case LINUX_SEMCTL: {
1489336Sdfr		struct linux_semctl_args a;
1491590Srgrimes		int error;
1501590Srgrimes
1511590Srgrimes		a.semid = args->arg1;
1521590Srgrimes		a.semnum = args->arg2;
1531590Srgrimes		a.cmd = args->arg3;
1541590Srgrimes		error = copyin((caddr_t)args->ptr, &a.arg, sizeof(a.arg));
1551590Srgrimes		if (error)
1569336Sdfr			return (error);
1571590Srgrimes		return (linux_semctl(p, &a));
1581590Srgrimes	}
1591590Srgrimes	case LINUX_MSGSND: {
1601590Srgrimes		struct linux_msgsnd_args a;
1611590Srgrimes
1621590Srgrimes		a.msqid = args->arg1;
1631590Srgrimes		a.msgp = args->ptr;
1641590Srgrimes		a.msgsz = args->arg2;
1651590Srgrimes		a.msgflg = args->arg3;
1661590Srgrimes		return (linux_msgsnd(p, &a));
1671590Srgrimes	}
1681590Srgrimes	case LINUX_MSGRCV: {
1691590Srgrimes		struct linux_msgrcv_args a;
1701590Srgrimes
1711590Srgrimes		a.msqid = args->arg1;
1721590Srgrimes		a.msgsz = args->arg2;
1731590Srgrimes		a.msgflg = args->arg3;
1741590Srgrimes		if ((args->what >> 16) == 0) {
1751590Srgrimes			struct l_ipc_kludge tmp;
1761590Srgrimes			int error;
1771590Srgrimes
1781590Srgrimes			if (args->ptr == NULL)
1791590Srgrimes				return (EINVAL);
1801590Srgrimes			error = copyin((caddr_t)args->ptr, &tmp, sizeof(tmp));
1811590Srgrimes			if (error)
1821590Srgrimes				return (error);
1831590Srgrimes			a.msgp = tmp.msgp;
1841590Srgrimes			a.msgtyp = tmp.msgtyp;
1851590Srgrimes		} else {
1861590Srgrimes			a.msgp = args->ptr;
1871590Srgrimes			a.msgtyp = args->arg5;
1881590Srgrimes		}
1891590Srgrimes		return (linux_msgrcv(p, &a));
1901590Srgrimes	}
1911590Srgrimes	case LINUX_MSGGET: {
1921590Srgrimes		struct linux_msgget_args a;
1931590Srgrimes
1941590Srgrimes		a.key = args->arg1;
1951590Srgrimes		a.msgflg = args->arg2;
1961590Srgrimes		return (linux_msgget(p, &a));
1971590Srgrimes	}
1981590Srgrimes	case LINUX_MSGCTL: {
1991590Srgrimes		struct linux_msgctl_args a;
2001590Srgrimes
2011590Srgrimes		a.msqid = args->arg1;
2021590Srgrimes		a.cmd = args->arg2;
2031590Srgrimes		a.buf = args->ptr;
2041590Srgrimes		return (linux_msgctl(p, &a));
2051590Srgrimes	}
2061590Srgrimes	case LINUX_SHMAT: {
2071590Srgrimes		struct linux_shmat_args a;
2081590Srgrimes
2091590Srgrimes		a.shmid = args->arg1;
2101590Srgrimes		a.shmaddr = args->ptr;
2111590Srgrimes		a.shmflg = args->arg2;
2121590Srgrimes		a.raddr = (l_ulong *)args->arg3;
2131590Srgrimes		return (linux_shmat(p, &a));
2141590Srgrimes	}
2151590Srgrimes	case LINUX_SHMDT: {
2161590Srgrimes		struct linux_shmdt_args a;
2171590Srgrimes
2181590Srgrimes		a.shmaddr = args->ptr;
2191590Srgrimes		return (linux_shmdt(p, &a));
2201590Srgrimes	}
2211590Srgrimes	case LINUX_SHMGET: {
2221590Srgrimes		struct linux_shmget_args a;
2231590Srgrimes
2241590Srgrimes		a.key = args->arg1;
2251590Srgrimes		a.size = args->arg2;
2261590Srgrimes		a.shmflg = args->arg3;
2271590Srgrimes		return (linux_shmget(p, &a));
2281590Srgrimes	}
2291590Srgrimes	case LINUX_SHMCTL: {
2301590Srgrimes		struct linux_shmctl_args a;
2311590Srgrimes
2321590Srgrimes		a.shmid = args->arg1;
2331590Srgrimes		a.cmd = args->arg2;
2341590Srgrimes		a.buf = args->ptr;
2351590Srgrimes		return (linux_shmctl(p, &a));
2361590Srgrimes	}
2371590Srgrimes	default:
2381590Srgrimes		break;
2391590Srgrimes	}
2401590Srgrimes
2411590Srgrimes	return (EINVAL);
2421590Srgrimes}
2431590Srgrimes
2441590Srgrimesint
2451590Srgrimeslinux_old_select(struct proc *p, struct linux_old_select_args *args)
2461590Srgrimes{
2471590Srgrimes	struct l_old_select_argv linux_args;
2481590Srgrimes	struct linux_select_args newsel;
2491590Srgrimes	int error;
2501590Srgrimes
2511590Srgrimes#ifdef DEBUG
2521590Srgrimes	if (ldebug(old_select))
2531590Srgrimes		printf(ARGS(old_select, "%x"), args->ptr);
2541590Srgrimes#endif
2551590Srgrimes
2561590Srgrimes	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
2571590Srgrimes	if (error)
2581590Srgrimes		return (error);
2591590Srgrimes
2601590Srgrimes	newsel.nfds = linux_args.nfds;
2611590Srgrimes	newsel.readfds = linux_args.readfds;
2621590Srgrimes	newsel.writefds = linux_args.writefds;
2631590Srgrimes	newsel.exceptfds = linux_args.exceptfds;
2641590Srgrimes	newsel.timeout = linux_args.timeout;
2651590Srgrimes	return (linux_select(p, &newsel));
2661590Srgrimes}
2671590Srgrimes
2681590Srgrimesint
2691590Srgrimeslinux_fork(struct proc *p, struct linux_fork_args *args)
2701590Srgrimes{
2711590Srgrimes	int error;
2721590Srgrimes
2731590Srgrimes#ifdef DEBUG
2741590Srgrimes	if (ldebug(fork))
2751590Srgrimes		printf(ARGS(fork, ""));
2761590Srgrimes#endif
2771590Srgrimes
2781590Srgrimes	if ((error = fork(p, (struct fork_args *)args)) != 0)
2791590Srgrimes		return (error);
2801590Srgrimes
2811590Srgrimes	if (p->p_retval[1] == 1)
2821590Srgrimes		p->p_retval[0] = 0;
2831590Srgrimes	return (0);
2841590Srgrimes}
2851590Srgrimes
2861590Srgrimesint
2871590Srgrimeslinux_vfork(struct proc *p, struct linux_vfork_args *args)
2881590Srgrimes{
2891590Srgrimes	int error;
2901590Srgrimes
2911590Srgrimes#ifdef DEBUG
2921590Srgrimes	if (ldebug(vfork))
2931590Srgrimes		printf(ARGS(vfork, ""));
2941590Srgrimes#endif
2951590Srgrimes
2961590Srgrimes	if ((error = vfork(p, (struct vfork_args *)args)) != 0)
2971590Srgrimes		return (error);
2981590Srgrimes	/* Are we the child? */
2991590Srgrimes	if (p->p_retval[1] == 1)
3001590Srgrimes		p->p_retval[0] = 0;
3011590Srgrimes	return (0);
3021590Srgrimes}
3031590Srgrimes
3041590Srgrimes#define CLONE_VM	0x100
3051590Srgrimes#define CLONE_FS	0x200
3061590Srgrimes#define CLONE_FILES	0x400
3071590Srgrimes#define CLONE_SIGHAND	0x800
3081590Srgrimes#define CLONE_PID	0x1000
3091590Srgrimes
3101590Srgrimesint
3111590Srgrimeslinux_clone(struct proc *p, struct linux_clone_args *args)
3121590Srgrimes{
3131590Srgrimes	int error, ff = RFPROC | RFSTOPPED;
3141590Srgrimes	struct proc *p2;
3151590Srgrimes	int exit_signal;
3161590Srgrimes
3171590Srgrimes#ifdef DEBUG
3181590Srgrimes	if (ldebug(clone)) {
3191590Srgrimes		printf(ARGS(clone, "flags %x, stack %x"),
3201590Srgrimes		    (unsigned int)args->flags, (unsigned int)args->stack);
3211590Srgrimes		if (args->flags & CLONE_PID)
3221590Srgrimes			printf(LMSG("CLONE_PID not yet supported"));
3231590Srgrimes	}
3241590Srgrimes#endif
3251590Srgrimes
3261590Srgrimes	if (!args->stack)
3271590Srgrimes		return (EINVAL);
3281590Srgrimes
3291590Srgrimes	exit_signal = args->flags & 0x000000ff;
3301590Srgrimes	if (exit_signal >= LINUX_NSIG)
3311590Srgrimes		return (EINVAL);
3321590Srgrimes
3331590Srgrimes	if (exit_signal <= LINUX_SIGTBLSZ)
3341590Srgrimes		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
3351590Srgrimes
3361590Srgrimes	if (args->flags & CLONE_VM)
3371590Srgrimes		ff |= RFMEM;
3381590Srgrimes	if (args->flags & CLONE_SIGHAND)
3391590Srgrimes		ff |= RFSIGSHARE;
3401590Srgrimes	if (!(args->flags & CLONE_FILES))
3411590Srgrimes		ff |= RFFDG;
3421590Srgrimes
3431590Srgrimes	mtx_lock(&Giant);
3441590Srgrimes	error = fork1(p, ff, &p2);
3451590Srgrimes	if (error == 0) {
3461590Srgrimes		p->p_retval[0] = p2->p_pid;
3471590Srgrimes		p->p_retval[1] = 0;
3481590Srgrimes
3491590Srgrimes		PROC_LOCK(p2);
3501590Srgrimes		p2->p_sigparent = exit_signal;
3511590Srgrimes		p2->p_frame->tf_esp = (unsigned int)args->stack;
3521590Srgrimes
3531590Srgrimes#ifdef DEBUG
3541590Srgrimes		if (ldebug(clone))
3551590Srgrimes			printf(LMSG("clone: successful rfork to %ld"),
356			    (long)p2->p_pid);
357#endif
358
359		/*
360		 * Make this runnable after we are finished with it.
361		 */
362		mtx_lock_spin(&sched_lock);
363		p2->p_stat = SRUN;
364		setrunqueue(p2);
365		mtx_unlock_spin(&sched_lock);
366		PROC_UNLOCK(p2);
367	}
368	mtx_unlock(&Giant);
369
370	return (error);
371}
372
373/* XXX move */
374struct l_mmap_argv {
375	l_caddr_t	addr;
376	l_int		len;
377	l_int		prot;
378	l_int		flags;
379	l_int		fd;
380	l_int		pos;
381};
382
383#define STACK_SIZE  (2 * 1024 * 1024)
384#define GUARD_SIZE  (4 * PAGE_SIZE)
385
386int
387linux_mmap(struct proc *p, struct linux_mmap_args *args)
388{
389	struct mmap_args /* {
390		caddr_t addr;
391		size_t len;
392		int prot;
393		int flags;
394		int fd;
395		long pad;
396		off_t pos;
397	} */ bsd_args;
398	int error;
399	struct l_mmap_argv linux_args;
400
401	error = copyin((caddr_t)args->ptr, &linux_args, sizeof(linux_args));
402	if (error)
403		return (error);
404
405#ifdef DEBUG
406	if (ldebug(mmap))
407		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
408		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
409		    linux_args.flags, linux_args.fd, linux_args.pos);
410#endif
411
412	bsd_args.flags = 0;
413	if (linux_args.flags & LINUX_MAP_SHARED)
414		bsd_args.flags |= MAP_SHARED;
415	if (linux_args.flags & LINUX_MAP_PRIVATE)
416		bsd_args.flags |= MAP_PRIVATE;
417	if (linux_args.flags & LINUX_MAP_FIXED)
418		bsd_args.flags |= MAP_FIXED;
419	if (linux_args.flags & LINUX_MAP_ANON)
420		bsd_args.flags |= MAP_ANON;
421	else
422		bsd_args.flags |= MAP_NOSYNC;
423	if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
424		bsd_args.flags |= MAP_STACK;
425
426		/* The linux MAP_GROWSDOWN option does not limit auto
427		 * growth of the region.  Linux mmap with this option
428		 * takes as addr the inital BOS, and as len, the initial
429		 * region size.  It can then grow down from addr without
430		 * limit.  However, linux threads has an implicit internal
431		 * limit to stack size of STACK_SIZE.  Its just not
432		 * enforced explicitly in linux.  But, here we impose
433		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
434		 * region, since we can do this with our mmap.
435		 *
436		 * Our mmap with MAP_STACK takes addr as the maximum
437		 * downsize limit on BOS, and as len the max size of
438		 * the region.  It them maps the top SGROWSIZ bytes,
439		 * and autgrows the region down, up to the limit
440		 * in addr.
441		 *
442		 * If we don't use the MAP_STACK option, the effect
443		 * of this code is to allocate a stack region of a
444		 * fixed size of (STACK_SIZE - GUARD_SIZE).
445		 */
446
447		/* This gives us TOS */
448		bsd_args.addr = linux_args.addr + linux_args.len;
449
450		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
451			/* Some linux apps will attempt to mmap
452			 * thread stacks near the top of their
453			 * address space.  If their TOS is greater
454			 * than vm_maxsaddr, vm_map_growstack()
455			 * will confuse the thread stack with the
456			 * process stack and deliver a SEGV if they
457			 * attempt to grow the thread stack past their
458			 * current stacksize rlimit.  To avoid this,
459			 * adjust vm_maxsaddr upwards to reflect
460			 * the current stacksize rlimit rather
461			 * than the maximum possible stacksize.
462			 * It would be better to adjust the
463			 * mmap'ed region, but some apps do not check
464			 * mmap's return value.
465			 */
466			mtx_assert(&Giant, MA_OWNED);
467			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
468			    p->p_rlimit[RLIMIT_STACK].rlim_cur;
469		}
470
471		/* This gives us our maximum stack size */
472		if (linux_args.len > STACK_SIZE - GUARD_SIZE)
473			bsd_args.len = linux_args.len;
474		else
475			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
476
477		/* This gives us a new BOS.  If we're using VM_STACK, then
478		 * mmap will just map the top SGROWSIZ bytes, and let
479		 * the stack grow down to the limit at BOS.  If we're
480		 * not using VM_STACK we map the full stack, since we
481		 * don't have a way to autogrow it.
482		 */
483		bsd_args.addr -= bsd_args.len;
484	} else {
485		bsd_args.addr = linux_args.addr;
486		bsd_args.len  = linux_args.len;
487	}
488
489	bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
490	if (linux_args.flags & LINUX_MAP_ANON)
491		bsd_args.fd = -1;
492	else
493		bsd_args.fd = linux_args.fd;
494	bsd_args.pos = linux_args.pos;
495	bsd_args.pad = 0;
496
497#ifdef DEBUG
498	if (ldebug(mmap))
499		printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
500		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
501		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
502#endif
503
504	return (mmap(p, &bsd_args));
505}
506
507int
508linux_pipe(struct proc *p, struct linux_pipe_args *args)
509{
510	int error;
511	int reg_edx;
512
513#ifdef DEBUG
514	if (ldebug(pipe))
515		printf(ARGS(pipe, "*"));
516#endif
517
518	reg_edx = p->p_retval[1];
519	error = pipe(p, 0);
520	if (error) {
521		p->p_retval[1] = reg_edx;
522		return (error);
523	}
524
525	error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
526	if (error) {
527		p->p_retval[1] = reg_edx;
528		return (error);
529	}
530
531	p->p_retval[1] = reg_edx;
532	p->p_retval[0] = 0;
533	return (0);
534}
535
536int
537linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
538{
539	struct sysarch_args sa;
540	struct i386_ioperm_args *iia;
541	caddr_t sg;
542
543	sg = stackgap_init();
544	iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
545	iia->start = args->start;
546	iia->length = args->length;
547	iia->enable = args->enable;
548	sa.op = I386_SET_IOPERM;
549	sa.parms = (char *)iia;
550	return (sysarch(p, &sa));
551}
552
553int
554linux_iopl(struct proc *p, struct linux_iopl_args *args)
555{
556	int error;
557
558	if (args->level < 0 || args->level > 3)
559		return (EINVAL);
560	if ((error = suser(p)) != 0)
561		return (error);
562	if (securelevel > 0)
563		return (EPERM);
564	p->p_frame->tf_eflags = (p->p_frame->tf_eflags & ~PSL_IOPL) |
565	    (args->level * (PSL_IOPL / 3));
566	return (0);
567}
568
569int
570linux_modify_ldt(p, uap)
571	struct proc *p;
572	struct linux_modify_ldt_args *uap;
573{
574	int error;
575	caddr_t sg;
576	struct sysarch_args args;
577	struct i386_ldt_args *ldt;
578	struct l_descriptor ld;
579	union descriptor *desc;
580
581	sg = stackgap_init();
582
583	if (uap->ptr == NULL)
584		return (EINVAL);
585
586	switch (uap->func) {
587	case 0x00: /* read_ldt */
588		ldt = stackgap_alloc(&sg, sizeof(*ldt));
589		ldt->start = 0;
590		ldt->descs = uap->ptr;
591		ldt->num = uap->bytecount / sizeof(union descriptor);
592		args.op = I386_GET_LDT;
593		args.parms = (char*)ldt;
594		error = sysarch(p, &args);
595		p->p_retval[0] *= sizeof(union descriptor);
596		break;
597	case 0x01: /* write_ldt */
598	case 0x11: /* write_ldt */
599		if (uap->bytecount != sizeof(ld))
600			return (EINVAL);
601
602		error = copyin(uap->ptr, &ld, sizeof(ld));
603		if (error)
604			return (error);
605
606		ldt = stackgap_alloc(&sg, sizeof(*ldt));
607		desc = stackgap_alloc(&sg, sizeof(*desc));
608		ldt->start = ld.entry_number;
609		ldt->descs = desc;
610		ldt->num = 1;
611		desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
612		desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
613		desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
614		desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
615		desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
616			(ld.contents << 2);
617		desc->sd.sd_dpl = 3;
618		desc->sd.sd_p = (ld.seg_not_present ^ 1);
619		desc->sd.sd_xx = 0;
620		desc->sd.sd_def32 = ld.seg_32bit;
621		desc->sd.sd_gran = ld.limit_in_pages;
622		args.op = I386_SET_LDT;
623		args.parms = (char*)ldt;
624		error = sysarch(p, &args);
625		break;
626	default:
627		error = EINVAL;
628		break;
629	}
630
631	if (error == EOPNOTSUPP) {
632		printf("linux: modify_ldt needs kernel option USER_LDT\n");
633		error = ENOSYS;
634	}
635
636	return (error);
637}
638
639int
640linux_sigaction(struct proc *p, struct linux_sigaction_args *args)
641{
642	l_osigaction_t osa;
643	l_sigaction_t act, oact;
644	int error;
645
646#ifdef DEBUG
647	if (ldebug(sigaction))
648		printf(ARGS(sigaction, "%d, %p, %p"),
649		    args->sig, (void *)args->nsa, (void *)args->osa);
650#endif
651
652	if (args->nsa != NULL) {
653		error = copyin((caddr_t)args->nsa, &osa,
654		    sizeof(l_osigaction_t));
655		if (error)
656			return (error);
657		act.lsa_handler = osa.lsa_handler;
658		act.lsa_flags = osa.lsa_flags;
659		act.lsa_restorer = osa.lsa_restorer;
660		LINUX_SIGEMPTYSET(act.lsa_mask);
661		act.lsa_mask.__bits[0] = osa.lsa_mask;
662	}
663
664	error = linux_do_sigaction(p, args->sig, args->nsa ? &act : NULL,
665	    args->osa ? &oact : NULL);
666
667	if (args->osa != NULL && !error) {
668		osa.lsa_handler = oact.lsa_handler;
669		osa.lsa_flags = oact.lsa_flags;
670		osa.lsa_restorer = oact.lsa_restorer;
671		osa.lsa_mask = oact.lsa_mask.__bits[0];
672		error = copyout(&osa, (caddr_t)args->osa,
673		    sizeof(l_osigaction_t));
674	}
675
676	return (error);
677}
678
679/*
680 * Linux has two extra args, restart and oldmask.  We dont use these,
681 * but it seems that "restart" is actually a context pointer that
682 * enables the signal to happen with a different register set.
683 */
684int
685linux_sigsuspend(struct proc *p, struct linux_sigsuspend_args *args)
686{
687	struct sigsuspend_args bsd;
688	sigset_t *sigmask;
689	l_sigset_t mask;
690	caddr_t sg = stackgap_init();
691
692#ifdef DEBUG
693	if (ldebug(sigsuspend))
694		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
695#endif
696
697	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
698	LINUX_SIGEMPTYSET(mask);
699	mask.__bits[0] = args->mask;
700	linux_to_bsd_sigset(&mask, sigmask);
701	bsd.sigmask = sigmask;
702	return (sigsuspend(p, &bsd));
703}
704
705int
706linux_rt_sigsuspend(p, uap)
707	struct proc *p;
708	struct linux_rt_sigsuspend_args *uap;
709{
710	l_sigset_t lmask;
711	sigset_t *bmask;
712	struct sigsuspend_args bsd;
713	caddr_t sg = stackgap_init();
714	int error;
715
716#ifdef DEBUG
717	if (ldebug(rt_sigsuspend))
718		printf(ARGS(rt_sigsuspend, "%p, %d"),
719		    (void *)uap->newset, uap->sigsetsize);
720#endif
721
722	if (uap->sigsetsize != sizeof(l_sigset_t))
723		return (EINVAL);
724
725	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
726	if (error)
727		return (error);
728
729	bmask = stackgap_alloc(&sg, sizeof(sigset_t));
730	linux_to_bsd_sigset(&lmask, bmask);
731	bsd.sigmask = bmask;
732	return (sigsuspend(p, &bsd));
733}
734
735int
736linux_pause(struct proc *p, struct linux_pause_args *args)
737{
738	struct sigsuspend_args bsd;
739	sigset_t *sigmask;
740	caddr_t sg = stackgap_init();
741
742#ifdef DEBUG
743	if (ldebug(pause))
744		printf(ARGS(pause, ""));
745#endif
746
747	sigmask = stackgap_alloc(&sg, sizeof(sigset_t));
748	PROC_LOCK(p);
749	*sigmask = p->p_sigmask;
750	PROC_UNLOCK(p);
751	bsd.sigmask = sigmask;
752	return (sigsuspend(p, &bsd));
753}
754
755int
756linux_sigaltstack(p, uap)
757	struct proc *p;
758	struct linux_sigaltstack_args *uap;
759{
760	struct sigaltstack_args bsd;
761	stack_t *ss, *oss;
762	l_stack_t lss;
763	int error;
764	caddr_t sg = stackgap_init();
765
766#ifdef DEBUG
767	if (ldebug(sigaltstack))
768		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
769#endif
770
771	if (uap->uss == NULL) {
772		ss = NULL;
773	} else {
774		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
775		if (error)
776			return (error);
777
778		ss = stackgap_alloc(&sg, sizeof(stack_t));
779		ss->ss_sp = lss.ss_sp;
780		ss->ss_size = lss.ss_size;
781		ss->ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
782	}
783	oss = (uap->uoss != NULL)
784	    ? stackgap_alloc(&sg, sizeof(stack_t))
785	    : NULL;
786
787	bsd.ss = ss;
788	bsd.oss = oss;
789	error = sigaltstack(p, &bsd);
790
791	if (!error && oss != NULL) {
792		lss.ss_sp = oss->ss_sp;
793		lss.ss_size = oss->ss_size;
794		lss.ss_flags = bsd_to_linux_sigaltstack(oss->ss_flags);
795		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
796	}
797
798	return (error);
799}
800