linux32_machdep.c revision 142057
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 142057 2005-02-18 18:51:59Z jhb $");
33
34#include <sys/param.h>
35#include <sys/kernel.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mman.h>
41#include <sys/mutex.h>
42#include <sys/proc.h>
43#include <sys/resource.h>
44#include <sys/resourcevar.h>
45#include <sys/syscallsubr.h>
46#include <sys/sysproto.h>
47#include <sys/unistd.h>
48
49#include <machine/frame.h>
50
51#include <vm/vm.h>
52#include <vm/pmap.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_map.h>
56
57#include <amd64/linux32/linux.h>
58#include <amd64/linux32/linux32_proto.h>
59#include <compat/linux/linux_ipc.h>
60#include <compat/linux/linux_signal.h>
61#include <compat/linux/linux_util.h>
62
63struct l_old_select_argv {
64	l_int		nfds;
65	l_uintptr_t	readfds;
66	l_uintptr_t	writefds;
67	l_uintptr_t	exceptfds;
68	l_uintptr_t	timeout;
69} __packed;
70
71int
72linux_to_bsd_sigaltstack(int lsa)
73{
74	int bsa = 0;
75
76	if (lsa & LINUX_SS_DISABLE)
77		bsa |= SS_DISABLE;
78	if (lsa & LINUX_SS_ONSTACK)
79		bsa |= SS_ONSTACK;
80	return (bsa);
81}
82
83int
84bsd_to_linux_sigaltstack(int bsa)
85{
86	int lsa = 0;
87
88	if (bsa & SS_DISABLE)
89		lsa |= LINUX_SS_DISABLE;
90	if (bsa & SS_ONSTACK)
91		lsa |= LINUX_SS_ONSTACK;
92	return (lsa);
93}
94
95/*
96 * Custom version of exec_copyin_args() so that we can translate
97 * the pointers.
98 */
99static int
100linux_exec_copyin_args(struct image_args *args, char *fname,
101    enum uio_seg segflg, char **argv, char **envv)
102{
103	char *argp, *envp;
104	u_int32_t *p32, arg;
105	size_t length;
106	int error;
107
108	bzero(args, sizeof(*args));
109	if (argv == NULL)
110		return (EFAULT);
111
112	/*
113	 * Allocate temporary demand zeroed space for argument and
114	 *	environment strings
115	 */
116	args->buf = (char *) kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
117	if (args->buf == NULL)
118		return (ENOMEM);
119	args->begin_argv = args->buf;
120	args->endp = args->begin_argv;
121	args->stringspace = ARG_MAX;
122
123	args->fname = args->buf + ARG_MAX;
124
125	/*
126	 * Copy the file name.
127	 */
128	error = (segflg == UIO_SYSSPACE) ?
129	    copystr(fname, args->fname, PATH_MAX, &length) :
130	    copyinstr(fname, args->fname, PATH_MAX, &length);
131	if (error != 0)
132		return (error);
133
134	/*
135	 * extract arguments first
136	 */
137	p32 = (u_int32_t *)argv;
138	for (;;) {
139		error = copyin(p32++, &arg, sizeof(arg));
140		if (error)
141			return (error);
142		if (arg == 0)
143			break;
144		argp = PTRIN(arg);
145		error = copyinstr(argp, args->endp, args->stringspace, &length);
146		if (error) {
147			if (error == ENAMETOOLONG)
148				return (E2BIG);
149			else
150				return (error);
151		}
152		args->stringspace -= length;
153		args->endp += length;
154		args->argc++;
155	}
156
157	args->begin_envv = args->endp;
158
159	/*
160	 * extract environment strings
161	 */
162	if (envv) {
163		p32 = (u_int32_t *)envv;
164		for (;;) {
165			error = copyin(p32++, &arg, sizeof(arg));
166			if (error)
167				return (error);
168			if (arg == 0)
169				break;
170			envp = PTRIN(arg);
171			error = copyinstr(envp, args->endp, args->stringspace,
172			    &length);
173			if (error) {
174				if (error == ENAMETOOLONG)
175					return (E2BIG);
176				else
177					return (error);
178			}
179			args->stringspace -= length;
180			args->endp += length;
181			args->envc++;
182		}
183	}
184
185	return (0);
186}
187
188int
189linux_execve(struct thread *td, struct linux_execve_args *args)
190{
191	struct image_args eargs;
192	char *path;
193	int error;
194
195	LCONVPATHEXIST(td, args->path, &path);
196
197#ifdef DEBUG
198	if (ldebug(execve))
199		printf(ARGS(execve, "%s"), path);
200#endif
201
202	error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp,
203	    args->envp);
204	free(path, M_TEMP);
205	if (error == 0)
206		error = kern_execve(td, &eargs, NULL);
207	exec_free_args(&eargs);
208	return (error);
209}
210
211struct iovec32 {
212	u_int32_t iov_base;
213	int	iov_len;
214};
215#define	STACKGAPLEN	400
216
217CTASSERT(sizeof(struct iovec32) == 8);
218
219int
220linux_readv(struct thread *td, struct linux_readv_args *uap)
221{
222	int error, osize, nsize, i;
223	caddr_t sg;
224	struct readv_args /* {
225		syscallarg(int) fd;
226		syscallarg(struct iovec *) iovp;
227		syscallarg(u_int) iovcnt;
228	} */ a;
229	struct iovec32 *oio;
230	struct iovec *nio;
231
232	sg = stackgap_init();
233
234	if (uap->iovcnt > (STACKGAPLEN / sizeof (struct iovec)))
235		return (EINVAL);
236
237	osize = uap->iovcnt * sizeof (struct iovec32);
238	nsize = uap->iovcnt * sizeof (struct iovec);
239
240	oio = malloc(osize, M_TEMP, M_WAITOK);
241	nio = malloc(nsize, M_TEMP, M_WAITOK);
242
243	error = 0;
244	if ((error = copyin(uap->iovp, oio, osize)))
245		goto punt;
246	for (i = 0; i < uap->iovcnt; i++) {
247		nio[i].iov_base = PTRIN(oio[i].iov_base);
248		nio[i].iov_len = oio[i].iov_len;
249	}
250
251	a.fd = uap->fd;
252	a.iovp = stackgap_alloc(&sg, nsize);
253	a.iovcnt = uap->iovcnt;
254
255	if ((error = copyout(nio, (caddr_t)a.iovp, nsize)))
256		goto punt;
257	error = readv(td, &a);
258
259punt:
260	free(oio, M_TEMP);
261	free(nio, M_TEMP);
262	return (error);
263}
264
265int
266linux_writev(struct thread *td, struct linux_writev_args *uap)
267{
268	int error, i, nsize, osize;
269	caddr_t sg;
270	struct writev_args /* {
271		syscallarg(int) fd;
272		syscallarg(struct iovec *) iovp;
273		syscallarg(u_int) iovcnt;
274	} */ a;
275	struct iovec32 *oio;
276	struct iovec *nio;
277
278	sg = stackgap_init();
279
280	if (uap->iovcnt > (STACKGAPLEN / sizeof (struct iovec)))
281		return (EINVAL);
282
283	osize = uap->iovcnt * sizeof (struct iovec32);
284	nsize = uap->iovcnt * sizeof (struct iovec);
285
286	oio = malloc(osize, M_TEMP, M_WAITOK);
287	nio = malloc(nsize, M_TEMP, M_WAITOK);
288
289	error = 0;
290	if ((error = copyin(uap->iovp, oio, osize)))
291		goto punt;
292	for (i = 0; i < uap->iovcnt; i++) {
293		nio[i].iov_base = PTRIN(oio[i].iov_base);
294		nio[i].iov_len = oio[i].iov_len;
295	}
296
297	a.fd = uap->fd;
298	a.iovp = stackgap_alloc(&sg, nsize);
299	a.iovcnt = uap->iovcnt;
300
301	if ((error = copyout(nio, (caddr_t)a.iovp, nsize)))
302		goto punt;
303	error = writev(td, &a);
304
305punt:
306	free(oio, M_TEMP);
307	free(nio, M_TEMP);
308	return (error);
309}
310
311struct l_ipc_kludge {
312	l_uintptr_t msgp;
313	l_long msgtyp;
314} __packed;
315
316int
317linux_ipc(struct thread *td, struct linux_ipc_args *args)
318{
319
320	switch (args->what & 0xFFFF) {
321	case LINUX_SEMOP: {
322		struct linux_semop_args a;
323
324		a.semid = args->arg1;
325		a.tsops = args->ptr;
326		a.nsops = args->arg2;
327		return (linux_semop(td, &a));
328	}
329	case LINUX_SEMGET: {
330		struct linux_semget_args a;
331
332		a.key = args->arg1;
333		a.nsems = args->arg2;
334		a.semflg = args->arg3;
335		return (linux_semget(td, &a));
336	}
337	case LINUX_SEMCTL: {
338		struct linux_semctl_args a;
339		int error;
340
341		a.semid = args->arg1;
342		a.semnum = args->arg2;
343		a.cmd = args->arg3;
344		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
345		if (error)
346			return (error);
347		return (linux_semctl(td, &a));
348	}
349	case LINUX_MSGSND: {
350		struct linux_msgsnd_args a;
351
352		a.msqid = args->arg1;
353		a.msgp = args->ptr;
354		a.msgsz = args->arg2;
355		a.msgflg = args->arg3;
356		return (linux_msgsnd(td, &a));
357	}
358	case LINUX_MSGRCV: {
359		struct linux_msgrcv_args a;
360
361		a.msqid = args->arg1;
362		a.msgsz = args->arg2;
363		a.msgflg = args->arg3;
364		if ((args->what >> 16) == 0) {
365			struct l_ipc_kludge tmp;
366			int error;
367
368			if (args->ptr == 0)
369				return (EINVAL);
370			error = copyin(args->ptr, &tmp, sizeof(tmp));
371			if (error)
372				return (error);
373			a.msgp = PTRIN(tmp.msgp);
374			a.msgtyp = tmp.msgtyp;
375		} else {
376			a.msgp = args->ptr;
377			a.msgtyp = args->arg5;
378		}
379		return (linux_msgrcv(td, &a));
380	}
381	case LINUX_MSGGET: {
382		struct linux_msgget_args a;
383
384		a.key = args->arg1;
385		a.msgflg = args->arg2;
386		return (linux_msgget(td, &a));
387	}
388	case LINUX_MSGCTL: {
389		struct linux_msgctl_args a;
390
391		a.msqid = args->arg1;
392		a.cmd = args->arg2;
393		a.buf = args->ptr;
394		return (linux_msgctl(td, &a));
395	}
396	case LINUX_SHMAT: {
397		struct linux_shmat_args a;
398
399		a.shmid = args->arg1;
400		a.shmaddr = args->ptr;
401		a.shmflg = args->arg2;
402		a.raddr = PTRIN(args->arg3);
403		return (linux_shmat(td, &a));
404	}
405	case LINUX_SHMDT: {
406		struct linux_shmdt_args a;
407
408		a.shmaddr = args->ptr;
409		return (linux_shmdt(td, &a));
410	}
411	case LINUX_SHMGET: {
412		struct linux_shmget_args a;
413
414		a.key = args->arg1;
415		a.size = args->arg2;
416		a.shmflg = args->arg3;
417		return (linux_shmget(td, &a));
418	}
419	case LINUX_SHMCTL: {
420		struct linux_shmctl_args a;
421
422		a.shmid = args->arg1;
423		a.cmd = args->arg2;
424		a.buf = args->ptr;
425		return (linux_shmctl(td, &a));
426	}
427	default:
428		break;
429	}
430
431	return (EINVAL);
432}
433
434int
435linux_old_select(struct thread *td, struct linux_old_select_args *args)
436{
437	struct l_old_select_argv linux_args;
438	struct linux_select_args newsel;
439	int error;
440
441#ifdef DEBUG
442	if (ldebug(old_select))
443		printf(ARGS(old_select, "%p"), args->ptr);
444#endif
445
446	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
447	if (error)
448		return (error);
449
450	newsel.nfds = linux_args.nfds;
451	newsel.readfds = PTRIN(linux_args.readfds);
452	newsel.writefds = PTRIN(linux_args.writefds);
453	newsel.exceptfds = PTRIN(linux_args.exceptfds);
454	newsel.timeout = PTRIN(linux_args.timeout);
455	return (linux_select(td, &newsel));
456}
457
458int
459linux_fork(struct thread *td, struct linux_fork_args *args)
460{
461	int error;
462
463#ifdef DEBUG
464	if (ldebug(fork))
465		printf(ARGS(fork, ""));
466#endif
467
468	if ((error = fork(td, (struct fork_args *)args)) != 0)
469		return (error);
470
471	if (td->td_retval[1] == 1)
472		td->td_retval[0] = 0;
473	return (0);
474}
475
476int
477linux_vfork(struct thread *td, struct linux_vfork_args *args)
478{
479	int error;
480
481#ifdef DEBUG
482	if (ldebug(vfork))
483		printf(ARGS(vfork, ""));
484#endif
485
486	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
487		return (error);
488	/* Are we the child? */
489	if (td->td_retval[1] == 1)
490		td->td_retval[0] = 0;
491	return (0);
492}
493
494#define CLONE_VM	0x100
495#define CLONE_FS	0x200
496#define CLONE_FILES	0x400
497#define CLONE_SIGHAND	0x800
498#define CLONE_PID	0x1000
499
500int
501linux_clone(struct thread *td, struct linux_clone_args *args)
502{
503	int error, ff = RFPROC | RFSTOPPED;
504	struct proc *p2;
505	struct thread *td2;
506	int exit_signal;
507
508#ifdef DEBUG
509	if (ldebug(clone)) {
510		printf(ARGS(clone, "flags %x, stack %x"),
511		    (unsigned int)(uintptr_t)args->flags,
512		    (unsigned int)(uintptr_t)args->stack);
513		if (args->flags & CLONE_PID)
514			printf(LMSG("CLONE_PID not yet supported"));
515	}
516#endif
517
518	if (!args->stack)
519		return (EINVAL);
520
521	exit_signal = args->flags & 0x000000ff;
522	if (exit_signal >= LINUX_NSIG)
523		return (EINVAL);
524
525	if (exit_signal <= LINUX_SIGTBLSZ)
526		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
527
528	if (args->flags & CLONE_VM)
529		ff |= RFMEM;
530	if (args->flags & CLONE_SIGHAND)
531		ff |= RFSIGSHARE;
532	if (!(args->flags & CLONE_FILES))
533		ff |= RFFDG;
534
535	error = fork1(td, ff, 0, &p2);
536	if (error)
537		return (error);
538
539
540	PROC_LOCK(p2);
541	p2->p_sigparent = exit_signal;
542	PROC_UNLOCK(p2);
543	td2 = FIRST_THREAD_IN_PROC(p2);
544	td2->td_frame->tf_rsp = PTROUT(args->stack);
545
546#ifdef DEBUG
547	if (ldebug(clone))
548		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
549		    (long)p2->p_pid, args->stack, exit_signal);
550#endif
551
552	/*
553	 * Make this runnable after we are finished with it.
554	 */
555	mtx_lock_spin(&sched_lock);
556	TD_SET_CAN_RUN(td2);
557	setrunqueue(td2, SRQ_BORING);
558	mtx_unlock_spin(&sched_lock);
559
560	td->td_retval[0] = p2->p_pid;
561	td->td_retval[1] = 0;
562	return (0);
563}
564
565/* XXX move */
566struct l_mmap_argv {
567	l_ulong		addr;
568	l_int		len;
569	l_int		prot;
570	l_int		flags;
571	l_int		fd;
572	l_int		pos;
573};
574
575#define STACK_SIZE  (2 * 1024 * 1024)
576#define GUARD_SIZE  (4 * PAGE_SIZE)
577
578static int linux_mmap_common(struct thread *, struct l_mmap_argv *);
579
580int
581linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
582{
583	struct l_mmap_argv linux_args;
584
585#ifdef DEBUG
586	if (ldebug(mmap2))
587		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
588		    (void *)(intptr_t)args->addr, args->len, args->prot,
589		    args->flags, args->fd, args->pgoff);
590#endif
591
592	linux_args.addr = PTROUT(args->addr);
593	linux_args.len = args->len;
594	linux_args.prot = args->prot;
595	linux_args.flags = args->flags;
596	linux_args.fd = args->fd;
597	linux_args.pos = args->pgoff * PAGE_SIZE;
598
599	return (linux_mmap_common(td, &linux_args));
600}
601
602int
603linux_mmap(struct thread *td, struct linux_mmap_args *args)
604{
605	int error;
606	struct l_mmap_argv linux_args;
607
608	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
609	if (error)
610		return (error);
611
612#ifdef DEBUG
613	if (ldebug(mmap))
614		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
615		    (void *)(intptr_t)linux_args.addr, linux_args.len,
616		    linux_args.prot, linux_args.flags, linux_args.fd,
617		    linux_args.pos);
618#endif
619
620	return (linux_mmap_common(td, &linux_args));
621}
622
623static int
624linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
625{
626	struct proc *p = td->td_proc;
627	struct mmap_args /* {
628		caddr_t addr;
629		size_t len;
630		int prot;
631		int flags;
632		int fd;
633		long pad;
634		off_t pos;
635	} */ bsd_args;
636	int error;
637
638	error = 0;
639	bsd_args.flags = 0;
640	if (linux_args->flags & LINUX_MAP_SHARED)
641		bsd_args.flags |= MAP_SHARED;
642	if (linux_args->flags & LINUX_MAP_PRIVATE)
643		bsd_args.flags |= MAP_PRIVATE;
644	if (linux_args->flags & LINUX_MAP_FIXED)
645		bsd_args.flags |= MAP_FIXED;
646	if (linux_args->flags & LINUX_MAP_ANON)
647		bsd_args.flags |= MAP_ANON;
648	else
649		bsd_args.flags |= MAP_NOSYNC;
650	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
651		bsd_args.flags |= MAP_STACK;
652
653		/* The linux MAP_GROWSDOWN option does not limit auto
654		 * growth of the region.  Linux mmap with this option
655		 * takes as addr the inital BOS, and as len, the initial
656		 * region size.  It can then grow down from addr without
657		 * limit.  However, linux threads has an implicit internal
658		 * limit to stack size of STACK_SIZE.  Its just not
659		 * enforced explicitly in linux.  But, here we impose
660		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
661		 * region, since we can do this with our mmap.
662		 *
663		 * Our mmap with MAP_STACK takes addr as the maximum
664		 * downsize limit on BOS, and as len the max size of
665		 * the region.  It them maps the top SGROWSIZ bytes,
666		 * and autgrows the region down, up to the limit
667		 * in addr.
668		 *
669		 * If we don't use the MAP_STACK option, the effect
670		 * of this code is to allocate a stack region of a
671		 * fixed size of (STACK_SIZE - GUARD_SIZE).
672		 */
673
674		/* This gives us TOS */
675		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr) +
676		    linux_args->len;
677
678		if ((caddr_t)PTRIN(bsd_args.addr) >
679		    p->p_vmspace->vm_maxsaddr) {
680			/* Some linux apps will attempt to mmap
681			 * thread stacks near the top of their
682			 * address space.  If their TOS is greater
683			 * than vm_maxsaddr, vm_map_growstack()
684			 * will confuse the thread stack with the
685			 * process stack and deliver a SEGV if they
686			 * attempt to grow the thread stack past their
687			 * current stacksize rlimit.  To avoid this,
688			 * adjust vm_maxsaddr upwards to reflect
689			 * the current stacksize rlimit rather
690			 * than the maximum possible stacksize.
691			 * It would be better to adjust the
692			 * mmap'ed region, but some apps do not check
693			 * mmap's return value.
694			 */
695			PROC_LOCK(p);
696			p->p_vmspace->vm_maxsaddr =
697			    (char *)LINUX32_USRSTACK -
698			    lim_cur(p, RLIMIT_STACK);
699			PROC_UNLOCK(p);
700		}
701
702		/* This gives us our maximum stack size */
703		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
704			bsd_args.len = linux_args->len;
705		else
706			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
707
708		/* This gives us a new BOS.  If we're using VM_STACK, then
709		 * mmap will just map the top SGROWSIZ bytes, and let
710		 * the stack grow down to the limit at BOS.  If we're
711		 * not using VM_STACK we map the full stack, since we
712		 * don't have a way to autogrow it.
713		 */
714		bsd_args.addr -= bsd_args.len;
715	} else {
716		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr);
717		bsd_args.len  = linux_args->len;
718	}
719	/*
720	 * XXX i386 Linux always emulator forces PROT_READ on (why?)
721	 * so we do the same. We add PROT_EXEC to work around buggy
722	 * applications (e.g. Java) that take advantage of the fact
723	 * that execute permissions are not enforced by x86 CPUs.
724	 */
725	bsd_args.prot = linux_args->prot | PROT_EXEC | PROT_READ;
726	if (linux_args->flags & LINUX_MAP_ANON)
727		bsd_args.fd = -1;
728	else
729		bsd_args.fd = linux_args->fd;
730	bsd_args.pos = linux_args->pos;
731	bsd_args.pad = 0;
732
733#ifdef DEBUG
734	if (ldebug(mmap))
735		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
736		    __func__,
737		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
738		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
739#endif
740	error = mmap(td, &bsd_args);
741#ifdef DEBUG
742	if (ldebug(mmap))
743		printf("-> %s() return: 0x%x (0x%08x)\n",
744			__func__, error, (u_int)td->td_retval[0]);
745#endif
746	return (error);
747}
748
749int
750linux_pipe(struct thread *td, struct linux_pipe_args *args)
751{
752	int pip[2];
753	int error;
754	register_t reg_rdx;
755
756#ifdef DEBUG
757	if (ldebug(pipe))
758		printf(ARGS(pipe, "*"));
759#endif
760
761	reg_rdx = td->td_retval[1];
762	error = pipe(td, 0);
763	if (error) {
764		td->td_retval[1] = reg_rdx;
765		return (error);
766	}
767
768	pip[0] = td->td_retval[0];
769	pip[1] = td->td_retval[1];
770	error = copyout(pip, args->pipefds, 2 * sizeof(int));
771	if (error) {
772		td->td_retval[1] = reg_rdx;
773		return (error);
774	}
775
776	td->td_retval[1] = reg_rdx;
777	td->td_retval[0] = 0;
778	return (0);
779}
780
781int
782linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
783{
784	l_osigaction_t osa;
785	l_sigaction_t act, oact;
786	int error;
787
788#ifdef DEBUG
789	if (ldebug(sigaction))
790		printf(ARGS(sigaction, "%d, %p, %p"),
791		    args->sig, (void *)args->nsa, (void *)args->osa);
792#endif
793
794	if (args->nsa != NULL) {
795		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
796		if (error)
797			return (error);
798		act.lsa_handler = osa.lsa_handler;
799		act.lsa_flags = osa.lsa_flags;
800		act.lsa_restorer = osa.lsa_restorer;
801		LINUX_SIGEMPTYSET(act.lsa_mask);
802		act.lsa_mask.__bits[0] = osa.lsa_mask;
803	}
804
805	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
806	    args->osa ? &oact : NULL);
807
808	if (args->osa != NULL && !error) {
809		osa.lsa_handler = oact.lsa_handler;
810		osa.lsa_flags = oact.lsa_flags;
811		osa.lsa_restorer = oact.lsa_restorer;
812		osa.lsa_mask = oact.lsa_mask.__bits[0];
813		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
814	}
815
816	return (error);
817}
818
819/*
820 * Linux has two extra args, restart and oldmask.  We dont use these,
821 * but it seems that "restart" is actually a context pointer that
822 * enables the signal to happen with a different register set.
823 */
824int
825linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
826{
827	sigset_t sigmask;
828	l_sigset_t mask;
829
830#ifdef DEBUG
831	if (ldebug(sigsuspend))
832		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
833#endif
834
835	LINUX_SIGEMPTYSET(mask);
836	mask.__bits[0] = args->mask;
837	linux_to_bsd_sigset(&mask, &sigmask);
838	return (kern_sigsuspend(td, sigmask));
839}
840
841int
842linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
843{
844	l_sigset_t lmask;
845	sigset_t sigmask;
846	int error;
847
848#ifdef DEBUG
849	if (ldebug(rt_sigsuspend))
850		printf(ARGS(rt_sigsuspend, "%p, %d"),
851		    (void *)uap->newset, uap->sigsetsize);
852#endif
853
854	if (uap->sigsetsize != sizeof(l_sigset_t))
855		return (EINVAL);
856
857	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
858	if (error)
859		return (error);
860
861	linux_to_bsd_sigset(&lmask, &sigmask);
862	return (kern_sigsuspend(td, sigmask));
863}
864
865int
866linux_pause(struct thread *td, struct linux_pause_args *args)
867{
868	struct proc *p = td->td_proc;
869	sigset_t sigmask;
870
871#ifdef DEBUG
872	if (ldebug(pause))
873		printf(ARGS(pause, ""));
874#endif
875
876	PROC_LOCK(p);
877	sigmask = td->td_sigmask;
878	PROC_UNLOCK(p);
879	return (kern_sigsuspend(td, sigmask));
880}
881
882int
883linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
884{
885	stack_t ss, oss;
886	l_stack_t lss;
887	int error;
888
889#ifdef DEBUG
890	if (ldebug(sigaltstack))
891		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
892#endif
893
894	if (uap->uss != NULL) {
895		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
896		if (error)
897			return (error);
898
899		ss.ss_sp = PTRIN(lss.ss_sp);
900		ss.ss_size = lss.ss_size;
901		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
902	}
903	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
904	    (uap->uoss != NULL) ? &oss : NULL);
905	if (!error && uap->uoss != NULL) {
906		lss.ss_sp = PTROUT(oss.ss_sp);
907		lss.ss_size = oss.ss_size;
908		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
909		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
910	}
911
912	return (error);
913}
914
915int
916linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
917{
918	struct ftruncate_args sa;
919
920#ifdef DEBUG
921	if (ldebug(ftruncate64))
922		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
923		    (intmax_t)args->length);
924#endif
925
926	sa.fd = args->fd;
927	sa.pad = 0;
928	sa.length = args->length;
929	return ftruncate(td, &sa);
930}
931
932int
933linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
934{
935	struct timeval atv;
936	l_timeval atv32;
937	struct timezone rtz;
938	int error = 0;
939
940	if (uap->tp) {
941		microtime(&atv);
942		atv32.tv_sec = atv.tv_sec;
943		atv32.tv_usec = atv.tv_usec;
944		error = copyout(&atv32, uap->tp, sizeof (atv32));
945	}
946	if (error == 0 && uap->tzp != NULL) {
947		rtz.tz_minuteswest = tz_minuteswest;
948		rtz.tz_dsttime = tz_dsttime;
949		error = copyout(&rtz, uap->tzp, sizeof (rtz));
950	}
951	return (error);
952}
953
954int
955linux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap)
956{
957	struct timespec rqt, rmt;
958	struct l_timespec ats32;
959	int error;
960
961	error = copyin(uap->rqtp, &ats32, sizeof(ats32));
962	if (error != 0)
963		return (error);
964	rqt.tv_sec = ats32.tv_sec;
965	rqt.tv_nsec = ats32.tv_nsec;
966	error = kern_nanosleep(td, &rqt, &rmt);
967	if (uap->rmtp != NULL) {
968		ats32.tv_sec = rmt.tv_sec;
969		ats32.tv_nsec = rmt.tv_nsec;
970		error = copyout(&ats32, uap->rmtp, sizeof(ats32));
971	}
972	return (error);
973}
974
975int
976linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
977{
978	struct l_rusage s32;
979	struct rusage s;
980	int error;
981
982	error = kern_getrusage(td, uap->who, &s);
983	if (error != 0)
984		return (error);
985	if (uap->rusage != NULL) {
986		s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
987		s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
988		s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
989		s32.ru_stime.tv_usec = s.ru_stime.tv_usec;
990		s32.ru_maxrss = s.ru_maxrss;
991		s32.ru_ixrss = s.ru_ixrss;
992		s32.ru_idrss = s.ru_idrss;
993		s32.ru_isrss = s.ru_isrss;
994		s32.ru_minflt = s.ru_minflt;
995		s32.ru_majflt = s.ru_majflt;
996		s32.ru_nswap = s.ru_nswap;
997		s32.ru_inblock = s.ru_inblock;
998		s32.ru_oublock = s.ru_oublock;
999		s32.ru_msgsnd = s.ru_msgsnd;
1000		s32.ru_msgrcv = s.ru_msgrcv;
1001		s32.ru_nsignals = s.ru_nsignals;
1002		s32.ru_nvcsw = s.ru_nvcsw;
1003		s32.ru_nivcsw = s.ru_nivcsw;
1004		error = copyout(&s32, uap->rusage, sizeof(s32));
1005	}
1006	return (error);
1007}
1008
1009int
1010linux_sched_rr_get_interval(struct thread *td,
1011    struct linux_sched_rr_get_interval_args *uap)
1012{
1013	struct sched_rr_get_interval_args bsd_args;
1014	caddr_t sg, psgts;
1015	struct timespec ts;
1016	struct l_timespec ts32;
1017	int error;
1018
1019	sg = stackgap_init();
1020	psgts = stackgap_alloc(&sg, sizeof(struct timespec));
1021	bsd_args.pid = uap->pid;
1022	bsd_args.interval = (void *)psgts;
1023	error = sched_rr_get_interval(td, &bsd_args);
1024	if (error != 0)
1025		return (error);
1026	error = copyin(psgts, &ts, sizeof(ts));
1027	if (error != 0)
1028		return (error);
1029	ts32.tv_sec = ts.tv_sec;
1030	ts32.tv_nsec = ts.tv_nsec;
1031	return (copyout(&ts32, uap->interval, sizeof(ts32)));
1032}
1033
1034int
1035linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
1036{
1037	struct mprotect_args bsd_args;
1038
1039	bsd_args.addr = uap->addr;
1040	bsd_args.len = uap->len;
1041	bsd_args.prot = uap->prot;
1042	/* XXX PROT_READ implies PROT_EXEC; see linux_mmap_common(). */
1043	if ((bsd_args.prot & PROT_READ) != 0)
1044		bsd_args.prot |= PROT_EXEC;
1045	return (mprotect(td, &bsd_args));
1046}
1047