linux32_machdep.c revision 147588
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_machdep.c 147588 2005-06-24 17:41:28Z jhb $");
33
34#include <sys/param.h>
35#include <sys/kernel.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mman.h>
41#include <sys/mutex.h>
42#include <sys/proc.h>
43#include <sys/resource.h>
44#include <sys/resourcevar.h>
45#include <sys/syscallsubr.h>
46#include <sys/sysproto.h>
47#include <sys/unistd.h>
48
49#include <machine/frame.h>
50
51#include <vm/vm.h>
52#include <vm/pmap.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_map.h>
56
57#include <amd64/linux32/linux.h>
58#include <amd64/linux32/linux32_proto.h>
59#include <compat/linux/linux_ipc.h>
60#include <compat/linux/linux_signal.h>
61#include <compat/linux/linux_util.h>
62
63struct l_old_select_argv {
64	l_int		nfds;
65	l_uintptr_t	readfds;
66	l_uintptr_t	writefds;
67	l_uintptr_t	exceptfds;
68	l_uintptr_t	timeout;
69} __packed;
70
71int
72linux_to_bsd_sigaltstack(int lsa)
73{
74	int bsa = 0;
75
76	if (lsa & LINUX_SS_DISABLE)
77		bsa |= SS_DISABLE;
78	if (lsa & LINUX_SS_ONSTACK)
79		bsa |= SS_ONSTACK;
80	return (bsa);
81}
82
83int
84bsd_to_linux_sigaltstack(int bsa)
85{
86	int lsa = 0;
87
88	if (bsa & SS_DISABLE)
89		lsa |= LINUX_SS_DISABLE;
90	if (bsa & SS_ONSTACK)
91		lsa |= LINUX_SS_ONSTACK;
92	return (lsa);
93}
94
95/*
96 * Custom version of exec_copyin_args() so that we can translate
97 * the pointers.
98 */
99static int
100linux_exec_copyin_args(struct image_args *args, char *fname,
101    enum uio_seg segflg, char **argv, char **envv)
102{
103	char *argp, *envp;
104	u_int32_t *p32, arg;
105	size_t length;
106	int error;
107
108	bzero(args, sizeof(*args));
109	if (argv == NULL)
110		return (EFAULT);
111
112	/*
113	 * Allocate temporary demand zeroed space for argument and
114	 *	environment strings
115	 */
116	args->buf = (char *) kmem_alloc_wait(exec_map,
117	    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
118	if (args->buf == NULL)
119		return (ENOMEM);
120	args->begin_argv = args->buf;
121	args->endp = args->begin_argv;
122	args->stringspace = ARG_MAX;
123
124	args->fname = args->buf + ARG_MAX;
125
126	/*
127	 * Copy the file name.
128	 */
129	error = (segflg == UIO_SYSSPACE) ?
130	    copystr(fname, args->fname, PATH_MAX, &length) :
131	    copyinstr(fname, args->fname, PATH_MAX, &length);
132	if (error != 0)
133		return (error);
134
135	/*
136	 * extract arguments first
137	 */
138	p32 = (u_int32_t *)argv;
139	for (;;) {
140		error = copyin(p32++, &arg, sizeof(arg));
141		if (error)
142			return (error);
143		if (arg == 0)
144			break;
145		argp = PTRIN(arg);
146		error = copyinstr(argp, args->endp, args->stringspace, &length);
147		if (error) {
148			if (error == ENAMETOOLONG)
149				return (E2BIG);
150			else
151				return (error);
152		}
153		args->stringspace -= length;
154		args->endp += length;
155		args->argc++;
156	}
157
158	args->begin_envv = args->endp;
159
160	/*
161	 * extract environment strings
162	 */
163	if (envv) {
164		p32 = (u_int32_t *)envv;
165		for (;;) {
166			error = copyin(p32++, &arg, sizeof(arg));
167			if (error)
168				return (error);
169			if (arg == 0)
170				break;
171			envp = PTRIN(arg);
172			error = copyinstr(envp, args->endp, args->stringspace,
173			    &length);
174			if (error) {
175				if (error == ENAMETOOLONG)
176					return (E2BIG);
177				else
178					return (error);
179			}
180			args->stringspace -= length;
181			args->endp += length;
182			args->envc++;
183		}
184	}
185
186	return (0);
187}
188
189int
190linux_execve(struct thread *td, struct linux_execve_args *args)
191{
192	struct image_args eargs;
193	char *path;
194	int error;
195
196	LCONVPATHEXIST(td, args->path, &path);
197
198#ifdef DEBUG
199	if (ldebug(execve))
200		printf(ARGS(execve, "%s"), path);
201#endif
202
203	error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp,
204	    args->envp);
205	free(path, M_TEMP);
206	if (error == 0)
207		error = kern_execve(td, &eargs, NULL);
208	exec_free_args(&eargs);
209	return (error);
210}
211
212struct iovec32 {
213	u_int32_t iov_base;
214	int	iov_len;
215};
216
217CTASSERT(sizeof(struct iovec32) == 8);
218
219static int
220linux32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
221{
222	struct iovec32 iov32;
223	struct iovec *iov;
224	struct uio *uio;
225	u_int iovlen;
226	int error, i;
227
228	*uiop = NULL;
229	if (iovcnt > UIO_MAXIOV)
230		return (EINVAL);
231	iovlen = iovcnt * sizeof(struct iovec);
232	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
233	iov = (struct iovec *)(uio + 1);
234	for (i = 0; i < iovcnt; i++) {
235		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
236		if (error) {
237			free(uio, M_IOV);
238			return (error);
239		}
240		iov[i].iov_base = PTRIN(iov32.iov_base);
241		iov[i].iov_len = iov32.iov_len;
242	}
243	uio->uio_iov = iov;
244	uio->uio_iovcnt = iovcnt;
245	uio->uio_segflg = UIO_USERSPACE;
246	uio->uio_offset = -1;
247	uio->uio_resid = 0;
248	for (i = 0; i < iovcnt; i++) {
249		if (iov->iov_len > INT_MAX - uio->uio_resid) {
250			free(uio, M_IOV);
251			return (EINVAL);
252		}
253		uio->uio_resid += iov->iov_len;
254		iov++;
255	}
256	*uiop = uio;
257	return (0);
258}
259
260int
261linux_readv(struct thread *td, struct linux_readv_args *uap)
262{
263	struct uio *auio;
264	int error;
265
266	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
267	if (error)
268		return (error);
269	error = kern_readv(td, uap->fd, auio);
270	free(auio, M_IOV);
271	return (error);
272}
273
274int
275linux_writev(struct thread *td, struct linux_writev_args *uap)
276{
277	struct uio *auio;
278	int error;
279
280	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
281	if (error)
282		return (error);
283	error = kern_writev(td, uap->fd, auio);
284	free(auio, M_IOV);
285	return (error);
286}
287
288struct l_ipc_kludge {
289	l_uintptr_t msgp;
290	l_long msgtyp;
291} __packed;
292
293int
294linux_ipc(struct thread *td, struct linux_ipc_args *args)
295{
296
297	switch (args->what & 0xFFFF) {
298	case LINUX_SEMOP: {
299		struct linux_semop_args a;
300
301		a.semid = args->arg1;
302		a.tsops = args->ptr;
303		a.nsops = args->arg2;
304		return (linux_semop(td, &a));
305	}
306	case LINUX_SEMGET: {
307		struct linux_semget_args a;
308
309		a.key = args->arg1;
310		a.nsems = args->arg2;
311		a.semflg = args->arg3;
312		return (linux_semget(td, &a));
313	}
314	case LINUX_SEMCTL: {
315		struct linux_semctl_args a;
316		int error;
317
318		a.semid = args->arg1;
319		a.semnum = args->arg2;
320		a.cmd = args->arg3;
321		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
322		if (error)
323			return (error);
324		return (linux_semctl(td, &a));
325	}
326	case LINUX_MSGSND: {
327		struct linux_msgsnd_args a;
328
329		a.msqid = args->arg1;
330		a.msgp = args->ptr;
331		a.msgsz = args->arg2;
332		a.msgflg = args->arg3;
333		return (linux_msgsnd(td, &a));
334	}
335	case LINUX_MSGRCV: {
336		struct linux_msgrcv_args a;
337
338		a.msqid = args->arg1;
339		a.msgsz = args->arg2;
340		a.msgflg = args->arg3;
341		if ((args->what >> 16) == 0) {
342			struct l_ipc_kludge tmp;
343			int error;
344
345			if (args->ptr == 0)
346				return (EINVAL);
347			error = copyin(args->ptr, &tmp, sizeof(tmp));
348			if (error)
349				return (error);
350			a.msgp = PTRIN(tmp.msgp);
351			a.msgtyp = tmp.msgtyp;
352		} else {
353			a.msgp = args->ptr;
354			a.msgtyp = args->arg5;
355		}
356		return (linux_msgrcv(td, &a));
357	}
358	case LINUX_MSGGET: {
359		struct linux_msgget_args a;
360
361		a.key = args->arg1;
362		a.msgflg = args->arg2;
363		return (linux_msgget(td, &a));
364	}
365	case LINUX_MSGCTL: {
366		struct linux_msgctl_args a;
367
368		a.msqid = args->arg1;
369		a.cmd = args->arg2;
370		a.buf = args->ptr;
371		return (linux_msgctl(td, &a));
372	}
373	case LINUX_SHMAT: {
374		struct linux_shmat_args a;
375
376		a.shmid = args->arg1;
377		a.shmaddr = args->ptr;
378		a.shmflg = args->arg2;
379		a.raddr = PTRIN((l_uint)args->arg3);
380		return (linux_shmat(td, &a));
381	}
382	case LINUX_SHMDT: {
383		struct linux_shmdt_args a;
384
385		a.shmaddr = args->ptr;
386		return (linux_shmdt(td, &a));
387	}
388	case LINUX_SHMGET: {
389		struct linux_shmget_args a;
390
391		a.key = args->arg1;
392		a.size = args->arg2;
393		a.shmflg = args->arg3;
394		return (linux_shmget(td, &a));
395	}
396	case LINUX_SHMCTL: {
397		struct linux_shmctl_args a;
398
399		a.shmid = args->arg1;
400		a.cmd = args->arg2;
401		a.buf = args->ptr;
402		return (linux_shmctl(td, &a));
403	}
404	default:
405		break;
406	}
407
408	return (EINVAL);
409}
410
411int
412linux_old_select(struct thread *td, struct linux_old_select_args *args)
413{
414	struct l_old_select_argv linux_args;
415	struct linux_select_args newsel;
416	int error;
417
418#ifdef DEBUG
419	if (ldebug(old_select))
420		printf(ARGS(old_select, "%p"), args->ptr);
421#endif
422
423	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
424	if (error)
425		return (error);
426
427	newsel.nfds = linux_args.nfds;
428	newsel.readfds = PTRIN(linux_args.readfds);
429	newsel.writefds = PTRIN(linux_args.writefds);
430	newsel.exceptfds = PTRIN(linux_args.exceptfds);
431	newsel.timeout = PTRIN(linux_args.timeout);
432	return (linux_select(td, &newsel));
433}
434
435int
436linux_fork(struct thread *td, struct linux_fork_args *args)
437{
438	int error;
439
440#ifdef DEBUG
441	if (ldebug(fork))
442		printf(ARGS(fork, ""));
443#endif
444
445	if ((error = fork(td, (struct fork_args *)args)) != 0)
446		return (error);
447
448	if (td->td_retval[1] == 1)
449		td->td_retval[0] = 0;
450	return (0);
451}
452
453int
454linux_vfork(struct thread *td, struct linux_vfork_args *args)
455{
456	int error;
457
458#ifdef DEBUG
459	if (ldebug(vfork))
460		printf(ARGS(vfork, ""));
461#endif
462
463	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
464		return (error);
465	/* Are we the child? */
466	if (td->td_retval[1] == 1)
467		td->td_retval[0] = 0;
468	return (0);
469}
470
471#define CLONE_VM	0x100
472#define CLONE_FS	0x200
473#define CLONE_FILES	0x400
474#define CLONE_SIGHAND	0x800
475#define CLONE_PID	0x1000
476
477int
478linux_clone(struct thread *td, struct linux_clone_args *args)
479{
480	int error, ff = RFPROC | RFSTOPPED;
481	struct proc *p2;
482	struct thread *td2;
483	int exit_signal;
484
485#ifdef DEBUG
486	if (ldebug(clone)) {
487		printf(ARGS(clone, "flags %x, stack %x"),
488		    (unsigned int)(uintptr_t)args->flags,
489		    (unsigned int)(uintptr_t)args->stack);
490		if (args->flags & CLONE_PID)
491			printf(LMSG("CLONE_PID not yet supported"));
492	}
493#endif
494
495	if (!args->stack)
496		return (EINVAL);
497
498	exit_signal = args->flags & 0x000000ff;
499	if (exit_signal >= LINUX_NSIG)
500		return (EINVAL);
501
502	if (exit_signal <= LINUX_SIGTBLSZ)
503		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
504
505	if (args->flags & CLONE_VM)
506		ff |= RFMEM;
507	if (args->flags & CLONE_SIGHAND)
508		ff |= RFSIGSHARE;
509	if (!(args->flags & CLONE_FILES))
510		ff |= RFFDG;
511
512	error = fork1(td, ff, 0, &p2);
513	if (error)
514		return (error);
515
516
517	PROC_LOCK(p2);
518	p2->p_sigparent = exit_signal;
519	PROC_UNLOCK(p2);
520	td2 = FIRST_THREAD_IN_PROC(p2);
521	td2->td_frame->tf_rsp = PTROUT(args->stack);
522
523#ifdef DEBUG
524	if (ldebug(clone))
525		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
526		    (long)p2->p_pid, args->stack, exit_signal);
527#endif
528
529	/*
530	 * Make this runnable after we are finished with it.
531	 */
532	mtx_lock_spin(&sched_lock);
533	TD_SET_CAN_RUN(td2);
534	setrunqueue(td2, SRQ_BORING);
535	mtx_unlock_spin(&sched_lock);
536
537	td->td_retval[0] = p2->p_pid;
538	td->td_retval[1] = 0;
539	return (0);
540}
541
542/* XXX move */
543struct l_mmap_argv {
544	l_ulong		addr;
545	l_ulong		len;
546	l_ulong		prot;
547	l_ulong		flags;
548	l_ulong		fd;
549	l_ulong		pgoff;
550};
551
552#define STACK_SIZE  (2 * 1024 * 1024)
553#define GUARD_SIZE  (4 * PAGE_SIZE)
554
555static int linux_mmap_common(struct thread *, struct l_mmap_argv *);
556
557int
558linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
559{
560	struct l_mmap_argv linux_args;
561
562#ifdef DEBUG
563	if (ldebug(mmap2))
564		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
565		    (void *)(intptr_t)args->addr, args->len, args->prot,
566		    args->flags, args->fd, args->pgoff);
567#endif
568
569	linux_args.addr = PTROUT(args->addr);
570	linux_args.len = args->len;
571	linux_args.prot = args->prot;
572	linux_args.flags = args->flags;
573	linux_args.fd = args->fd;
574	linux_args.pgoff = args->pgoff;
575
576	return (linux_mmap_common(td, &linux_args));
577}
578
579int
580linux_mmap(struct thread *td, struct linux_mmap_args *args)
581{
582	int error;
583	struct l_mmap_argv linux_args;
584
585	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
586	if (error)
587		return (error);
588
589#ifdef DEBUG
590	if (ldebug(mmap))
591		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
592		    (void *)(intptr_t)linux_args.addr, linux_args.len,
593		    linux_args.prot, linux_args.flags, linux_args.fd,
594		    linux_args.pgoff);
595#endif
596	if ((linux_args.pgoff % PAGE_SIZE) != 0)
597		return (EINVAL);
598	linux_args.pgoff /= PAGE_SIZE;
599
600	return (linux_mmap_common(td, &linux_args));
601}
602
603static int
604linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
605{
606	struct proc *p = td->td_proc;
607	struct mmap_args /* {
608		caddr_t addr;
609		size_t len;
610		int prot;
611		int flags;
612		int fd;
613		long pad;
614		off_t pos;
615	} */ bsd_args;
616	int error;
617
618	error = 0;
619	bsd_args.flags = 0;
620	if (linux_args->flags & LINUX_MAP_SHARED)
621		bsd_args.flags |= MAP_SHARED;
622	if (linux_args->flags & LINUX_MAP_PRIVATE)
623		bsd_args.flags |= MAP_PRIVATE;
624	if (linux_args->flags & LINUX_MAP_FIXED)
625		bsd_args.flags |= MAP_FIXED;
626	if (linux_args->flags & LINUX_MAP_ANON)
627		bsd_args.flags |= MAP_ANON;
628	else
629		bsd_args.flags |= MAP_NOSYNC;
630	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
631		bsd_args.flags |= MAP_STACK;
632
633		/* The linux MAP_GROWSDOWN option does not limit auto
634		 * growth of the region.  Linux mmap with this option
635		 * takes as addr the inital BOS, and as len, the initial
636		 * region size.  It can then grow down from addr without
637		 * limit.  However, linux threads has an implicit internal
638		 * limit to stack size of STACK_SIZE.  Its just not
639		 * enforced explicitly in linux.  But, here we impose
640		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
641		 * region, since we can do this with our mmap.
642		 *
643		 * Our mmap with MAP_STACK takes addr as the maximum
644		 * downsize limit on BOS, and as len the max size of
645		 * the region.  It them maps the top SGROWSIZ bytes,
646		 * and autgrows the region down, up to the limit
647		 * in addr.
648		 *
649		 * If we don't use the MAP_STACK option, the effect
650		 * of this code is to allocate a stack region of a
651		 * fixed size of (STACK_SIZE - GUARD_SIZE).
652		 */
653
654		/* This gives us TOS */
655		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr) +
656		    linux_args->len;
657
658		if ((caddr_t)PTRIN(bsd_args.addr) >
659		    p->p_vmspace->vm_maxsaddr) {
660			/* Some linux apps will attempt to mmap
661			 * thread stacks near the top of their
662			 * address space.  If their TOS is greater
663			 * than vm_maxsaddr, vm_map_growstack()
664			 * will confuse the thread stack with the
665			 * process stack and deliver a SEGV if they
666			 * attempt to grow the thread stack past their
667			 * current stacksize rlimit.  To avoid this,
668			 * adjust vm_maxsaddr upwards to reflect
669			 * the current stacksize rlimit rather
670			 * than the maximum possible stacksize.
671			 * It would be better to adjust the
672			 * mmap'ed region, but some apps do not check
673			 * mmap's return value.
674			 */
675			PROC_LOCK(p);
676			p->p_vmspace->vm_maxsaddr =
677			    (char *)LINUX32_USRSTACK -
678			    lim_cur(p, RLIMIT_STACK);
679			PROC_UNLOCK(p);
680		}
681
682		/* This gives us our maximum stack size */
683		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
684			bsd_args.len = linux_args->len;
685		else
686			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
687
688		/* This gives us a new BOS.  If we're using VM_STACK, then
689		 * mmap will just map the top SGROWSIZ bytes, and let
690		 * the stack grow down to the limit at BOS.  If we're
691		 * not using VM_STACK we map the full stack, since we
692		 * don't have a way to autogrow it.
693		 */
694		bsd_args.addr -= bsd_args.len;
695	} else {
696		bsd_args.addr = (caddr_t)PTRIN(linux_args->addr);
697		bsd_args.len  = linux_args->len;
698	}
699	/*
700	 * XXX i386 Linux always emulator forces PROT_READ on (why?)
701	 * so we do the same. We add PROT_EXEC to work around buggy
702	 * applications (e.g. Java) that take advantage of the fact
703	 * that execute permissions are not enforced by x86 CPUs.
704	 */
705	bsd_args.prot = linux_args->prot | PROT_EXEC | PROT_READ;
706	if (linux_args->flags & LINUX_MAP_ANON)
707		bsd_args.fd = -1;
708	else
709		bsd_args.fd = linux_args->fd;
710	bsd_args.pos = (off_t)linux_args->pgoff * PAGE_SIZE;
711	bsd_args.pad = 0;
712
713#ifdef DEBUG
714	if (ldebug(mmap))
715		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
716		    __func__,
717		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
718		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
719#endif
720	error = mmap(td, &bsd_args);
721#ifdef DEBUG
722	if (ldebug(mmap))
723		printf("-> %s() return: 0x%x (0x%08x)\n",
724			__func__, error, (u_int)td->td_retval[0]);
725#endif
726	return (error);
727}
728
729int
730linux_pipe(struct thread *td, struct linux_pipe_args *args)
731{
732	int pip[2];
733	int error;
734	register_t reg_rdx;
735
736#ifdef DEBUG
737	if (ldebug(pipe))
738		printf(ARGS(pipe, "*"));
739#endif
740
741	reg_rdx = td->td_retval[1];
742	error = pipe(td, 0);
743	if (error) {
744		td->td_retval[1] = reg_rdx;
745		return (error);
746	}
747
748	pip[0] = td->td_retval[0];
749	pip[1] = td->td_retval[1];
750	error = copyout(pip, args->pipefds, 2 * sizeof(int));
751	if (error) {
752		td->td_retval[1] = reg_rdx;
753		return (error);
754	}
755
756	td->td_retval[1] = reg_rdx;
757	td->td_retval[0] = 0;
758	return (0);
759}
760
761int
762linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
763{
764	l_osigaction_t osa;
765	l_sigaction_t act, oact;
766	int error;
767
768#ifdef DEBUG
769	if (ldebug(sigaction))
770		printf(ARGS(sigaction, "%d, %p, %p"),
771		    args->sig, (void *)args->nsa, (void *)args->osa);
772#endif
773
774	if (args->nsa != NULL) {
775		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
776		if (error)
777			return (error);
778		act.lsa_handler = osa.lsa_handler;
779		act.lsa_flags = osa.lsa_flags;
780		act.lsa_restorer = osa.lsa_restorer;
781		LINUX_SIGEMPTYSET(act.lsa_mask);
782		act.lsa_mask.__bits[0] = osa.lsa_mask;
783	}
784
785	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
786	    args->osa ? &oact : NULL);
787
788	if (args->osa != NULL && !error) {
789		osa.lsa_handler = oact.lsa_handler;
790		osa.lsa_flags = oact.lsa_flags;
791		osa.lsa_restorer = oact.lsa_restorer;
792		osa.lsa_mask = oact.lsa_mask.__bits[0];
793		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
794	}
795
796	return (error);
797}
798
799/*
800 * Linux has two extra args, restart and oldmask.  We dont use these,
801 * but it seems that "restart" is actually a context pointer that
802 * enables the signal to happen with a different register set.
803 */
804int
805linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
806{
807	sigset_t sigmask;
808	l_sigset_t mask;
809
810#ifdef DEBUG
811	if (ldebug(sigsuspend))
812		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
813#endif
814
815	LINUX_SIGEMPTYSET(mask);
816	mask.__bits[0] = args->mask;
817	linux_to_bsd_sigset(&mask, &sigmask);
818	return (kern_sigsuspend(td, sigmask));
819}
820
821int
822linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
823{
824	l_sigset_t lmask;
825	sigset_t sigmask;
826	int error;
827
828#ifdef DEBUG
829	if (ldebug(rt_sigsuspend))
830		printf(ARGS(rt_sigsuspend, "%p, %d"),
831		    (void *)uap->newset, uap->sigsetsize);
832#endif
833
834	if (uap->sigsetsize != sizeof(l_sigset_t))
835		return (EINVAL);
836
837	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
838	if (error)
839		return (error);
840
841	linux_to_bsd_sigset(&lmask, &sigmask);
842	return (kern_sigsuspend(td, sigmask));
843}
844
845int
846linux_pause(struct thread *td, struct linux_pause_args *args)
847{
848	struct proc *p = td->td_proc;
849	sigset_t sigmask;
850
851#ifdef DEBUG
852	if (ldebug(pause))
853		printf(ARGS(pause, ""));
854#endif
855
856	PROC_LOCK(p);
857	sigmask = td->td_sigmask;
858	PROC_UNLOCK(p);
859	return (kern_sigsuspend(td, sigmask));
860}
861
862int
863linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
864{
865	stack_t ss, oss;
866	l_stack_t lss;
867	int error;
868
869#ifdef DEBUG
870	if (ldebug(sigaltstack))
871		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
872#endif
873
874	if (uap->uss != NULL) {
875		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
876		if (error)
877			return (error);
878
879		ss.ss_sp = PTRIN(lss.ss_sp);
880		ss.ss_size = lss.ss_size;
881		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
882	}
883	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
884	    (uap->uoss != NULL) ? &oss : NULL);
885	if (!error && uap->uoss != NULL) {
886		lss.ss_sp = PTROUT(oss.ss_sp);
887		lss.ss_size = oss.ss_size;
888		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
889		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
890	}
891
892	return (error);
893}
894
895int
896linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
897{
898	struct ftruncate_args sa;
899
900#ifdef DEBUG
901	if (ldebug(ftruncate64))
902		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
903		    (intmax_t)args->length);
904#endif
905
906	sa.fd = args->fd;
907	sa.pad = 0;
908	sa.length = args->length;
909	return ftruncate(td, &sa);
910}
911
912int
913linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
914{
915	struct timeval atv;
916	l_timeval atv32;
917	struct timezone rtz;
918	int error = 0;
919
920	if (uap->tp) {
921		microtime(&atv);
922		atv32.tv_sec = atv.tv_sec;
923		atv32.tv_usec = atv.tv_usec;
924		error = copyout(&atv32, uap->tp, sizeof (atv32));
925	}
926	if (error == 0 && uap->tzp != NULL) {
927		rtz.tz_minuteswest = tz_minuteswest;
928		rtz.tz_dsttime = tz_dsttime;
929		error = copyout(&rtz, uap->tzp, sizeof (rtz));
930	}
931	return (error);
932}
933
934int
935linux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap)
936{
937	struct timespec rqt, rmt;
938	struct l_timespec ats32;
939	int error;
940
941	error = copyin(uap->rqtp, &ats32, sizeof(ats32));
942	if (error != 0)
943		return (error);
944	rqt.tv_sec = ats32.tv_sec;
945	rqt.tv_nsec = ats32.tv_nsec;
946	error = kern_nanosleep(td, &rqt, &rmt);
947	if (uap->rmtp != NULL) {
948		ats32.tv_sec = rmt.tv_sec;
949		ats32.tv_nsec = rmt.tv_nsec;
950		error = copyout(&ats32, uap->rmtp, sizeof(ats32));
951	}
952	return (error);
953}
954
955int
956linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
957{
958	struct l_rusage s32;
959	struct rusage s;
960	int error;
961
962	error = kern_getrusage(td, uap->who, &s);
963	if (error != 0)
964		return (error);
965	if (uap->rusage != NULL) {
966		s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
967		s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
968		s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
969		s32.ru_stime.tv_usec = s.ru_stime.tv_usec;
970		s32.ru_maxrss = s.ru_maxrss;
971		s32.ru_ixrss = s.ru_ixrss;
972		s32.ru_idrss = s.ru_idrss;
973		s32.ru_isrss = s.ru_isrss;
974		s32.ru_minflt = s.ru_minflt;
975		s32.ru_majflt = s.ru_majflt;
976		s32.ru_nswap = s.ru_nswap;
977		s32.ru_inblock = s.ru_inblock;
978		s32.ru_oublock = s.ru_oublock;
979		s32.ru_msgsnd = s.ru_msgsnd;
980		s32.ru_msgrcv = s.ru_msgrcv;
981		s32.ru_nsignals = s.ru_nsignals;
982		s32.ru_nvcsw = s.ru_nvcsw;
983		s32.ru_nivcsw = s.ru_nivcsw;
984		error = copyout(&s32, uap->rusage, sizeof(s32));
985	}
986	return (error);
987}
988
989int
990linux_sched_rr_get_interval(struct thread *td,
991    struct linux_sched_rr_get_interval_args *uap)
992{
993	struct timespec ts;
994	struct l_timespec ts32;
995	int error;
996
997	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
998	if (error != 0)
999		return (error);
1000	ts32.tv_sec = ts.tv_sec;
1001	ts32.tv_nsec = ts.tv_nsec;
1002	return (copyout(&ts32, uap->interval, sizeof(ts32)));
1003}
1004
1005int
1006linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
1007{
1008	struct mprotect_args bsd_args;
1009
1010	bsd_args.addr = uap->addr;
1011	bsd_args.len = uap->len;
1012	bsd_args.prot = uap->prot;
1013	/* XXX PROT_READ implies PROT_EXEC; see linux_mmap_common(). */
1014	if ((bsd_args.prot & PROT_READ) != 0)
1015		bsd_args.prot |= PROT_EXEC;
1016	return (mprotect(td, &bsd_args));
1017}
1018