linux_misc.c revision 70061
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 70061 2000-12-15 19:41:27Z jhb $
29 */
30
31#include "opt_compat.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/sysproto.h>
36#include <sys/kernel.h>
37#include <sys/mman.h>
38#include <sys/proc.h>
39#include <sys/fcntl.h>
40#include <sys/imgact_aout.h>
41#include <sys/mount.h>
42#include <sys/namei.h>
43#include <sys/resourcevar.h>
44#include <sys/stat.h>
45#include <sys/sysctl.h>
46#include <sys/unistd.h>
47#include <sys/vnode.h>
48#include <sys/wait.h>
49#include <sys/time.h>
50#include <sys/signalvar.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57
58#include <machine/frame.h>
59#include <machine/limits.h>
60#include <machine/psl.h>
61#include <machine/sysarch.h>
62#ifdef __i386__
63#include <machine/segments.h>
64#endif
65
66#include <posix4/sched.h>
67
68#include <machine/../linux/linux.h>
69#include <machine/../linux/linux_proto.h>
70#include <compat/linux/linux_mib.h>
71#include <compat/linux/linux_util.h>
72
73#ifdef __alpha__
74#define BSD_TO_LINUX_SIGNAL(sig)       (sig)
75#else
76#define BSD_TO_LINUX_SIGNAL(sig)	\
77	(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
78#endif
79
80struct linux_rlimit {
81	unsigned long rlim_cur;
82	unsigned long rlim_max;
83};
84
85#ifndef __alpha__
86static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
87{ RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
88  RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
89  RLIMIT_MEMLOCK, -1
90};
91#endif /*!__alpha__*/
92
93#ifndef __alpha__
94int
95linux_alarm(struct proc *p, struct linux_alarm_args *args)
96{
97    struct itimerval it, old_it;
98    struct timeval tv;
99    int s;
100
101#ifdef DEBUG
102    printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
103#endif
104    if (args->secs > 100000000)
105	return EINVAL;
106    it.it_value.tv_sec = (long)args->secs;
107    it.it_value.tv_usec = 0;
108    it.it_interval.tv_sec = 0;
109    it.it_interval.tv_usec = 0;
110    s = splsoftclock();
111    old_it = p->p_realtimer;
112    getmicrouptime(&tv);
113    if (timevalisset(&old_it.it_value))
114	callout_stop(&p->p_itcallout);
115    if (it.it_value.tv_sec != 0) {
116	callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p);
117	timevaladd(&it.it_value, &tv);
118    }
119    p->p_realtimer = it;
120    splx(s);
121    if (timevalcmp(&old_it.it_value, &tv, >)) {
122	timevalsub(&old_it.it_value, &tv);
123	if (old_it.it_value.tv_usec != 0)
124	    old_it.it_value.tv_sec++;
125	p->p_retval[0] = old_it.it_value.tv_sec;
126    }
127    return 0;
128}
129#endif /*!__alpha__*/
130
131int
132linux_brk(struct proc *p, struct linux_brk_args *args)
133{
134#if 0
135    struct vmspace *vm = p->p_vmspace;
136    vm_offset_t new, old;
137    int error;
138
139    if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
140	return EINVAL;
141    if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
142	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
143	return ENOMEM;
144
145    old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
146    new = round_page((vm_offset_t)args->dsend);
147    p->p_retval[0] = old;
148    if ((new-old) > 0) {
149	if (swap_pager_full)
150	    return ENOMEM;
151	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
152			VM_PROT_ALL, VM_PROT_ALL, 0);
153	if (error)
154	    return error;
155	vm->vm_dsize += btoc((new-old));
156	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
157    }
158    return 0;
159#else
160    struct vmspace *vm = p->p_vmspace;
161    vm_offset_t new, old;
162    struct obreak_args /* {
163	char * nsize;
164    } */ tmp;
165
166#ifdef DEBUG
167    printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
168#endif
169    old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
170    new = (vm_offset_t)args->dsend;
171    tmp.nsize = (char *) new;
172    if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
173	p->p_retval[0] = (long)new;
174    else
175	p->p_retval[0] = (long)old;
176
177    return 0;
178#endif
179}
180
181int
182linux_uselib(struct proc *p, struct linux_uselib_args *args)
183{
184    struct nameidata ni;
185    struct vnode *vp;
186    struct exec *a_out;
187    struct vattr attr;
188    struct ucred *uc;
189    vm_offset_t vmaddr;
190    unsigned long file_offset;
191    vm_offset_t buffer;
192    unsigned long bss_size;
193    int error;
194    caddr_t sg;
195    int locked;
196
197    sg = stackgap_init();
198    CHECKALTEXIST(p, &sg, args->library);
199
200#ifdef DEBUG
201    printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
202#endif
203
204    a_out = NULL;
205    locked = 0;
206    vp = NULL;
207
208    NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
209    error = namei(&ni);
210    if (error)
211	goto cleanup;
212
213    vp = ni.ni_vp;
214    /*
215     * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
216     * without returning a vnode.
217     */
218    if (vp == NULL) {
219	error = ENOEXEC;	/* ?? */
220	goto cleanup;
221    }
222    NDFREE(&ni, NDF_ONLY_PNBUF);
223
224    /*
225     * From here on down, we have a locked vnode that must be unlocked.
226     */
227    locked++;
228
229    /*
230     * Writable?
231     */
232    if (vp->v_writecount) {
233	error = ETXTBSY;
234	goto cleanup;
235    }
236
237    /*
238     * Executable?
239     */
240    PROC_LOCK(p);
241    uc = p->p_ucred;
242    crhold(uc);
243    PROC_UNLOCK(p);
244    error = VOP_GETATTR(vp, &attr, uc, p);
245    if (error) {
246	crfree(uc);
247	goto cleanup;
248    }
249
250    if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
251	((attr.va_mode & 0111) == 0) ||
252	(attr.va_type != VREG)) {
253	    error = ENOEXEC;
254	    crfree(uc);
255	    goto cleanup;
256    }
257
258    /*
259     * Sensible size?
260     */
261    if (attr.va_size == 0) {
262	error = ENOEXEC;
263	crfree(uc);
264	goto cleanup;
265    }
266
267    /*
268     * Can we access it?
269     */
270    error = VOP_ACCESS(vp, VEXEC, uc, p);
271    if (error) {
272	crfree(uc);
273	goto cleanup;
274    }
275
276    error = VOP_OPEN(vp, FREAD, uc, p);
277    crfree(uc);
278    if (error)
279	goto cleanup;
280
281    /*
282     * Lock no longer needed
283     */
284    VOP_UNLOCK(vp, 0, p);
285    locked = 0;
286
287    /*
288     * Pull in executable header into kernel_map
289     */
290    error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
291	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
292    if (error)
293	goto cleanup;
294
295    /*
296     * Is it a Linux binary ?
297     */
298    if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
299	error = ENOEXEC;
300	goto cleanup;
301    }
302
303    /* While we are here, we should REALLY do some more checks */
304
305    /*
306     * Set file/virtual offset based on a.out variant.
307     */
308    switch ((int)(a_out->a_magic & 0xffff)) {
309    case 0413:	/* ZMAGIC */
310	file_offset = 1024;
311	break;
312    case 0314:	/* QMAGIC */
313	file_offset = 0;
314	break;
315    default:
316	error = ENOEXEC;
317	goto cleanup;
318    }
319
320    bss_size = round_page(a_out->a_bss);
321
322    /*
323     * Check various fields in header for validity/bounds.
324     */
325    if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
326	error = ENOEXEC;
327	goto cleanup;
328    }
329
330    /* text + data can't exceed file size */
331    if (a_out->a_data + a_out->a_text > attr.va_size) {
332	error = EFAULT;
333	goto cleanup;
334    }
335
336    /* To protect p->p_rlimit in the if condition. */
337    mtx_assert(&Giant, MA_OWNED);
338
339    /*
340     * text/data/bss must not exceed limits
341     * XXX: this is not complete. it should check current usage PLUS
342     * the resources needed by this library.
343     */
344    if (a_out->a_text > MAXTSIZ ||
345	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
346	error = ENOMEM;
347	goto cleanup;
348    }
349
350    /*
351     * prevent more writers
352     */
353    vp->v_flag |= VTEXT;
354
355    /*
356     * Check if file_offset page aligned,.
357     * Currently we cannot handle misalinged file offsets,
358     * and so we read in the entire image (what a waste).
359     */
360    if (file_offset & PAGE_MASK) {
361#ifdef DEBUG
362printf("uselib: Non page aligned binary %lu\n", file_offset);
363#endif
364	/*
365	 * Map text+data read/write/execute
366	 */
367
368	/* a_entry is the load address and is page aligned */
369	vmaddr = trunc_page(a_out->a_entry);
370
371	/* get anon user mapping, read+write+execute */
372	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
373		    	    a_out->a_text + a_out->a_data, FALSE,
374			    VM_PROT_ALL, VM_PROT_ALL, 0);
375	if (error)
376	    goto cleanup;
377
378	/* map file into kernel_map */
379	error = vm_mmap(kernel_map, &buffer,
380			round_page(a_out->a_text + a_out->a_data + file_offset),
381		   	VM_PROT_READ, VM_PROT_READ, 0,
382			(caddr_t)vp, trunc_page(file_offset));
383	if (error)
384	    goto cleanup;
385
386	/* copy from kernel VM space to user space */
387	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
388			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
389
390	/* release temporary kernel space */
391	vm_map_remove(kernel_map, buffer,
392		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
393
394	if (error)
395	    goto cleanup;
396    }
397    else {
398#ifdef DEBUG
399printf("uselib: Page aligned binary %lu\n", file_offset);
400#endif
401	/*
402	 * for QMAGIC, a_entry is 20 bytes beyond the load address
403	 * to skip the executable header
404	 */
405	vmaddr = trunc_page(a_out->a_entry);
406
407	/*
408	 * Map it all into the process's space as a single copy-on-write
409	 * "data" segment.
410	 */
411	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
412		   	a_out->a_text + a_out->a_data,
413			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
414			(caddr_t)vp, file_offset);
415	if (error)
416	    goto cleanup;
417    }
418#ifdef DEBUG
419printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]);
420#endif
421    if (bss_size != 0) {
422        /*
423	 * Calculate BSS start address
424	 */
425	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
426
427	/*
428	 * allocate some 'anon' space
429	 */
430	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
431			    bss_size, FALSE,
432			    VM_PROT_ALL, VM_PROT_ALL, 0);
433	if (error)
434	    goto cleanup;
435    }
436
437cleanup:
438    /*
439     * Unlock vnode if needed
440     */
441    if (locked)
442	VOP_UNLOCK(vp, 0, p);
443
444    /*
445     * Release the kernel mapping.
446     */
447    if (a_out)
448	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
449
450    return error;
451}
452
453int
454linux_newselect(struct proc *p, struct linux_newselect_args *args)
455{
456    struct select_args bsa;
457    struct timeval tv0, tv1, utv, *tvp;
458    caddr_t sg;
459    int error;
460
461#ifdef DEBUG
462    printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
463  	(long)p->p_pid, args->nfds, (void *)args->readfds,
464	(void *)args->writefds, (void *)args->exceptfds,
465	(void *)args->timeout);
466#endif
467    error = 0;
468    bsa.nd = args->nfds;
469    bsa.in = args->readfds;
470    bsa.ou = args->writefds;
471    bsa.ex = args->exceptfds;
472    bsa.tv = args->timeout;
473
474    /*
475     * Store current time for computation of the amount of
476     * time left.
477     */
478    if (args->timeout) {
479	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
480	    goto select_out;
481#ifdef DEBUG
482	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
483	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
484#endif
485	if (itimerfix(&utv)) {
486	    /*
487	     * The timeval was invalid.  Convert it to something
488	     * valid that will act as it does under Linux.
489	     */
490	    sg = stackgap_init();
491	    tvp = stackgap_alloc(&sg, sizeof(utv));
492	    utv.tv_sec += utv.tv_usec / 1000000;
493	    utv.tv_usec %= 1000000;
494	    if (utv.tv_usec < 0) {
495		utv.tv_sec -= 1;
496		utv.tv_usec += 1000000;
497	    }
498	    if (utv.tv_sec < 0)
499		timevalclear(&utv);
500	    if ((error = copyout(&utv, tvp, sizeof(utv))))
501		goto select_out;
502	    bsa.tv = tvp;
503	}
504	microtime(&tv0);
505    }
506
507    error = select(p, &bsa);
508#ifdef DEBUG
509    printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
510#endif
511
512    if (error) {
513	/*
514	 * See fs/select.c in the Linux kernel.  Without this,
515	 * Maelstrom doesn't work.
516	 */
517	if (error == ERESTART)
518	    error = EINTR;
519	goto select_out;
520    }
521
522    if (args->timeout) {
523	if (p->p_retval[0]) {
524	    /*
525	     * Compute how much time was left of the timeout,
526	     * by subtracting the current time and the time
527	     * before we started the call, and subtracting
528	     * that result from the user-supplied value.
529	     */
530	    microtime(&tv1);
531	    timevalsub(&tv1, &tv0);
532	    timevalsub(&utv, &tv1);
533	    if (utv.tv_sec < 0)
534		timevalclear(&utv);
535	} else
536	    timevalclear(&utv);
537#ifdef DEBUG
538	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
539	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
540#endif
541	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
542	    goto select_out;
543    }
544
545select_out:
546#ifdef DEBUG
547    printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
548#endif
549    return error;
550}
551
552int
553linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
554{
555    struct proc *curp;
556
557#ifdef DEBUG
558    printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
559#endif
560    if (args->pid != p->p_pid) {
561	if (!(curp = pfind(args->pid)))
562	    return ESRCH;
563    }
564    else
565	curp = p;
566    p->p_retval[0] = curp->p_pgid;
567    return 0;
568}
569
570int
571linux_mremap(struct proc *p, struct linux_mremap_args *args)
572{
573	struct munmap_args /* {
574		void *addr;
575		size_t len;
576	} */ bsd_args;
577	int error = 0;
578
579#ifdef DEBUG
580	printf("Linux-emul(%ld): mremap(%p, %08lx, %08lx, %08lx)\n",
581	    (long)p->p_pid, (void *)args->addr,
582	    (unsigned long)args->old_len,
583	    (unsigned long)args->new_len,
584	    (unsigned long)args->flags);
585#endif
586	args->new_len = round_page(args->new_len);
587	args->old_len = round_page(args->old_len);
588
589	if (args->new_len > args->old_len) {
590		p->p_retval[0] = 0;
591		return ENOMEM;
592	}
593
594	if (args->new_len < args->old_len) {
595		bsd_args.addr = args->addr + args->new_len;
596		bsd_args.len = args->old_len - args->new_len;
597		error = munmap(p, &bsd_args);
598	}
599
600	p->p_retval[0] = error ? 0 : (u_long)args->addr;
601	return error;
602}
603
604int
605linux_msync(struct proc *p, struct linux_msync_args *args)
606{
607	struct msync_args bsd_args;
608
609	bsd_args.addr = args->addr;
610	bsd_args.len = args->len;
611	bsd_args.flags = 0;	/* XXX ignore */
612
613	return msync(p, &bsd_args);
614}
615
616#ifndef __alpha__
617int
618linux_time(struct proc *p, struct linux_time_args *args)
619{
620    struct timeval tv;
621    linux_time_t tm;
622    int error;
623
624#ifdef DEBUG
625    printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
626#endif
627    microtime(&tv);
628    tm = tv.tv_sec;
629    if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
630	return error;
631    p->p_retval[0] = tm;
632    return 0;
633}
634#endif	/*!__alpha__*/
635
636struct linux_times_argv {
637    long    tms_utime;
638    long    tms_stime;
639    long    tms_cutime;
640    long    tms_cstime;
641};
642
643#define CLK_TCK 100	/* Linux uses 100 */
644#define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
645
646int
647linux_times(struct proc *p, struct linux_times_args *args)
648{
649    struct timeval tv;
650    struct linux_times_argv tms;
651    struct rusage ru;
652    int error;
653
654#ifdef DEBUG
655    printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
656#endif
657    calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
658
659    tms.tms_utime = CONVTCK(ru.ru_utime);
660    tms.tms_stime = CONVTCK(ru.ru_stime);
661
662    tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
663    tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
664
665    if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
666	    	    sizeof(struct linux_times_argv))))
667	return error;
668
669    microuptime(&tv);
670    p->p_retval[0] = (int)CONVTCK(tv);
671    return 0;
672}
673
674int
675linux_newuname(struct proc *p, struct linux_newuname_args *args)
676{
677	struct linux_new_utsname utsname;
678	char *osrelease, *osname;
679
680#ifdef DEBUG
681	printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
682#endif
683
684	osname = linux_get_osname(p);
685	osrelease = linux_get_osrelease(p);
686
687	bzero(&utsname, sizeof(struct linux_new_utsname));
688	strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
689	strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
690	strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
691	strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
692	strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
693	strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
694
695	return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
696			sizeof(struct linux_new_utsname)));
697}
698
699struct linux_utimbuf {
700	linux_time_t l_actime;
701	linux_time_t l_modtime;
702};
703
704int
705linux_utime(struct proc *p, struct linux_utime_args *args)
706{
707    struct utimes_args /* {
708	char	*path;
709	struct	timeval *tptr;
710    } */ bsdutimes;
711    struct timeval tv[2], *tvp;
712    struct linux_utimbuf lut;
713    int error;
714    caddr_t sg;
715
716    sg = stackgap_init();
717    CHECKALTEXIST(p, &sg, args->fname);
718
719#ifdef DEBUG
720    printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
721#endif
722    if (args->times) {
723	if ((error = copyin(args->times, &lut, sizeof lut)))
724	    return error;
725	tv[0].tv_sec = lut.l_actime;
726	tv[0].tv_usec = 0;
727	tv[1].tv_sec = lut.l_modtime;
728	tv[1].tv_usec = 0;
729	/* so that utimes can copyin */
730	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
731	if (tvp == NULL)
732		return (ENAMETOOLONG);
733	if ((error = copyout(tv, tvp, sizeof(tv))))
734	    return error;
735	bsdutimes.tptr = tvp;
736    } else
737	bsdutimes.tptr = NULL;
738
739    bsdutimes.path = args->fname;
740    return utimes(p, &bsdutimes);
741}
742
743#define __WCLONE 0x80000000
744
745#ifndef __alpha__
746int
747linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
748{
749    struct wait_args /* {
750	int pid;
751	int *status;
752	int options;
753	struct	rusage *rusage;
754    } */ tmp;
755    int error, tmpstat;
756
757#ifdef DEBUG
758    printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
759	(long)p->p_pid, args->pid, (void *)args->status, args->options);
760#endif
761    tmp.pid = args->pid;
762    tmp.status = args->status;
763    tmp.options = (args->options & (WNOHANG | WUNTRACED));
764    /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
765    if (args->options & __WCLONE)
766	tmp.options |= WLINUXCLONE;
767    tmp.rusage = NULL;
768
769    if ((error = wait4(p, &tmp)) != 0)
770	return error;
771
772    if (args->status) {
773	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
774	    return error;
775	tmpstat &= 0xffff;
776	if (WIFSIGNALED(tmpstat))
777	    tmpstat = (tmpstat & 0xffffff80) |
778		      BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
779	else if (WIFSTOPPED(tmpstat))
780	    tmpstat = (tmpstat & 0xffff00ff) |
781		      (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
782	return copyout(&tmpstat, args->status, sizeof(int));
783    } else
784	return 0;
785}
786#endif	/*!__alpha__*/
787
788int
789linux_wait4(struct proc *p, struct linux_wait4_args *args)
790{
791    struct wait_args /* {
792	int pid;
793	int *status;
794	int options;
795	struct	rusage *rusage;
796    } */ tmp;
797    int error, tmpstat;
798
799#ifdef DEBUG
800    printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
801	(long)p->p_pid, args->pid, (void *)args->status, args->options,
802	(void *)args->rusage);
803#endif
804    tmp.pid = args->pid;
805    tmp.status = args->status;
806    tmp.options = (args->options & (WNOHANG | WUNTRACED));
807    /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
808    if (args->options & __WCLONE)
809	tmp.options |= WLINUXCLONE;
810    tmp.rusage = args->rusage;
811
812    if ((error = wait4(p, &tmp)) != 0)
813	return error;
814
815    SIGDELSET(p->p_siglist, SIGCHLD);
816
817    if (args->status) {
818	if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
819	    return error;
820	tmpstat &= 0xffff;
821	if (WIFSIGNALED(tmpstat))
822	    tmpstat = (tmpstat & 0xffffff80) |
823		  BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
824	else if (WIFSTOPPED(tmpstat))
825	    tmpstat = (tmpstat & 0xffff00ff) |
826		  (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
827	return copyout(&tmpstat, args->status, sizeof(int));
828    } else
829	return 0;
830}
831
832int
833linux_mknod(struct proc *p, struct linux_mknod_args *args)
834{
835	caddr_t sg;
836	struct mknod_args bsd_mknod;
837	struct mkfifo_args bsd_mkfifo;
838
839	sg = stackgap_init();
840
841	CHECKALTCREAT(p, &sg, args->path);
842
843#ifdef DEBUG
844	printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
845	   (long)p->p_pid, args->path, args->mode, args->dev);
846#endif
847
848	if (args->mode & S_IFIFO) {
849		bsd_mkfifo.path = args->path;
850		bsd_mkfifo.mode = args->mode;
851		return mkfifo(p, &bsd_mkfifo);
852	} else {
853		bsd_mknod.path = args->path;
854		bsd_mknod.mode = args->mode;
855		bsd_mknod.dev = args->dev;
856		return mknod(p, &bsd_mknod);
857	}
858}
859
860/*
861 * UGH! This is just about the dumbest idea I've ever heard!!
862 */
863int
864linux_personality(struct proc *p, struct linux_personality_args *args)
865{
866#ifdef DEBUG
867	printf("Linux-emul(%ld): personality(%d)\n",
868	   (long)p->p_pid, args->per);
869#endif
870#ifndef __alpha__
871	if (args->per != 0)
872		return EINVAL;
873#endif
874
875	/* Yes Jim, it's still a Linux... */
876	p->p_retval[0] = 0;
877	return 0;
878}
879
880/*
881 * Wrappers for get/setitimer for debugging..
882 */
883int
884linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
885{
886	struct setitimer_args bsa;
887	struct itimerval foo;
888	int error;
889
890#ifdef DEBUG
891	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
892	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
893#endif
894	bsa.which = args->which;
895	bsa.itv = args->itv;
896	bsa.oitv = args->oitv;
897	if (args->itv) {
898	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
899			sizeof(foo))))
900		return error;
901#ifdef DEBUG
902	    printf("setitimer: value: sec: %ld, usec: %ld\n",
903		foo.it_value.tv_sec, foo.it_value.tv_usec);
904	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
905		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
906#endif
907	}
908	return setitimer(p, &bsa);
909}
910
911int
912linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
913{
914	struct getitimer_args bsa;
915#ifdef DEBUG
916	printf("Linux-emul(%ld): getitimer(%p)\n",
917	    (long)p->p_pid, (void *)args->itv);
918#endif
919	bsa.which = args->which;
920	bsa.itv = args->itv;
921	return getitimer(p, &bsa);
922}
923
924#ifndef __alpha__
925int
926linux_nice(struct proc *p, struct linux_nice_args *args)
927{
928	struct setpriority_args	bsd_args;
929
930	bsd_args.which = PRIO_PROCESS;
931	bsd_args.who = 0;	/* current process */
932	bsd_args.prio = args->inc;
933	return setpriority(p, &bsd_args);
934}
935#endif	/*!__alpha__*/
936
937int
938linux_setgroups(p, uap)
939	struct proc *p;
940	struct linux_setgroups_args *uap;
941{
942	struct pcred *pc;
943	linux_gid_t linux_gidset[NGROUPS];
944	gid_t *bsd_gidset;
945	int ngrp, error;
946
947	pc = p->p_cred;
948	ngrp = uap->gidsetsize;
949
950	/*
951	 * cr_groups[0] holds egid. Setting the whole set from
952	 * the supplied set will cause egid to be changed too.
953	 * Keep cr_groups[0] unchanged to prevent that.
954	 */
955
956	if ((error = suser(p)) != 0)
957		return (error);
958
959	if (ngrp >= NGROUPS)
960		return (EINVAL);
961
962	pc->pc_ucred = crcopy(pc->pc_ucred);
963	if (ngrp > 0) {
964		error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
965			       ngrp * sizeof(linux_gid_t));
966		if (error)
967			return (error);
968
969		pc->pc_ucred->cr_ngroups = ngrp + 1;
970
971		bsd_gidset = pc->pc_ucred->cr_groups;
972		ngrp--;
973		while (ngrp >= 0) {
974			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
975			ngrp--;
976		}
977	}
978	else
979		pc->pc_ucred->cr_ngroups = 1;
980
981	setsugid(p);
982	return (0);
983}
984
985int
986linux_getgroups(p, uap)
987	struct proc *p;
988	struct linux_getgroups_args *uap;
989{
990	struct pcred *pc;
991	linux_gid_t linux_gidset[NGROUPS];
992	gid_t *bsd_gidset;
993	int bsd_gidsetsz, ngrp, error;
994
995	pc = p->p_cred;
996	bsd_gidset = pc->pc_ucred->cr_groups;
997	bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
998
999	/*
1000	 * cr_groups[0] holds egid. Returning the whole set
1001	 * here will cause a duplicate. Exclude cr_groups[0]
1002	 * to prevent that.
1003	 */
1004
1005	if ((ngrp = uap->gidsetsize) == 0) {
1006		p->p_retval[0] = bsd_gidsetsz;
1007		return (0);
1008	}
1009
1010	if (ngrp < bsd_gidsetsz)
1011		return (EINVAL);
1012
1013	ngrp = 0;
1014	while (ngrp < bsd_gidsetsz) {
1015		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1016		ngrp++;
1017	}
1018
1019	if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1020	    ngrp * sizeof(linux_gid_t))))
1021		return (error);
1022
1023	p->p_retval[0] = ngrp;
1024	return (0);
1025}
1026
1027#ifndef __alpha__
1028int
1029linux_setrlimit(p, uap)
1030	struct proc *p;
1031	struct linux_setrlimit_args *uap;
1032{
1033	struct __setrlimit_args bsd;
1034	struct linux_rlimit rlim;
1035	int error;
1036	caddr_t sg = stackgap_init();
1037
1038#ifdef DEBUG
1039	printf("Linux-emul(%ld): setrlimit(%d, %p)\n", (long)p->p_pid,
1040	    uap->resource, (void *)uap->rlim);
1041#endif
1042
1043	if (uap->resource >= LINUX_RLIM_NLIMITS)
1044		return (EINVAL);
1045
1046	bsd.which = linux_to_bsd_resource[uap->resource];
1047	if (bsd.which == -1)
1048		return (EINVAL);
1049
1050	error = copyin(uap->rlim, &rlim, sizeof(rlim));
1051	if (error)
1052		return (error);
1053
1054	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1055	bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur;
1056	bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max;
1057	return (setrlimit(p, &bsd));
1058}
1059
1060int
1061linux_getrlimit(p, uap)
1062	struct proc *p;
1063	struct linux_getrlimit_args *uap;
1064{
1065	struct __getrlimit_args bsd;
1066	struct linux_rlimit rlim;
1067	int error;
1068	caddr_t sg = stackgap_init();
1069
1070#ifdef DEBUG
1071	printf("Linux-emul(%ld): getrlimit(%d, %p)\n", (long)p->p_pid,
1072	    uap->resource, (void *)uap->rlim);
1073#endif
1074
1075	if (uap->resource >= LINUX_RLIM_NLIMITS)
1076		return (EINVAL);
1077
1078	bsd.which = linux_to_bsd_resource[uap->resource];
1079	if (bsd.which == -1)
1080		return (EINVAL);
1081
1082	bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit));
1083	error = getrlimit(p, &bsd);
1084	if (error)
1085		return (error);
1086
1087	rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur;
1088	if (rlim.rlim_cur == ULONG_MAX)
1089		rlim.rlim_cur = LONG_MAX;
1090	rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max;
1091	if (rlim.rlim_max == ULONG_MAX)
1092		rlim.rlim_max = LONG_MAX;
1093	return (copyout(&rlim, uap->rlim, sizeof(rlim)));
1094}
1095#endif /*!__alpha__*/
1096
1097int
1098linux_sched_setscheduler(p, uap)
1099	struct proc *p;
1100	struct linux_sched_setscheduler_args *uap;
1101{
1102	struct sched_setscheduler_args bsd;
1103
1104#ifdef DEBUG
1105	printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1106	    (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1107#endif
1108
1109	switch (uap->policy) {
1110	case LINUX_SCHED_OTHER:
1111		bsd.policy = SCHED_OTHER;
1112		break;
1113	case LINUX_SCHED_FIFO:
1114		bsd.policy = SCHED_FIFO;
1115		break;
1116	case LINUX_SCHED_RR:
1117		bsd.policy = SCHED_RR;
1118		break;
1119	default:
1120		return EINVAL;
1121	}
1122
1123	bsd.pid = uap->pid;
1124	bsd.param = uap->param;
1125	return sched_setscheduler(p, &bsd);
1126}
1127
1128int
1129linux_sched_getscheduler(p, uap)
1130	struct proc *p;
1131	struct linux_sched_getscheduler_args *uap;
1132{
1133	struct sched_getscheduler_args bsd;
1134	int error;
1135
1136#ifdef DEBUG
1137	printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1138	       (long)p->p_pid, uap->pid);
1139#endif
1140
1141	bsd.pid = uap->pid;
1142	error = sched_getscheduler(p, &bsd);
1143
1144	switch (p->p_retval[0]) {
1145	case SCHED_OTHER:
1146		p->p_retval[0] = LINUX_SCHED_OTHER;
1147		break;
1148	case SCHED_FIFO:
1149		p->p_retval[0] = LINUX_SCHED_FIFO;
1150		break;
1151	case SCHED_RR:
1152		p->p_retval[0] = LINUX_SCHED_RR;
1153		break;
1154	}
1155
1156	return error;
1157}
1158