linux_misc.c revision 41931
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *  $Id: linux_misc.c,v 1.47 1998/12/10 13:47:18 jkh Exp $
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/sysproto.h>
34#include <sys/kernel.h>
35#include <sys/mman.h>
36#include <sys/proc.h>
37#include <sys/fcntl.h>
38#include <sys/imgact_aout.h>
39#include <sys/mount.h>
40#include <sys/namei.h>
41#include <sys/resourcevar.h>
42#include <sys/stat.h>
43#include <sys/sysctl.h>
44#ifdef COMPAT_LINUX_THREADS
45#include <sys/unistd.h>
46#endif /* COMPAT_LINUX_THREADS */
47#include <sys/vnode.h>
48#include <sys/wait.h>
49#include <sys/time.h>
50
51#include <vm/vm.h>
52#include <vm/pmap.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_prot.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57
58#include <machine/frame.h>
59#include <machine/psl.h>
60
61#include <i386/linux/linux.h>
62#include <i386/linux/linux_proto.h>
63#include <i386/linux/linux_util.h>
64
65int
66linux_alarm(struct proc *p, struct linux_alarm_args *args)
67{
68    struct itimerval it, old_it;
69    struct timeval tv;
70    int s;
71
72#ifdef DEBUG
73    printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
74#endif
75    if (args->secs > 100000000)
76	return EINVAL;
77    it.it_value.tv_sec = (long)args->secs;
78    it.it_value.tv_usec = 0;
79    it.it_interval.tv_sec = 0;
80    it.it_interval.tv_usec = 0;
81    s = splsoftclock();
82    old_it = p->p_realtimer;
83    getmicrouptime(&tv);
84    if (timevalisset(&old_it.it_value))
85	untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
86    if (it.it_value.tv_sec != 0) {
87	p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
88	timevaladd(&it.it_value, &tv);
89    }
90    p->p_realtimer = it;
91    splx(s);
92    if (timevalcmp(&old_it.it_value, &tv, >)) {
93	timevalsub(&old_it.it_value, &tv);
94	if (old_it.it_value.tv_usec != 0)
95	    old_it.it_value.tv_sec++;
96	p->p_retval[0] = old_it.it_value.tv_sec;
97    }
98    return 0;
99}
100
101int
102linux_brk(struct proc *p, struct linux_brk_args *args)
103{
104#if 0
105    struct vmspace *vm = p->p_vmspace;
106    vm_offset_t new, old;
107    int error;
108
109    if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
110	return EINVAL;
111    if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
112	> p->p_rlimit[RLIMIT_DATA].rlim_cur)
113	return ENOMEM;
114
115    old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
116    new = round_page((vm_offset_t)args->dsend);
117    p->p_retval[0] = old;
118    if ((new-old) > 0) {
119	if (swap_pager_full)
120	    return ENOMEM;
121	error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
122			VM_PROT_ALL, VM_PROT_ALL, 0);
123	if (error)
124	    return error;
125	vm->vm_dsize += btoc((new-old));
126	p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
127    }
128    return 0;
129#else
130    struct vmspace *vm = p->p_vmspace;
131    vm_offset_t new, old;
132    struct obreak_args /* {
133	char * nsize;
134    } */ tmp;
135
136#ifdef DEBUG
137    printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
138#endif
139    old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
140    new = (vm_offset_t)args->dsend;
141    tmp.nsize = (char *) new;
142    if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
143	p->p_retval[0] = (int)new;
144    else
145	p->p_retval[0] = (int)old;
146
147    return 0;
148#endif
149}
150
151int
152linux_uselib(struct proc *p, struct linux_uselib_args *args)
153{
154    struct nameidata ni;
155    struct vnode *vp;
156    struct exec *a_out;
157    struct vattr attr;
158    vm_offset_t vmaddr;
159    unsigned long file_offset;
160    vm_offset_t buffer;
161    unsigned long bss_size;
162    int error;
163    caddr_t sg;
164    int locked;
165
166    sg = stackgap_init();
167    CHECKALTEXIST(p, &sg, args->library);
168
169#ifdef DEBUG
170    printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
171#endif
172
173    a_out = NULL;
174    locked = 0;
175    vp = NULL;
176
177    NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p);
178    if (error = namei(&ni))
179	goto cleanup;
180
181    vp = ni.ni_vp;
182    if (vp == NULL) {
183	error = ENOEXEC;	/* ?? */
184	goto cleanup;
185    }
186
187    /*
188     * From here on down, we have a locked vnode that must be unlocked.
189     */
190    locked++;
191
192    /*
193     * Writable?
194     */
195    if (vp->v_writecount) {
196	error = ETXTBSY;
197	goto cleanup;
198    }
199
200    /*
201     * Executable?
202     */
203    if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
204	goto cleanup;
205
206    if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
207	((attr.va_mode & 0111) == 0) ||
208	(attr.va_type != VREG)) {
209	    error = ENOEXEC;
210	    goto cleanup;
211    }
212
213    /*
214     * Sensible size?
215     */
216    if (attr.va_size == 0) {
217	error = ENOEXEC;
218	goto cleanup;
219    }
220
221    /*
222     * Can we access it?
223     */
224    if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
225	goto cleanup;
226
227    if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
228	goto cleanup;
229
230    /*
231     * Lock no longer needed
232     */
233    VOP_UNLOCK(vp, 0, p);
234    locked = 0;
235
236    /*
237     * Pull in executable header into kernel_map
238     */
239    error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
240	    	    VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
241    if (error)
242	goto cleanup;
243
244    /*
245     * Is it a Linux binary ?
246     */
247    if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
248	error = ENOEXEC;
249	goto cleanup;
250    }
251
252    /* While we are here, we should REALLY do some more checks */
253
254    /*
255     * Set file/virtual offset based on a.out variant.
256     */
257    switch ((int)(a_out->a_magic & 0xffff)) {
258    case 0413:	/* ZMAGIC */
259	file_offset = 1024;
260	break;
261    case 0314:	/* QMAGIC */
262	file_offset = 0;
263	break;
264    default:
265	error = ENOEXEC;
266	goto cleanup;
267    }
268
269    bss_size = round_page(a_out->a_bss);
270
271    /*
272     * Check various fields in header for validity/bounds.
273     */
274    if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
275	error = ENOEXEC;
276	goto cleanup;
277    }
278
279    /* text + data can't exceed file size */
280    if (a_out->a_data + a_out->a_text > attr.va_size) {
281	error = EFAULT;
282	goto cleanup;
283    }
284
285    /*
286     * text/data/bss must not exceed limits
287     * XXX: this is not complete. it should check current usage PLUS
288     * the resources needed by this library.
289     */
290    if (a_out->a_text > MAXTSIZ ||
291	a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
292	error = ENOMEM;
293	goto cleanup;
294    }
295
296    /*
297     * prevent more writers
298     */
299    vp->v_flag |= VTEXT;
300
301    /*
302     * Check if file_offset page aligned,.
303     * Currently we cannot handle misalinged file offsets,
304     * and so we read in the entire image (what a waste).
305     */
306    if (file_offset & PAGE_MASK) {
307#ifdef DEBUG
308printf("uselib: Non page aligned binary %lu\n", file_offset);
309#endif
310	/*
311	 * Map text+data read/write/execute
312	 */
313
314	/* a_entry is the load address and is page aligned */
315	vmaddr = trunc_page(a_out->a_entry);
316
317	/* get anon user mapping, read+write+execute */
318	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
319		    	    a_out->a_text + a_out->a_data, FALSE,
320			    VM_PROT_ALL, VM_PROT_ALL, 0);
321	if (error)
322	    goto cleanup;
323
324	/* map file into kernel_map */
325	error = vm_mmap(kernel_map, &buffer,
326			round_page(a_out->a_text + a_out->a_data + file_offset),
327		   	VM_PROT_READ, VM_PROT_READ, 0,
328			(caddr_t)vp, trunc_page(file_offset));
329	if (error)
330	    goto cleanup;
331
332	/* copy from kernel VM space to user space */
333	error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
334			(caddr_t)vmaddr, a_out->a_text + a_out->a_data);
335
336	/* release temporary kernel space */
337	vm_map_remove(kernel_map, buffer,
338		      buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
339
340	if (error)
341	    goto cleanup;
342    }
343    else {
344#ifdef DEBUG
345printf("uselib: Page aligned binary %lu\n", file_offset);
346#endif
347	/*
348	 * for QMAGIC, a_entry is 20 bytes beyond the load address
349	 * to skip the executable header
350	 */
351	vmaddr = trunc_page(a_out->a_entry);
352
353	/*
354	 * Map it all into the process's space as a single copy-on-write
355	 * "data" segment.
356	 */
357	error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
358		   	a_out->a_text + a_out->a_data,
359			VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
360			(caddr_t)vp, file_offset);
361	if (error)
362	    goto cleanup;
363    }
364#ifdef DEBUG
365printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
366#endif
367    if (bss_size != 0) {
368        /*
369	 * Calculate BSS start address
370	 */
371	vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
372
373	/*
374	 * allocate some 'anon' space
375	 */
376	error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
377			    bss_size, FALSE,
378			    VM_PROT_ALL, VM_PROT_ALL, 0);
379	if (error)
380	    goto cleanup;
381    }
382
383cleanup:
384    /*
385     * Unlock vnode if needed
386     */
387    if (locked)
388	VOP_UNLOCK(vp, 0, p);
389
390    /*
391     * Release the kernel mapping.
392     */
393    if (a_out)
394	vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
395
396    return error;
397}
398
399/* XXX move */
400struct linux_select_argv {
401	int nfds;
402	fd_set *readfds;
403	fd_set *writefds;
404	fd_set *exceptfds;
405	struct timeval *timeout;
406};
407
408int
409linux_select(struct proc *p, struct linux_select_args *args)
410{
411    struct linux_select_argv linux_args;
412    struct linux_newselect_args newsel;
413    int error;
414
415#ifdef SELECT_DEBUG
416    printf("Linux-emul(%d): select(%x)\n",
417	   p->p_pid, args->ptr);
418#endif
419    if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
420			sizeof(linux_args))))
421	return error;
422
423    newsel.nfds = linux_args.nfds;
424    newsel.readfds = linux_args.readfds;
425    newsel.writefds = linux_args.writefds;
426    newsel.exceptfds = linux_args.exceptfds;
427    newsel.timeout = linux_args.timeout;
428
429    return linux_newselect(p, &newsel);
430}
431
432int
433linux_newselect(struct proc *p, struct linux_newselect_args *args)
434{
435    struct select_args bsa;
436    struct timeval tv0, tv1, utv, *tvp;
437    caddr_t sg;
438    int error;
439
440#ifdef DEBUG
441    printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
442  	(long)p->p_pid, args->nfds, (void *)args->readfds,
443	(void *)args->writefds, (void *)args->exceptfds,
444	(void *)args->timeout);
445#endif
446    error = 0;
447    bsa.nd = args->nfds;
448    bsa.in = args->readfds;
449    bsa.ou = args->writefds;
450    bsa.ex = args->exceptfds;
451    bsa.tv = args->timeout;
452
453    /*
454     * Store current time for computation of the amount of
455     * time left.
456     */
457    if (args->timeout) {
458	if ((error = copyin(args->timeout, &utv, sizeof(utv))))
459	    goto select_out;
460#ifdef DEBUG
461	printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
462	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
463#endif
464	if (itimerfix(&utv)) {
465	    /*
466	     * The timeval was invalid.  Convert it to something
467	     * valid that will act as it does under Linux.
468	     */
469	    sg = stackgap_init();
470	    tvp = stackgap_alloc(&sg, sizeof(utv));
471	    utv.tv_sec += utv.tv_usec / 1000000;
472	    utv.tv_usec %= 1000000;
473	    if (utv.tv_usec < 0) {
474		utv.tv_sec -= 1;
475		utv.tv_usec += 1000000;
476	    }
477	    if (utv.tv_sec < 0)
478		timevalclear(&utv);
479	    if ((error = copyout(&utv, tvp, sizeof(utv))))
480		goto select_out;
481	    bsa.tv = tvp;
482	}
483	microtime(&tv0);
484    }
485
486    error = select(p, &bsa);
487#ifdef DEBUG
488    printf("Linux-emul(%d): real select returns %d\n",
489	       p->p_pid, error);
490#endif
491
492    if (error) {
493	/*
494	 * See fs/select.c in the Linux kernel.  Without this,
495	 * Maelstrom doesn't work.
496	 */
497	if (error == ERESTART)
498	    error = EINTR;
499	goto select_out;
500    }
501
502    if (args->timeout) {
503	if (p->p_retval[0]) {
504	    /*
505	     * Compute how much time was left of the timeout,
506	     * by subtracting the current time and the time
507	     * before we started the call, and subtracting
508	     * that result from the user-supplied value.
509	     */
510	    microtime(&tv1);
511	    timevalsub(&tv1, &tv0);
512	    timevalsub(&utv, &tv1);
513	    if (utv.tv_sec < 0)
514		timevalclear(&utv);
515	} else
516	    timevalclear(&utv);
517#ifdef DEBUG
518	printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
519	    (long)p->p_pid, utv.tv_sec, utv.tv_usec);
520#endif
521	if ((error = copyout(&utv, args->timeout, sizeof(utv))))
522	    goto select_out;
523    }
524
525select_out:
526#ifdef DEBUG
527    printf("Linux-emul(%d): newselect_out -> %d\n",
528	       p->p_pid, error);
529#endif
530    return error;
531}
532
533int
534linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
535{
536    struct proc *curproc;
537
538#ifdef DEBUG
539    printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
540#endif
541    if (args->pid != p->p_pid) {
542	if (!(curproc = pfind(args->pid)))
543	    return ESRCH;
544    }
545    else
546	curproc = p;
547    p->p_retval[0] = curproc->p_pgid;
548    return 0;
549}
550
551int
552linux_fork(struct proc *p, struct linux_fork_args *args)
553{
554    int error;
555
556#ifdef DEBUG
557    printf("Linux-emul(%d): fork()\n", p->p_pid);
558#endif
559    if (error = fork(p, (struct fork_args *)args))
560	return error;
561    if (p->p_retval[1] == 1)
562	p->p_retval[0] = 0;
563    return 0;
564}
565
566#ifndef COMPAT_LINUX_THREADS
567int
568linux_clone(struct proc *p, struct linux_clone_args *args)
569{
570    printf("linux_clone(%d): Not enabled\n", p->p_pid);
571    return (EOPNOTSUPP);
572}
573
574#else
575#define CLONE_VM	0x100
576#define CLONE_FS	0x200
577#define CLONE_FILES	0x400
578#define CLONE_SIGHAND	0x800
579#define CLONE_PID	0x1000
580
581int
582linux_clone(struct proc *p, struct linux_clone_args *args)
583{
584    int error, ff = RFPROC;
585    struct proc *p2;
586    int            growable;
587    int            initstacksize;
588    int            maxstacksize;
589    int            exit_signal;
590    vm_map_entry_t entry;
591    vm_map_t       map;
592    vm_offset_t    start;
593    struct rfork_args rf_args;
594
595#ifdef SMP
596    printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid);
597    return (EOPNOTSUPP);
598#endif
599#ifdef DEBUG
600    if (args->flags & CLONE_PID)
601	printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
602    printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
603	     (unsigned int)args->flags, (unsigned int)args->stack);
604#endif
605
606    if (!args->stack)
607        return (EINVAL);
608    exit_signal = args->flags & 0x000000ff;
609    if (exit_signal >= LINUX_NSIG)
610	return EINVAL;
611    exit_signal = linux_to_bsd_signal[exit_signal];
612
613    /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
614    ff |= RFTHREAD;
615
616    if (args->flags & CLONE_VM)
617	ff |= RFMEM;
618    if (args->flags & CLONE_SIGHAND)
619	ff |= RFSIGSHARE;
620    if (!(args->flags & CLONE_FILES))
621	ff |= RFFDG;
622
623    error = 0;
624    start = 0;
625
626    rf_args.flags = ff;
627    if (error = rfork(p, &rf_args))
628	return error;
629
630    p2 = pfind(p->p_retval[0]);
631    if (p2 == 0)
632 	return ESRCH;
633
634    p2->p_sigparent = exit_signal;
635    p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
636
637#ifdef DEBUG
638    printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
639#endif
640    return 0;
641}
642
643#endif /* COMPAT_LINUX_THREADS */
644/* XXX move */
645struct linux_mmap_argv {
646	linux_caddr_t addr;
647	int len;
648	int prot;
649	int flags;
650	int fd;
651	int pos;
652};
653
654#ifdef COMPAT_LINUX_THREADS
655#define STACK_SIZE  (2 * 1024 * 1024)
656#define GUARD_SIZE  (4 * PAGE_SIZE)
657
658#endif /* COMPAT_LINUX_THREADS */
659int
660linux_mmap(struct proc *p, struct linux_mmap_args *args)
661{
662    struct mmap_args /* {
663	caddr_t addr;
664	size_t len;
665	int prot;
666	int flags;
667	int fd;
668	long pad;
669	off_t pos;
670    } */ bsd_args;
671    int error;
672    struct linux_mmap_argv linux_args;
673
674    if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
675			sizeof(linux_args))))
676	return error;
677#ifdef DEBUG
678    printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
679	(long)p->p_pid, (void *)linux_args.addr, linux_args.len,
680	linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
681#endif
682    bsd_args.flags = 0;
683    if (linux_args.flags & LINUX_MAP_SHARED)
684	bsd_args.flags |= MAP_SHARED;
685    if (linux_args.flags & LINUX_MAP_PRIVATE)
686	bsd_args.flags |= MAP_PRIVATE;
687    if (linux_args.flags & LINUX_MAP_FIXED)
688	bsd_args.flags |= MAP_FIXED;
689    if (linux_args.flags & LINUX_MAP_ANON)
690	bsd_args.flags |= MAP_ANON;
691#ifndef COMPAT_LINUX_THREADS
692    bsd_args.addr = linux_args.addr;
693    bsd_args.len = linux_args.len;
694#else
695
696#if !defined(USE_VM_STACK) && !defined(USE_VM_STACK_FOR_EXEC)
697    /* Linux Threads will map into the proc stack space, unless
698       we prevent it.  This causes problems if we're not using
699       our VM_STACK options.
700    */
701    if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ))
702        return (EINVAL);
703#endif
704
705    if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
706
707#ifdef USE_VM_STACK
708        /* USE_VM_STACK is defined (or not) in vm/vm_map.h */
709        bsd_args.flags |= MAP_STACK;
710#endif
711
712	/* The linux MAP_GROWSDOWN option does not limit auto
713	   growth of the region.  Linux mmap with this option
714	   takes as addr the inital BOS, and as len, the initial
715	   region size.  It can then grow down from addr without
716	   limit.  However, linux threads has an implicit internal
717	   limit to stack size of STACK_SIZE.  Its just not
718	   enforced explicitly in linux.  But, here we impose
719	   a limit of (STACK_SIZE - GUARD_SIZE) on the stack
720	   region, since we can do this with our mmap.
721
722	   Our mmap with MAP_STACK takes addr as the maximum
723	   downsize limit on BOS, and as len the max size of
724	   the region.  It them maps the top SGROWSIZ bytes,
725	   and autgrows the region down, up to the limit
726	   in addr.
727
728	   If we don't use the MAP_STACK option, the effect
729	   of this code is to allocate a stack region of a
730	   fixed size of (STACK_SIZE - GUARD_SIZE).
731	*/
732
733	/* This gives us TOS */
734        bsd_args.addr = linux_args.addr + linux_args.len;
735
736	/* This gives us our maximum stack size */
737	if (linux_args.len > STACK_SIZE - GUARD_SIZE)
738	    bsd_args.len = linux_args.len;
739	else
740	    bsd_args.len  = STACK_SIZE - GUARD_SIZE;
741
742	/* This gives us a new BOS.  If we're using VM_STACK, then
743	   mmap will just map the top SGROWSIZ bytes, and let
744	   the stack grow down to the limit at BOS.  If we're
745	   not using VM_STACK we map the full stack, since we
746	   don't have a way to autogrow it.
747	*/
748	bsd_args.addr -= bsd_args.len;
749
750    } else {
751        bsd_args.addr = linux_args.addr;
752	bsd_args.len  = linux_args.len;
753    }
754#endif /* COMPAT_LINUX_THREADS */
755    bsd_args.prot = linux_args.prot | PROT_READ;	/* always required */
756    bsd_args.fd = linux_args.fd;
757    bsd_args.pos = linux_args.pos;
758    bsd_args.pad = 0;
759    return mmap(p, &bsd_args);
760}
761
762int
763linux_mremap(struct proc *p, struct linux_mremap_args *args)
764{
765	struct munmap_args /* {
766		void *addr;
767		size_t len;
768	} */ bsd_args;
769	int error = 0;
770
771#ifdef DEBUG
772	printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
773	    (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
774	    args->flags);
775#endif
776	args->new_len = round_page(args->new_len);
777	args->old_len = round_page(args->old_len);
778
779	if (args->new_len > args->old_len) {
780		p->p_retval[0] = 0;
781		return ENOMEM;
782	}
783
784	if (args->new_len < args->old_len) {
785		bsd_args.addr = args->addr + args->new_len;
786		bsd_args.len = args->old_len - args->new_len;
787		error = munmap(p, &bsd_args);
788	}
789
790	p->p_retval[0] = error ? 0 : (int)args->addr;
791	return error;
792}
793
794int
795linux_msync(struct proc *p, struct linux_msync_args *args)
796{
797	struct msync_args bsd_args;
798
799	bsd_args.addr = args->addr;
800	bsd_args.len = args->len;
801	bsd_args.flags = 0;	/* XXX ignore */
802
803	return msync(p, &bsd_args);
804}
805
806int
807linux_pipe(struct proc *p, struct linux_pipe_args *args)
808{
809    int error;
810    int reg_edx;
811
812#ifdef DEBUG
813    printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
814#endif
815    reg_edx = p->p_retval[1];
816    if (error = pipe(p, 0)) {
817	p->p_retval[1] = reg_edx;
818	return error;
819    }
820
821    if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) {
822	p->p_retval[1] = reg_edx;
823	return error;
824    }
825
826    p->p_retval[1] = reg_edx;
827    p->p_retval[0] = 0;
828    return 0;
829}
830
831int
832linux_time(struct proc *p, struct linux_time_args *args)
833{
834    struct timeval tv;
835    linux_time_t tm;
836    int error;
837
838#ifdef DEBUG
839    printf("Linux-emul(%d): time(*)\n", p->p_pid);
840#endif
841    microtime(&tv);
842    tm = tv.tv_sec;
843    if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
844	return error;
845    p->p_retval[0] = tm;
846    return 0;
847}
848
849struct linux_times_argv {
850    long    tms_utime;
851    long    tms_stime;
852    long    tms_cutime;
853    long    tms_cstime;
854};
855
856#define CLK_TCK 100	/* Linux uses 100 */
857#define CONVTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
858
859int
860linux_times(struct proc *p, struct linux_times_args *args)
861{
862    struct timeval tv;
863    struct linux_times_argv tms;
864    struct rusage ru;
865    int error;
866
867#ifdef DEBUG
868    printf("Linux-emul(%d): times(*)\n", p->p_pid);
869#endif
870    calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
871
872    tms.tms_utime = CONVTCK(ru.ru_utime);
873    tms.tms_stime = CONVTCK(ru.ru_stime);
874
875    tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
876    tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
877
878    if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
879	    	    sizeof(struct linux_times_argv))))
880	return error;
881
882    microuptime(&tv);
883    p->p_retval[0] = (int)CONVTCK(tv);
884    return 0;
885}
886
887/* XXX move */
888struct linux_newuname_t {
889    char sysname[65];
890    char nodename[65];
891    char release[65];
892    char version[65];
893    char machine[65];
894    char domainname[65];
895};
896
897int
898linux_newuname(struct proc *p, struct linux_newuname_args *args)
899{
900    struct linux_newuname_t linux_newuname;
901
902#ifdef DEBUG
903    printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
904#endif
905    bzero(&linux_newuname, sizeof(struct linux_newuname_t));
906    strncpy(linux_newuname.sysname, ostype,
907	sizeof(linux_newuname.sysname) - 1);
908    strncpy(linux_newuname.nodename, hostname,
909	sizeof(linux_newuname.nodename) - 1);
910    strncpy(linux_newuname.release, osrelease,
911	sizeof(linux_newuname.release) - 1);
912    strncpy(linux_newuname.version, version,
913	sizeof(linux_newuname.version) - 1);
914    strncpy(linux_newuname.machine, machine,
915	sizeof(linux_newuname.machine) - 1);
916    strncpy(linux_newuname.domainname, domainname,
917	sizeof(linux_newuname.domainname) - 1);
918    return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
919	    	    sizeof(struct linux_newuname_t)));
920}
921
922struct linux_utimbuf {
923	linux_time_t l_actime;
924	linux_time_t l_modtime;
925};
926
927int
928linux_utime(struct proc *p, struct linux_utime_args *args)
929{
930    struct utimes_args /* {
931	char	*path;
932	struct	timeval *tptr;
933    } */ bsdutimes;
934    struct timeval tv[2], *tvp;
935    struct linux_utimbuf lut;
936    int error;
937    caddr_t sg;
938
939    sg = stackgap_init();
940    CHECKALTEXIST(p, &sg, args->fname);
941
942#ifdef DEBUG
943    printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
944#endif
945    if (args->times) {
946	if ((error = copyin(args->times, &lut, sizeof lut)))
947	    return error;
948	tv[0].tv_sec = lut.l_actime;
949	tv[0].tv_usec = 0;
950	tv[1].tv_sec = lut.l_modtime;
951	tv[1].tv_usec = 0;
952	/* so that utimes can copyin */
953	tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
954	if ((error = copyout(tv, tvp, sizeof(tv))))
955	    return error;
956	bsdutimes.tptr = tvp;
957    } else
958	bsdutimes.tptr = NULL;
959
960    bsdutimes.path = args->fname;
961    return utimes(p, &bsdutimes);
962}
963
964int
965linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
966{
967    struct wait_args /* {
968	int pid;
969	int *status;
970	int options;
971	struct	rusage *rusage;
972    } */ tmp;
973    int error, tmpstat;
974
975#ifdef DEBUG
976    printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
977	(long)p->p_pid, args->pid, (void *)args->status, args->options);
978#endif
979    tmp.pid = args->pid;
980    tmp.status = args->status;
981#ifndef COMPAT_LINUX_THREADS
982    tmp.options = args->options;
983#else
984    /* This filters out the linux option _WCLONE.  I don't
985       think we need it, but I could be wrong.  If we need
986       it, we need to fix wait4, since it will give us an
987       error return of EINVAL if we pass in _WCLONE, and
988       of course, it won't do anything with it.
989    */
990    tmp.options = (args->options & (WNOHANG | WUNTRACED));
991#endif /* COMPAT_LINUX_THREADS */
992    tmp.rusage = NULL;
993
994    if (error = wait4(p, &tmp))
995#ifndef COMPAT_LINUX_THREADS
996	return error;
997#else
998  	return error;
999#endif /* COMPAT_LINUX_THREADS */
1000    if (args->status) {
1001	if (error = copyin(args->status, &tmpstat, sizeof(int)))
1002	    return error;
1003	if (WIFSIGNALED(tmpstat))
1004	    tmpstat = (tmpstat & 0xffffff80) |
1005		      bsd_to_linux_signal[WTERMSIG(tmpstat)];
1006	else if (WIFSTOPPED(tmpstat))
1007	    tmpstat = (tmpstat & 0xffff00ff) |
1008		      (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1009	return copyout(&tmpstat, args->status, sizeof(int));
1010    } else
1011	return 0;
1012}
1013
1014int
1015linux_wait4(struct proc *p, struct linux_wait4_args *args)
1016{
1017    struct wait_args /* {
1018	int pid;
1019	int *status;
1020	int options;
1021	struct	rusage *rusage;
1022    } */ tmp;
1023    int error, tmpstat;
1024
1025#ifdef DEBUG
1026    printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1027	(long)p->p_pid, args->pid, (void *)args->status, args->options,
1028	(void *)args->rusage);
1029#endif
1030    tmp.pid = args->pid;
1031    tmp.status = args->status;
1032#ifndef COMPAT_LINUX_THREADS
1033    tmp.options = args->options;
1034#else
1035    /* This filters out the linux option _WCLONE.  I don't
1036       think we need it, but I could be wrong.  If we need
1037       it, we need to fix wait4, since it will give us an
1038       error return of EINVAL if we pass in _WCLONE, and
1039       of course, it won't do anything with it.
1040    */
1041    tmp.options = (args->options & (WNOHANG | WUNTRACED));
1042#endif /* COMPAT_LINUX_THREADS */
1043    tmp.rusage = args->rusage;
1044
1045    if (error = wait4(p, &tmp))
1046	return error;
1047
1048    p->p_siglist &= ~sigmask(SIGCHLD);
1049
1050    if (args->status) {
1051	if (error = copyin(args->status, &tmpstat, sizeof(int)))
1052	    return error;
1053	if (WIFSIGNALED(tmpstat))
1054	    tmpstat = (tmpstat & 0xffffff80) |
1055		  bsd_to_linux_signal[WTERMSIG(tmpstat)];
1056	else if (WIFSTOPPED(tmpstat))
1057	    tmpstat = (tmpstat & 0xffff00ff) |
1058		  (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
1059	return copyout(&tmpstat, args->status, sizeof(int));
1060    } else
1061	return 0;
1062}
1063
1064int
1065linux_mknod(struct proc *p, struct linux_mknod_args *args)
1066{
1067	caddr_t sg;
1068	struct mknod_args bsd_mknod;
1069	struct mkfifo_args bsd_mkfifo;
1070
1071	sg = stackgap_init();
1072
1073	CHECKALTCREAT(p, &sg, args->path);
1074
1075#ifdef DEBUG
1076	printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1077	   p->p_pid, args->path, args->mode, args->dev);
1078#endif
1079
1080	if (args->mode & S_IFIFO) {
1081		bsd_mkfifo.path = args->path;
1082		bsd_mkfifo.mode = args->mode;
1083		return mkfifo(p, &bsd_mkfifo);
1084	} else {
1085		bsd_mknod.path = args->path;
1086		bsd_mknod.mode = args->mode;
1087		bsd_mknod.dev = args->dev;
1088		return mknod(p, &bsd_mknod);
1089	}
1090}
1091
1092/*
1093 * UGH! This is just about the dumbest idea I've ever heard!!
1094 */
1095int
1096linux_personality(struct proc *p, struct linux_personality_args *args)
1097{
1098#ifdef DEBUG
1099	printf("Linux-emul(%d): personality(%d)\n",
1100	   p->p_pid, args->per);
1101#endif
1102	if (args->per != 0)
1103		return EINVAL;
1104
1105	/* Yes Jim, it's still a Linux... */
1106	p->p_retval[0] = 0;
1107	return 0;
1108}
1109
1110/*
1111 * Wrappers for get/setitimer for debugging..
1112 */
1113int
1114linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1115{
1116	struct setitimer_args bsa;
1117	struct itimerval foo;
1118	int error;
1119
1120#ifdef DEBUG
1121	printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1122	    (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1123#endif
1124	bsa.which = args->which;
1125	bsa.itv = args->itv;
1126	bsa.oitv = args->oitv;
1127	if (args->itv) {
1128	    if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1129			sizeof(foo))))
1130		return error;
1131#ifdef DEBUG
1132	    printf("setitimer: value: sec: %ld, usec: %ld\n",
1133		foo.it_value.tv_sec, foo.it_value.tv_usec);
1134	    printf("setitimer: interval: sec: %ld, usec: %ld\n",
1135		foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1136#endif
1137	}
1138	return setitimer(p, &bsa);
1139}
1140
1141int
1142linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1143{
1144	struct getitimer_args bsa;
1145#ifdef DEBUG
1146	printf("Linux-emul(%ld): getitimer(%p)\n",
1147	    (long)p->p_pid, (void *)args->itv);
1148#endif
1149	bsa.which = args->which;
1150	bsa.itv = args->itv;
1151	return getitimer(p, &bsa);
1152}
1153
1154int
1155linux_iopl(struct proc *p, struct linux_iopl_args *args)
1156{
1157	int error;
1158
1159	error = suser(p->p_ucred, &p->p_acflag);
1160	if (error != 0)
1161		return error;
1162	if (securelevel > 0)
1163		return EPERM;
1164	p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1165	return 0;
1166}
1167
1168int
1169linux_nice(struct proc *p, struct linux_nice_args *args)
1170{
1171	struct setpriority_args	bsd_args;
1172
1173	bsd_args.which = PRIO_PROCESS;
1174	bsd_args.who = 0;	/* current process */
1175	bsd_args.prio = args->inc;
1176	return setpriority(p, &bsd_args);
1177}
1178
1179