Deleted Added
full compact
linux_misc.c (63605) linux_misc.c (63778)
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
1/*-
2 * Copyright (c) 1994-1995 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 63605 2000-07-20 05:37:41Z marcel $
28 * $FreeBSD: head/sys/compat/linux/linux_misc.c 63778 2000-07-23 16:54:18Z marcel $
29 */
30
31#include "opt_compat.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/sysproto.h>
36#include <sys/kernel.h>
37#include <sys/mman.h>
38#include <sys/proc.h>
39#include <sys/fcntl.h>
40#include <sys/imgact_aout.h>
41#include <sys/mount.h>
42#include <sys/namei.h>
43#include <sys/resourcevar.h>
44#include <sys/stat.h>
45#include <sys/sysctl.h>
46#include <sys/unistd.h>
47#include <sys/vnode.h>
48#include <sys/wait.h>
49#include <sys/time.h>
50#include <sys/signalvar.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57
58#include <machine/frame.h>
59#include <machine/psl.h>
60#include <machine/sysarch.h>
61#include <machine/segments.h>
62
63#include <i386/linux/linux.h>
64#include <i386/linux/linux_proto.h>
65#include <i386/linux/linux_util.h>
66#include <i386/linux/linux_mib.h>
67
68#include <posix4/sched.h>
69
70#define BSD_TO_LINUX_SIGNAL(sig) \
71 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
72
73static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
74{ RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
75 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
76 RLIMIT_MEMLOCK, -1
77};
78
79int
80linux_alarm(struct proc *p, struct linux_alarm_args *args)
81{
82 struct itimerval it, old_it;
83 struct timeval tv;
84 int s;
85
86#ifdef DEBUG
87 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
88#endif
89 if (args->secs > 100000000)
90 return EINVAL;
91 it.it_value.tv_sec = (long)args->secs;
92 it.it_value.tv_usec = 0;
93 it.it_interval.tv_sec = 0;
94 it.it_interval.tv_usec = 0;
95 s = splsoftclock();
96 old_it = p->p_realtimer;
97 getmicrouptime(&tv);
98 if (timevalisset(&old_it.it_value))
99 untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
100 if (it.it_value.tv_sec != 0) {
101 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
102 timevaladd(&it.it_value, &tv);
103 }
104 p->p_realtimer = it;
105 splx(s);
106 if (timevalcmp(&old_it.it_value, &tv, >)) {
107 timevalsub(&old_it.it_value, &tv);
108 if (old_it.it_value.tv_usec != 0)
109 old_it.it_value.tv_sec++;
110 p->p_retval[0] = old_it.it_value.tv_sec;
111 }
112 return 0;
113}
114
115int
116linux_brk(struct proc *p, struct linux_brk_args *args)
117{
118#if 0
119 struct vmspace *vm = p->p_vmspace;
120 vm_offset_t new, old;
121 int error;
122
123 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
124 return EINVAL;
125 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
126 > p->p_rlimit[RLIMIT_DATA].rlim_cur)
127 return ENOMEM;
128
129 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
130 new = round_page((vm_offset_t)args->dsend);
131 p->p_retval[0] = old;
132 if ((new-old) > 0) {
133 if (swap_pager_full)
134 return ENOMEM;
135 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
136 VM_PROT_ALL, VM_PROT_ALL, 0);
137 if (error)
138 return error;
139 vm->vm_dsize += btoc((new-old));
140 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
141 }
142 return 0;
143#else
144 struct vmspace *vm = p->p_vmspace;
145 vm_offset_t new, old;
146 struct obreak_args /* {
147 char * nsize;
148 } */ tmp;
149
150#ifdef DEBUG
151 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
152#endif
153 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
154 new = (vm_offset_t)args->dsend;
155 tmp.nsize = (char *) new;
156 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
157 p->p_retval[0] = (int)new;
158 else
159 p->p_retval[0] = (int)old;
160
161 return 0;
162#endif
163}
164
165int
166linux_uselib(struct proc *p, struct linux_uselib_args *args)
167{
168 struct nameidata ni;
169 struct vnode *vp;
170 struct exec *a_out;
171 struct vattr attr;
172 vm_offset_t vmaddr;
173 unsigned long file_offset;
174 vm_offset_t buffer;
175 unsigned long bss_size;
176 int error;
177 caddr_t sg;
178 int locked;
179
180 sg = stackgap_init();
181 CHECKALTEXIST(p, &sg, args->library);
182
183#ifdef DEBUG
184 printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
185#endif
186
187 a_out = NULL;
188 locked = 0;
189 vp = NULL;
190
191 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
192 error = namei(&ni);
193 if (error)
194 goto cleanup;
195
196 vp = ni.ni_vp;
197 /*
198 * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
199 * without returning a vnode.
200 */
201 if (vp == NULL) {
202 error = ENOEXEC; /* ?? */
203 goto cleanup;
204 }
205 NDFREE(&ni, NDF_ONLY_PNBUF);
206
207 /*
208 * From here on down, we have a locked vnode that must be unlocked.
209 */
210 locked++;
211
212 /*
213 * Writable?
214 */
215 if (vp->v_writecount) {
216 error = ETXTBSY;
217 goto cleanup;
218 }
219
220 /*
221 * Executable?
222 */
223 error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
224 if (error)
225 goto cleanup;
226
227 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
228 ((attr.va_mode & 0111) == 0) ||
229 (attr.va_type != VREG)) {
230 error = ENOEXEC;
231 goto cleanup;
232 }
233
234 /*
235 * Sensible size?
236 */
237 if (attr.va_size == 0) {
238 error = ENOEXEC;
239 goto cleanup;
240 }
241
242 /*
243 * Can we access it?
244 */
245 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
246 if (error)
247 goto cleanup;
248
249 error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
250 if (error)
251 goto cleanup;
252
253 /*
254 * Lock no longer needed
255 */
256 VOP_UNLOCK(vp, 0, p);
257 locked = 0;
258
259 /*
260 * Pull in executable header into kernel_map
261 */
262 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
263 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
264 if (error)
265 goto cleanup;
266
267 /*
268 * Is it a Linux binary ?
269 */
270 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
271 error = ENOEXEC;
272 goto cleanup;
273 }
274
275 /* While we are here, we should REALLY do some more checks */
276
277 /*
278 * Set file/virtual offset based on a.out variant.
279 */
280 switch ((int)(a_out->a_magic & 0xffff)) {
281 case 0413: /* ZMAGIC */
282 file_offset = 1024;
283 break;
284 case 0314: /* QMAGIC */
285 file_offset = 0;
286 break;
287 default:
288 error = ENOEXEC;
289 goto cleanup;
290 }
291
292 bss_size = round_page(a_out->a_bss);
293
294 /*
295 * Check various fields in header for validity/bounds.
296 */
297 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
298 error = ENOEXEC;
299 goto cleanup;
300 }
301
302 /* text + data can't exceed file size */
303 if (a_out->a_data + a_out->a_text > attr.va_size) {
304 error = EFAULT;
305 goto cleanup;
306 }
307
308 /*
309 * text/data/bss must not exceed limits
310 * XXX: this is not complete. it should check current usage PLUS
311 * the resources needed by this library.
312 */
313 if (a_out->a_text > MAXTSIZ ||
314 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
315 error = ENOMEM;
316 goto cleanup;
317 }
318
319 /*
320 * prevent more writers
321 */
322 vp->v_flag |= VTEXT;
323
324 /*
325 * Check if file_offset page aligned,.
326 * Currently we cannot handle misalinged file offsets,
327 * and so we read in the entire image (what a waste).
328 */
329 if (file_offset & PAGE_MASK) {
330#ifdef DEBUG
331printf("uselib: Non page aligned binary %lu\n", file_offset);
332#endif
333 /*
334 * Map text+data read/write/execute
335 */
336
337 /* a_entry is the load address and is page aligned */
338 vmaddr = trunc_page(a_out->a_entry);
339
340 /* get anon user mapping, read+write+execute */
341 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
342 a_out->a_text + a_out->a_data, FALSE,
343 VM_PROT_ALL, VM_PROT_ALL, 0);
344 if (error)
345 goto cleanup;
346
347 /* map file into kernel_map */
348 error = vm_mmap(kernel_map, &buffer,
349 round_page(a_out->a_text + a_out->a_data + file_offset),
350 VM_PROT_READ, VM_PROT_READ, 0,
351 (caddr_t)vp, trunc_page(file_offset));
352 if (error)
353 goto cleanup;
354
355 /* copy from kernel VM space to user space */
356 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
357 (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
358
359 /* release temporary kernel space */
360 vm_map_remove(kernel_map, buffer,
361 buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
362
363 if (error)
364 goto cleanup;
365 }
366 else {
367#ifdef DEBUG
368printf("uselib: Page aligned binary %lu\n", file_offset);
369#endif
370 /*
371 * for QMAGIC, a_entry is 20 bytes beyond the load address
372 * to skip the executable header
373 */
374 vmaddr = trunc_page(a_out->a_entry);
375
376 /*
377 * Map it all into the process's space as a single copy-on-write
378 * "data" segment.
379 */
380 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
381 a_out->a_text + a_out->a_data,
382 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
383 (caddr_t)vp, file_offset);
384 if (error)
385 goto cleanup;
386 }
387#ifdef DEBUG
388printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
389#endif
390 if (bss_size != 0) {
391 /*
392 * Calculate BSS start address
393 */
394 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
395
396 /*
397 * allocate some 'anon' space
398 */
399 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
400 bss_size, FALSE,
401 VM_PROT_ALL, VM_PROT_ALL, 0);
402 if (error)
403 goto cleanup;
404 }
405
406cleanup:
407 /*
408 * Unlock vnode if needed
409 */
410 if (locked)
411 VOP_UNLOCK(vp, 0, p);
412
413 /*
414 * Release the kernel mapping.
415 */
416 if (a_out)
417 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
418
419 return error;
420}
421
422/* XXX move */
423struct linux_select_argv {
424 int nfds;
425 fd_set *readfds;
426 fd_set *writefds;
427 fd_set *exceptfds;
428 struct timeval *timeout;
429};
430
431int
432linux_select(struct proc *p, struct linux_select_args *args)
433{
434 struct linux_select_argv linux_args;
435 struct linux_newselect_args newsel;
436 int error;
437
438#ifdef SELECT_DEBUG
439 printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
440#endif
441 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
442 sizeof(linux_args))))
443 return error;
444
445 newsel.nfds = linux_args.nfds;
446 newsel.readfds = linux_args.readfds;
447 newsel.writefds = linux_args.writefds;
448 newsel.exceptfds = linux_args.exceptfds;
449 newsel.timeout = linux_args.timeout;
450
451 return linux_newselect(p, &newsel);
452}
453
454int
455linux_newselect(struct proc *p, struct linux_newselect_args *args)
456{
457 struct select_args bsa;
458 struct timeval tv0, tv1, utv, *tvp;
459 caddr_t sg;
460 int error;
461
462#ifdef DEBUG
463 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
464 (long)p->p_pid, args->nfds, (void *)args->readfds,
465 (void *)args->writefds, (void *)args->exceptfds,
466 (void *)args->timeout);
467#endif
468 error = 0;
469 bsa.nd = args->nfds;
470 bsa.in = args->readfds;
471 bsa.ou = args->writefds;
472 bsa.ex = args->exceptfds;
473 bsa.tv = args->timeout;
474
475 /*
476 * Store current time for computation of the amount of
477 * time left.
478 */
479 if (args->timeout) {
480 if ((error = copyin(args->timeout, &utv, sizeof(utv))))
481 goto select_out;
482#ifdef DEBUG
483 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
484 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
485#endif
486 if (itimerfix(&utv)) {
487 /*
488 * The timeval was invalid. Convert it to something
489 * valid that will act as it does under Linux.
490 */
491 sg = stackgap_init();
492 tvp = stackgap_alloc(&sg, sizeof(utv));
493 utv.tv_sec += utv.tv_usec / 1000000;
494 utv.tv_usec %= 1000000;
495 if (utv.tv_usec < 0) {
496 utv.tv_sec -= 1;
497 utv.tv_usec += 1000000;
498 }
499 if (utv.tv_sec < 0)
500 timevalclear(&utv);
501 if ((error = copyout(&utv, tvp, sizeof(utv))))
502 goto select_out;
503 bsa.tv = tvp;
504 }
505 microtime(&tv0);
506 }
507
508 error = select(p, &bsa);
509#ifdef DEBUG
510 printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
511#endif
512
513 if (error) {
514 /*
515 * See fs/select.c in the Linux kernel. Without this,
516 * Maelstrom doesn't work.
517 */
518 if (error == ERESTART)
519 error = EINTR;
520 goto select_out;
521 }
522
523 if (args->timeout) {
524 if (p->p_retval[0]) {
525 /*
526 * Compute how much time was left of the timeout,
527 * by subtracting the current time and the time
528 * before we started the call, and subtracting
529 * that result from the user-supplied value.
530 */
531 microtime(&tv1);
532 timevalsub(&tv1, &tv0);
533 timevalsub(&utv, &tv1);
534 if (utv.tv_sec < 0)
535 timevalclear(&utv);
536 } else
537 timevalclear(&utv);
538#ifdef DEBUG
539 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
540 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
541#endif
542 if ((error = copyout(&utv, args->timeout, sizeof(utv))))
543 goto select_out;
544 }
545
546select_out:
547#ifdef DEBUG
548 printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
549#endif
550 return error;
551}
552
553int
554linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
555{
556 struct proc *curp;
557
558#ifdef DEBUG
559 printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
560#endif
561 if (args->pid != p->p_pid) {
562 if (!(curp = pfind(args->pid)))
563 return ESRCH;
564 }
565 else
566 curp = p;
567 p->p_retval[0] = curp->p_pgid;
568 return 0;
569}
570
571int
572linux_fork(struct proc *p, struct linux_fork_args *args)
573{
574 int error;
575
576#ifdef DEBUG
577 printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
578#endif
579 if ((error = fork(p, (struct fork_args *)args)) != 0)
580 return error;
581 if (p->p_retval[1] == 1)
582 p->p_retval[0] = 0;
583 return 0;
584}
585
586int
587linux_vfork(struct proc *p, struct linux_vfork_args *args)
588{
589 int error;
590
591#ifdef DEBUG
592 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
593#endif
594
595 if ((error = vfork(p, (struct vfork_args *)args)) != 0)
596 return error;
597 /* Are we the child? */
598 if (p->p_retval[1] == 1)
599 p->p_retval[0] = 0;
600 return 0;
601}
602
603#define CLONE_VM 0x100
604#define CLONE_FS 0x200
605#define CLONE_FILES 0x400
606#define CLONE_SIGHAND 0x800
607#define CLONE_PID 0x1000
608
609int
610linux_clone(struct proc *p, struct linux_clone_args *args)
611{
612 int error, ff = RFPROC;
613 struct proc *p2;
614 int exit_signal;
615 vm_offset_t start;
616 struct rfork_args rf_args;
617
618#ifdef DEBUG
619 if (args->flags & CLONE_PID)
620 printf("linux_clone(%ld): CLONE_PID not yet supported\n",
621 (long)p->p_pid);
622 printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
623 (long)p->p_pid, (unsigned int)args->flags,
624 (unsigned int)args->stack);
625#endif
626
627 if (!args->stack)
628 return (EINVAL);
629
630 exit_signal = args->flags & 0x000000ff;
631 if (exit_signal >= LINUX_NSIG)
632 return EINVAL;
633
634 if (exit_signal <= LINUX_SIGTBLSZ)
635 exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
636
637 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
638 ff |= RFTHREAD;
639
640 if (args->flags & CLONE_VM)
641 ff |= RFMEM;
642 if (args->flags & CLONE_SIGHAND)
643 ff |= RFSIGSHARE;
644 if (!(args->flags & CLONE_FILES))
645 ff |= RFFDG;
646
647 error = 0;
648 start = 0;
649
650 rf_args.flags = ff;
651 if ((error = rfork(p, &rf_args)) != 0)
652 return error;
653
654 p2 = pfind(p->p_retval[0]);
655 if (p2 == 0)
656 return ESRCH;
657
658 p2->p_sigparent = exit_signal;
659 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
660
661#ifdef DEBUG
662 printf ("linux_clone(%ld): successful rfork to %ld\n",
663 (long)p->p_pid, (long)p2->p_pid);
664#endif
665 return 0;
666}
667
668/* XXX move */
669struct linux_mmap_argv {
670 linux_caddr_t addr;
671 int len;
672 int prot;
673 int flags;
674 int fd;
675 int pos;
676};
677
678#define STACK_SIZE (2 * 1024 * 1024)
679#define GUARD_SIZE (4 * PAGE_SIZE)
680int
681linux_mmap(struct proc *p, struct linux_mmap_args *args)
682{
683 struct mmap_args /* {
684 caddr_t addr;
685 size_t len;
686 int prot;
687 int flags;
688 int fd;
689 long pad;
690 off_t pos;
691 } */ bsd_args;
692 int error;
693 struct linux_mmap_argv linux_args;
694
695 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
696 sizeof(linux_args))))
697 return error;
698#ifdef DEBUG
699 printf("Linux-emul(%ld): mmap(%p, %d, %d, 0x%08x, %d, %d)",
700 (long)p->p_pid, (void *)linux_args.addr, linux_args.len,
701 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
702#endif
703 bsd_args.flags = 0;
704 if (linux_args.flags & LINUX_MAP_SHARED)
705 bsd_args.flags |= MAP_SHARED;
706 if (linux_args.flags & LINUX_MAP_PRIVATE)
707 bsd_args.flags |= MAP_PRIVATE;
708 if (linux_args.flags & LINUX_MAP_FIXED)
709 bsd_args.flags |= MAP_FIXED;
710 if (linux_args.flags & LINUX_MAP_ANON)
711 bsd_args.flags |= MAP_ANON;
712 if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
713 bsd_args.flags |= MAP_STACK;
714
715 /* The linux MAP_GROWSDOWN option does not limit auto
716 * growth of the region. Linux mmap with this option
717 * takes as addr the inital BOS, and as len, the initial
718 * region size. It can then grow down from addr without
719 * limit. However, linux threads has an implicit internal
720 * limit to stack size of STACK_SIZE. Its just not
721 * enforced explicitly in linux. But, here we impose
722 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
723 * region, since we can do this with our mmap.
724 *
725 * Our mmap with MAP_STACK takes addr as the maximum
726 * downsize limit on BOS, and as len the max size of
727 * the region. It them maps the top SGROWSIZ bytes,
728 * and autgrows the region down, up to the limit
729 * in addr.
730 *
731 * If we don't use the MAP_STACK option, the effect
732 * of this code is to allocate a stack region of a
733 * fixed size of (STACK_SIZE - GUARD_SIZE).
734 */
735
736 /* This gives us TOS */
737 bsd_args.addr = linux_args.addr + linux_args.len;
738
739 /* This gives us our maximum stack size */
740 if (linux_args.len > STACK_SIZE - GUARD_SIZE)
741 bsd_args.len = linux_args.len;
742 else
743 bsd_args.len = STACK_SIZE - GUARD_SIZE;
744
745 /* This gives us a new BOS. If we're using VM_STACK, then
746 * mmap will just map the top SGROWSIZ bytes, and let
747 * the stack grow down to the limit at BOS. If we're
748 * not using VM_STACK we map the full stack, since we
749 * don't have a way to autogrow it.
750 */
751 bsd_args.addr -= bsd_args.len;
752
753 } else {
754 bsd_args.addr = linux_args.addr;
755 bsd_args.len = linux_args.len;
756 }
757
758 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */
759 if (linux_args.flags & LINUX_MAP_ANON)
760 bsd_args.fd = -1;
761 else
762 bsd_args.fd = linux_args.fd;
763 bsd_args.pos = linux_args.pos;
764 bsd_args.pad = 0;
765#ifdef DEBUG
766 printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
767 (void *)bsd_args.addr, bsd_args.len,
768 bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
769#endif
770 return mmap(p, &bsd_args);
771}
772
773int
774linux_mremap(struct proc *p, struct linux_mremap_args *args)
775{
776 struct munmap_args /* {
777 void *addr;
778 size_t len;
779 } */ bsd_args;
780 int error = 0;
781
782#ifdef DEBUG
783 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
784 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
785 args->flags);
786#endif
787 args->new_len = round_page(args->new_len);
788 args->old_len = round_page(args->old_len);
789
790 if (args->new_len > args->old_len) {
791 p->p_retval[0] = 0;
792 return ENOMEM;
793 }
794
795 if (args->new_len < args->old_len) {
796 bsd_args.addr = args->addr + args->new_len;
797 bsd_args.len = args->old_len - args->new_len;
798 error = munmap(p, &bsd_args);
799 }
800
801 p->p_retval[0] = error ? 0 : (int)args->addr;
802 return error;
803}
804
805int
806linux_msync(struct proc *p, struct linux_msync_args *args)
807{
808 struct msync_args bsd_args;
809
810 bsd_args.addr = args->addr;
811 bsd_args.len = args->len;
812 bsd_args.flags = 0; /* XXX ignore */
813
814 return msync(p, &bsd_args);
815}
816
817int
818linux_pipe(struct proc *p, struct linux_pipe_args *args)
819{
820 int error;
821 int reg_edx;
822
823#ifdef DEBUG
824 printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
825#endif
826 reg_edx = p->p_retval[1];
827 error = pipe(p, 0);
828 if (error) {
829 p->p_retval[1] = reg_edx;
830 return error;
831 }
832
833 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
834 if (error) {
835 p->p_retval[1] = reg_edx;
836 return error;
837 }
838
839 p->p_retval[1] = reg_edx;
840 p->p_retval[0] = 0;
841 return 0;
842}
843
844int
845linux_time(struct proc *p, struct linux_time_args *args)
846{
847 struct timeval tv;
848 linux_time_t tm;
849 int error;
850
851#ifdef DEBUG
852 printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
853#endif
854 microtime(&tv);
855 tm = tv.tv_sec;
856 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
857 return error;
858 p->p_retval[0] = tm;
859 return 0;
860}
861
862struct linux_times_argv {
863 long tms_utime;
864 long tms_stime;
865 long tms_cutime;
866 long tms_cstime;
867};
868
869#define CLK_TCK 100 /* Linux uses 100 */
870#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
871
872int
873linux_times(struct proc *p, struct linux_times_args *args)
874{
875 struct timeval tv;
876 struct linux_times_argv tms;
877 struct rusage ru;
878 int error;
879
880#ifdef DEBUG
881 printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
882#endif
883 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
884
885 tms.tms_utime = CONVTCK(ru.ru_utime);
886 tms.tms_stime = CONVTCK(ru.ru_stime);
887
888 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
889 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
890
891 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
892 sizeof(struct linux_times_argv))))
893 return error;
894
895 microuptime(&tv);
896 p->p_retval[0] = (int)CONVTCK(tv);
897 return 0;
898}
899
900int
901linux_newuname(struct proc *p, struct linux_newuname_args *args)
902{
903 struct linux_new_utsname utsname;
904 char *osrelease, *osname;
905
906#ifdef DEBUG
907 printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
908#endif
909
910 osname = linux_get_osname(p);
911 osrelease = linux_get_osrelease(p);
912
913 bzero(&utsname, sizeof(struct linux_new_utsname));
914 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
915 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
916 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
917 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
918 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
919 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
920
921 return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
922 sizeof(struct linux_new_utsname)));
923}
924
925struct linux_utimbuf {
926 linux_time_t l_actime;
927 linux_time_t l_modtime;
928};
929
930int
931linux_utime(struct proc *p, struct linux_utime_args *args)
932{
933 struct utimes_args /* {
934 char *path;
935 struct timeval *tptr;
936 } */ bsdutimes;
937 struct timeval tv[2], *tvp;
938 struct linux_utimbuf lut;
939 int error;
940 caddr_t sg;
941
942 sg = stackgap_init();
943 CHECKALTEXIST(p, &sg, args->fname);
944
945#ifdef DEBUG
946 printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
947#endif
948 if (args->times) {
949 if ((error = copyin(args->times, &lut, sizeof lut)))
950 return error;
951 tv[0].tv_sec = lut.l_actime;
952 tv[0].tv_usec = 0;
953 tv[1].tv_sec = lut.l_modtime;
954 tv[1].tv_usec = 0;
955 /* so that utimes can copyin */
956 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
29 */
30
31#include "opt_compat.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/sysproto.h>
36#include <sys/kernel.h>
37#include <sys/mman.h>
38#include <sys/proc.h>
39#include <sys/fcntl.h>
40#include <sys/imgact_aout.h>
41#include <sys/mount.h>
42#include <sys/namei.h>
43#include <sys/resourcevar.h>
44#include <sys/stat.h>
45#include <sys/sysctl.h>
46#include <sys/unistd.h>
47#include <sys/vnode.h>
48#include <sys/wait.h>
49#include <sys/time.h>
50#include <sys/signalvar.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_extern.h>
57
58#include <machine/frame.h>
59#include <machine/psl.h>
60#include <machine/sysarch.h>
61#include <machine/segments.h>
62
63#include <i386/linux/linux.h>
64#include <i386/linux/linux_proto.h>
65#include <i386/linux/linux_util.h>
66#include <i386/linux/linux_mib.h>
67
68#include <posix4/sched.h>
69
70#define BSD_TO_LINUX_SIGNAL(sig) \
71 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
72
73static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] =
74{ RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
75 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
76 RLIMIT_MEMLOCK, -1
77};
78
79int
80linux_alarm(struct proc *p, struct linux_alarm_args *args)
81{
82 struct itimerval it, old_it;
83 struct timeval tv;
84 int s;
85
86#ifdef DEBUG
87 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
88#endif
89 if (args->secs > 100000000)
90 return EINVAL;
91 it.it_value.tv_sec = (long)args->secs;
92 it.it_value.tv_usec = 0;
93 it.it_interval.tv_sec = 0;
94 it.it_interval.tv_usec = 0;
95 s = splsoftclock();
96 old_it = p->p_realtimer;
97 getmicrouptime(&tv);
98 if (timevalisset(&old_it.it_value))
99 untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
100 if (it.it_value.tv_sec != 0) {
101 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
102 timevaladd(&it.it_value, &tv);
103 }
104 p->p_realtimer = it;
105 splx(s);
106 if (timevalcmp(&old_it.it_value, &tv, >)) {
107 timevalsub(&old_it.it_value, &tv);
108 if (old_it.it_value.tv_usec != 0)
109 old_it.it_value.tv_sec++;
110 p->p_retval[0] = old_it.it_value.tv_sec;
111 }
112 return 0;
113}
114
115int
116linux_brk(struct proc *p, struct linux_brk_args *args)
117{
118#if 0
119 struct vmspace *vm = p->p_vmspace;
120 vm_offset_t new, old;
121 int error;
122
123 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
124 return EINVAL;
125 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
126 > p->p_rlimit[RLIMIT_DATA].rlim_cur)
127 return ENOMEM;
128
129 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
130 new = round_page((vm_offset_t)args->dsend);
131 p->p_retval[0] = old;
132 if ((new-old) > 0) {
133 if (swap_pager_full)
134 return ENOMEM;
135 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
136 VM_PROT_ALL, VM_PROT_ALL, 0);
137 if (error)
138 return error;
139 vm->vm_dsize += btoc((new-old));
140 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
141 }
142 return 0;
143#else
144 struct vmspace *vm = p->p_vmspace;
145 vm_offset_t new, old;
146 struct obreak_args /* {
147 char * nsize;
148 } */ tmp;
149
150#ifdef DEBUG
151 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
152#endif
153 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
154 new = (vm_offset_t)args->dsend;
155 tmp.nsize = (char *) new;
156 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
157 p->p_retval[0] = (int)new;
158 else
159 p->p_retval[0] = (int)old;
160
161 return 0;
162#endif
163}
164
165int
166linux_uselib(struct proc *p, struct linux_uselib_args *args)
167{
168 struct nameidata ni;
169 struct vnode *vp;
170 struct exec *a_out;
171 struct vattr attr;
172 vm_offset_t vmaddr;
173 unsigned long file_offset;
174 vm_offset_t buffer;
175 unsigned long bss_size;
176 int error;
177 caddr_t sg;
178 int locked;
179
180 sg = stackgap_init();
181 CHECKALTEXIST(p, &sg, args->library);
182
183#ifdef DEBUG
184 printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library);
185#endif
186
187 a_out = NULL;
188 locked = 0;
189 vp = NULL;
190
191 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
192 error = namei(&ni);
193 if (error)
194 goto cleanup;
195
196 vp = ni.ni_vp;
197 /*
198 * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed
199 * without returning a vnode.
200 */
201 if (vp == NULL) {
202 error = ENOEXEC; /* ?? */
203 goto cleanup;
204 }
205 NDFREE(&ni, NDF_ONLY_PNBUF);
206
207 /*
208 * From here on down, we have a locked vnode that must be unlocked.
209 */
210 locked++;
211
212 /*
213 * Writable?
214 */
215 if (vp->v_writecount) {
216 error = ETXTBSY;
217 goto cleanup;
218 }
219
220 /*
221 * Executable?
222 */
223 error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
224 if (error)
225 goto cleanup;
226
227 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
228 ((attr.va_mode & 0111) == 0) ||
229 (attr.va_type != VREG)) {
230 error = ENOEXEC;
231 goto cleanup;
232 }
233
234 /*
235 * Sensible size?
236 */
237 if (attr.va_size == 0) {
238 error = ENOEXEC;
239 goto cleanup;
240 }
241
242 /*
243 * Can we access it?
244 */
245 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
246 if (error)
247 goto cleanup;
248
249 error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
250 if (error)
251 goto cleanup;
252
253 /*
254 * Lock no longer needed
255 */
256 VOP_UNLOCK(vp, 0, p);
257 locked = 0;
258
259 /*
260 * Pull in executable header into kernel_map
261 */
262 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
263 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
264 if (error)
265 goto cleanup;
266
267 /*
268 * Is it a Linux binary ?
269 */
270 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
271 error = ENOEXEC;
272 goto cleanup;
273 }
274
275 /* While we are here, we should REALLY do some more checks */
276
277 /*
278 * Set file/virtual offset based on a.out variant.
279 */
280 switch ((int)(a_out->a_magic & 0xffff)) {
281 case 0413: /* ZMAGIC */
282 file_offset = 1024;
283 break;
284 case 0314: /* QMAGIC */
285 file_offset = 0;
286 break;
287 default:
288 error = ENOEXEC;
289 goto cleanup;
290 }
291
292 bss_size = round_page(a_out->a_bss);
293
294 /*
295 * Check various fields in header for validity/bounds.
296 */
297 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
298 error = ENOEXEC;
299 goto cleanup;
300 }
301
302 /* text + data can't exceed file size */
303 if (a_out->a_data + a_out->a_text > attr.va_size) {
304 error = EFAULT;
305 goto cleanup;
306 }
307
308 /*
309 * text/data/bss must not exceed limits
310 * XXX: this is not complete. it should check current usage PLUS
311 * the resources needed by this library.
312 */
313 if (a_out->a_text > MAXTSIZ ||
314 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
315 error = ENOMEM;
316 goto cleanup;
317 }
318
319 /*
320 * prevent more writers
321 */
322 vp->v_flag |= VTEXT;
323
324 /*
325 * Check if file_offset page aligned,.
326 * Currently we cannot handle misalinged file offsets,
327 * and so we read in the entire image (what a waste).
328 */
329 if (file_offset & PAGE_MASK) {
330#ifdef DEBUG
331printf("uselib: Non page aligned binary %lu\n", file_offset);
332#endif
333 /*
334 * Map text+data read/write/execute
335 */
336
337 /* a_entry is the load address and is page aligned */
338 vmaddr = trunc_page(a_out->a_entry);
339
340 /* get anon user mapping, read+write+execute */
341 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
342 a_out->a_text + a_out->a_data, FALSE,
343 VM_PROT_ALL, VM_PROT_ALL, 0);
344 if (error)
345 goto cleanup;
346
347 /* map file into kernel_map */
348 error = vm_mmap(kernel_map, &buffer,
349 round_page(a_out->a_text + a_out->a_data + file_offset),
350 VM_PROT_READ, VM_PROT_READ, 0,
351 (caddr_t)vp, trunc_page(file_offset));
352 if (error)
353 goto cleanup;
354
355 /* copy from kernel VM space to user space */
356 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
357 (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
358
359 /* release temporary kernel space */
360 vm_map_remove(kernel_map, buffer,
361 buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
362
363 if (error)
364 goto cleanup;
365 }
366 else {
367#ifdef DEBUG
368printf("uselib: Page aligned binary %lu\n", file_offset);
369#endif
370 /*
371 * for QMAGIC, a_entry is 20 bytes beyond the load address
372 * to skip the executable header
373 */
374 vmaddr = trunc_page(a_out->a_entry);
375
376 /*
377 * Map it all into the process's space as a single copy-on-write
378 * "data" segment.
379 */
380 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
381 a_out->a_text + a_out->a_data,
382 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
383 (caddr_t)vp, file_offset);
384 if (error)
385 goto cleanup;
386 }
387#ifdef DEBUG
388printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
389#endif
390 if (bss_size != 0) {
391 /*
392 * Calculate BSS start address
393 */
394 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
395
396 /*
397 * allocate some 'anon' space
398 */
399 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
400 bss_size, FALSE,
401 VM_PROT_ALL, VM_PROT_ALL, 0);
402 if (error)
403 goto cleanup;
404 }
405
406cleanup:
407 /*
408 * Unlock vnode if needed
409 */
410 if (locked)
411 VOP_UNLOCK(vp, 0, p);
412
413 /*
414 * Release the kernel mapping.
415 */
416 if (a_out)
417 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
418
419 return error;
420}
421
422/* XXX move */
423struct linux_select_argv {
424 int nfds;
425 fd_set *readfds;
426 fd_set *writefds;
427 fd_set *exceptfds;
428 struct timeval *timeout;
429};
430
431int
432linux_select(struct proc *p, struct linux_select_args *args)
433{
434 struct linux_select_argv linux_args;
435 struct linux_newselect_args newsel;
436 int error;
437
438#ifdef SELECT_DEBUG
439 printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr);
440#endif
441 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
442 sizeof(linux_args))))
443 return error;
444
445 newsel.nfds = linux_args.nfds;
446 newsel.readfds = linux_args.readfds;
447 newsel.writefds = linux_args.writefds;
448 newsel.exceptfds = linux_args.exceptfds;
449 newsel.timeout = linux_args.timeout;
450
451 return linux_newselect(p, &newsel);
452}
453
454int
455linux_newselect(struct proc *p, struct linux_newselect_args *args)
456{
457 struct select_args bsa;
458 struct timeval tv0, tv1, utv, *tvp;
459 caddr_t sg;
460 int error;
461
462#ifdef DEBUG
463 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
464 (long)p->p_pid, args->nfds, (void *)args->readfds,
465 (void *)args->writefds, (void *)args->exceptfds,
466 (void *)args->timeout);
467#endif
468 error = 0;
469 bsa.nd = args->nfds;
470 bsa.in = args->readfds;
471 bsa.ou = args->writefds;
472 bsa.ex = args->exceptfds;
473 bsa.tv = args->timeout;
474
475 /*
476 * Store current time for computation of the amount of
477 * time left.
478 */
479 if (args->timeout) {
480 if ((error = copyin(args->timeout, &utv, sizeof(utv))))
481 goto select_out;
482#ifdef DEBUG
483 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
484 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
485#endif
486 if (itimerfix(&utv)) {
487 /*
488 * The timeval was invalid. Convert it to something
489 * valid that will act as it does under Linux.
490 */
491 sg = stackgap_init();
492 tvp = stackgap_alloc(&sg, sizeof(utv));
493 utv.tv_sec += utv.tv_usec / 1000000;
494 utv.tv_usec %= 1000000;
495 if (utv.tv_usec < 0) {
496 utv.tv_sec -= 1;
497 utv.tv_usec += 1000000;
498 }
499 if (utv.tv_sec < 0)
500 timevalclear(&utv);
501 if ((error = copyout(&utv, tvp, sizeof(utv))))
502 goto select_out;
503 bsa.tv = tvp;
504 }
505 microtime(&tv0);
506 }
507
508 error = select(p, &bsa);
509#ifdef DEBUG
510 printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error);
511#endif
512
513 if (error) {
514 /*
515 * See fs/select.c in the Linux kernel. Without this,
516 * Maelstrom doesn't work.
517 */
518 if (error == ERESTART)
519 error = EINTR;
520 goto select_out;
521 }
522
523 if (args->timeout) {
524 if (p->p_retval[0]) {
525 /*
526 * Compute how much time was left of the timeout,
527 * by subtracting the current time and the time
528 * before we started the call, and subtracting
529 * that result from the user-supplied value.
530 */
531 microtime(&tv1);
532 timevalsub(&tv1, &tv0);
533 timevalsub(&utv, &tv1);
534 if (utv.tv_sec < 0)
535 timevalclear(&utv);
536 } else
537 timevalclear(&utv);
538#ifdef DEBUG
539 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
540 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
541#endif
542 if ((error = copyout(&utv, args->timeout, sizeof(utv))))
543 goto select_out;
544 }
545
546select_out:
547#ifdef DEBUG
548 printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error);
549#endif
550 return error;
551}
552
553int
554linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
555{
556 struct proc *curp;
557
558#ifdef DEBUG
559 printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid);
560#endif
561 if (args->pid != p->p_pid) {
562 if (!(curp = pfind(args->pid)))
563 return ESRCH;
564 }
565 else
566 curp = p;
567 p->p_retval[0] = curp->p_pgid;
568 return 0;
569}
570
571int
572linux_fork(struct proc *p, struct linux_fork_args *args)
573{
574 int error;
575
576#ifdef DEBUG
577 printf("Linux-emul(%ld): fork()\n", (long)p->p_pid);
578#endif
579 if ((error = fork(p, (struct fork_args *)args)) != 0)
580 return error;
581 if (p->p_retval[1] == 1)
582 p->p_retval[0] = 0;
583 return 0;
584}
585
586int
587linux_vfork(struct proc *p, struct linux_vfork_args *args)
588{
589 int error;
590
591#ifdef DEBUG
592 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
593#endif
594
595 if ((error = vfork(p, (struct vfork_args *)args)) != 0)
596 return error;
597 /* Are we the child? */
598 if (p->p_retval[1] == 1)
599 p->p_retval[0] = 0;
600 return 0;
601}
602
603#define CLONE_VM 0x100
604#define CLONE_FS 0x200
605#define CLONE_FILES 0x400
606#define CLONE_SIGHAND 0x800
607#define CLONE_PID 0x1000
608
609int
610linux_clone(struct proc *p, struct linux_clone_args *args)
611{
612 int error, ff = RFPROC;
613 struct proc *p2;
614 int exit_signal;
615 vm_offset_t start;
616 struct rfork_args rf_args;
617
618#ifdef DEBUG
619 if (args->flags & CLONE_PID)
620 printf("linux_clone(%ld): CLONE_PID not yet supported\n",
621 (long)p->p_pid);
622 printf("linux_clone(%ld): invoked with flags %x and stack %x\n",
623 (long)p->p_pid, (unsigned int)args->flags,
624 (unsigned int)args->stack);
625#endif
626
627 if (!args->stack)
628 return (EINVAL);
629
630 exit_signal = args->flags & 0x000000ff;
631 if (exit_signal >= LINUX_NSIG)
632 return EINVAL;
633
634 if (exit_signal <= LINUX_SIGTBLSZ)
635 exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
636
637 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
638 ff |= RFTHREAD;
639
640 if (args->flags & CLONE_VM)
641 ff |= RFMEM;
642 if (args->flags & CLONE_SIGHAND)
643 ff |= RFSIGSHARE;
644 if (!(args->flags & CLONE_FILES))
645 ff |= RFFDG;
646
647 error = 0;
648 start = 0;
649
650 rf_args.flags = ff;
651 if ((error = rfork(p, &rf_args)) != 0)
652 return error;
653
654 p2 = pfind(p->p_retval[0]);
655 if (p2 == 0)
656 return ESRCH;
657
658 p2->p_sigparent = exit_signal;
659 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
660
661#ifdef DEBUG
662 printf ("linux_clone(%ld): successful rfork to %ld\n",
663 (long)p->p_pid, (long)p2->p_pid);
664#endif
665 return 0;
666}
667
668/* XXX move */
669struct linux_mmap_argv {
670 linux_caddr_t addr;
671 int len;
672 int prot;
673 int flags;
674 int fd;
675 int pos;
676};
677
678#define STACK_SIZE (2 * 1024 * 1024)
679#define GUARD_SIZE (4 * PAGE_SIZE)
680int
681linux_mmap(struct proc *p, struct linux_mmap_args *args)
682{
683 struct mmap_args /* {
684 caddr_t addr;
685 size_t len;
686 int prot;
687 int flags;
688 int fd;
689 long pad;
690 off_t pos;
691 } */ bsd_args;
692 int error;
693 struct linux_mmap_argv linux_args;
694
695 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
696 sizeof(linux_args))))
697 return error;
698#ifdef DEBUG
699 printf("Linux-emul(%ld): mmap(%p, %d, %d, 0x%08x, %d, %d)",
700 (long)p->p_pid, (void *)linux_args.addr, linux_args.len,
701 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
702#endif
703 bsd_args.flags = 0;
704 if (linux_args.flags & LINUX_MAP_SHARED)
705 bsd_args.flags |= MAP_SHARED;
706 if (linux_args.flags & LINUX_MAP_PRIVATE)
707 bsd_args.flags |= MAP_PRIVATE;
708 if (linux_args.flags & LINUX_MAP_FIXED)
709 bsd_args.flags |= MAP_FIXED;
710 if (linux_args.flags & LINUX_MAP_ANON)
711 bsd_args.flags |= MAP_ANON;
712 if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
713 bsd_args.flags |= MAP_STACK;
714
715 /* The linux MAP_GROWSDOWN option does not limit auto
716 * growth of the region. Linux mmap with this option
717 * takes as addr the inital BOS, and as len, the initial
718 * region size. It can then grow down from addr without
719 * limit. However, linux threads has an implicit internal
720 * limit to stack size of STACK_SIZE. Its just not
721 * enforced explicitly in linux. But, here we impose
722 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
723 * region, since we can do this with our mmap.
724 *
725 * Our mmap with MAP_STACK takes addr as the maximum
726 * downsize limit on BOS, and as len the max size of
727 * the region. It them maps the top SGROWSIZ bytes,
728 * and autgrows the region down, up to the limit
729 * in addr.
730 *
731 * If we don't use the MAP_STACK option, the effect
732 * of this code is to allocate a stack region of a
733 * fixed size of (STACK_SIZE - GUARD_SIZE).
734 */
735
736 /* This gives us TOS */
737 bsd_args.addr = linux_args.addr + linux_args.len;
738
739 /* This gives us our maximum stack size */
740 if (linux_args.len > STACK_SIZE - GUARD_SIZE)
741 bsd_args.len = linux_args.len;
742 else
743 bsd_args.len = STACK_SIZE - GUARD_SIZE;
744
745 /* This gives us a new BOS. If we're using VM_STACK, then
746 * mmap will just map the top SGROWSIZ bytes, and let
747 * the stack grow down to the limit at BOS. If we're
748 * not using VM_STACK we map the full stack, since we
749 * don't have a way to autogrow it.
750 */
751 bsd_args.addr -= bsd_args.len;
752
753 } else {
754 bsd_args.addr = linux_args.addr;
755 bsd_args.len = linux_args.len;
756 }
757
758 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */
759 if (linux_args.flags & LINUX_MAP_ANON)
760 bsd_args.fd = -1;
761 else
762 bsd_args.fd = linux_args.fd;
763 bsd_args.pos = linux_args.pos;
764 bsd_args.pad = 0;
765#ifdef DEBUG
766 printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n",
767 (void *)bsd_args.addr, bsd_args.len,
768 bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
769#endif
770 return mmap(p, &bsd_args);
771}
772
773int
774linux_mremap(struct proc *p, struct linux_mremap_args *args)
775{
776 struct munmap_args /* {
777 void *addr;
778 size_t len;
779 } */ bsd_args;
780 int error = 0;
781
782#ifdef DEBUG
783 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
784 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
785 args->flags);
786#endif
787 args->new_len = round_page(args->new_len);
788 args->old_len = round_page(args->old_len);
789
790 if (args->new_len > args->old_len) {
791 p->p_retval[0] = 0;
792 return ENOMEM;
793 }
794
795 if (args->new_len < args->old_len) {
796 bsd_args.addr = args->addr + args->new_len;
797 bsd_args.len = args->old_len - args->new_len;
798 error = munmap(p, &bsd_args);
799 }
800
801 p->p_retval[0] = error ? 0 : (int)args->addr;
802 return error;
803}
804
805int
806linux_msync(struct proc *p, struct linux_msync_args *args)
807{
808 struct msync_args bsd_args;
809
810 bsd_args.addr = args->addr;
811 bsd_args.len = args->len;
812 bsd_args.flags = 0; /* XXX ignore */
813
814 return msync(p, &bsd_args);
815}
816
817int
818linux_pipe(struct proc *p, struct linux_pipe_args *args)
819{
820 int error;
821 int reg_edx;
822
823#ifdef DEBUG
824 printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid);
825#endif
826 reg_edx = p->p_retval[1];
827 error = pipe(p, 0);
828 if (error) {
829 p->p_retval[1] = reg_edx;
830 return error;
831 }
832
833 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int));
834 if (error) {
835 p->p_retval[1] = reg_edx;
836 return error;
837 }
838
839 p->p_retval[1] = reg_edx;
840 p->p_retval[0] = 0;
841 return 0;
842}
843
844int
845linux_time(struct proc *p, struct linux_time_args *args)
846{
847 struct timeval tv;
848 linux_time_t tm;
849 int error;
850
851#ifdef DEBUG
852 printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid);
853#endif
854 microtime(&tv);
855 tm = tv.tv_sec;
856 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
857 return error;
858 p->p_retval[0] = tm;
859 return 0;
860}
861
862struct linux_times_argv {
863 long tms_utime;
864 long tms_stime;
865 long tms_cutime;
866 long tms_cstime;
867};
868
869#define CLK_TCK 100 /* Linux uses 100 */
870#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
871
872int
873linux_times(struct proc *p, struct linux_times_args *args)
874{
875 struct timeval tv;
876 struct linux_times_argv tms;
877 struct rusage ru;
878 int error;
879
880#ifdef DEBUG
881 printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid);
882#endif
883 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
884
885 tms.tms_utime = CONVTCK(ru.ru_utime);
886 tms.tms_stime = CONVTCK(ru.ru_stime);
887
888 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
889 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
890
891 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
892 sizeof(struct linux_times_argv))))
893 return error;
894
895 microuptime(&tv);
896 p->p_retval[0] = (int)CONVTCK(tv);
897 return 0;
898}
899
900int
901linux_newuname(struct proc *p, struct linux_newuname_args *args)
902{
903 struct linux_new_utsname utsname;
904 char *osrelease, *osname;
905
906#ifdef DEBUG
907 printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid);
908#endif
909
910 osname = linux_get_osname(p);
911 osrelease = linux_get_osrelease(p);
912
913 bzero(&utsname, sizeof(struct linux_new_utsname));
914 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1);
915 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1);
916 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1);
917 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1);
918 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1);
919 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1);
920
921 return (copyout((caddr_t)&utsname, (caddr_t)args->buf,
922 sizeof(struct linux_new_utsname)));
923}
924
925struct linux_utimbuf {
926 linux_time_t l_actime;
927 linux_time_t l_modtime;
928};
929
930int
931linux_utime(struct proc *p, struct linux_utime_args *args)
932{
933 struct utimes_args /* {
934 char *path;
935 struct timeval *tptr;
936 } */ bsdutimes;
937 struct timeval tv[2], *tvp;
938 struct linux_utimbuf lut;
939 int error;
940 caddr_t sg;
941
942 sg = stackgap_init();
943 CHECKALTEXIST(p, &sg, args->fname);
944
945#ifdef DEBUG
946 printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname);
947#endif
948 if (args->times) {
949 if ((error = copyin(args->times, &lut, sizeof lut)))
950 return error;
951 tv[0].tv_sec = lut.l_actime;
952 tv[0].tv_usec = 0;
953 tv[1].tv_sec = lut.l_modtime;
954 tv[1].tv_usec = 0;
955 /* so that utimes can copyin */
956 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
957 if (tvp == NULL)
958 return (ENAMETOOLONG);
957 if ((error = copyout(tv, tvp, sizeof(tv))))
958 return error;
959 bsdutimes.tptr = tvp;
960 } else
961 bsdutimes.tptr = NULL;
962
963 bsdutimes.path = args->fname;
964 return utimes(p, &bsdutimes);
965}
966
967#define __WCLONE 0x80000000
968
969int
970linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
971{
972 struct wait_args /* {
973 int pid;
974 int *status;
975 int options;
976 struct rusage *rusage;
977 } */ tmp;
978 int error, tmpstat;
979
980#ifdef DEBUG
981 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
982 (long)p->p_pid, args->pid, (void *)args->status, args->options);
983#endif
984 tmp.pid = args->pid;
985 tmp.status = args->status;
986 tmp.options = (args->options & (WNOHANG | WUNTRACED));
987 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
988 if (args->options & __WCLONE)
989 tmp.options |= WLINUXCLONE;
990 tmp.rusage = NULL;
991
992 if ((error = wait4(p, &tmp)) != 0)
993 return error;
994
995 if (args->status) {
996 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
997 return error;
998 tmpstat &= 0xffff;
999 if (WIFSIGNALED(tmpstat))
1000 tmpstat = (tmpstat & 0xffffff80) |
1001 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1002 else if (WIFSTOPPED(tmpstat))
1003 tmpstat = (tmpstat & 0xffff00ff) |
1004 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1005 return copyout(&tmpstat, args->status, sizeof(int));
1006 } else
1007 return 0;
1008}
1009
1010int
1011linux_wait4(struct proc *p, struct linux_wait4_args *args)
1012{
1013 struct wait_args /* {
1014 int pid;
1015 int *status;
1016 int options;
1017 struct rusage *rusage;
1018 } */ tmp;
1019 int error, tmpstat;
1020
1021#ifdef DEBUG
1022 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1023 (long)p->p_pid, args->pid, (void *)args->status, args->options,
1024 (void *)args->rusage);
1025#endif
1026 tmp.pid = args->pid;
1027 tmp.status = args->status;
1028 tmp.options = (args->options & (WNOHANG | WUNTRACED));
1029 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1030 if (args->options & __WCLONE)
1031 tmp.options |= WLINUXCLONE;
1032 tmp.rusage = args->rusage;
1033
1034 if ((error = wait4(p, &tmp)) != 0)
1035 return error;
1036
1037 SIGDELSET(p->p_siglist, SIGCHLD);
1038
1039 if (args->status) {
1040 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1041 return error;
1042 tmpstat &= 0xffff;
1043 if (WIFSIGNALED(tmpstat))
1044 tmpstat = (tmpstat & 0xffffff80) |
1045 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1046 else if (WIFSTOPPED(tmpstat))
1047 tmpstat = (tmpstat & 0xffff00ff) |
1048 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1049 return copyout(&tmpstat, args->status, sizeof(int));
1050 } else
1051 return 0;
1052}
1053
1054int
1055linux_mknod(struct proc *p, struct linux_mknod_args *args)
1056{
1057 caddr_t sg;
1058 struct mknod_args bsd_mknod;
1059 struct mkfifo_args bsd_mkfifo;
1060
1061 sg = stackgap_init();
1062
1063 CHECKALTCREAT(p, &sg, args->path);
1064
1065#ifdef DEBUG
1066 printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1067 (long)p->p_pid, args->path, args->mode, args->dev);
1068#endif
1069
1070 if (args->mode & S_IFIFO) {
1071 bsd_mkfifo.path = args->path;
1072 bsd_mkfifo.mode = args->mode;
1073 return mkfifo(p, &bsd_mkfifo);
1074 } else {
1075 bsd_mknod.path = args->path;
1076 bsd_mknod.mode = args->mode;
1077 bsd_mknod.dev = args->dev;
1078 return mknod(p, &bsd_mknod);
1079 }
1080}
1081
1082/*
1083 * UGH! This is just about the dumbest idea I've ever heard!!
1084 */
1085int
1086linux_personality(struct proc *p, struct linux_personality_args *args)
1087{
1088#ifdef DEBUG
1089 printf("Linux-emul(%ld): personality(%d)\n",
1090 (long)p->p_pid, args->per);
1091#endif
1092 if (args->per != 0)
1093 return EINVAL;
1094
1095 /* Yes Jim, it's still a Linux... */
1096 p->p_retval[0] = 0;
1097 return 0;
1098}
1099
1100/*
1101 * Wrappers for get/setitimer for debugging..
1102 */
1103int
1104linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1105{
1106 struct setitimer_args bsa;
1107 struct itimerval foo;
1108 int error;
1109
1110#ifdef DEBUG
1111 printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1112 (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1113#endif
1114 bsa.which = args->which;
1115 bsa.itv = args->itv;
1116 bsa.oitv = args->oitv;
1117 if (args->itv) {
1118 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1119 sizeof(foo))))
1120 return error;
1121#ifdef DEBUG
1122 printf("setitimer: value: sec: %ld, usec: %ld\n",
1123 foo.it_value.tv_sec, foo.it_value.tv_usec);
1124 printf("setitimer: interval: sec: %ld, usec: %ld\n",
1125 foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1126#endif
1127 }
1128 return setitimer(p, &bsa);
1129}
1130
1131int
1132linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1133{
1134 struct getitimer_args bsa;
1135#ifdef DEBUG
1136 printf("Linux-emul(%ld): getitimer(%p)\n",
1137 (long)p->p_pid, (void *)args->itv);
1138#endif
1139 bsa.which = args->which;
1140 bsa.itv = args->itv;
1141 return getitimer(p, &bsa);
1142}
1143
1144int
1145linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1146{
1147 struct sysarch_args sa;
1148 struct i386_ioperm_args *iia;
1149 caddr_t sg;
1150
1151 sg = stackgap_init();
1152 iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1153 iia->start = args->start;
1154 iia->length = args->length;
1155 iia->enable = args->enable;
1156 sa.op = I386_SET_IOPERM;
1157 sa.parms = (char *)iia;
1158 return sysarch(p, &sa);
1159}
1160
1161int
1162linux_iopl(struct proc *p, struct linux_iopl_args *args)
1163{
1164 int error;
1165
1166 if (args->level < 0 || args->level > 3)
1167 return (EINVAL);
1168 if ((error = suser(p)) != 0)
1169 return (error);
1170 if (securelevel > 0)
1171 return (EPERM);
1172 p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1173 (args->level * (PSL_IOPL / 3));
1174 return (0);
1175}
1176
1177int
1178linux_nice(struct proc *p, struct linux_nice_args *args)
1179{
1180 struct setpriority_args bsd_args;
1181
1182 bsd_args.which = PRIO_PROCESS;
1183 bsd_args.who = 0; /* current process */
1184 bsd_args.prio = args->inc;
1185 return setpriority(p, &bsd_args);
1186}
1187
1188int
1189linux_setgroups(p, uap)
1190 struct proc *p;
1191 struct linux_setgroups_args *uap;
1192{
1193 struct pcred *pc;
1194 linux_gid_t linux_gidset[NGROUPS];
1195 gid_t *bsd_gidset;
1196 int ngrp, error;
1197
1198 pc = p->p_cred;
1199 ngrp = uap->gidsetsize;
1200
1201 /*
1202 * cr_groups[0] holds egid. Setting the whole set from
1203 * the supplied set will cause egid to be changed too.
1204 * Keep cr_groups[0] unchanged to prevent that.
1205 */
1206
1207 if ((error = suser(p)) != 0)
1208 return (error);
1209
1210 if (ngrp >= NGROUPS)
1211 return (EINVAL);
1212
1213 pc->pc_ucred = crcopy(pc->pc_ucred);
1214 if (ngrp > 0) {
1215 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1216 ngrp * sizeof(linux_gid_t));
1217 if (error)
1218 return (error);
1219
1220 pc->pc_ucred->cr_ngroups = ngrp + 1;
1221
1222 bsd_gidset = pc->pc_ucred->cr_groups;
1223 ngrp--;
1224 while (ngrp >= 0) {
1225 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1226 ngrp--;
1227 }
1228 }
1229 else
1230 pc->pc_ucred->cr_ngroups = 1;
1231
1232 setsugid(p);
1233 return (0);
1234}
1235
1236int
1237linux_getgroups(p, uap)
1238 struct proc *p;
1239 struct linux_getgroups_args *uap;
1240{
1241 struct pcred *pc;
1242 linux_gid_t linux_gidset[NGROUPS];
1243 gid_t *bsd_gidset;
1244 int bsd_gidsetsz, ngrp, error;
1245
1246 pc = p->p_cred;
1247 bsd_gidset = pc->pc_ucred->cr_groups;
1248 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1249
1250 /*
1251 * cr_groups[0] holds egid. Returning the whole set
1252 * here will cause a duplicate. Exclude cr_groups[0]
1253 * to prevent that.
1254 */
1255
1256 if ((ngrp = uap->gidsetsize) == 0) {
1257 p->p_retval[0] = bsd_gidsetsz;
1258 return (0);
1259 }
1260
1261 if (ngrp < bsd_gidsetsz)
1262 return (EINVAL);
1263
1264 ngrp = 0;
1265 while (ngrp < bsd_gidsetsz) {
1266 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1267 ngrp++;
1268 }
1269
1270 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1271 ngrp * sizeof(linux_gid_t))))
1272 return (error);
1273
1274 p->p_retval[0] = ngrp;
1275 return (0);
1276}
1277
1278int
1279linux_setrlimit(p, uap)
1280 struct proc *p;
1281 struct linux_setrlimit_args *uap;
1282{
1283 struct osetrlimit_args bsd;
1284
1285#ifdef DEBUG
1286 printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1287 (long)p->p_pid, uap->resource, (void *)uap->rlim);
1288#endif
1289
1290 if (uap->resource >= LINUX_RLIM_NLIMITS)
1291 return EINVAL;
1292
1293 bsd.which = linux_to_bsd_resource[uap->resource];
1294
1295 if (bsd.which == -1)
1296 return EINVAL;
1297
1298 bsd.rlp = uap->rlim;
1299 return osetrlimit(p, &bsd);
1300}
1301
1302int
1303linux_getrlimit(p, uap)
1304 struct proc *p;
1305 struct linux_getrlimit_args *uap;
1306{
1307 struct ogetrlimit_args bsd;
1308
1309#ifdef DEBUG
1310 printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1311 (long)p->p_pid, uap->resource, (void *)uap->rlim);
1312#endif
1313
1314 if (uap->resource >= LINUX_RLIM_NLIMITS)
1315 return EINVAL;
1316
1317 bsd.which = linux_to_bsd_resource[uap->resource];
1318
1319 if (bsd.which == -1)
1320 return EINVAL;
1321
1322 bsd.rlp = uap->rlim;
1323 return ogetrlimit(p, &bsd);
1324}
1325
1326int
1327linux_sched_setscheduler(p, uap)
1328 struct proc *p;
1329 struct linux_sched_setscheduler_args *uap;
1330{
1331 struct sched_setscheduler_args bsd;
1332
1333#ifdef DEBUG
1334 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1335 (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1336#endif
1337
1338 switch (uap->policy) {
1339 case LINUX_SCHED_OTHER:
1340 bsd.policy = SCHED_OTHER;
1341 break;
1342 case LINUX_SCHED_FIFO:
1343 bsd.policy = SCHED_FIFO;
1344 break;
1345 case LINUX_SCHED_RR:
1346 bsd.policy = SCHED_RR;
1347 break;
1348 default:
1349 return EINVAL;
1350 }
1351
1352 bsd.pid = uap->pid;
1353 bsd.param = uap->param;
1354 return sched_setscheduler(p, &bsd);
1355}
1356
1357int
1358linux_sched_getscheduler(p, uap)
1359 struct proc *p;
1360 struct linux_sched_getscheduler_args *uap;
1361{
1362 struct sched_getscheduler_args bsd;
1363 int error;
1364
1365#ifdef DEBUG
1366 printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1367 (long)p->p_pid, uap->pid);
1368#endif
1369
1370 bsd.pid = uap->pid;
1371 error = sched_getscheduler(p, &bsd);
1372
1373 switch (p->p_retval[0]) {
1374 case SCHED_OTHER:
1375 p->p_retval[0] = LINUX_SCHED_OTHER;
1376 break;
1377 case SCHED_FIFO:
1378 p->p_retval[0] = LINUX_SCHED_FIFO;
1379 break;
1380 case SCHED_RR:
1381 p->p_retval[0] = LINUX_SCHED_RR;
1382 break;
1383 }
1384
1385 return error;
1386}
1387
1388struct linux_descriptor {
1389 unsigned int entry_number;
1390 unsigned long base_addr;
1391 unsigned int limit;
1392 unsigned int seg_32bit:1;
1393 unsigned int contents:2;
1394 unsigned int read_exec_only:1;
1395 unsigned int limit_in_pages:1;
1396 unsigned int seg_not_present:1;
1397 unsigned int useable:1;
1398};
1399
1400int
1401linux_modify_ldt(p, uap)
1402 struct proc *p;
1403 struct linux_modify_ldt_args *uap;
1404{
1405 int error;
1406 caddr_t sg;
1407 struct sysarch_args args;
1408 struct i386_ldt_args *ldt;
1409 struct linux_descriptor ld;
1410 union descriptor *desc;
1411
1412 sg = stackgap_init();
1413
1414 if (uap->ptr == NULL)
1415 return (EINVAL);
1416
1417 switch (uap->func) {
1418 case 0x00: /* read_ldt */
1419 ldt = stackgap_alloc(&sg, sizeof(*ldt));
1420 ldt->start = 0;
1421 ldt->descs = uap->ptr;
1422 ldt->num = uap->bytecount / sizeof(union descriptor);
1423 args.op = I386_GET_LDT;
1424 args.parms = (char*)ldt;
1425 error = sysarch(p, &args);
1426 p->p_retval[0] *= sizeof(union descriptor);
1427 break;
1428 case 0x01: /* write_ldt */
1429 case 0x11: /* write_ldt */
1430 if (uap->bytecount != sizeof(ld))
1431 return (EINVAL);
1432
1433 error = copyin(uap->ptr, &ld, sizeof(ld));
1434 if (error)
1435 return (error);
1436
1437 ldt = stackgap_alloc(&sg, sizeof(*ldt));
1438 desc = stackgap_alloc(&sg, sizeof(*desc));
1439 ldt->start = ld.entry_number;
1440 ldt->descs = desc;
1441 ldt->num = 1;
1442 desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1443 desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1444 desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1445 desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1446 desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1447 (ld.contents << 2);
1448 desc->sd.sd_dpl = 3;
1449 desc->sd.sd_p = (ld.seg_not_present ^ 1);
1450 desc->sd.sd_xx = 0;
1451 desc->sd.sd_def32 = ld.seg_32bit;
1452 desc->sd.sd_gran = ld.limit_in_pages;
1453 args.op = I386_SET_LDT;
1454 args.parms = (char*)ldt;
1455 error = sysarch(p, &args);
1456 break;
1457 default:
1458 error = EINVAL;
1459 break;
1460 }
1461
1462 if (error == EOPNOTSUPP) {
1463 printf("linux: modify_ldt needs kernel option USER_LDT\n");
1464 error = ENOSYS;
1465 }
1466
1467 return (error);
1468}
959 if ((error = copyout(tv, tvp, sizeof(tv))))
960 return error;
961 bsdutimes.tptr = tvp;
962 } else
963 bsdutimes.tptr = NULL;
964
965 bsdutimes.path = args->fname;
966 return utimes(p, &bsdutimes);
967}
968
969#define __WCLONE 0x80000000
970
971int
972linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
973{
974 struct wait_args /* {
975 int pid;
976 int *status;
977 int options;
978 struct rusage *rusage;
979 } */ tmp;
980 int error, tmpstat;
981
982#ifdef DEBUG
983 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
984 (long)p->p_pid, args->pid, (void *)args->status, args->options);
985#endif
986 tmp.pid = args->pid;
987 tmp.status = args->status;
988 tmp.options = (args->options & (WNOHANG | WUNTRACED));
989 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
990 if (args->options & __WCLONE)
991 tmp.options |= WLINUXCLONE;
992 tmp.rusage = NULL;
993
994 if ((error = wait4(p, &tmp)) != 0)
995 return error;
996
997 if (args->status) {
998 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
999 return error;
1000 tmpstat &= 0xffff;
1001 if (WIFSIGNALED(tmpstat))
1002 tmpstat = (tmpstat & 0xffffff80) |
1003 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1004 else if (WIFSTOPPED(tmpstat))
1005 tmpstat = (tmpstat & 0xffff00ff) |
1006 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1007 return copyout(&tmpstat, args->status, sizeof(int));
1008 } else
1009 return 0;
1010}
1011
1012int
1013linux_wait4(struct proc *p, struct linux_wait4_args *args)
1014{
1015 struct wait_args /* {
1016 int pid;
1017 int *status;
1018 int options;
1019 struct rusage *rusage;
1020 } */ tmp;
1021 int error, tmpstat;
1022
1023#ifdef DEBUG
1024 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1025 (long)p->p_pid, args->pid, (void *)args->status, args->options,
1026 (void *)args->rusage);
1027#endif
1028 tmp.pid = args->pid;
1029 tmp.status = args->status;
1030 tmp.options = (args->options & (WNOHANG | WUNTRACED));
1031 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1032 if (args->options & __WCLONE)
1033 tmp.options |= WLINUXCLONE;
1034 tmp.rusage = args->rusage;
1035
1036 if ((error = wait4(p, &tmp)) != 0)
1037 return error;
1038
1039 SIGDELSET(p->p_siglist, SIGCHLD);
1040
1041 if (args->status) {
1042 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1043 return error;
1044 tmpstat &= 0xffff;
1045 if (WIFSIGNALED(tmpstat))
1046 tmpstat = (tmpstat & 0xffffff80) |
1047 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1048 else if (WIFSTOPPED(tmpstat))
1049 tmpstat = (tmpstat & 0xffff00ff) |
1050 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1051 return copyout(&tmpstat, args->status, sizeof(int));
1052 } else
1053 return 0;
1054}
1055
1056int
1057linux_mknod(struct proc *p, struct linux_mknod_args *args)
1058{
1059 caddr_t sg;
1060 struct mknod_args bsd_mknod;
1061 struct mkfifo_args bsd_mkfifo;
1062
1063 sg = stackgap_init();
1064
1065 CHECKALTCREAT(p, &sg, args->path);
1066
1067#ifdef DEBUG
1068 printf("Linux-emul(%ld): mknod(%s, %d, %d)\n",
1069 (long)p->p_pid, args->path, args->mode, args->dev);
1070#endif
1071
1072 if (args->mode & S_IFIFO) {
1073 bsd_mkfifo.path = args->path;
1074 bsd_mkfifo.mode = args->mode;
1075 return mkfifo(p, &bsd_mkfifo);
1076 } else {
1077 bsd_mknod.path = args->path;
1078 bsd_mknod.mode = args->mode;
1079 bsd_mknod.dev = args->dev;
1080 return mknod(p, &bsd_mknod);
1081 }
1082}
1083
1084/*
1085 * UGH! This is just about the dumbest idea I've ever heard!!
1086 */
1087int
1088linux_personality(struct proc *p, struct linux_personality_args *args)
1089{
1090#ifdef DEBUG
1091 printf("Linux-emul(%ld): personality(%d)\n",
1092 (long)p->p_pid, args->per);
1093#endif
1094 if (args->per != 0)
1095 return EINVAL;
1096
1097 /* Yes Jim, it's still a Linux... */
1098 p->p_retval[0] = 0;
1099 return 0;
1100}
1101
1102/*
1103 * Wrappers for get/setitimer for debugging..
1104 */
1105int
1106linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1107{
1108 struct setitimer_args bsa;
1109 struct itimerval foo;
1110 int error;
1111
1112#ifdef DEBUG
1113 printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1114 (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1115#endif
1116 bsa.which = args->which;
1117 bsa.itv = args->itv;
1118 bsa.oitv = args->oitv;
1119 if (args->itv) {
1120 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1121 sizeof(foo))))
1122 return error;
1123#ifdef DEBUG
1124 printf("setitimer: value: sec: %ld, usec: %ld\n",
1125 foo.it_value.tv_sec, foo.it_value.tv_usec);
1126 printf("setitimer: interval: sec: %ld, usec: %ld\n",
1127 foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1128#endif
1129 }
1130 return setitimer(p, &bsa);
1131}
1132
1133int
1134linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1135{
1136 struct getitimer_args bsa;
1137#ifdef DEBUG
1138 printf("Linux-emul(%ld): getitimer(%p)\n",
1139 (long)p->p_pid, (void *)args->itv);
1140#endif
1141 bsa.which = args->which;
1142 bsa.itv = args->itv;
1143 return getitimer(p, &bsa);
1144}
1145
1146int
1147linux_ioperm(struct proc *p, struct linux_ioperm_args *args)
1148{
1149 struct sysarch_args sa;
1150 struct i386_ioperm_args *iia;
1151 caddr_t sg;
1152
1153 sg = stackgap_init();
1154 iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args));
1155 iia->start = args->start;
1156 iia->length = args->length;
1157 iia->enable = args->enable;
1158 sa.op = I386_SET_IOPERM;
1159 sa.parms = (char *)iia;
1160 return sysarch(p, &sa);
1161}
1162
1163int
1164linux_iopl(struct proc *p, struct linux_iopl_args *args)
1165{
1166 int error;
1167
1168 if (args->level < 0 || args->level > 3)
1169 return (EINVAL);
1170 if ((error = suser(p)) != 0)
1171 return (error);
1172 if (securelevel > 0)
1173 return (EPERM);
1174 p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) |
1175 (args->level * (PSL_IOPL / 3));
1176 return (0);
1177}
1178
1179int
1180linux_nice(struct proc *p, struct linux_nice_args *args)
1181{
1182 struct setpriority_args bsd_args;
1183
1184 bsd_args.which = PRIO_PROCESS;
1185 bsd_args.who = 0; /* current process */
1186 bsd_args.prio = args->inc;
1187 return setpriority(p, &bsd_args);
1188}
1189
1190int
1191linux_setgroups(p, uap)
1192 struct proc *p;
1193 struct linux_setgroups_args *uap;
1194{
1195 struct pcred *pc;
1196 linux_gid_t linux_gidset[NGROUPS];
1197 gid_t *bsd_gidset;
1198 int ngrp, error;
1199
1200 pc = p->p_cred;
1201 ngrp = uap->gidsetsize;
1202
1203 /*
1204 * cr_groups[0] holds egid. Setting the whole set from
1205 * the supplied set will cause egid to be changed too.
1206 * Keep cr_groups[0] unchanged to prevent that.
1207 */
1208
1209 if ((error = suser(p)) != 0)
1210 return (error);
1211
1212 if (ngrp >= NGROUPS)
1213 return (EINVAL);
1214
1215 pc->pc_ucred = crcopy(pc->pc_ucred);
1216 if (ngrp > 0) {
1217 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1218 ngrp * sizeof(linux_gid_t));
1219 if (error)
1220 return (error);
1221
1222 pc->pc_ucred->cr_ngroups = ngrp + 1;
1223
1224 bsd_gidset = pc->pc_ucred->cr_groups;
1225 ngrp--;
1226 while (ngrp >= 0) {
1227 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1228 ngrp--;
1229 }
1230 }
1231 else
1232 pc->pc_ucred->cr_ngroups = 1;
1233
1234 setsugid(p);
1235 return (0);
1236}
1237
1238int
1239linux_getgroups(p, uap)
1240 struct proc *p;
1241 struct linux_getgroups_args *uap;
1242{
1243 struct pcred *pc;
1244 linux_gid_t linux_gidset[NGROUPS];
1245 gid_t *bsd_gidset;
1246 int bsd_gidsetsz, ngrp, error;
1247
1248 pc = p->p_cred;
1249 bsd_gidset = pc->pc_ucred->cr_groups;
1250 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1251
1252 /*
1253 * cr_groups[0] holds egid. Returning the whole set
1254 * here will cause a duplicate. Exclude cr_groups[0]
1255 * to prevent that.
1256 */
1257
1258 if ((ngrp = uap->gidsetsize) == 0) {
1259 p->p_retval[0] = bsd_gidsetsz;
1260 return (0);
1261 }
1262
1263 if (ngrp < bsd_gidsetsz)
1264 return (EINVAL);
1265
1266 ngrp = 0;
1267 while (ngrp < bsd_gidsetsz) {
1268 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1269 ngrp++;
1270 }
1271
1272 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1273 ngrp * sizeof(linux_gid_t))))
1274 return (error);
1275
1276 p->p_retval[0] = ngrp;
1277 return (0);
1278}
1279
1280int
1281linux_setrlimit(p, uap)
1282 struct proc *p;
1283 struct linux_setrlimit_args *uap;
1284{
1285 struct osetrlimit_args bsd;
1286
1287#ifdef DEBUG
1288 printf("Linux-emul(%ld): setrlimit(%d, %p)\n",
1289 (long)p->p_pid, uap->resource, (void *)uap->rlim);
1290#endif
1291
1292 if (uap->resource >= LINUX_RLIM_NLIMITS)
1293 return EINVAL;
1294
1295 bsd.which = linux_to_bsd_resource[uap->resource];
1296
1297 if (bsd.which == -1)
1298 return EINVAL;
1299
1300 bsd.rlp = uap->rlim;
1301 return osetrlimit(p, &bsd);
1302}
1303
1304int
1305linux_getrlimit(p, uap)
1306 struct proc *p;
1307 struct linux_getrlimit_args *uap;
1308{
1309 struct ogetrlimit_args bsd;
1310
1311#ifdef DEBUG
1312 printf("Linux-emul(%ld): getrlimit(%d, %p)\n",
1313 (long)p->p_pid, uap->resource, (void *)uap->rlim);
1314#endif
1315
1316 if (uap->resource >= LINUX_RLIM_NLIMITS)
1317 return EINVAL;
1318
1319 bsd.which = linux_to_bsd_resource[uap->resource];
1320
1321 if (bsd.which == -1)
1322 return EINVAL;
1323
1324 bsd.rlp = uap->rlim;
1325 return ogetrlimit(p, &bsd);
1326}
1327
1328int
1329linux_sched_setscheduler(p, uap)
1330 struct proc *p;
1331 struct linux_sched_setscheduler_args *uap;
1332{
1333 struct sched_setscheduler_args bsd;
1334
1335#ifdef DEBUG
1336 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1337 (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param);
1338#endif
1339
1340 switch (uap->policy) {
1341 case LINUX_SCHED_OTHER:
1342 bsd.policy = SCHED_OTHER;
1343 break;
1344 case LINUX_SCHED_FIFO:
1345 bsd.policy = SCHED_FIFO;
1346 break;
1347 case LINUX_SCHED_RR:
1348 bsd.policy = SCHED_RR;
1349 break;
1350 default:
1351 return EINVAL;
1352 }
1353
1354 bsd.pid = uap->pid;
1355 bsd.param = uap->param;
1356 return sched_setscheduler(p, &bsd);
1357}
1358
1359int
1360linux_sched_getscheduler(p, uap)
1361 struct proc *p;
1362 struct linux_sched_getscheduler_args *uap;
1363{
1364 struct sched_getscheduler_args bsd;
1365 int error;
1366
1367#ifdef DEBUG
1368 printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1369 (long)p->p_pid, uap->pid);
1370#endif
1371
1372 bsd.pid = uap->pid;
1373 error = sched_getscheduler(p, &bsd);
1374
1375 switch (p->p_retval[0]) {
1376 case SCHED_OTHER:
1377 p->p_retval[0] = LINUX_SCHED_OTHER;
1378 break;
1379 case SCHED_FIFO:
1380 p->p_retval[0] = LINUX_SCHED_FIFO;
1381 break;
1382 case SCHED_RR:
1383 p->p_retval[0] = LINUX_SCHED_RR;
1384 break;
1385 }
1386
1387 return error;
1388}
1389
1390struct linux_descriptor {
1391 unsigned int entry_number;
1392 unsigned long base_addr;
1393 unsigned int limit;
1394 unsigned int seg_32bit:1;
1395 unsigned int contents:2;
1396 unsigned int read_exec_only:1;
1397 unsigned int limit_in_pages:1;
1398 unsigned int seg_not_present:1;
1399 unsigned int useable:1;
1400};
1401
1402int
1403linux_modify_ldt(p, uap)
1404 struct proc *p;
1405 struct linux_modify_ldt_args *uap;
1406{
1407 int error;
1408 caddr_t sg;
1409 struct sysarch_args args;
1410 struct i386_ldt_args *ldt;
1411 struct linux_descriptor ld;
1412 union descriptor *desc;
1413
1414 sg = stackgap_init();
1415
1416 if (uap->ptr == NULL)
1417 return (EINVAL);
1418
1419 switch (uap->func) {
1420 case 0x00: /* read_ldt */
1421 ldt = stackgap_alloc(&sg, sizeof(*ldt));
1422 ldt->start = 0;
1423 ldt->descs = uap->ptr;
1424 ldt->num = uap->bytecount / sizeof(union descriptor);
1425 args.op = I386_GET_LDT;
1426 args.parms = (char*)ldt;
1427 error = sysarch(p, &args);
1428 p->p_retval[0] *= sizeof(union descriptor);
1429 break;
1430 case 0x01: /* write_ldt */
1431 case 0x11: /* write_ldt */
1432 if (uap->bytecount != sizeof(ld))
1433 return (EINVAL);
1434
1435 error = copyin(uap->ptr, &ld, sizeof(ld));
1436 if (error)
1437 return (error);
1438
1439 ldt = stackgap_alloc(&sg, sizeof(*ldt));
1440 desc = stackgap_alloc(&sg, sizeof(*desc));
1441 ldt->start = ld.entry_number;
1442 ldt->descs = desc;
1443 ldt->num = 1;
1444 desc->sd.sd_lolimit = (ld.limit & 0x0000ffff);
1445 desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
1446 desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff);
1447 desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
1448 desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
1449 (ld.contents << 2);
1450 desc->sd.sd_dpl = 3;
1451 desc->sd.sd_p = (ld.seg_not_present ^ 1);
1452 desc->sd.sd_xx = 0;
1453 desc->sd.sd_def32 = ld.seg_32bit;
1454 desc->sd.sd_gran = ld.limit_in_pages;
1455 args.op = I386_SET_LDT;
1456 args.parms = (char*)ldt;
1457 error = sysarch(p, &args);
1458 break;
1459 default:
1460 error = EINVAL;
1461 break;
1462 }
1463
1464 if (error == EOPNOTSUPP) {
1465 printf("linux: modify_ldt needs kernel option USER_LDT\n");
1466 error = ENOSYS;
1467 }
1468
1469 return (error);
1470}