29 */ 30 31#include "opt_compat.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/sysproto.h> 36#include <sys/kernel.h> 37#include <sys/mman.h> 38#include <sys/proc.h> 39#include <sys/fcntl.h> 40#include <sys/imgact_aout.h> 41#include <sys/mount.h> 42#include <sys/namei.h> 43#include <sys/resourcevar.h> 44#include <sys/stat.h> 45#include <sys/sysctl.h> 46#include <sys/unistd.h> 47#include <sys/vnode.h> 48#include <sys/wait.h> 49#include <sys/time.h> 50#include <sys/signalvar.h> 51 52#include <vm/vm.h> 53#include <vm/pmap.h> 54#include <vm/vm_kern.h> 55#include <vm/vm_map.h> 56#include <vm/vm_extern.h> 57 58#include <machine/frame.h> 59#include <machine/psl.h> 60#include <machine/sysarch.h> 61#include <machine/segments.h> 62 63#include <i386/linux/linux.h> 64#include <i386/linux/linux_proto.h> 65#include <i386/linux/linux_util.h> 66#include <i386/linux/linux_mib.h> 67 68#include <posix4/sched.h> 69 70#define BSD_TO_LINUX_SIGNAL(sig) \ 71 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 72 73static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = 74{ RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 75 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 76 RLIMIT_MEMLOCK, -1 77}; 78 79int 80linux_alarm(struct proc *p, struct linux_alarm_args *args) 81{ 82 struct itimerval it, old_it; 83 struct timeval tv; 84 int s; 85 86#ifdef DEBUG 87 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 88#endif 89 if (args->secs > 100000000) 90 return EINVAL; 91 it.it_value.tv_sec = (long)args->secs; 92 it.it_value.tv_usec = 0; 93 it.it_interval.tv_sec = 0; 94 it.it_interval.tv_usec = 0; 95 s = splsoftclock(); 96 old_it = p->p_realtimer; 97 getmicrouptime(&tv); 98 if (timevalisset(&old_it.it_value)) 99 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 100 if (it.it_value.tv_sec != 0) { 101 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 102 timevaladd(&it.it_value, &tv); 103 } 104 p->p_realtimer = it; 105 splx(s); 106 if (timevalcmp(&old_it.it_value, &tv, >)) { 107 timevalsub(&old_it.it_value, &tv); 108 if (old_it.it_value.tv_usec != 0) 109 old_it.it_value.tv_sec++; 110 p->p_retval[0] = old_it.it_value.tv_sec; 111 } 112 return 0; 113} 114 115int 116linux_brk(struct proc *p, struct linux_brk_args *args) 117{ 118#if 0 119 struct vmspace *vm = p->p_vmspace; 120 vm_offset_t new, old; 121 int error; 122 123 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 124 return EINVAL; 125 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 126 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 127 return ENOMEM; 128 129 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 130 new = round_page((vm_offset_t)args->dsend); 131 p->p_retval[0] = old; 132 if ((new-old) > 0) { 133 if (swap_pager_full) 134 return ENOMEM; 135 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 136 VM_PROT_ALL, VM_PROT_ALL, 0); 137 if (error) 138 return error; 139 vm->vm_dsize += btoc((new-old)); 140 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 141 } 142 return 0; 143#else 144 struct vmspace *vm = p->p_vmspace; 145 vm_offset_t new, old; 146 struct obreak_args /* { 147 char * nsize; 148 } */ tmp; 149 150#ifdef DEBUG 151 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 152#endif 153 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 154 new = (vm_offset_t)args->dsend; 155 tmp.nsize = (char *) new; 156 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 157 p->p_retval[0] = (int)new; 158 else 159 p->p_retval[0] = (int)old; 160 161 return 0; 162#endif 163} 164 165int 166linux_uselib(struct proc *p, struct linux_uselib_args *args) 167{ 168 struct nameidata ni; 169 struct vnode *vp; 170 struct exec *a_out; 171 struct vattr attr; 172 vm_offset_t vmaddr; 173 unsigned long file_offset; 174 vm_offset_t buffer; 175 unsigned long bss_size; 176 int error; 177 caddr_t sg; 178 int locked; 179 180 sg = stackgap_init(); 181 CHECKALTEXIST(p, &sg, args->library); 182 183#ifdef DEBUG 184 printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library); 185#endif 186 187 a_out = NULL; 188 locked = 0; 189 vp = NULL; 190 191 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p); 192 error = namei(&ni); 193 if (error) 194 goto cleanup; 195 196 vp = ni.ni_vp; 197 /* 198 * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed 199 * without returning a vnode. 200 */ 201 if (vp == NULL) { 202 error = ENOEXEC; /* ?? */ 203 goto cleanup; 204 } 205 NDFREE(&ni, NDF_ONLY_PNBUF); 206 207 /* 208 * From here on down, we have a locked vnode that must be unlocked. 209 */ 210 locked++; 211 212 /* 213 * Writable? 214 */ 215 if (vp->v_writecount) { 216 error = ETXTBSY; 217 goto cleanup; 218 } 219 220 /* 221 * Executable? 222 */ 223 error = VOP_GETATTR(vp, &attr, p->p_ucred, p); 224 if (error) 225 goto cleanup; 226 227 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 228 ((attr.va_mode & 0111) == 0) || 229 (attr.va_type != VREG)) { 230 error = ENOEXEC; 231 goto cleanup; 232 } 233 234 /* 235 * Sensible size? 236 */ 237 if (attr.va_size == 0) { 238 error = ENOEXEC; 239 goto cleanup; 240 } 241 242 /* 243 * Can we access it? 244 */ 245 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); 246 if (error) 247 goto cleanup; 248 249 error = VOP_OPEN(vp, FREAD, p->p_ucred, p); 250 if (error) 251 goto cleanup; 252 253 /* 254 * Lock no longer needed 255 */ 256 VOP_UNLOCK(vp, 0, p); 257 locked = 0; 258 259 /* 260 * Pull in executable header into kernel_map 261 */ 262 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 263 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 264 if (error) 265 goto cleanup; 266 267 /* 268 * Is it a Linux binary ? 269 */ 270 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 271 error = ENOEXEC; 272 goto cleanup; 273 } 274 275 /* While we are here, we should REALLY do some more checks */ 276 277 /* 278 * Set file/virtual offset based on a.out variant. 279 */ 280 switch ((int)(a_out->a_magic & 0xffff)) { 281 case 0413: /* ZMAGIC */ 282 file_offset = 1024; 283 break; 284 case 0314: /* QMAGIC */ 285 file_offset = 0; 286 break; 287 default: 288 error = ENOEXEC; 289 goto cleanup; 290 } 291 292 bss_size = round_page(a_out->a_bss); 293 294 /* 295 * Check various fields in header for validity/bounds. 296 */ 297 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 298 error = ENOEXEC; 299 goto cleanup; 300 } 301 302 /* text + data can't exceed file size */ 303 if (a_out->a_data + a_out->a_text > attr.va_size) { 304 error = EFAULT; 305 goto cleanup; 306 } 307 308 /* 309 * text/data/bss must not exceed limits 310 * XXX: this is not complete. it should check current usage PLUS 311 * the resources needed by this library. 312 */ 313 if (a_out->a_text > MAXTSIZ || 314 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 315 error = ENOMEM; 316 goto cleanup; 317 } 318 319 /* 320 * prevent more writers 321 */ 322 vp->v_flag |= VTEXT; 323 324 /* 325 * Check if file_offset page aligned,. 326 * Currently we cannot handle misalinged file offsets, 327 * and so we read in the entire image (what a waste). 328 */ 329 if (file_offset & PAGE_MASK) { 330#ifdef DEBUG 331printf("uselib: Non page aligned binary %lu\n", file_offset); 332#endif 333 /* 334 * Map text+data read/write/execute 335 */ 336 337 /* a_entry is the load address and is page aligned */ 338 vmaddr = trunc_page(a_out->a_entry); 339 340 /* get anon user mapping, read+write+execute */ 341 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 342 a_out->a_text + a_out->a_data, FALSE, 343 VM_PROT_ALL, VM_PROT_ALL, 0); 344 if (error) 345 goto cleanup; 346 347 /* map file into kernel_map */ 348 error = vm_mmap(kernel_map, &buffer, 349 round_page(a_out->a_text + a_out->a_data + file_offset), 350 VM_PROT_READ, VM_PROT_READ, 0, 351 (caddr_t)vp, trunc_page(file_offset)); 352 if (error) 353 goto cleanup; 354 355 /* copy from kernel VM space to user space */ 356 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 357 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 358 359 /* release temporary kernel space */ 360 vm_map_remove(kernel_map, buffer, 361 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 362 363 if (error) 364 goto cleanup; 365 } 366 else { 367#ifdef DEBUG 368printf("uselib: Page aligned binary %lu\n", file_offset); 369#endif 370 /* 371 * for QMAGIC, a_entry is 20 bytes beyond the load address 372 * to skip the executable header 373 */ 374 vmaddr = trunc_page(a_out->a_entry); 375 376 /* 377 * Map it all into the process's space as a single copy-on-write 378 * "data" segment. 379 */ 380 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 381 a_out->a_text + a_out->a_data, 382 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 383 (caddr_t)vp, file_offset); 384 if (error) 385 goto cleanup; 386 } 387#ifdef DEBUG 388printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 389#endif 390 if (bss_size != 0) { 391 /* 392 * Calculate BSS start address 393 */ 394 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 395 396 /* 397 * allocate some 'anon' space 398 */ 399 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 400 bss_size, FALSE, 401 VM_PROT_ALL, VM_PROT_ALL, 0); 402 if (error) 403 goto cleanup; 404 } 405 406cleanup: 407 /* 408 * Unlock vnode if needed 409 */ 410 if (locked) 411 VOP_UNLOCK(vp, 0, p); 412 413 /* 414 * Release the kernel mapping. 415 */ 416 if (a_out) 417 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 418 419 return error; 420} 421 422/* XXX move */ 423struct linux_select_argv { 424 int nfds; 425 fd_set *readfds; 426 fd_set *writefds; 427 fd_set *exceptfds; 428 struct timeval *timeout; 429}; 430 431int 432linux_select(struct proc *p, struct linux_select_args *args) 433{ 434 struct linux_select_argv linux_args; 435 struct linux_newselect_args newsel; 436 int error; 437 438#ifdef SELECT_DEBUG 439 printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr); 440#endif 441 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 442 sizeof(linux_args)))) 443 return error; 444 445 newsel.nfds = linux_args.nfds; 446 newsel.readfds = linux_args.readfds; 447 newsel.writefds = linux_args.writefds; 448 newsel.exceptfds = linux_args.exceptfds; 449 newsel.timeout = linux_args.timeout; 450 451 return linux_newselect(p, &newsel); 452} 453 454int 455linux_newselect(struct proc *p, struct linux_newselect_args *args) 456{ 457 struct select_args bsa; 458 struct timeval tv0, tv1, utv, *tvp; 459 caddr_t sg; 460 int error; 461 462#ifdef DEBUG 463 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 464 (long)p->p_pid, args->nfds, (void *)args->readfds, 465 (void *)args->writefds, (void *)args->exceptfds, 466 (void *)args->timeout); 467#endif 468 error = 0; 469 bsa.nd = args->nfds; 470 bsa.in = args->readfds; 471 bsa.ou = args->writefds; 472 bsa.ex = args->exceptfds; 473 bsa.tv = args->timeout; 474 475 /* 476 * Store current time for computation of the amount of 477 * time left. 478 */ 479 if (args->timeout) { 480 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 481 goto select_out; 482#ifdef DEBUG 483 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 484 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 485#endif 486 if (itimerfix(&utv)) { 487 /* 488 * The timeval was invalid. Convert it to something 489 * valid that will act as it does under Linux. 490 */ 491 sg = stackgap_init(); 492 tvp = stackgap_alloc(&sg, sizeof(utv)); 493 utv.tv_sec += utv.tv_usec / 1000000; 494 utv.tv_usec %= 1000000; 495 if (utv.tv_usec < 0) { 496 utv.tv_sec -= 1; 497 utv.tv_usec += 1000000; 498 } 499 if (utv.tv_sec < 0) 500 timevalclear(&utv); 501 if ((error = copyout(&utv, tvp, sizeof(utv)))) 502 goto select_out; 503 bsa.tv = tvp; 504 } 505 microtime(&tv0); 506 } 507 508 error = select(p, &bsa); 509#ifdef DEBUG 510 printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error); 511#endif 512 513 if (error) { 514 /* 515 * See fs/select.c in the Linux kernel. Without this, 516 * Maelstrom doesn't work. 517 */ 518 if (error == ERESTART) 519 error = EINTR; 520 goto select_out; 521 } 522 523 if (args->timeout) { 524 if (p->p_retval[0]) { 525 /* 526 * Compute how much time was left of the timeout, 527 * by subtracting the current time and the time 528 * before we started the call, and subtracting 529 * that result from the user-supplied value. 530 */ 531 microtime(&tv1); 532 timevalsub(&tv1, &tv0); 533 timevalsub(&utv, &tv1); 534 if (utv.tv_sec < 0) 535 timevalclear(&utv); 536 } else 537 timevalclear(&utv); 538#ifdef DEBUG 539 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 540 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 541#endif 542 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 543 goto select_out; 544 } 545 546select_out: 547#ifdef DEBUG 548 printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error); 549#endif 550 return error; 551} 552 553int 554linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 555{ 556 struct proc *curp; 557 558#ifdef DEBUG 559 printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid); 560#endif 561 if (args->pid != p->p_pid) { 562 if (!(curp = pfind(args->pid))) 563 return ESRCH; 564 } 565 else 566 curp = p; 567 p->p_retval[0] = curp->p_pgid; 568 return 0; 569} 570 571int 572linux_fork(struct proc *p, struct linux_fork_args *args) 573{ 574 int error; 575 576#ifdef DEBUG 577 printf("Linux-emul(%ld): fork()\n", (long)p->p_pid); 578#endif 579 if ((error = fork(p, (struct fork_args *)args)) != 0) 580 return error; 581 if (p->p_retval[1] == 1) 582 p->p_retval[0] = 0; 583 return 0; 584} 585 586int 587linux_vfork(struct proc *p, struct linux_vfork_args *args) 588{ 589 int error; 590 591#ifdef DEBUG 592 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid); 593#endif 594 595 if ((error = vfork(p, (struct vfork_args *)args)) != 0) 596 return error; 597 /* Are we the child? */ 598 if (p->p_retval[1] == 1) 599 p->p_retval[0] = 0; 600 return 0; 601} 602 603#define CLONE_VM 0x100 604#define CLONE_FS 0x200 605#define CLONE_FILES 0x400 606#define CLONE_SIGHAND 0x800 607#define CLONE_PID 0x1000 608 609int 610linux_clone(struct proc *p, struct linux_clone_args *args) 611{ 612 int error, ff = RFPROC; 613 struct proc *p2; 614 int exit_signal; 615 vm_offset_t start; 616 struct rfork_args rf_args; 617 618#ifdef DEBUG 619 if (args->flags & CLONE_PID) 620 printf("linux_clone(%ld): CLONE_PID not yet supported\n", 621 (long)p->p_pid); 622 printf("linux_clone(%ld): invoked with flags %x and stack %x\n", 623 (long)p->p_pid, (unsigned int)args->flags, 624 (unsigned int)args->stack); 625#endif 626 627 if (!args->stack) 628 return (EINVAL); 629 630 exit_signal = args->flags & 0x000000ff; 631 if (exit_signal >= LINUX_NSIG) 632 return EINVAL; 633 634 if (exit_signal <= LINUX_SIGTBLSZ) 635 exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 636 637 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 638 ff |= RFTHREAD; 639 640 if (args->flags & CLONE_VM) 641 ff |= RFMEM; 642 if (args->flags & CLONE_SIGHAND) 643 ff |= RFSIGSHARE; 644 if (!(args->flags & CLONE_FILES)) 645 ff |= RFFDG; 646 647 error = 0; 648 start = 0; 649 650 rf_args.flags = ff; 651 if ((error = rfork(p, &rf_args)) != 0) 652 return error; 653 654 p2 = pfind(p->p_retval[0]); 655 if (p2 == 0) 656 return ESRCH; 657 658 p2->p_sigparent = exit_signal; 659 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 660 661#ifdef DEBUG 662 printf ("linux_clone(%ld): successful rfork to %ld\n", 663 (long)p->p_pid, (long)p2->p_pid); 664#endif 665 return 0; 666} 667 668/* XXX move */ 669struct linux_mmap_argv { 670 linux_caddr_t addr; 671 int len; 672 int prot; 673 int flags; 674 int fd; 675 int pos; 676}; 677 678#define STACK_SIZE (2 * 1024 * 1024) 679#define GUARD_SIZE (4 * PAGE_SIZE) 680int 681linux_mmap(struct proc *p, struct linux_mmap_args *args) 682{ 683 struct mmap_args /* { 684 caddr_t addr; 685 size_t len; 686 int prot; 687 int flags; 688 int fd; 689 long pad; 690 off_t pos; 691 } */ bsd_args; 692 int error; 693 struct linux_mmap_argv linux_args; 694 695 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 696 sizeof(linux_args)))) 697 return error; 698#ifdef DEBUG 699 printf("Linux-emul(%ld): mmap(%p, %d, %d, 0x%08x, %d, %d)", 700 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 701 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 702#endif 703 bsd_args.flags = 0; 704 if (linux_args.flags & LINUX_MAP_SHARED) 705 bsd_args.flags |= MAP_SHARED; 706 if (linux_args.flags & LINUX_MAP_PRIVATE) 707 bsd_args.flags |= MAP_PRIVATE; 708 if (linux_args.flags & LINUX_MAP_FIXED) 709 bsd_args.flags |= MAP_FIXED; 710 if (linux_args.flags & LINUX_MAP_ANON) 711 bsd_args.flags |= MAP_ANON; 712 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 713 bsd_args.flags |= MAP_STACK; 714 715 /* The linux MAP_GROWSDOWN option does not limit auto 716 * growth of the region. Linux mmap with this option 717 * takes as addr the inital BOS, and as len, the initial 718 * region size. It can then grow down from addr without 719 * limit. However, linux threads has an implicit internal 720 * limit to stack size of STACK_SIZE. Its just not 721 * enforced explicitly in linux. But, here we impose 722 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 723 * region, since we can do this with our mmap. 724 * 725 * Our mmap with MAP_STACK takes addr as the maximum 726 * downsize limit on BOS, and as len the max size of 727 * the region. It them maps the top SGROWSIZ bytes, 728 * and autgrows the region down, up to the limit 729 * in addr. 730 * 731 * If we don't use the MAP_STACK option, the effect 732 * of this code is to allocate a stack region of a 733 * fixed size of (STACK_SIZE - GUARD_SIZE). 734 */ 735 736 /* This gives us TOS */ 737 bsd_args.addr = linux_args.addr + linux_args.len; 738 739 /* This gives us our maximum stack size */ 740 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 741 bsd_args.len = linux_args.len; 742 else 743 bsd_args.len = STACK_SIZE - GUARD_SIZE; 744 745 /* This gives us a new BOS. If we're using VM_STACK, then 746 * mmap will just map the top SGROWSIZ bytes, and let 747 * the stack grow down to the limit at BOS. If we're 748 * not using VM_STACK we map the full stack, since we 749 * don't have a way to autogrow it. 750 */ 751 bsd_args.addr -= bsd_args.len; 752 753 } else { 754 bsd_args.addr = linux_args.addr; 755 bsd_args.len = linux_args.len; 756 } 757 758 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 759 if (linux_args.flags & LINUX_MAP_ANON) 760 bsd_args.fd = -1; 761 else 762 bsd_args.fd = linux_args.fd; 763 bsd_args.pos = linux_args.pos; 764 bsd_args.pad = 0; 765#ifdef DEBUG 766 printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n", 767 (void *)bsd_args.addr, bsd_args.len, 768 bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 769#endif 770 return mmap(p, &bsd_args); 771} 772 773int 774linux_mremap(struct proc *p, struct linux_mremap_args *args) 775{ 776 struct munmap_args /* { 777 void *addr; 778 size_t len; 779 } */ bsd_args; 780 int error = 0; 781 782#ifdef DEBUG 783 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 784 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 785 args->flags); 786#endif 787 args->new_len = round_page(args->new_len); 788 args->old_len = round_page(args->old_len); 789 790 if (args->new_len > args->old_len) { 791 p->p_retval[0] = 0; 792 return ENOMEM; 793 } 794 795 if (args->new_len < args->old_len) { 796 bsd_args.addr = args->addr + args->new_len; 797 bsd_args.len = args->old_len - args->new_len; 798 error = munmap(p, &bsd_args); 799 } 800 801 p->p_retval[0] = error ? 0 : (int)args->addr; 802 return error; 803} 804 805int 806linux_msync(struct proc *p, struct linux_msync_args *args) 807{ 808 struct msync_args bsd_args; 809 810 bsd_args.addr = args->addr; 811 bsd_args.len = args->len; 812 bsd_args.flags = 0; /* XXX ignore */ 813 814 return msync(p, &bsd_args); 815} 816 817int 818linux_pipe(struct proc *p, struct linux_pipe_args *args) 819{ 820 int error; 821 int reg_edx; 822 823#ifdef DEBUG 824 printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid); 825#endif 826 reg_edx = p->p_retval[1]; 827 error = pipe(p, 0); 828 if (error) { 829 p->p_retval[1] = reg_edx; 830 return error; 831 } 832 833 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int)); 834 if (error) { 835 p->p_retval[1] = reg_edx; 836 return error; 837 } 838 839 p->p_retval[1] = reg_edx; 840 p->p_retval[0] = 0; 841 return 0; 842} 843 844int 845linux_time(struct proc *p, struct linux_time_args *args) 846{ 847 struct timeval tv; 848 linux_time_t tm; 849 int error; 850 851#ifdef DEBUG 852 printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid); 853#endif 854 microtime(&tv); 855 tm = tv.tv_sec; 856 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 857 return error; 858 p->p_retval[0] = tm; 859 return 0; 860} 861 862struct linux_times_argv { 863 long tms_utime; 864 long tms_stime; 865 long tms_cutime; 866 long tms_cstime; 867}; 868 869#define CLK_TCK 100 /* Linux uses 100 */ 870#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 871 872int 873linux_times(struct proc *p, struct linux_times_args *args) 874{ 875 struct timeval tv; 876 struct linux_times_argv tms; 877 struct rusage ru; 878 int error; 879 880#ifdef DEBUG 881 printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid); 882#endif 883 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 884 885 tms.tms_utime = CONVTCK(ru.ru_utime); 886 tms.tms_stime = CONVTCK(ru.ru_stime); 887 888 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 889 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 890 891 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 892 sizeof(struct linux_times_argv)))) 893 return error; 894 895 microuptime(&tv); 896 p->p_retval[0] = (int)CONVTCK(tv); 897 return 0; 898} 899 900int 901linux_newuname(struct proc *p, struct linux_newuname_args *args) 902{ 903 struct linux_new_utsname utsname; 904 char *osrelease, *osname; 905 906#ifdef DEBUG 907 printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid); 908#endif 909 910 osname = linux_get_osname(p); 911 osrelease = linux_get_osrelease(p); 912 913 bzero(&utsname, sizeof(struct linux_new_utsname)); 914 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1); 915 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1); 916 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1); 917 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1); 918 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1); 919 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1); 920 921 return (copyout((caddr_t)&utsname, (caddr_t)args->buf, 922 sizeof(struct linux_new_utsname))); 923} 924 925struct linux_utimbuf { 926 linux_time_t l_actime; 927 linux_time_t l_modtime; 928}; 929 930int 931linux_utime(struct proc *p, struct linux_utime_args *args) 932{ 933 struct utimes_args /* { 934 char *path; 935 struct timeval *tptr; 936 } */ bsdutimes; 937 struct timeval tv[2], *tvp; 938 struct linux_utimbuf lut; 939 int error; 940 caddr_t sg; 941 942 sg = stackgap_init(); 943 CHECKALTEXIST(p, &sg, args->fname); 944 945#ifdef DEBUG 946 printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname); 947#endif 948 if (args->times) { 949 if ((error = copyin(args->times, &lut, sizeof lut))) 950 return error; 951 tv[0].tv_sec = lut.l_actime; 952 tv[0].tv_usec = 0; 953 tv[1].tv_sec = lut.l_modtime; 954 tv[1].tv_usec = 0; 955 /* so that utimes can copyin */ 956 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
| 29 */ 30 31#include "opt_compat.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/sysproto.h> 36#include <sys/kernel.h> 37#include <sys/mman.h> 38#include <sys/proc.h> 39#include <sys/fcntl.h> 40#include <sys/imgact_aout.h> 41#include <sys/mount.h> 42#include <sys/namei.h> 43#include <sys/resourcevar.h> 44#include <sys/stat.h> 45#include <sys/sysctl.h> 46#include <sys/unistd.h> 47#include <sys/vnode.h> 48#include <sys/wait.h> 49#include <sys/time.h> 50#include <sys/signalvar.h> 51 52#include <vm/vm.h> 53#include <vm/pmap.h> 54#include <vm/vm_kern.h> 55#include <vm/vm_map.h> 56#include <vm/vm_extern.h> 57 58#include <machine/frame.h> 59#include <machine/psl.h> 60#include <machine/sysarch.h> 61#include <machine/segments.h> 62 63#include <i386/linux/linux.h> 64#include <i386/linux/linux_proto.h> 65#include <i386/linux/linux_util.h> 66#include <i386/linux/linux_mib.h> 67 68#include <posix4/sched.h> 69 70#define BSD_TO_LINUX_SIGNAL(sig) \ 71 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 72 73static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = 74{ RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 75 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 76 RLIMIT_MEMLOCK, -1 77}; 78 79int 80linux_alarm(struct proc *p, struct linux_alarm_args *args) 81{ 82 struct itimerval it, old_it; 83 struct timeval tv; 84 int s; 85 86#ifdef DEBUG 87 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 88#endif 89 if (args->secs > 100000000) 90 return EINVAL; 91 it.it_value.tv_sec = (long)args->secs; 92 it.it_value.tv_usec = 0; 93 it.it_interval.tv_sec = 0; 94 it.it_interval.tv_usec = 0; 95 s = splsoftclock(); 96 old_it = p->p_realtimer; 97 getmicrouptime(&tv); 98 if (timevalisset(&old_it.it_value)) 99 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 100 if (it.it_value.tv_sec != 0) { 101 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 102 timevaladd(&it.it_value, &tv); 103 } 104 p->p_realtimer = it; 105 splx(s); 106 if (timevalcmp(&old_it.it_value, &tv, >)) { 107 timevalsub(&old_it.it_value, &tv); 108 if (old_it.it_value.tv_usec != 0) 109 old_it.it_value.tv_sec++; 110 p->p_retval[0] = old_it.it_value.tv_sec; 111 } 112 return 0; 113} 114 115int 116linux_brk(struct proc *p, struct linux_brk_args *args) 117{ 118#if 0 119 struct vmspace *vm = p->p_vmspace; 120 vm_offset_t new, old; 121 int error; 122 123 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 124 return EINVAL; 125 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 126 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 127 return ENOMEM; 128 129 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 130 new = round_page((vm_offset_t)args->dsend); 131 p->p_retval[0] = old; 132 if ((new-old) > 0) { 133 if (swap_pager_full) 134 return ENOMEM; 135 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 136 VM_PROT_ALL, VM_PROT_ALL, 0); 137 if (error) 138 return error; 139 vm->vm_dsize += btoc((new-old)); 140 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 141 } 142 return 0; 143#else 144 struct vmspace *vm = p->p_vmspace; 145 vm_offset_t new, old; 146 struct obreak_args /* { 147 char * nsize; 148 } */ tmp; 149 150#ifdef DEBUG 151 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 152#endif 153 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 154 new = (vm_offset_t)args->dsend; 155 tmp.nsize = (char *) new; 156 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 157 p->p_retval[0] = (int)new; 158 else 159 p->p_retval[0] = (int)old; 160 161 return 0; 162#endif 163} 164 165int 166linux_uselib(struct proc *p, struct linux_uselib_args *args) 167{ 168 struct nameidata ni; 169 struct vnode *vp; 170 struct exec *a_out; 171 struct vattr attr; 172 vm_offset_t vmaddr; 173 unsigned long file_offset; 174 vm_offset_t buffer; 175 unsigned long bss_size; 176 int error; 177 caddr_t sg; 178 int locked; 179 180 sg = stackgap_init(); 181 CHECKALTEXIST(p, &sg, args->library); 182 183#ifdef DEBUG 184 printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library); 185#endif 186 187 a_out = NULL; 188 locked = 0; 189 vp = NULL; 190 191 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p); 192 error = namei(&ni); 193 if (error) 194 goto cleanup; 195 196 vp = ni.ni_vp; 197 /* 198 * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed 199 * without returning a vnode. 200 */ 201 if (vp == NULL) { 202 error = ENOEXEC; /* ?? */ 203 goto cleanup; 204 } 205 NDFREE(&ni, NDF_ONLY_PNBUF); 206 207 /* 208 * From here on down, we have a locked vnode that must be unlocked. 209 */ 210 locked++; 211 212 /* 213 * Writable? 214 */ 215 if (vp->v_writecount) { 216 error = ETXTBSY; 217 goto cleanup; 218 } 219 220 /* 221 * Executable? 222 */ 223 error = VOP_GETATTR(vp, &attr, p->p_ucred, p); 224 if (error) 225 goto cleanup; 226 227 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 228 ((attr.va_mode & 0111) == 0) || 229 (attr.va_type != VREG)) { 230 error = ENOEXEC; 231 goto cleanup; 232 } 233 234 /* 235 * Sensible size? 236 */ 237 if (attr.va_size == 0) { 238 error = ENOEXEC; 239 goto cleanup; 240 } 241 242 /* 243 * Can we access it? 244 */ 245 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); 246 if (error) 247 goto cleanup; 248 249 error = VOP_OPEN(vp, FREAD, p->p_ucred, p); 250 if (error) 251 goto cleanup; 252 253 /* 254 * Lock no longer needed 255 */ 256 VOP_UNLOCK(vp, 0, p); 257 locked = 0; 258 259 /* 260 * Pull in executable header into kernel_map 261 */ 262 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 263 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 264 if (error) 265 goto cleanup; 266 267 /* 268 * Is it a Linux binary ? 269 */ 270 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 271 error = ENOEXEC; 272 goto cleanup; 273 } 274 275 /* While we are here, we should REALLY do some more checks */ 276 277 /* 278 * Set file/virtual offset based on a.out variant. 279 */ 280 switch ((int)(a_out->a_magic & 0xffff)) { 281 case 0413: /* ZMAGIC */ 282 file_offset = 1024; 283 break; 284 case 0314: /* QMAGIC */ 285 file_offset = 0; 286 break; 287 default: 288 error = ENOEXEC; 289 goto cleanup; 290 } 291 292 bss_size = round_page(a_out->a_bss); 293 294 /* 295 * Check various fields in header for validity/bounds. 296 */ 297 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 298 error = ENOEXEC; 299 goto cleanup; 300 } 301 302 /* text + data can't exceed file size */ 303 if (a_out->a_data + a_out->a_text > attr.va_size) { 304 error = EFAULT; 305 goto cleanup; 306 } 307 308 /* 309 * text/data/bss must not exceed limits 310 * XXX: this is not complete. it should check current usage PLUS 311 * the resources needed by this library. 312 */ 313 if (a_out->a_text > MAXTSIZ || 314 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 315 error = ENOMEM; 316 goto cleanup; 317 } 318 319 /* 320 * prevent more writers 321 */ 322 vp->v_flag |= VTEXT; 323 324 /* 325 * Check if file_offset page aligned,. 326 * Currently we cannot handle misalinged file offsets, 327 * and so we read in the entire image (what a waste). 328 */ 329 if (file_offset & PAGE_MASK) { 330#ifdef DEBUG 331printf("uselib: Non page aligned binary %lu\n", file_offset); 332#endif 333 /* 334 * Map text+data read/write/execute 335 */ 336 337 /* a_entry is the load address and is page aligned */ 338 vmaddr = trunc_page(a_out->a_entry); 339 340 /* get anon user mapping, read+write+execute */ 341 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 342 a_out->a_text + a_out->a_data, FALSE, 343 VM_PROT_ALL, VM_PROT_ALL, 0); 344 if (error) 345 goto cleanup; 346 347 /* map file into kernel_map */ 348 error = vm_mmap(kernel_map, &buffer, 349 round_page(a_out->a_text + a_out->a_data + file_offset), 350 VM_PROT_READ, VM_PROT_READ, 0, 351 (caddr_t)vp, trunc_page(file_offset)); 352 if (error) 353 goto cleanup; 354 355 /* copy from kernel VM space to user space */ 356 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 357 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 358 359 /* release temporary kernel space */ 360 vm_map_remove(kernel_map, buffer, 361 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 362 363 if (error) 364 goto cleanup; 365 } 366 else { 367#ifdef DEBUG 368printf("uselib: Page aligned binary %lu\n", file_offset); 369#endif 370 /* 371 * for QMAGIC, a_entry is 20 bytes beyond the load address 372 * to skip the executable header 373 */ 374 vmaddr = trunc_page(a_out->a_entry); 375 376 /* 377 * Map it all into the process's space as a single copy-on-write 378 * "data" segment. 379 */ 380 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 381 a_out->a_text + a_out->a_data, 382 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 383 (caddr_t)vp, file_offset); 384 if (error) 385 goto cleanup; 386 } 387#ifdef DEBUG 388printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 389#endif 390 if (bss_size != 0) { 391 /* 392 * Calculate BSS start address 393 */ 394 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 395 396 /* 397 * allocate some 'anon' space 398 */ 399 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 400 bss_size, FALSE, 401 VM_PROT_ALL, VM_PROT_ALL, 0); 402 if (error) 403 goto cleanup; 404 } 405 406cleanup: 407 /* 408 * Unlock vnode if needed 409 */ 410 if (locked) 411 VOP_UNLOCK(vp, 0, p); 412 413 /* 414 * Release the kernel mapping. 415 */ 416 if (a_out) 417 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 418 419 return error; 420} 421 422/* XXX move */ 423struct linux_select_argv { 424 int nfds; 425 fd_set *readfds; 426 fd_set *writefds; 427 fd_set *exceptfds; 428 struct timeval *timeout; 429}; 430 431int 432linux_select(struct proc *p, struct linux_select_args *args) 433{ 434 struct linux_select_argv linux_args; 435 struct linux_newselect_args newsel; 436 int error; 437 438#ifdef SELECT_DEBUG 439 printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr); 440#endif 441 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 442 sizeof(linux_args)))) 443 return error; 444 445 newsel.nfds = linux_args.nfds; 446 newsel.readfds = linux_args.readfds; 447 newsel.writefds = linux_args.writefds; 448 newsel.exceptfds = linux_args.exceptfds; 449 newsel.timeout = linux_args.timeout; 450 451 return linux_newselect(p, &newsel); 452} 453 454int 455linux_newselect(struct proc *p, struct linux_newselect_args *args) 456{ 457 struct select_args bsa; 458 struct timeval tv0, tv1, utv, *tvp; 459 caddr_t sg; 460 int error; 461 462#ifdef DEBUG 463 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 464 (long)p->p_pid, args->nfds, (void *)args->readfds, 465 (void *)args->writefds, (void *)args->exceptfds, 466 (void *)args->timeout); 467#endif 468 error = 0; 469 bsa.nd = args->nfds; 470 bsa.in = args->readfds; 471 bsa.ou = args->writefds; 472 bsa.ex = args->exceptfds; 473 bsa.tv = args->timeout; 474 475 /* 476 * Store current time for computation of the amount of 477 * time left. 478 */ 479 if (args->timeout) { 480 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 481 goto select_out; 482#ifdef DEBUG 483 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 484 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 485#endif 486 if (itimerfix(&utv)) { 487 /* 488 * The timeval was invalid. Convert it to something 489 * valid that will act as it does under Linux. 490 */ 491 sg = stackgap_init(); 492 tvp = stackgap_alloc(&sg, sizeof(utv)); 493 utv.tv_sec += utv.tv_usec / 1000000; 494 utv.tv_usec %= 1000000; 495 if (utv.tv_usec < 0) { 496 utv.tv_sec -= 1; 497 utv.tv_usec += 1000000; 498 } 499 if (utv.tv_sec < 0) 500 timevalclear(&utv); 501 if ((error = copyout(&utv, tvp, sizeof(utv)))) 502 goto select_out; 503 bsa.tv = tvp; 504 } 505 microtime(&tv0); 506 } 507 508 error = select(p, &bsa); 509#ifdef DEBUG 510 printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error); 511#endif 512 513 if (error) { 514 /* 515 * See fs/select.c in the Linux kernel. Without this, 516 * Maelstrom doesn't work. 517 */ 518 if (error == ERESTART) 519 error = EINTR; 520 goto select_out; 521 } 522 523 if (args->timeout) { 524 if (p->p_retval[0]) { 525 /* 526 * Compute how much time was left of the timeout, 527 * by subtracting the current time and the time 528 * before we started the call, and subtracting 529 * that result from the user-supplied value. 530 */ 531 microtime(&tv1); 532 timevalsub(&tv1, &tv0); 533 timevalsub(&utv, &tv1); 534 if (utv.tv_sec < 0) 535 timevalclear(&utv); 536 } else 537 timevalclear(&utv); 538#ifdef DEBUG 539 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 540 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 541#endif 542 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 543 goto select_out; 544 } 545 546select_out: 547#ifdef DEBUG 548 printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error); 549#endif 550 return error; 551} 552 553int 554linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 555{ 556 struct proc *curp; 557 558#ifdef DEBUG 559 printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid); 560#endif 561 if (args->pid != p->p_pid) { 562 if (!(curp = pfind(args->pid))) 563 return ESRCH; 564 } 565 else 566 curp = p; 567 p->p_retval[0] = curp->p_pgid; 568 return 0; 569} 570 571int 572linux_fork(struct proc *p, struct linux_fork_args *args) 573{ 574 int error; 575 576#ifdef DEBUG 577 printf("Linux-emul(%ld): fork()\n", (long)p->p_pid); 578#endif 579 if ((error = fork(p, (struct fork_args *)args)) != 0) 580 return error; 581 if (p->p_retval[1] == 1) 582 p->p_retval[0] = 0; 583 return 0; 584} 585 586int 587linux_vfork(struct proc *p, struct linux_vfork_args *args) 588{ 589 int error; 590 591#ifdef DEBUG 592 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid); 593#endif 594 595 if ((error = vfork(p, (struct vfork_args *)args)) != 0) 596 return error; 597 /* Are we the child? */ 598 if (p->p_retval[1] == 1) 599 p->p_retval[0] = 0; 600 return 0; 601} 602 603#define CLONE_VM 0x100 604#define CLONE_FS 0x200 605#define CLONE_FILES 0x400 606#define CLONE_SIGHAND 0x800 607#define CLONE_PID 0x1000 608 609int 610linux_clone(struct proc *p, struct linux_clone_args *args) 611{ 612 int error, ff = RFPROC; 613 struct proc *p2; 614 int exit_signal; 615 vm_offset_t start; 616 struct rfork_args rf_args; 617 618#ifdef DEBUG 619 if (args->flags & CLONE_PID) 620 printf("linux_clone(%ld): CLONE_PID not yet supported\n", 621 (long)p->p_pid); 622 printf("linux_clone(%ld): invoked with flags %x and stack %x\n", 623 (long)p->p_pid, (unsigned int)args->flags, 624 (unsigned int)args->stack); 625#endif 626 627 if (!args->stack) 628 return (EINVAL); 629 630 exit_signal = args->flags & 0x000000ff; 631 if (exit_signal >= LINUX_NSIG) 632 return EINVAL; 633 634 if (exit_signal <= LINUX_SIGTBLSZ) 635 exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 636 637 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 638 ff |= RFTHREAD; 639 640 if (args->flags & CLONE_VM) 641 ff |= RFMEM; 642 if (args->flags & CLONE_SIGHAND) 643 ff |= RFSIGSHARE; 644 if (!(args->flags & CLONE_FILES)) 645 ff |= RFFDG; 646 647 error = 0; 648 start = 0; 649 650 rf_args.flags = ff; 651 if ((error = rfork(p, &rf_args)) != 0) 652 return error; 653 654 p2 = pfind(p->p_retval[0]); 655 if (p2 == 0) 656 return ESRCH; 657 658 p2->p_sigparent = exit_signal; 659 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 660 661#ifdef DEBUG 662 printf ("linux_clone(%ld): successful rfork to %ld\n", 663 (long)p->p_pid, (long)p2->p_pid); 664#endif 665 return 0; 666} 667 668/* XXX move */ 669struct linux_mmap_argv { 670 linux_caddr_t addr; 671 int len; 672 int prot; 673 int flags; 674 int fd; 675 int pos; 676}; 677 678#define STACK_SIZE (2 * 1024 * 1024) 679#define GUARD_SIZE (4 * PAGE_SIZE) 680int 681linux_mmap(struct proc *p, struct linux_mmap_args *args) 682{ 683 struct mmap_args /* { 684 caddr_t addr; 685 size_t len; 686 int prot; 687 int flags; 688 int fd; 689 long pad; 690 off_t pos; 691 } */ bsd_args; 692 int error; 693 struct linux_mmap_argv linux_args; 694 695 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 696 sizeof(linux_args)))) 697 return error; 698#ifdef DEBUG 699 printf("Linux-emul(%ld): mmap(%p, %d, %d, 0x%08x, %d, %d)", 700 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 701 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 702#endif 703 bsd_args.flags = 0; 704 if (linux_args.flags & LINUX_MAP_SHARED) 705 bsd_args.flags |= MAP_SHARED; 706 if (linux_args.flags & LINUX_MAP_PRIVATE) 707 bsd_args.flags |= MAP_PRIVATE; 708 if (linux_args.flags & LINUX_MAP_FIXED) 709 bsd_args.flags |= MAP_FIXED; 710 if (linux_args.flags & LINUX_MAP_ANON) 711 bsd_args.flags |= MAP_ANON; 712 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 713 bsd_args.flags |= MAP_STACK; 714 715 /* The linux MAP_GROWSDOWN option does not limit auto 716 * growth of the region. Linux mmap with this option 717 * takes as addr the inital BOS, and as len, the initial 718 * region size. It can then grow down from addr without 719 * limit. However, linux threads has an implicit internal 720 * limit to stack size of STACK_SIZE. Its just not 721 * enforced explicitly in linux. But, here we impose 722 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 723 * region, since we can do this with our mmap. 724 * 725 * Our mmap with MAP_STACK takes addr as the maximum 726 * downsize limit on BOS, and as len the max size of 727 * the region. It them maps the top SGROWSIZ bytes, 728 * and autgrows the region down, up to the limit 729 * in addr. 730 * 731 * If we don't use the MAP_STACK option, the effect 732 * of this code is to allocate a stack region of a 733 * fixed size of (STACK_SIZE - GUARD_SIZE). 734 */ 735 736 /* This gives us TOS */ 737 bsd_args.addr = linux_args.addr + linux_args.len; 738 739 /* This gives us our maximum stack size */ 740 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 741 bsd_args.len = linux_args.len; 742 else 743 bsd_args.len = STACK_SIZE - GUARD_SIZE; 744 745 /* This gives us a new BOS. If we're using VM_STACK, then 746 * mmap will just map the top SGROWSIZ bytes, and let 747 * the stack grow down to the limit at BOS. If we're 748 * not using VM_STACK we map the full stack, since we 749 * don't have a way to autogrow it. 750 */ 751 bsd_args.addr -= bsd_args.len; 752 753 } else { 754 bsd_args.addr = linux_args.addr; 755 bsd_args.len = linux_args.len; 756 } 757 758 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 759 if (linux_args.flags & LINUX_MAP_ANON) 760 bsd_args.fd = -1; 761 else 762 bsd_args.fd = linux_args.fd; 763 bsd_args.pos = linux_args.pos; 764 bsd_args.pad = 0; 765#ifdef DEBUG 766 printf("-> (%p, %d, %d, 0x%08x, %d, %d)\n", 767 (void *)bsd_args.addr, bsd_args.len, 768 bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 769#endif 770 return mmap(p, &bsd_args); 771} 772 773int 774linux_mremap(struct proc *p, struct linux_mremap_args *args) 775{ 776 struct munmap_args /* { 777 void *addr; 778 size_t len; 779 } */ bsd_args; 780 int error = 0; 781 782#ifdef DEBUG 783 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 784 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 785 args->flags); 786#endif 787 args->new_len = round_page(args->new_len); 788 args->old_len = round_page(args->old_len); 789 790 if (args->new_len > args->old_len) { 791 p->p_retval[0] = 0; 792 return ENOMEM; 793 } 794 795 if (args->new_len < args->old_len) { 796 bsd_args.addr = args->addr + args->new_len; 797 bsd_args.len = args->old_len - args->new_len; 798 error = munmap(p, &bsd_args); 799 } 800 801 p->p_retval[0] = error ? 0 : (int)args->addr; 802 return error; 803} 804 805int 806linux_msync(struct proc *p, struct linux_msync_args *args) 807{ 808 struct msync_args bsd_args; 809 810 bsd_args.addr = args->addr; 811 bsd_args.len = args->len; 812 bsd_args.flags = 0; /* XXX ignore */ 813 814 return msync(p, &bsd_args); 815} 816 817int 818linux_pipe(struct proc *p, struct linux_pipe_args *args) 819{ 820 int error; 821 int reg_edx; 822 823#ifdef DEBUG 824 printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid); 825#endif 826 reg_edx = p->p_retval[1]; 827 error = pipe(p, 0); 828 if (error) { 829 p->p_retval[1] = reg_edx; 830 return error; 831 } 832 833 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int)); 834 if (error) { 835 p->p_retval[1] = reg_edx; 836 return error; 837 } 838 839 p->p_retval[1] = reg_edx; 840 p->p_retval[0] = 0; 841 return 0; 842} 843 844int 845linux_time(struct proc *p, struct linux_time_args *args) 846{ 847 struct timeval tv; 848 linux_time_t tm; 849 int error; 850 851#ifdef DEBUG 852 printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid); 853#endif 854 microtime(&tv); 855 tm = tv.tv_sec; 856 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 857 return error; 858 p->p_retval[0] = tm; 859 return 0; 860} 861 862struct linux_times_argv { 863 long tms_utime; 864 long tms_stime; 865 long tms_cutime; 866 long tms_cstime; 867}; 868 869#define CLK_TCK 100 /* Linux uses 100 */ 870#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 871 872int 873linux_times(struct proc *p, struct linux_times_args *args) 874{ 875 struct timeval tv; 876 struct linux_times_argv tms; 877 struct rusage ru; 878 int error; 879 880#ifdef DEBUG 881 printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid); 882#endif 883 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 884 885 tms.tms_utime = CONVTCK(ru.ru_utime); 886 tms.tms_stime = CONVTCK(ru.ru_stime); 887 888 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 889 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 890 891 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 892 sizeof(struct linux_times_argv)))) 893 return error; 894 895 microuptime(&tv); 896 p->p_retval[0] = (int)CONVTCK(tv); 897 return 0; 898} 899 900int 901linux_newuname(struct proc *p, struct linux_newuname_args *args) 902{ 903 struct linux_new_utsname utsname; 904 char *osrelease, *osname; 905 906#ifdef DEBUG 907 printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid); 908#endif 909 910 osname = linux_get_osname(p); 911 osrelease = linux_get_osrelease(p); 912 913 bzero(&utsname, sizeof(struct linux_new_utsname)); 914 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1); 915 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1); 916 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1); 917 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1); 918 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1); 919 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1); 920 921 return (copyout((caddr_t)&utsname, (caddr_t)args->buf, 922 sizeof(struct linux_new_utsname))); 923} 924 925struct linux_utimbuf { 926 linux_time_t l_actime; 927 linux_time_t l_modtime; 928}; 929 930int 931linux_utime(struct proc *p, struct linux_utime_args *args) 932{ 933 struct utimes_args /* { 934 char *path; 935 struct timeval *tptr; 936 } */ bsdutimes; 937 struct timeval tv[2], *tvp; 938 struct linux_utimbuf lut; 939 int error; 940 caddr_t sg; 941 942 sg = stackgap_init(); 943 CHECKALTEXIST(p, &sg, args->fname); 944 945#ifdef DEBUG 946 printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname); 947#endif 948 if (args->times) { 949 if ((error = copyin(args->times, &lut, sizeof lut))) 950 return error; 951 tv[0].tv_sec = lut.l_actime; 952 tv[0].tv_usec = 0; 953 tv[1].tv_sec = lut.l_modtime; 954 tv[1].tv_usec = 0; 955 /* so that utimes can copyin */ 956 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
|
957 if ((error = copyout(tv, tvp, sizeof(tv)))) 958 return error; 959 bsdutimes.tptr = tvp; 960 } else 961 bsdutimes.tptr = NULL; 962 963 bsdutimes.path = args->fname; 964 return utimes(p, &bsdutimes); 965} 966 967#define __WCLONE 0x80000000 968 969int 970linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 971{ 972 struct wait_args /* { 973 int pid; 974 int *status; 975 int options; 976 struct rusage *rusage; 977 } */ tmp; 978 int error, tmpstat; 979 980#ifdef DEBUG 981 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 982 (long)p->p_pid, args->pid, (void *)args->status, args->options); 983#endif 984 tmp.pid = args->pid; 985 tmp.status = args->status; 986 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 987 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 988 if (args->options & __WCLONE) 989 tmp.options |= WLINUXCLONE; 990 tmp.rusage = NULL; 991 992 if ((error = wait4(p, &tmp)) != 0) 993 return error; 994 995 if (args->status) { 996 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 997 return error; 998 tmpstat &= 0xffff; 999 if (WIFSIGNALED(tmpstat)) 1000 tmpstat = (tmpstat & 0xffffff80) | 1001 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 1002 else if (WIFSTOPPED(tmpstat)) 1003 tmpstat = (tmpstat & 0xffff00ff) | 1004 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 1005 return copyout(&tmpstat, args->status, sizeof(int)); 1006 } else 1007 return 0; 1008} 1009 1010int 1011linux_wait4(struct proc *p, struct linux_wait4_args *args) 1012{ 1013 struct wait_args /* { 1014 int pid; 1015 int *status; 1016 int options; 1017 struct rusage *rusage; 1018 } */ tmp; 1019 int error, tmpstat; 1020 1021#ifdef DEBUG 1022 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1023 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1024 (void *)args->rusage); 1025#endif 1026 tmp.pid = args->pid; 1027 tmp.status = args->status; 1028 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1029 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1030 if (args->options & __WCLONE) 1031 tmp.options |= WLINUXCLONE; 1032 tmp.rusage = args->rusage; 1033 1034 if ((error = wait4(p, &tmp)) != 0) 1035 return error; 1036 1037 SIGDELSET(p->p_siglist, SIGCHLD); 1038 1039 if (args->status) { 1040 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 1041 return error; 1042 tmpstat &= 0xffff; 1043 if (WIFSIGNALED(tmpstat)) 1044 tmpstat = (tmpstat & 0xffffff80) | 1045 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 1046 else if (WIFSTOPPED(tmpstat)) 1047 tmpstat = (tmpstat & 0xffff00ff) | 1048 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 1049 return copyout(&tmpstat, args->status, sizeof(int)); 1050 } else 1051 return 0; 1052} 1053 1054int 1055linux_mknod(struct proc *p, struct linux_mknod_args *args) 1056{ 1057 caddr_t sg; 1058 struct mknod_args bsd_mknod; 1059 struct mkfifo_args bsd_mkfifo; 1060 1061 sg = stackgap_init(); 1062 1063 CHECKALTCREAT(p, &sg, args->path); 1064 1065#ifdef DEBUG 1066 printf("Linux-emul(%ld): mknod(%s, %d, %d)\n", 1067 (long)p->p_pid, args->path, args->mode, args->dev); 1068#endif 1069 1070 if (args->mode & S_IFIFO) { 1071 bsd_mkfifo.path = args->path; 1072 bsd_mkfifo.mode = args->mode; 1073 return mkfifo(p, &bsd_mkfifo); 1074 } else { 1075 bsd_mknod.path = args->path; 1076 bsd_mknod.mode = args->mode; 1077 bsd_mknod.dev = args->dev; 1078 return mknod(p, &bsd_mknod); 1079 } 1080} 1081 1082/* 1083 * UGH! This is just about the dumbest idea I've ever heard!! 1084 */ 1085int 1086linux_personality(struct proc *p, struct linux_personality_args *args) 1087{ 1088#ifdef DEBUG 1089 printf("Linux-emul(%ld): personality(%d)\n", 1090 (long)p->p_pid, args->per); 1091#endif 1092 if (args->per != 0) 1093 return EINVAL; 1094 1095 /* Yes Jim, it's still a Linux... */ 1096 p->p_retval[0] = 0; 1097 return 0; 1098} 1099 1100/* 1101 * Wrappers for get/setitimer for debugging.. 1102 */ 1103int 1104linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1105{ 1106 struct setitimer_args bsa; 1107 struct itimerval foo; 1108 int error; 1109 1110#ifdef DEBUG 1111 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1112 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1113#endif 1114 bsa.which = args->which; 1115 bsa.itv = args->itv; 1116 bsa.oitv = args->oitv; 1117 if (args->itv) { 1118 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1119 sizeof(foo)))) 1120 return error; 1121#ifdef DEBUG 1122 printf("setitimer: value: sec: %ld, usec: %ld\n", 1123 foo.it_value.tv_sec, foo.it_value.tv_usec); 1124 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1125 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1126#endif 1127 } 1128 return setitimer(p, &bsa); 1129} 1130 1131int 1132linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1133{ 1134 struct getitimer_args bsa; 1135#ifdef DEBUG 1136 printf("Linux-emul(%ld): getitimer(%p)\n", 1137 (long)p->p_pid, (void *)args->itv); 1138#endif 1139 bsa.which = args->which; 1140 bsa.itv = args->itv; 1141 return getitimer(p, &bsa); 1142} 1143 1144int 1145linux_ioperm(struct proc *p, struct linux_ioperm_args *args) 1146{ 1147 struct sysarch_args sa; 1148 struct i386_ioperm_args *iia; 1149 caddr_t sg; 1150 1151 sg = stackgap_init(); 1152 iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args)); 1153 iia->start = args->start; 1154 iia->length = args->length; 1155 iia->enable = args->enable; 1156 sa.op = I386_SET_IOPERM; 1157 sa.parms = (char *)iia; 1158 return sysarch(p, &sa); 1159} 1160 1161int 1162linux_iopl(struct proc *p, struct linux_iopl_args *args) 1163{ 1164 int error; 1165 1166 if (args->level < 0 || args->level > 3) 1167 return (EINVAL); 1168 if ((error = suser(p)) != 0) 1169 return (error); 1170 if (securelevel > 0) 1171 return (EPERM); 1172 p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) | 1173 (args->level * (PSL_IOPL / 3)); 1174 return (0); 1175} 1176 1177int 1178linux_nice(struct proc *p, struct linux_nice_args *args) 1179{ 1180 struct setpriority_args bsd_args; 1181 1182 bsd_args.which = PRIO_PROCESS; 1183 bsd_args.who = 0; /* current process */ 1184 bsd_args.prio = args->inc; 1185 return setpriority(p, &bsd_args); 1186} 1187 1188int 1189linux_setgroups(p, uap) 1190 struct proc *p; 1191 struct linux_setgroups_args *uap; 1192{ 1193 struct pcred *pc; 1194 linux_gid_t linux_gidset[NGROUPS]; 1195 gid_t *bsd_gidset; 1196 int ngrp, error; 1197 1198 pc = p->p_cred; 1199 ngrp = uap->gidsetsize; 1200 1201 /* 1202 * cr_groups[0] holds egid. Setting the whole set from 1203 * the supplied set will cause egid to be changed too. 1204 * Keep cr_groups[0] unchanged to prevent that. 1205 */ 1206 1207 if ((error = suser(p)) != 0) 1208 return (error); 1209 1210 if (ngrp >= NGROUPS) 1211 return (EINVAL); 1212 1213 pc->pc_ucred = crcopy(pc->pc_ucred); 1214 if (ngrp > 0) { 1215 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset, 1216 ngrp * sizeof(linux_gid_t)); 1217 if (error) 1218 return (error); 1219 1220 pc->pc_ucred->cr_ngroups = ngrp + 1; 1221 1222 bsd_gidset = pc->pc_ucred->cr_groups; 1223 ngrp--; 1224 while (ngrp >= 0) { 1225 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1226 ngrp--; 1227 } 1228 } 1229 else 1230 pc->pc_ucred->cr_ngroups = 1; 1231 1232 setsugid(p); 1233 return (0); 1234} 1235 1236int 1237linux_getgroups(p, uap) 1238 struct proc *p; 1239 struct linux_getgroups_args *uap; 1240{ 1241 struct pcred *pc; 1242 linux_gid_t linux_gidset[NGROUPS]; 1243 gid_t *bsd_gidset; 1244 int bsd_gidsetsz, ngrp, error; 1245 1246 pc = p->p_cred; 1247 bsd_gidset = pc->pc_ucred->cr_groups; 1248 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1; 1249 1250 /* 1251 * cr_groups[0] holds egid. Returning the whole set 1252 * here will cause a duplicate. Exclude cr_groups[0] 1253 * to prevent that. 1254 */ 1255 1256 if ((ngrp = uap->gidsetsize) == 0) { 1257 p->p_retval[0] = bsd_gidsetsz; 1258 return (0); 1259 } 1260 1261 if (ngrp < bsd_gidsetsz) 1262 return (EINVAL); 1263 1264 ngrp = 0; 1265 while (ngrp < bsd_gidsetsz) { 1266 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1267 ngrp++; 1268 } 1269 1270 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1271 ngrp * sizeof(linux_gid_t)))) 1272 return (error); 1273 1274 p->p_retval[0] = ngrp; 1275 return (0); 1276} 1277 1278int 1279linux_setrlimit(p, uap) 1280 struct proc *p; 1281 struct linux_setrlimit_args *uap; 1282{ 1283 struct osetrlimit_args bsd; 1284 1285#ifdef DEBUG 1286 printf("Linux-emul(%ld): setrlimit(%d, %p)\n", 1287 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1288#endif 1289 1290 if (uap->resource >= LINUX_RLIM_NLIMITS) 1291 return EINVAL; 1292 1293 bsd.which = linux_to_bsd_resource[uap->resource]; 1294 1295 if (bsd.which == -1) 1296 return EINVAL; 1297 1298 bsd.rlp = uap->rlim; 1299 return osetrlimit(p, &bsd); 1300} 1301 1302int 1303linux_getrlimit(p, uap) 1304 struct proc *p; 1305 struct linux_getrlimit_args *uap; 1306{ 1307 struct ogetrlimit_args bsd; 1308 1309#ifdef DEBUG 1310 printf("Linux-emul(%ld): getrlimit(%d, %p)\n", 1311 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1312#endif 1313 1314 if (uap->resource >= LINUX_RLIM_NLIMITS) 1315 return EINVAL; 1316 1317 bsd.which = linux_to_bsd_resource[uap->resource]; 1318 1319 if (bsd.which == -1) 1320 return EINVAL; 1321 1322 bsd.rlp = uap->rlim; 1323 return ogetrlimit(p, &bsd); 1324} 1325 1326int 1327linux_sched_setscheduler(p, uap) 1328 struct proc *p; 1329 struct linux_sched_setscheduler_args *uap; 1330{ 1331 struct sched_setscheduler_args bsd; 1332 1333#ifdef DEBUG 1334 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n", 1335 (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param); 1336#endif 1337 1338 switch (uap->policy) { 1339 case LINUX_SCHED_OTHER: 1340 bsd.policy = SCHED_OTHER; 1341 break; 1342 case LINUX_SCHED_FIFO: 1343 bsd.policy = SCHED_FIFO; 1344 break; 1345 case LINUX_SCHED_RR: 1346 bsd.policy = SCHED_RR; 1347 break; 1348 default: 1349 return EINVAL; 1350 } 1351 1352 bsd.pid = uap->pid; 1353 bsd.param = uap->param; 1354 return sched_setscheduler(p, &bsd); 1355} 1356 1357int 1358linux_sched_getscheduler(p, uap) 1359 struct proc *p; 1360 struct linux_sched_getscheduler_args *uap; 1361{ 1362 struct sched_getscheduler_args bsd; 1363 int error; 1364 1365#ifdef DEBUG 1366 printf("Linux-emul(%ld): sched_getscheduler(%d)\n", 1367 (long)p->p_pid, uap->pid); 1368#endif 1369 1370 bsd.pid = uap->pid; 1371 error = sched_getscheduler(p, &bsd); 1372 1373 switch (p->p_retval[0]) { 1374 case SCHED_OTHER: 1375 p->p_retval[0] = LINUX_SCHED_OTHER; 1376 break; 1377 case SCHED_FIFO: 1378 p->p_retval[0] = LINUX_SCHED_FIFO; 1379 break; 1380 case SCHED_RR: 1381 p->p_retval[0] = LINUX_SCHED_RR; 1382 break; 1383 } 1384 1385 return error; 1386} 1387 1388struct linux_descriptor { 1389 unsigned int entry_number; 1390 unsigned long base_addr; 1391 unsigned int limit; 1392 unsigned int seg_32bit:1; 1393 unsigned int contents:2; 1394 unsigned int read_exec_only:1; 1395 unsigned int limit_in_pages:1; 1396 unsigned int seg_not_present:1; 1397 unsigned int useable:1; 1398}; 1399 1400int 1401linux_modify_ldt(p, uap) 1402 struct proc *p; 1403 struct linux_modify_ldt_args *uap; 1404{ 1405 int error; 1406 caddr_t sg; 1407 struct sysarch_args args; 1408 struct i386_ldt_args *ldt; 1409 struct linux_descriptor ld; 1410 union descriptor *desc; 1411 1412 sg = stackgap_init(); 1413 1414 if (uap->ptr == NULL) 1415 return (EINVAL); 1416 1417 switch (uap->func) { 1418 case 0x00: /* read_ldt */ 1419 ldt = stackgap_alloc(&sg, sizeof(*ldt)); 1420 ldt->start = 0; 1421 ldt->descs = uap->ptr; 1422 ldt->num = uap->bytecount / sizeof(union descriptor); 1423 args.op = I386_GET_LDT; 1424 args.parms = (char*)ldt; 1425 error = sysarch(p, &args); 1426 p->p_retval[0] *= sizeof(union descriptor); 1427 break; 1428 case 0x01: /* write_ldt */ 1429 case 0x11: /* write_ldt */ 1430 if (uap->bytecount != sizeof(ld)) 1431 return (EINVAL); 1432 1433 error = copyin(uap->ptr, &ld, sizeof(ld)); 1434 if (error) 1435 return (error); 1436 1437 ldt = stackgap_alloc(&sg, sizeof(*ldt)); 1438 desc = stackgap_alloc(&sg, sizeof(*desc)); 1439 ldt->start = ld.entry_number; 1440 ldt->descs = desc; 1441 ldt->num = 1; 1442 desc->sd.sd_lolimit = (ld.limit & 0x0000ffff); 1443 desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; 1444 desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff); 1445 desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; 1446 desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | 1447 (ld.contents << 2); 1448 desc->sd.sd_dpl = 3; 1449 desc->sd.sd_p = (ld.seg_not_present ^ 1); 1450 desc->sd.sd_xx = 0; 1451 desc->sd.sd_def32 = ld.seg_32bit; 1452 desc->sd.sd_gran = ld.limit_in_pages; 1453 args.op = I386_SET_LDT; 1454 args.parms = (char*)ldt; 1455 error = sysarch(p, &args); 1456 break; 1457 default: 1458 error = EINVAL; 1459 break; 1460 } 1461 1462 if (error == EOPNOTSUPP) { 1463 printf("linux: modify_ldt needs kernel option USER_LDT\n"); 1464 error = ENOSYS; 1465 } 1466 1467 return (error); 1468}
| 959 if ((error = copyout(tv, tvp, sizeof(tv)))) 960 return error; 961 bsdutimes.tptr = tvp; 962 } else 963 bsdutimes.tptr = NULL; 964 965 bsdutimes.path = args->fname; 966 return utimes(p, &bsdutimes); 967} 968 969#define __WCLONE 0x80000000 970 971int 972linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 973{ 974 struct wait_args /* { 975 int pid; 976 int *status; 977 int options; 978 struct rusage *rusage; 979 } */ tmp; 980 int error, tmpstat; 981 982#ifdef DEBUG 983 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 984 (long)p->p_pid, args->pid, (void *)args->status, args->options); 985#endif 986 tmp.pid = args->pid; 987 tmp.status = args->status; 988 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 989 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 990 if (args->options & __WCLONE) 991 tmp.options |= WLINUXCLONE; 992 tmp.rusage = NULL; 993 994 if ((error = wait4(p, &tmp)) != 0) 995 return error; 996 997 if (args->status) { 998 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 999 return error; 1000 tmpstat &= 0xffff; 1001 if (WIFSIGNALED(tmpstat)) 1002 tmpstat = (tmpstat & 0xffffff80) | 1003 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 1004 else if (WIFSTOPPED(tmpstat)) 1005 tmpstat = (tmpstat & 0xffff00ff) | 1006 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 1007 return copyout(&tmpstat, args->status, sizeof(int)); 1008 } else 1009 return 0; 1010} 1011 1012int 1013linux_wait4(struct proc *p, struct linux_wait4_args *args) 1014{ 1015 struct wait_args /* { 1016 int pid; 1017 int *status; 1018 int options; 1019 struct rusage *rusage; 1020 } */ tmp; 1021 int error, tmpstat; 1022 1023#ifdef DEBUG 1024 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1025 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1026 (void *)args->rusage); 1027#endif 1028 tmp.pid = args->pid; 1029 tmp.status = args->status; 1030 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1031 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1032 if (args->options & __WCLONE) 1033 tmp.options |= WLINUXCLONE; 1034 tmp.rusage = args->rusage; 1035 1036 if ((error = wait4(p, &tmp)) != 0) 1037 return error; 1038 1039 SIGDELSET(p->p_siglist, SIGCHLD); 1040 1041 if (args->status) { 1042 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 1043 return error; 1044 tmpstat &= 0xffff; 1045 if (WIFSIGNALED(tmpstat)) 1046 tmpstat = (tmpstat & 0xffffff80) | 1047 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 1048 else if (WIFSTOPPED(tmpstat)) 1049 tmpstat = (tmpstat & 0xffff00ff) | 1050 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 1051 return copyout(&tmpstat, args->status, sizeof(int)); 1052 } else 1053 return 0; 1054} 1055 1056int 1057linux_mknod(struct proc *p, struct linux_mknod_args *args) 1058{ 1059 caddr_t sg; 1060 struct mknod_args bsd_mknod; 1061 struct mkfifo_args bsd_mkfifo; 1062 1063 sg = stackgap_init(); 1064 1065 CHECKALTCREAT(p, &sg, args->path); 1066 1067#ifdef DEBUG 1068 printf("Linux-emul(%ld): mknod(%s, %d, %d)\n", 1069 (long)p->p_pid, args->path, args->mode, args->dev); 1070#endif 1071 1072 if (args->mode & S_IFIFO) { 1073 bsd_mkfifo.path = args->path; 1074 bsd_mkfifo.mode = args->mode; 1075 return mkfifo(p, &bsd_mkfifo); 1076 } else { 1077 bsd_mknod.path = args->path; 1078 bsd_mknod.mode = args->mode; 1079 bsd_mknod.dev = args->dev; 1080 return mknod(p, &bsd_mknod); 1081 } 1082} 1083 1084/* 1085 * UGH! This is just about the dumbest idea I've ever heard!! 1086 */ 1087int 1088linux_personality(struct proc *p, struct linux_personality_args *args) 1089{ 1090#ifdef DEBUG 1091 printf("Linux-emul(%ld): personality(%d)\n", 1092 (long)p->p_pid, args->per); 1093#endif 1094 if (args->per != 0) 1095 return EINVAL; 1096 1097 /* Yes Jim, it's still a Linux... */ 1098 p->p_retval[0] = 0; 1099 return 0; 1100} 1101 1102/* 1103 * Wrappers for get/setitimer for debugging.. 1104 */ 1105int 1106linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1107{ 1108 struct setitimer_args bsa; 1109 struct itimerval foo; 1110 int error; 1111 1112#ifdef DEBUG 1113 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1114 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1115#endif 1116 bsa.which = args->which; 1117 bsa.itv = args->itv; 1118 bsa.oitv = args->oitv; 1119 if (args->itv) { 1120 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1121 sizeof(foo)))) 1122 return error; 1123#ifdef DEBUG 1124 printf("setitimer: value: sec: %ld, usec: %ld\n", 1125 foo.it_value.tv_sec, foo.it_value.tv_usec); 1126 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1127 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1128#endif 1129 } 1130 return setitimer(p, &bsa); 1131} 1132 1133int 1134linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1135{ 1136 struct getitimer_args bsa; 1137#ifdef DEBUG 1138 printf("Linux-emul(%ld): getitimer(%p)\n", 1139 (long)p->p_pid, (void *)args->itv); 1140#endif 1141 bsa.which = args->which; 1142 bsa.itv = args->itv; 1143 return getitimer(p, &bsa); 1144} 1145 1146int 1147linux_ioperm(struct proc *p, struct linux_ioperm_args *args) 1148{ 1149 struct sysarch_args sa; 1150 struct i386_ioperm_args *iia; 1151 caddr_t sg; 1152 1153 sg = stackgap_init(); 1154 iia = stackgap_alloc(&sg, sizeof(struct i386_ioperm_args)); 1155 iia->start = args->start; 1156 iia->length = args->length; 1157 iia->enable = args->enable; 1158 sa.op = I386_SET_IOPERM; 1159 sa.parms = (char *)iia; 1160 return sysarch(p, &sa); 1161} 1162 1163int 1164linux_iopl(struct proc *p, struct linux_iopl_args *args) 1165{ 1166 int error; 1167 1168 if (args->level < 0 || args->level > 3) 1169 return (EINVAL); 1170 if ((error = suser(p)) != 0) 1171 return (error); 1172 if (securelevel > 0) 1173 return (EPERM); 1174 p->p_md.md_regs->tf_eflags = (p->p_md.md_regs->tf_eflags & ~PSL_IOPL) | 1175 (args->level * (PSL_IOPL / 3)); 1176 return (0); 1177} 1178 1179int 1180linux_nice(struct proc *p, struct linux_nice_args *args) 1181{ 1182 struct setpriority_args bsd_args; 1183 1184 bsd_args.which = PRIO_PROCESS; 1185 bsd_args.who = 0; /* current process */ 1186 bsd_args.prio = args->inc; 1187 return setpriority(p, &bsd_args); 1188} 1189 1190int 1191linux_setgroups(p, uap) 1192 struct proc *p; 1193 struct linux_setgroups_args *uap; 1194{ 1195 struct pcred *pc; 1196 linux_gid_t linux_gidset[NGROUPS]; 1197 gid_t *bsd_gidset; 1198 int ngrp, error; 1199 1200 pc = p->p_cred; 1201 ngrp = uap->gidsetsize; 1202 1203 /* 1204 * cr_groups[0] holds egid. Setting the whole set from 1205 * the supplied set will cause egid to be changed too. 1206 * Keep cr_groups[0] unchanged to prevent that. 1207 */ 1208 1209 if ((error = suser(p)) != 0) 1210 return (error); 1211 1212 if (ngrp >= NGROUPS) 1213 return (EINVAL); 1214 1215 pc->pc_ucred = crcopy(pc->pc_ucred); 1216 if (ngrp > 0) { 1217 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset, 1218 ngrp * sizeof(linux_gid_t)); 1219 if (error) 1220 return (error); 1221 1222 pc->pc_ucred->cr_ngroups = ngrp + 1; 1223 1224 bsd_gidset = pc->pc_ucred->cr_groups; 1225 ngrp--; 1226 while (ngrp >= 0) { 1227 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1228 ngrp--; 1229 } 1230 } 1231 else 1232 pc->pc_ucred->cr_ngroups = 1; 1233 1234 setsugid(p); 1235 return (0); 1236} 1237 1238int 1239linux_getgroups(p, uap) 1240 struct proc *p; 1241 struct linux_getgroups_args *uap; 1242{ 1243 struct pcred *pc; 1244 linux_gid_t linux_gidset[NGROUPS]; 1245 gid_t *bsd_gidset; 1246 int bsd_gidsetsz, ngrp, error; 1247 1248 pc = p->p_cred; 1249 bsd_gidset = pc->pc_ucred->cr_groups; 1250 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1; 1251 1252 /* 1253 * cr_groups[0] holds egid. Returning the whole set 1254 * here will cause a duplicate. Exclude cr_groups[0] 1255 * to prevent that. 1256 */ 1257 1258 if ((ngrp = uap->gidsetsize) == 0) { 1259 p->p_retval[0] = bsd_gidsetsz; 1260 return (0); 1261 } 1262 1263 if (ngrp < bsd_gidsetsz) 1264 return (EINVAL); 1265 1266 ngrp = 0; 1267 while (ngrp < bsd_gidsetsz) { 1268 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1269 ngrp++; 1270 } 1271 1272 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1273 ngrp * sizeof(linux_gid_t)))) 1274 return (error); 1275 1276 p->p_retval[0] = ngrp; 1277 return (0); 1278} 1279 1280int 1281linux_setrlimit(p, uap) 1282 struct proc *p; 1283 struct linux_setrlimit_args *uap; 1284{ 1285 struct osetrlimit_args bsd; 1286 1287#ifdef DEBUG 1288 printf("Linux-emul(%ld): setrlimit(%d, %p)\n", 1289 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1290#endif 1291 1292 if (uap->resource >= LINUX_RLIM_NLIMITS) 1293 return EINVAL; 1294 1295 bsd.which = linux_to_bsd_resource[uap->resource]; 1296 1297 if (bsd.which == -1) 1298 return EINVAL; 1299 1300 bsd.rlp = uap->rlim; 1301 return osetrlimit(p, &bsd); 1302} 1303 1304int 1305linux_getrlimit(p, uap) 1306 struct proc *p; 1307 struct linux_getrlimit_args *uap; 1308{ 1309 struct ogetrlimit_args bsd; 1310 1311#ifdef DEBUG 1312 printf("Linux-emul(%ld): getrlimit(%d, %p)\n", 1313 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1314#endif 1315 1316 if (uap->resource >= LINUX_RLIM_NLIMITS) 1317 return EINVAL; 1318 1319 bsd.which = linux_to_bsd_resource[uap->resource]; 1320 1321 if (bsd.which == -1) 1322 return EINVAL; 1323 1324 bsd.rlp = uap->rlim; 1325 return ogetrlimit(p, &bsd); 1326} 1327 1328int 1329linux_sched_setscheduler(p, uap) 1330 struct proc *p; 1331 struct linux_sched_setscheduler_args *uap; 1332{ 1333 struct sched_setscheduler_args bsd; 1334 1335#ifdef DEBUG 1336 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n", 1337 (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param); 1338#endif 1339 1340 switch (uap->policy) { 1341 case LINUX_SCHED_OTHER: 1342 bsd.policy = SCHED_OTHER; 1343 break; 1344 case LINUX_SCHED_FIFO: 1345 bsd.policy = SCHED_FIFO; 1346 break; 1347 case LINUX_SCHED_RR: 1348 bsd.policy = SCHED_RR; 1349 break; 1350 default: 1351 return EINVAL; 1352 } 1353 1354 bsd.pid = uap->pid; 1355 bsd.param = uap->param; 1356 return sched_setscheduler(p, &bsd); 1357} 1358 1359int 1360linux_sched_getscheduler(p, uap) 1361 struct proc *p; 1362 struct linux_sched_getscheduler_args *uap; 1363{ 1364 struct sched_getscheduler_args bsd; 1365 int error; 1366 1367#ifdef DEBUG 1368 printf("Linux-emul(%ld): sched_getscheduler(%d)\n", 1369 (long)p->p_pid, uap->pid); 1370#endif 1371 1372 bsd.pid = uap->pid; 1373 error = sched_getscheduler(p, &bsd); 1374 1375 switch (p->p_retval[0]) { 1376 case SCHED_OTHER: 1377 p->p_retval[0] = LINUX_SCHED_OTHER; 1378 break; 1379 case SCHED_FIFO: 1380 p->p_retval[0] = LINUX_SCHED_FIFO; 1381 break; 1382 case SCHED_RR: 1383 p->p_retval[0] = LINUX_SCHED_RR; 1384 break; 1385 } 1386 1387 return error; 1388} 1389 1390struct linux_descriptor { 1391 unsigned int entry_number; 1392 unsigned long base_addr; 1393 unsigned int limit; 1394 unsigned int seg_32bit:1; 1395 unsigned int contents:2; 1396 unsigned int read_exec_only:1; 1397 unsigned int limit_in_pages:1; 1398 unsigned int seg_not_present:1; 1399 unsigned int useable:1; 1400}; 1401 1402int 1403linux_modify_ldt(p, uap) 1404 struct proc *p; 1405 struct linux_modify_ldt_args *uap; 1406{ 1407 int error; 1408 caddr_t sg; 1409 struct sysarch_args args; 1410 struct i386_ldt_args *ldt; 1411 struct linux_descriptor ld; 1412 union descriptor *desc; 1413 1414 sg = stackgap_init(); 1415 1416 if (uap->ptr == NULL) 1417 return (EINVAL); 1418 1419 switch (uap->func) { 1420 case 0x00: /* read_ldt */ 1421 ldt = stackgap_alloc(&sg, sizeof(*ldt)); 1422 ldt->start = 0; 1423 ldt->descs = uap->ptr; 1424 ldt->num = uap->bytecount / sizeof(union descriptor); 1425 args.op = I386_GET_LDT; 1426 args.parms = (char*)ldt; 1427 error = sysarch(p, &args); 1428 p->p_retval[0] *= sizeof(union descriptor); 1429 break; 1430 case 0x01: /* write_ldt */ 1431 case 0x11: /* write_ldt */ 1432 if (uap->bytecount != sizeof(ld)) 1433 return (EINVAL); 1434 1435 error = copyin(uap->ptr, &ld, sizeof(ld)); 1436 if (error) 1437 return (error); 1438 1439 ldt = stackgap_alloc(&sg, sizeof(*ldt)); 1440 desc = stackgap_alloc(&sg, sizeof(*desc)); 1441 ldt->start = ld.entry_number; 1442 ldt->descs = desc; 1443 ldt->num = 1; 1444 desc->sd.sd_lolimit = (ld.limit & 0x0000ffff); 1445 desc->sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; 1446 desc->sd.sd_lobase = (ld.base_addr & 0x00ffffff); 1447 desc->sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; 1448 desc->sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | 1449 (ld.contents << 2); 1450 desc->sd.sd_dpl = 3; 1451 desc->sd.sd_p = (ld.seg_not_present ^ 1); 1452 desc->sd.sd_xx = 0; 1453 desc->sd.sd_def32 = ld.seg_32bit; 1454 desc->sd.sd_gran = ld.limit_in_pages; 1455 args.op = I386_SET_LDT; 1456 args.parms = (char*)ldt; 1457 error = sysarch(p, &args); 1458 break; 1459 default: 1460 error = EINVAL; 1461 break; 1462 } 1463 1464 if (error == EOPNOTSUPP) { 1465 printf("linux: modify_ldt needs kernel option USER_LDT\n"); 1466 error = ENOSYS; 1467 } 1468 1469 return (error); 1470}
|