linux_misc.c revision 41931
1/*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.47 1998/12/10 13:47:18 jkh Exp $ 29 */ 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/sysproto.h> 34#include <sys/kernel.h> 35#include <sys/mman.h> 36#include <sys/proc.h> 37#include <sys/fcntl.h> 38#include <sys/imgact_aout.h> 39#include <sys/mount.h> 40#include <sys/namei.h> 41#include <sys/resourcevar.h> 42#include <sys/stat.h> 43#include <sys/sysctl.h> 44#ifdef COMPAT_LINUX_THREADS 45#include <sys/unistd.h> 46#endif /* COMPAT_LINUX_THREADS */ 47#include <sys/vnode.h> 48#include <sys/wait.h> 49#include <sys/time.h> 50 51#include <vm/vm.h> 52#include <vm/pmap.h> 53#include <vm/vm_kern.h> 54#include <vm/vm_prot.h> 55#include <vm/vm_map.h> 56#include <vm/vm_extern.h> 57 58#include <machine/frame.h> 59#include <machine/psl.h> 60 61#include <i386/linux/linux.h> 62#include <i386/linux/linux_proto.h> 63#include <i386/linux/linux_util.h> 64 65int 66linux_alarm(struct proc *p, struct linux_alarm_args *args) 67{ 68 struct itimerval it, old_it; 69 struct timeval tv; 70 int s; 71 72#ifdef DEBUG 73 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 74#endif 75 if (args->secs > 100000000) 76 return EINVAL; 77 it.it_value.tv_sec = (long)args->secs; 78 it.it_value.tv_usec = 0; 79 it.it_interval.tv_sec = 0; 80 it.it_interval.tv_usec = 0; 81 s = splsoftclock(); 82 old_it = p->p_realtimer; 83 getmicrouptime(&tv); 84 if (timevalisset(&old_it.it_value)) 85 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 86 if (it.it_value.tv_sec != 0) { 87 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 88 timevaladd(&it.it_value, &tv); 89 } 90 p->p_realtimer = it; 91 splx(s); 92 if (timevalcmp(&old_it.it_value, &tv, >)) { 93 timevalsub(&old_it.it_value, &tv); 94 if (old_it.it_value.tv_usec != 0) 95 old_it.it_value.tv_sec++; 96 p->p_retval[0] = old_it.it_value.tv_sec; 97 } 98 return 0; 99} 100 101int 102linux_brk(struct proc *p, struct linux_brk_args *args) 103{ 104#if 0 105 struct vmspace *vm = p->p_vmspace; 106 vm_offset_t new, old; 107 int error; 108 109 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 110 return EINVAL; 111 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 112 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 113 return ENOMEM; 114 115 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 116 new = round_page((vm_offset_t)args->dsend); 117 p->p_retval[0] = old; 118 if ((new-old) > 0) { 119 if (swap_pager_full) 120 return ENOMEM; 121 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 122 VM_PROT_ALL, VM_PROT_ALL, 0); 123 if (error) 124 return error; 125 vm->vm_dsize += btoc((new-old)); 126 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 127 } 128 return 0; 129#else 130 struct vmspace *vm = p->p_vmspace; 131 vm_offset_t new, old; 132 struct obreak_args /* { 133 char * nsize; 134 } */ tmp; 135 136#ifdef DEBUG 137 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 138#endif 139 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 140 new = (vm_offset_t)args->dsend; 141 tmp.nsize = (char *) new; 142 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 143 p->p_retval[0] = (int)new; 144 else 145 p->p_retval[0] = (int)old; 146 147 return 0; 148#endif 149} 150 151int 152linux_uselib(struct proc *p, struct linux_uselib_args *args) 153{ 154 struct nameidata ni; 155 struct vnode *vp; 156 struct exec *a_out; 157 struct vattr attr; 158 vm_offset_t vmaddr; 159 unsigned long file_offset; 160 vm_offset_t buffer; 161 unsigned long bss_size; 162 int error; 163 caddr_t sg; 164 int locked; 165 166 sg = stackgap_init(); 167 CHECKALTEXIST(p, &sg, args->library); 168 169#ifdef DEBUG 170 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 171#endif 172 173 a_out = NULL; 174 locked = 0; 175 vp = NULL; 176 177 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 178 if (error = namei(&ni)) 179 goto cleanup; 180 181 vp = ni.ni_vp; 182 if (vp == NULL) { 183 error = ENOEXEC; /* ?? */ 184 goto cleanup; 185 } 186 187 /* 188 * From here on down, we have a locked vnode that must be unlocked. 189 */ 190 locked++; 191 192 /* 193 * Writable? 194 */ 195 if (vp->v_writecount) { 196 error = ETXTBSY; 197 goto cleanup; 198 } 199 200 /* 201 * Executable? 202 */ 203 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) 204 goto cleanup; 205 206 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 207 ((attr.va_mode & 0111) == 0) || 208 (attr.va_type != VREG)) { 209 error = ENOEXEC; 210 goto cleanup; 211 } 212 213 /* 214 * Sensible size? 215 */ 216 if (attr.va_size == 0) { 217 error = ENOEXEC; 218 goto cleanup; 219 } 220 221 /* 222 * Can we access it? 223 */ 224 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) 225 goto cleanup; 226 227 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) 228 goto cleanup; 229 230 /* 231 * Lock no longer needed 232 */ 233 VOP_UNLOCK(vp, 0, p); 234 locked = 0; 235 236 /* 237 * Pull in executable header into kernel_map 238 */ 239 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 240 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 241 if (error) 242 goto cleanup; 243 244 /* 245 * Is it a Linux binary ? 246 */ 247 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 248 error = ENOEXEC; 249 goto cleanup; 250 } 251 252 /* While we are here, we should REALLY do some more checks */ 253 254 /* 255 * Set file/virtual offset based on a.out variant. 256 */ 257 switch ((int)(a_out->a_magic & 0xffff)) { 258 case 0413: /* ZMAGIC */ 259 file_offset = 1024; 260 break; 261 case 0314: /* QMAGIC */ 262 file_offset = 0; 263 break; 264 default: 265 error = ENOEXEC; 266 goto cleanup; 267 } 268 269 bss_size = round_page(a_out->a_bss); 270 271 /* 272 * Check various fields in header for validity/bounds. 273 */ 274 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 275 error = ENOEXEC; 276 goto cleanup; 277 } 278 279 /* text + data can't exceed file size */ 280 if (a_out->a_data + a_out->a_text > attr.va_size) { 281 error = EFAULT; 282 goto cleanup; 283 } 284 285 /* 286 * text/data/bss must not exceed limits 287 * XXX: this is not complete. it should check current usage PLUS 288 * the resources needed by this library. 289 */ 290 if (a_out->a_text > MAXTSIZ || 291 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 292 error = ENOMEM; 293 goto cleanup; 294 } 295 296 /* 297 * prevent more writers 298 */ 299 vp->v_flag |= VTEXT; 300 301 /* 302 * Check if file_offset page aligned,. 303 * Currently we cannot handle misalinged file offsets, 304 * and so we read in the entire image (what a waste). 305 */ 306 if (file_offset & PAGE_MASK) { 307#ifdef DEBUG 308printf("uselib: Non page aligned binary %lu\n", file_offset); 309#endif 310 /* 311 * Map text+data read/write/execute 312 */ 313 314 /* a_entry is the load address and is page aligned */ 315 vmaddr = trunc_page(a_out->a_entry); 316 317 /* get anon user mapping, read+write+execute */ 318 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 319 a_out->a_text + a_out->a_data, FALSE, 320 VM_PROT_ALL, VM_PROT_ALL, 0); 321 if (error) 322 goto cleanup; 323 324 /* map file into kernel_map */ 325 error = vm_mmap(kernel_map, &buffer, 326 round_page(a_out->a_text + a_out->a_data + file_offset), 327 VM_PROT_READ, VM_PROT_READ, 0, 328 (caddr_t)vp, trunc_page(file_offset)); 329 if (error) 330 goto cleanup; 331 332 /* copy from kernel VM space to user space */ 333 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 334 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 335 336 /* release temporary kernel space */ 337 vm_map_remove(kernel_map, buffer, 338 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 339 340 if (error) 341 goto cleanup; 342 } 343 else { 344#ifdef DEBUG 345printf("uselib: Page aligned binary %lu\n", file_offset); 346#endif 347 /* 348 * for QMAGIC, a_entry is 20 bytes beyond the load address 349 * to skip the executable header 350 */ 351 vmaddr = trunc_page(a_out->a_entry); 352 353 /* 354 * Map it all into the process's space as a single copy-on-write 355 * "data" segment. 356 */ 357 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 358 a_out->a_text + a_out->a_data, 359 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 360 (caddr_t)vp, file_offset); 361 if (error) 362 goto cleanup; 363 } 364#ifdef DEBUG 365printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 366#endif 367 if (bss_size != 0) { 368 /* 369 * Calculate BSS start address 370 */ 371 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 372 373 /* 374 * allocate some 'anon' space 375 */ 376 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 377 bss_size, FALSE, 378 VM_PROT_ALL, VM_PROT_ALL, 0); 379 if (error) 380 goto cleanup; 381 } 382 383cleanup: 384 /* 385 * Unlock vnode if needed 386 */ 387 if (locked) 388 VOP_UNLOCK(vp, 0, p); 389 390 /* 391 * Release the kernel mapping. 392 */ 393 if (a_out) 394 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 395 396 return error; 397} 398 399/* XXX move */ 400struct linux_select_argv { 401 int nfds; 402 fd_set *readfds; 403 fd_set *writefds; 404 fd_set *exceptfds; 405 struct timeval *timeout; 406}; 407 408int 409linux_select(struct proc *p, struct linux_select_args *args) 410{ 411 struct linux_select_argv linux_args; 412 struct linux_newselect_args newsel; 413 int error; 414 415#ifdef SELECT_DEBUG 416 printf("Linux-emul(%d): select(%x)\n", 417 p->p_pid, args->ptr); 418#endif 419 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 420 sizeof(linux_args)))) 421 return error; 422 423 newsel.nfds = linux_args.nfds; 424 newsel.readfds = linux_args.readfds; 425 newsel.writefds = linux_args.writefds; 426 newsel.exceptfds = linux_args.exceptfds; 427 newsel.timeout = linux_args.timeout; 428 429 return linux_newselect(p, &newsel); 430} 431 432int 433linux_newselect(struct proc *p, struct linux_newselect_args *args) 434{ 435 struct select_args bsa; 436 struct timeval tv0, tv1, utv, *tvp; 437 caddr_t sg; 438 int error; 439 440#ifdef DEBUG 441 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 442 (long)p->p_pid, args->nfds, (void *)args->readfds, 443 (void *)args->writefds, (void *)args->exceptfds, 444 (void *)args->timeout); 445#endif 446 error = 0; 447 bsa.nd = args->nfds; 448 bsa.in = args->readfds; 449 bsa.ou = args->writefds; 450 bsa.ex = args->exceptfds; 451 bsa.tv = args->timeout; 452 453 /* 454 * Store current time for computation of the amount of 455 * time left. 456 */ 457 if (args->timeout) { 458 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 459 goto select_out; 460#ifdef DEBUG 461 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 462 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 463#endif 464 if (itimerfix(&utv)) { 465 /* 466 * The timeval was invalid. Convert it to something 467 * valid that will act as it does under Linux. 468 */ 469 sg = stackgap_init(); 470 tvp = stackgap_alloc(&sg, sizeof(utv)); 471 utv.tv_sec += utv.tv_usec / 1000000; 472 utv.tv_usec %= 1000000; 473 if (utv.tv_usec < 0) { 474 utv.tv_sec -= 1; 475 utv.tv_usec += 1000000; 476 } 477 if (utv.tv_sec < 0) 478 timevalclear(&utv); 479 if ((error = copyout(&utv, tvp, sizeof(utv)))) 480 goto select_out; 481 bsa.tv = tvp; 482 } 483 microtime(&tv0); 484 } 485 486 error = select(p, &bsa); 487#ifdef DEBUG 488 printf("Linux-emul(%d): real select returns %d\n", 489 p->p_pid, error); 490#endif 491 492 if (error) { 493 /* 494 * See fs/select.c in the Linux kernel. Without this, 495 * Maelstrom doesn't work. 496 */ 497 if (error == ERESTART) 498 error = EINTR; 499 goto select_out; 500 } 501 502 if (args->timeout) { 503 if (p->p_retval[0]) { 504 /* 505 * Compute how much time was left of the timeout, 506 * by subtracting the current time and the time 507 * before we started the call, and subtracting 508 * that result from the user-supplied value. 509 */ 510 microtime(&tv1); 511 timevalsub(&tv1, &tv0); 512 timevalsub(&utv, &tv1); 513 if (utv.tv_sec < 0) 514 timevalclear(&utv); 515 } else 516 timevalclear(&utv); 517#ifdef DEBUG 518 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 519 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 520#endif 521 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 522 goto select_out; 523 } 524 525select_out: 526#ifdef DEBUG 527 printf("Linux-emul(%d): newselect_out -> %d\n", 528 p->p_pid, error); 529#endif 530 return error; 531} 532 533int 534linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 535{ 536 struct proc *curproc; 537 538#ifdef DEBUG 539 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 540#endif 541 if (args->pid != p->p_pid) { 542 if (!(curproc = pfind(args->pid))) 543 return ESRCH; 544 } 545 else 546 curproc = p; 547 p->p_retval[0] = curproc->p_pgid; 548 return 0; 549} 550 551int 552linux_fork(struct proc *p, struct linux_fork_args *args) 553{ 554 int error; 555 556#ifdef DEBUG 557 printf("Linux-emul(%d): fork()\n", p->p_pid); 558#endif 559 if (error = fork(p, (struct fork_args *)args)) 560 return error; 561 if (p->p_retval[1] == 1) 562 p->p_retval[0] = 0; 563 return 0; 564} 565 566#ifndef COMPAT_LINUX_THREADS 567int 568linux_clone(struct proc *p, struct linux_clone_args *args) 569{ 570 printf("linux_clone(%d): Not enabled\n", p->p_pid); 571 return (EOPNOTSUPP); 572} 573 574#else 575#define CLONE_VM 0x100 576#define CLONE_FS 0x200 577#define CLONE_FILES 0x400 578#define CLONE_SIGHAND 0x800 579#define CLONE_PID 0x1000 580 581int 582linux_clone(struct proc *p, struct linux_clone_args *args) 583{ 584 int error, ff = RFPROC; 585 struct proc *p2; 586 int growable; 587 int initstacksize; 588 int maxstacksize; 589 int exit_signal; 590 vm_map_entry_t entry; 591 vm_map_t map; 592 vm_offset_t start; 593 struct rfork_args rf_args; 594 595#ifdef SMP 596 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid); 597 return (EOPNOTSUPP); 598#endif 599#ifdef DEBUG 600 if (args->flags & CLONE_PID) 601 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 602 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 603 (unsigned int)args->flags, (unsigned int)args->stack); 604#endif 605 606 if (!args->stack) 607 return (EINVAL); 608 exit_signal = args->flags & 0x000000ff; 609 if (exit_signal >= LINUX_NSIG) 610 return EINVAL; 611 exit_signal = linux_to_bsd_signal[exit_signal]; 612 613 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 614 ff |= RFTHREAD; 615 616 if (args->flags & CLONE_VM) 617 ff |= RFMEM; 618 if (args->flags & CLONE_SIGHAND) 619 ff |= RFSIGSHARE; 620 if (!(args->flags & CLONE_FILES)) 621 ff |= RFFDG; 622 623 error = 0; 624 start = 0; 625 626 rf_args.flags = ff; 627 if (error = rfork(p, &rf_args)) 628 return error; 629 630 p2 = pfind(p->p_retval[0]); 631 if (p2 == 0) 632 return ESRCH; 633 634 p2->p_sigparent = exit_signal; 635 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 636 637#ifdef DEBUG 638 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 639#endif 640 return 0; 641} 642 643#endif /* COMPAT_LINUX_THREADS */ 644/* XXX move */ 645struct linux_mmap_argv { 646 linux_caddr_t addr; 647 int len; 648 int prot; 649 int flags; 650 int fd; 651 int pos; 652}; 653 654#ifdef COMPAT_LINUX_THREADS 655#define STACK_SIZE (2 * 1024 * 1024) 656#define GUARD_SIZE (4 * PAGE_SIZE) 657 658#endif /* COMPAT_LINUX_THREADS */ 659int 660linux_mmap(struct proc *p, struct linux_mmap_args *args) 661{ 662 struct mmap_args /* { 663 caddr_t addr; 664 size_t len; 665 int prot; 666 int flags; 667 int fd; 668 long pad; 669 off_t pos; 670 } */ bsd_args; 671 int error; 672 struct linux_mmap_argv linux_args; 673 674 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 675 sizeof(linux_args)))) 676 return error; 677#ifdef DEBUG 678 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 679 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 680 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 681#endif 682 bsd_args.flags = 0; 683 if (linux_args.flags & LINUX_MAP_SHARED) 684 bsd_args.flags |= MAP_SHARED; 685 if (linux_args.flags & LINUX_MAP_PRIVATE) 686 bsd_args.flags |= MAP_PRIVATE; 687 if (linux_args.flags & LINUX_MAP_FIXED) 688 bsd_args.flags |= MAP_FIXED; 689 if (linux_args.flags & LINUX_MAP_ANON) 690 bsd_args.flags |= MAP_ANON; 691#ifndef COMPAT_LINUX_THREADS 692 bsd_args.addr = linux_args.addr; 693 bsd_args.len = linux_args.len; 694#else 695 696#if !defined(USE_VM_STACK) && !defined(USE_VM_STACK_FOR_EXEC) 697 /* Linux Threads will map into the proc stack space, unless 698 we prevent it. This causes problems if we're not using 699 our VM_STACK options. 700 */ 701 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ)) 702 return (EINVAL); 703#endif 704 705 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 706 707#ifdef USE_VM_STACK 708 /* USE_VM_STACK is defined (or not) in vm/vm_map.h */ 709 bsd_args.flags |= MAP_STACK; 710#endif 711 712 /* The linux MAP_GROWSDOWN option does not limit auto 713 growth of the region. Linux mmap with this option 714 takes as addr the inital BOS, and as len, the initial 715 region size. It can then grow down from addr without 716 limit. However, linux threads has an implicit internal 717 limit to stack size of STACK_SIZE. Its just not 718 enforced explicitly in linux. But, here we impose 719 a limit of (STACK_SIZE - GUARD_SIZE) on the stack 720 region, since we can do this with our mmap. 721 722 Our mmap with MAP_STACK takes addr as the maximum 723 downsize limit on BOS, and as len the max size of 724 the region. It them maps the top SGROWSIZ bytes, 725 and autgrows the region down, up to the limit 726 in addr. 727 728 If we don't use the MAP_STACK option, the effect 729 of this code is to allocate a stack region of a 730 fixed size of (STACK_SIZE - GUARD_SIZE). 731 */ 732 733 /* This gives us TOS */ 734 bsd_args.addr = linux_args.addr + linux_args.len; 735 736 /* This gives us our maximum stack size */ 737 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 738 bsd_args.len = linux_args.len; 739 else 740 bsd_args.len = STACK_SIZE - GUARD_SIZE; 741 742 /* This gives us a new BOS. If we're using VM_STACK, then 743 mmap will just map the top SGROWSIZ bytes, and let 744 the stack grow down to the limit at BOS. If we're 745 not using VM_STACK we map the full stack, since we 746 don't have a way to autogrow it. 747 */ 748 bsd_args.addr -= bsd_args.len; 749 750 } else { 751 bsd_args.addr = linux_args.addr; 752 bsd_args.len = linux_args.len; 753 } 754#endif /* COMPAT_LINUX_THREADS */ 755 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 756 bsd_args.fd = linux_args.fd; 757 bsd_args.pos = linux_args.pos; 758 bsd_args.pad = 0; 759 return mmap(p, &bsd_args); 760} 761 762int 763linux_mremap(struct proc *p, struct linux_mremap_args *args) 764{ 765 struct munmap_args /* { 766 void *addr; 767 size_t len; 768 } */ bsd_args; 769 int error = 0; 770 771#ifdef DEBUG 772 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 773 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 774 args->flags); 775#endif 776 args->new_len = round_page(args->new_len); 777 args->old_len = round_page(args->old_len); 778 779 if (args->new_len > args->old_len) { 780 p->p_retval[0] = 0; 781 return ENOMEM; 782 } 783 784 if (args->new_len < args->old_len) { 785 bsd_args.addr = args->addr + args->new_len; 786 bsd_args.len = args->old_len - args->new_len; 787 error = munmap(p, &bsd_args); 788 } 789 790 p->p_retval[0] = error ? 0 : (int)args->addr; 791 return error; 792} 793 794int 795linux_msync(struct proc *p, struct linux_msync_args *args) 796{ 797 struct msync_args bsd_args; 798 799 bsd_args.addr = args->addr; 800 bsd_args.len = args->len; 801 bsd_args.flags = 0; /* XXX ignore */ 802 803 return msync(p, &bsd_args); 804} 805 806int 807linux_pipe(struct proc *p, struct linux_pipe_args *args) 808{ 809 int error; 810 int reg_edx; 811 812#ifdef DEBUG 813 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 814#endif 815 reg_edx = p->p_retval[1]; 816 if (error = pipe(p, 0)) { 817 p->p_retval[1] = reg_edx; 818 return error; 819 } 820 821 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) { 822 p->p_retval[1] = reg_edx; 823 return error; 824 } 825 826 p->p_retval[1] = reg_edx; 827 p->p_retval[0] = 0; 828 return 0; 829} 830 831int 832linux_time(struct proc *p, struct linux_time_args *args) 833{ 834 struct timeval tv; 835 linux_time_t tm; 836 int error; 837 838#ifdef DEBUG 839 printf("Linux-emul(%d): time(*)\n", p->p_pid); 840#endif 841 microtime(&tv); 842 tm = tv.tv_sec; 843 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 844 return error; 845 p->p_retval[0] = tm; 846 return 0; 847} 848 849struct linux_times_argv { 850 long tms_utime; 851 long tms_stime; 852 long tms_cutime; 853 long tms_cstime; 854}; 855 856#define CLK_TCK 100 /* Linux uses 100 */ 857#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 858 859int 860linux_times(struct proc *p, struct linux_times_args *args) 861{ 862 struct timeval tv; 863 struct linux_times_argv tms; 864 struct rusage ru; 865 int error; 866 867#ifdef DEBUG 868 printf("Linux-emul(%d): times(*)\n", p->p_pid); 869#endif 870 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 871 872 tms.tms_utime = CONVTCK(ru.ru_utime); 873 tms.tms_stime = CONVTCK(ru.ru_stime); 874 875 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 876 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 877 878 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 879 sizeof(struct linux_times_argv)))) 880 return error; 881 882 microuptime(&tv); 883 p->p_retval[0] = (int)CONVTCK(tv); 884 return 0; 885} 886 887/* XXX move */ 888struct linux_newuname_t { 889 char sysname[65]; 890 char nodename[65]; 891 char release[65]; 892 char version[65]; 893 char machine[65]; 894 char domainname[65]; 895}; 896 897int 898linux_newuname(struct proc *p, struct linux_newuname_args *args) 899{ 900 struct linux_newuname_t linux_newuname; 901 902#ifdef DEBUG 903 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 904#endif 905 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 906 strncpy(linux_newuname.sysname, ostype, 907 sizeof(linux_newuname.sysname) - 1); 908 strncpy(linux_newuname.nodename, hostname, 909 sizeof(linux_newuname.nodename) - 1); 910 strncpy(linux_newuname.release, osrelease, 911 sizeof(linux_newuname.release) - 1); 912 strncpy(linux_newuname.version, version, 913 sizeof(linux_newuname.version) - 1); 914 strncpy(linux_newuname.machine, machine, 915 sizeof(linux_newuname.machine) - 1); 916 strncpy(linux_newuname.domainname, domainname, 917 sizeof(linux_newuname.domainname) - 1); 918 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 919 sizeof(struct linux_newuname_t))); 920} 921 922struct linux_utimbuf { 923 linux_time_t l_actime; 924 linux_time_t l_modtime; 925}; 926 927int 928linux_utime(struct proc *p, struct linux_utime_args *args) 929{ 930 struct utimes_args /* { 931 char *path; 932 struct timeval *tptr; 933 } */ bsdutimes; 934 struct timeval tv[2], *tvp; 935 struct linux_utimbuf lut; 936 int error; 937 caddr_t sg; 938 939 sg = stackgap_init(); 940 CHECKALTEXIST(p, &sg, args->fname); 941 942#ifdef DEBUG 943 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 944#endif 945 if (args->times) { 946 if ((error = copyin(args->times, &lut, sizeof lut))) 947 return error; 948 tv[0].tv_sec = lut.l_actime; 949 tv[0].tv_usec = 0; 950 tv[1].tv_sec = lut.l_modtime; 951 tv[1].tv_usec = 0; 952 /* so that utimes can copyin */ 953 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 954 if ((error = copyout(tv, tvp, sizeof(tv)))) 955 return error; 956 bsdutimes.tptr = tvp; 957 } else 958 bsdutimes.tptr = NULL; 959 960 bsdutimes.path = args->fname; 961 return utimes(p, &bsdutimes); 962} 963 964int 965linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 966{ 967 struct wait_args /* { 968 int pid; 969 int *status; 970 int options; 971 struct rusage *rusage; 972 } */ tmp; 973 int error, tmpstat; 974 975#ifdef DEBUG 976 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 977 (long)p->p_pid, args->pid, (void *)args->status, args->options); 978#endif 979 tmp.pid = args->pid; 980 tmp.status = args->status; 981#ifndef COMPAT_LINUX_THREADS 982 tmp.options = args->options; 983#else 984 /* This filters out the linux option _WCLONE. I don't 985 think we need it, but I could be wrong. If we need 986 it, we need to fix wait4, since it will give us an 987 error return of EINVAL if we pass in _WCLONE, and 988 of course, it won't do anything with it. 989 */ 990 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 991#endif /* COMPAT_LINUX_THREADS */ 992 tmp.rusage = NULL; 993 994 if (error = wait4(p, &tmp)) 995#ifndef COMPAT_LINUX_THREADS 996 return error; 997#else 998 return error; 999#endif /* COMPAT_LINUX_THREADS */ 1000 if (args->status) { 1001 if (error = copyin(args->status, &tmpstat, sizeof(int))) 1002 return error; 1003 if (WIFSIGNALED(tmpstat)) 1004 tmpstat = (tmpstat & 0xffffff80) | 1005 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1006 else if (WIFSTOPPED(tmpstat)) 1007 tmpstat = (tmpstat & 0xffff00ff) | 1008 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1009 return copyout(&tmpstat, args->status, sizeof(int)); 1010 } else 1011 return 0; 1012} 1013 1014int 1015linux_wait4(struct proc *p, struct linux_wait4_args *args) 1016{ 1017 struct wait_args /* { 1018 int pid; 1019 int *status; 1020 int options; 1021 struct rusage *rusage; 1022 } */ tmp; 1023 int error, tmpstat; 1024 1025#ifdef DEBUG 1026 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1027 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1028 (void *)args->rusage); 1029#endif 1030 tmp.pid = args->pid; 1031 tmp.status = args->status; 1032#ifndef COMPAT_LINUX_THREADS 1033 tmp.options = args->options; 1034#else 1035 /* This filters out the linux option _WCLONE. I don't 1036 think we need it, but I could be wrong. If we need 1037 it, we need to fix wait4, since it will give us an 1038 error return of EINVAL if we pass in _WCLONE, and 1039 of course, it won't do anything with it. 1040 */ 1041 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1042#endif /* COMPAT_LINUX_THREADS */ 1043 tmp.rusage = args->rusage; 1044 1045 if (error = wait4(p, &tmp)) 1046 return error; 1047 1048 p->p_siglist &= ~sigmask(SIGCHLD); 1049 1050 if (args->status) { 1051 if (error = copyin(args->status, &tmpstat, sizeof(int))) 1052 return error; 1053 if (WIFSIGNALED(tmpstat)) 1054 tmpstat = (tmpstat & 0xffffff80) | 1055 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1056 else if (WIFSTOPPED(tmpstat)) 1057 tmpstat = (tmpstat & 0xffff00ff) | 1058 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1059 return copyout(&tmpstat, args->status, sizeof(int)); 1060 } else 1061 return 0; 1062} 1063 1064int 1065linux_mknod(struct proc *p, struct linux_mknod_args *args) 1066{ 1067 caddr_t sg; 1068 struct mknod_args bsd_mknod; 1069 struct mkfifo_args bsd_mkfifo; 1070 1071 sg = stackgap_init(); 1072 1073 CHECKALTCREAT(p, &sg, args->path); 1074 1075#ifdef DEBUG 1076 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1077 p->p_pid, args->path, args->mode, args->dev); 1078#endif 1079 1080 if (args->mode & S_IFIFO) { 1081 bsd_mkfifo.path = args->path; 1082 bsd_mkfifo.mode = args->mode; 1083 return mkfifo(p, &bsd_mkfifo); 1084 } else { 1085 bsd_mknod.path = args->path; 1086 bsd_mknod.mode = args->mode; 1087 bsd_mknod.dev = args->dev; 1088 return mknod(p, &bsd_mknod); 1089 } 1090} 1091 1092/* 1093 * UGH! This is just about the dumbest idea I've ever heard!! 1094 */ 1095int 1096linux_personality(struct proc *p, struct linux_personality_args *args) 1097{ 1098#ifdef DEBUG 1099 printf("Linux-emul(%d): personality(%d)\n", 1100 p->p_pid, args->per); 1101#endif 1102 if (args->per != 0) 1103 return EINVAL; 1104 1105 /* Yes Jim, it's still a Linux... */ 1106 p->p_retval[0] = 0; 1107 return 0; 1108} 1109 1110/* 1111 * Wrappers for get/setitimer for debugging.. 1112 */ 1113int 1114linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1115{ 1116 struct setitimer_args bsa; 1117 struct itimerval foo; 1118 int error; 1119 1120#ifdef DEBUG 1121 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1122 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1123#endif 1124 bsa.which = args->which; 1125 bsa.itv = args->itv; 1126 bsa.oitv = args->oitv; 1127 if (args->itv) { 1128 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1129 sizeof(foo)))) 1130 return error; 1131#ifdef DEBUG 1132 printf("setitimer: value: sec: %ld, usec: %ld\n", 1133 foo.it_value.tv_sec, foo.it_value.tv_usec); 1134 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1135 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1136#endif 1137 } 1138 return setitimer(p, &bsa); 1139} 1140 1141int 1142linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1143{ 1144 struct getitimer_args bsa; 1145#ifdef DEBUG 1146 printf("Linux-emul(%ld): getitimer(%p)\n", 1147 (long)p->p_pid, (void *)args->itv); 1148#endif 1149 bsa.which = args->which; 1150 bsa.itv = args->itv; 1151 return getitimer(p, &bsa); 1152} 1153 1154int 1155linux_iopl(struct proc *p, struct linux_iopl_args *args) 1156{ 1157 int error; 1158 1159 error = suser(p->p_ucred, &p->p_acflag); 1160 if (error != 0) 1161 return error; 1162 if (securelevel > 0) 1163 return EPERM; 1164 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1165 return 0; 1166} 1167 1168int 1169linux_nice(struct proc *p, struct linux_nice_args *args) 1170{ 1171 struct setpriority_args bsd_args; 1172 1173 bsd_args.which = PRIO_PROCESS; 1174 bsd_args.who = 0; /* current process */ 1175 bsd_args.prio = args->inc; 1176 return setpriority(p, &bsd_args); 1177} 1178 1179