linux_pipe.c revision 1.19
1/* $NetBSD: linux_pipe.c,v 1.19 1995/09/13 21:51:14 fvdl Exp $ */ 2 3/* 4 * Copyright (c) 1995 Frank van der Linden 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed for the NetBSD Project 18 * by Frank van der Linden 19 * 4. The name of the author may not be used to endorse or promote products 20 * derived from this software without specific prior written permission 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34/* 35 * Linux compatibility module. Try to deal with various Linux system calls. 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/namei.h> 41#include <sys/proc.h> 42#include <sys/dir.h> 43#include <sys/file.h> 44#include <sys/stat.h> 45#include <sys/filedesc.h> 46#include <sys/ioctl.h> 47#include <sys/kernel.h> 48#include <sys/malloc.h> 49#include <sys/mbuf.h> 50#include <sys/mman.h> 51#include <sys/mount.h> 52#include <sys/ptrace.h> 53#include <sys/resource.h> 54#include <sys/resourcevar.h> 55#include <sys/signal.h> 56#include <sys/signalvar.h> 57#include <sys/socket.h> 58#include <sys/time.h> 59#include <sys/times.h> 60#include <sys/vnode.h> 61#include <sys/uio.h> 62#include <sys/wait.h> 63#include <sys/utsname.h> 64#include <sys/unistd.h> 65 66#include <sys/syscallargs.h> 67 68#include <vm/vm.h> 69#include <vm/vm_param.h> 70 71#include <compat/linux/linux_types.h> 72#include <compat/linux/linux_fcntl.h> 73#include <compat/linux/linux_mmap.h> 74#include <compat/linux/linux_signal.h> 75#include <compat/linux/linux_syscallargs.h> 76#include <compat/linux/linux_util.h> 77#include <compat/linux/linux_dirent.h> 78 79/* 80 * The information on a terminated (or stopped) process needs 81 * to be converted in order for Linux binaries to get a valid signal 82 * number out of it. 83 */ 84static int 85bsd_to_linux_wstat(status) 86 int *status; 87{ 88 if (WIFSIGNALED(*status)) 89 *status = (*status & ~0177) | 90 bsd_to_linux_sig[WTERMSIG(*status)]; 91 else if (WIFSTOPPED(*status)) 92 *status = (*status & ~0xff00) | 93 (bsd_to_linux_sig[WSTOPSIG(*status)] << 8); 94} 95 96/* 97 * waitpid(2). Passed on to the NetBSD call, surrounded by code to 98 * reserve some space for a NetBSD-style wait status, and converting 99 * it to what Linux wants. 100 */ 101int 102linux_waitpid(p, uap, retval) 103 struct proc *p; 104 struct linux_waitpid_args /* { 105 syscallarg(int) pid; 106 syscallarg(int *) status; 107 syscallarg(int) options; 108 } */ *uap; 109 register_t *retval; 110{ 111 struct wait4_args w4a; 112 int error, *status, tstat; 113 caddr_t sg; 114 115 if (SCARG(uap, status) != NULL) { 116 sg = stackgap_init(p->p_emul); 117 status = (int *) stackgap_alloc(&sg, sizeof status); 118 } else 119 status = NULL; 120 121 SCARG(&w4a, pid) = SCARG(uap, pid); 122 SCARG(&w4a, status) = status; 123 SCARG(&w4a, options) = SCARG(uap, options); 124 SCARG(&w4a, rusage) = NULL; 125 126 if ((error = wait4(p, &w4a, retval))) 127 return error; 128 129 p->p_siglist &= ~sigmask(SIGCHLD); 130 131 if (status != NULL) { 132 if ((error = copyin(status, &tstat, sizeof tstat))) 133 return error; 134 135 bsd_to_linux_wstat(&tstat); 136 137 return copyout(&tstat, SCARG(uap, status), sizeof tstat); 138 } 139 140 return 0; 141} 142 143/* 144 * This is very much the same as waitpid() 145 */ 146int 147linux_wait4(p, uap, retval) 148 struct proc *p; 149 struct linux_wait4_args /* { 150 syscallarg(int) pid; 151 syscallarg(int *) status; 152 syscallarg(int) options; 153 syscallarg(struct rusage *) rusage; 154 } */ *uap; 155 register_t *retval; 156{ 157 struct wait4_args w4a; 158 int error, *status, tstat; 159 caddr_t sg; 160 161 if (SCARG(uap, status) != NULL) { 162 sg = stackgap_init(p->p_emul); 163 status = (int *) stackgap_alloc(&sg, sizeof status); 164 } else 165 status = NULL; 166 167 SCARG(&w4a, pid) = SCARG(uap, pid); 168 SCARG(&w4a, status) = status; 169 SCARG(&w4a, options) = SCARG(uap, options); 170 SCARG(&w4a, rusage) = SCARG(uap, rusage); 171 172 if ((error = wait4(p, &w4a, retval))) 173 return error; 174 175 p->p_siglist &= ~sigmask(SIGCHLD); 176 177 if (status != NULL) { 178 if ((error = copyin(status, &tstat, sizeof tstat))) 179 return error; 180 181 bsd_to_linux_wstat(&tstat); 182 183 return copyout(&tstat, SCARG(uap, status), sizeof tstat); 184 } 185 186 return 0; 187} 188 189/* 190 * This is the old brk(2) call. I don't think anything in the Linux 191 * world uses this anymore 192 */ 193int 194linux_break(p, uap, retval) 195 struct proc *p; 196 struct linux_brk_args /* { 197 syscallarg(char *) nsize; 198 } */ *uap; 199 register_t *retval; 200{ 201 return ENOSYS; 202} 203 204/* 205 * Linux brk(2). The check if the new address is >= the old one is 206 * done in the kernel in Linux. NetBSD does it in the library. 207 */ 208int 209linux_brk(p, uap, retval) 210 struct proc *p; 211 struct linux_brk_args /* { 212 syscallarg(char *) nsize; 213 } */ *uap; 214 register_t *retval; 215{ 216 char *nbrk = SCARG(uap, nsize); 217 struct obreak_args oba; 218 struct vmspace *vm = p->p_vmspace; 219 int error = 0; 220 caddr_t oldbrk, newbrk; 221 222 oldbrk = vm->vm_daddr + ctob(vm->vm_dsize); 223 /* 224 * XXX inconsistent.. Linux always returns at least the old 225 * brk value, but it will be page-aligned if this fails, 226 * and possibly not page aligned if it succeeds (the user 227 * supplied pointer is returned). 228 */ 229 SCARG(&oba, nsize) = nbrk; 230 231 if ((caddr_t) nbrk > vm->vm_daddr && obreak(p, &oba, retval) == 0) 232 retval[0] = (register_t) nbrk; 233 else 234 retval[0] = (register_t) oldbrk; 235 236 return 0; 237} 238 239/* 240 * I wonder why Linux has gettimeofday() _and_ time().. Still, we 241 * need to deal with it. 242 */ 243int 244linux_time(p, uap, retval) 245 struct proc *p; 246 struct linux_time_args /* { 247 linux_time_t *t; 248 } */ *uap; 249 register_t *retval; 250{ 251 struct timeval atv; 252 linux_time_t tt; 253 int error; 254 255 microtime(&atv); 256 257 tt = atv.tv_sec; 258 if (SCARG(uap, t) && (error = copyout(&tt, SCARG(uap, t), sizeof tt))) 259 return error; 260 261 retval[0] = tt; 262 return 0; 263} 264 265/* 266 * Convert BSD statfs structure to Linux statfs structure. 267 * The Linux structure has less fields, and it also wants 268 * the length of a name in a dir entry in a field, which 269 * we fake (probably the wrong way). 270 */ 271static void 272bsd_to_linux_statfs(bsp, lsp) 273 struct statfs *bsp; 274 struct linux_statfs *lsp; 275{ 276 lsp->l_ftype = bsp->f_type; 277 lsp->l_fbsize = bsp->f_bsize; 278 lsp->l_fblocks = bsp->f_blocks; 279 lsp->l_fbfree = bsp->f_bfree; 280 lsp->l_fbavail = bsp->f_bavail; 281 lsp->l_ffiles = bsp->f_files; 282 lsp->l_fffree = bsp->f_ffree; 283 lsp->l_ffsid.val[0] = bsp->f_fsid.val[0]; 284 lsp->l_ffsid.val[1] = bsp->f_fsid.val[1]; 285 lsp->l_fnamelen = MAXNAMLEN; /* XXX */ 286} 287 288/* 289 * Implement the fs stat functions. Straightforward. 290 */ 291int 292linux_statfs(p, uap, retval) 293 struct proc *p; 294 struct linux_statfs_args /* { 295 syscallarg(char *) path; 296 syscallarg(struct linux_statfs *) sp; 297 } */ *uap; 298 register_t *retval; 299{ 300 struct statfs btmp, *bsp; 301 struct linux_statfs ltmp; 302 struct statfs_args bsa; 303 caddr_t sg; 304 int error; 305 306 sg = stackgap_init(p->p_emul); 307 bsp = (struct statfs *) stackgap_alloc(&sg, sizeof (struct statfs)); 308 309 LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path)); 310 311 SCARG(&bsa, path) = SCARG(uap, path); 312 SCARG(&bsa, buf) = bsp; 313 314 if ((error = statfs(p, &bsa, retval))) 315 return error; 316 317 if ((error = copyin((caddr_t) bsp, (caddr_t) &btmp, sizeof btmp))) 318 return error; 319 320 bsd_to_linux_statfs(&btmp, <mp); 321 322 return copyout((caddr_t) <mp, (caddr_t) SCARG(uap, sp), sizeof ltmp); 323} 324 325int 326linux_fstatfs(p, uap, retval) 327 struct proc *p; 328 struct linux_fstatfs_args /* { 329 syscallarg(int) fd; 330 syscallarg(struct linux_statfs *) sp; 331 } */ *uap; 332 register_t *retval; 333{ 334 struct statfs btmp, *bsp; 335 struct linux_statfs ltmp; 336 struct fstatfs_args bsa; 337 caddr_t sg; 338 int error; 339 340 sg = stackgap_init(p->p_emul); 341 bsp = (struct statfs *) stackgap_alloc(&sg, sizeof (struct statfs)); 342 343 SCARG(&bsa, fd) = SCARG(uap, fd); 344 SCARG(&bsa, buf) = bsp; 345 346 if ((error = statfs(p, &bsa, retval))) 347 return error; 348 349 if ((error = copyin((caddr_t) bsp, (caddr_t) &btmp, sizeof btmp))) 350 return error; 351 352 bsd_to_linux_statfs(&btmp, <mp); 353 354 return copyout((caddr_t) <mp, (caddr_t) SCARG(uap, sp), sizeof ltmp); 355} 356 357/* 358 * uname(). Just copy the info from the various strings stored in the 359 * kernel, and put it in the Linux utsname structure. That structure 360 * is almost the same as the NetBSD one, only it has fields 65 characters 361 * long, and an extra domainname field. 362 */ 363int 364linux_uname(p, uap, retval) 365 struct proc *p; 366 struct linux_uname_args /* { 367 syscallarg(struct linux_utsname *) up; 368 } */ *uap; 369 register_t *retval; 370{ 371 extern char ostype[], hostname[], osrelease[], version[], machine[], 372 domainname[]; 373 struct linux_utsname luts; 374 int len; 375 char *cp; 376 377 strncpy(luts.l_sysname, ostype, sizeof(luts.l_sysname)); 378 strncpy(luts.l_nodename, hostname, sizeof(luts.l_nodename)); 379 strncpy(luts.l_release, osrelease, sizeof(luts.l_release)); 380 strncpy(luts.l_version, version, sizeof(luts.l_version)); 381 strncpy(luts.l_machine, machine, sizeof(luts.l_machine)); 382 strncpy(luts.l_domainname, domainname, sizeof(luts.l_domainname)); 383 384 /* This part taken from the the uname() in libc */ 385 len = sizeof(luts.l_version); 386 for (cp = luts.l_version; len--; ++cp) 387 if (*cp == '\n' || *cp == '\t') 388 if (len > 1) 389 *cp = ' '; 390 else 391 *cp = '\0'; 392 393 return copyout(&luts, SCARG(uap, up), sizeof(luts)); 394} 395 396int 397linux_olduname(p, uap, retval) 398 struct proc *p; 399 struct linux_uname_args /* { 400 syscallarg(struct linux_oldutsname *) up; 401 } */ *uap; 402 register_t *retval; 403{ 404 extern char ostype[], hostname[], osrelease[], version[], machine[]; 405 struct linux_oldutsname luts; 406 int len; 407 char *cp; 408 409 strncpy(luts.l_sysname, ostype, sizeof(luts.l_sysname)); 410 strncpy(luts.l_nodename, hostname, sizeof(luts.l_nodename)); 411 strncpy(luts.l_release, osrelease, sizeof(luts.l_release)); 412 strncpy(luts.l_version, version, sizeof(luts.l_version)); 413 strncpy(luts.l_machine, machine, sizeof(luts.l_machine)); 414 415 /* This part taken from the the uname() in libc */ 416 len = sizeof(luts.l_version); 417 for (cp = luts.l_version; len--; ++cp) 418 if (*cp == '\n' || *cp == '\t') 419 if (len > 1) 420 *cp = ' '; 421 else 422 *cp = '\0'; 423 424 return copyout(&luts, SCARG(uap, up), sizeof(luts)); 425} 426 427int 428linux_oldolduname(p, uap, retval) 429 struct proc *p; 430 struct linux_uname_args /* { 431 syscallarg(struct linux_oldoldutsname *) up; 432 } */ *uap; 433 register_t *retval; 434{ 435 extern char ostype[], hostname[], osrelease[], version[], machine[]; 436 struct linux_oldoldutsname luts; 437 int len; 438 char *cp; 439 440 strncpy(luts.l_sysname, ostype, sizeof(luts.l_sysname)); 441 strncpy(luts.l_nodename, hostname, sizeof(luts.l_nodename)); 442 strncpy(luts.l_release, osrelease, sizeof(luts.l_release)); 443 strncpy(luts.l_version, version, sizeof(luts.l_version)); 444 strncpy(luts.l_machine, machine, sizeof(luts.l_machine)); 445 446 /* This part taken from the the uname() in libc */ 447 len = sizeof(luts.l_version); 448 for (cp = luts.l_version; len--; ++cp) 449 if (*cp == '\n' || *cp == '\t') 450 if (len > 1) 451 *cp = ' '; 452 else 453 *cp = '\0'; 454 455 return copyout(&luts, SCARG(uap, up), sizeof(luts)); 456} 457 458/* 459 * Linux wants to pass everything to a syscall in registers. However, 460 * mmap() has 6 of them. Oops: out of register error. They just pass 461 * everything in a structure. 462 */ 463int 464linux_mmap(p, uap, retval) 465 struct proc *p; 466 struct linux_mmap_args /* { 467 syscallarg(struct linux_mmap *) lmp; 468 } */ *uap; 469 register_t *retval; 470{ 471 struct linux_mmap lmap; 472 struct mmap_args cma; 473 int error, flags; 474 475 if ((error = copyin(SCARG(uap, lmp), &lmap, sizeof lmap))) 476 return error; 477 478 flags = 0; 479 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_SHARED, MAP_SHARED); 480 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_PRIVATE, MAP_PRIVATE); 481 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_FIXED, MAP_FIXED); 482 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_ANON, MAP_ANON); 483 484 SCARG(&cma,addr) = lmap.lm_addr; 485 SCARG(&cma,len) = lmap.lm_len; 486 SCARG(&cma,prot) = lmap.lm_prot; 487 SCARG(&cma,flags) = flags; 488 SCARG(&cma,fd) = lmap.lm_fd; 489 SCARG(&cma,pad) = 0; 490 SCARG(&cma,pos) = lmap.lm_pos; 491 492 return mmap(p, &cma, retval); 493} 494 495/* 496 * Linux doesn't use the retval[1] value to determine whether 497 * we are the child or parent. 498 */ 499int 500linux_fork(p, uap, retval) 501 struct proc *p; 502 void *uap; 503 register_t *retval; 504{ 505 int error; 506 507 if ((error = fork(p, uap, retval))) 508 return error; 509 510 if (retval[1] == 1) 511 retval[0] = 0; 512 513 return 0; 514} 515 516/* 517 * This code is partly stolen from src/lib/libc/compat-43/times.c 518 * XXX - CLK_TCK isn't declared in /sys, just in <time.h>, done here 519 */ 520 521#define CLK_TCK 100 522#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 523 524int 525linux_times(p, uap, retval) 526 struct proc *p; 527 struct linux_times_args /* { 528 syscallarg(struct times *) tms; 529 } */ *uap; 530 register_t *retval; 531{ 532 struct timeval t; 533 struct linux_tms ltms; 534 struct rusage ru; 535 int error, s; 536 537 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 538 ltms.ltms_utime = CONVTCK(ru.ru_utime); 539 ltms.ltms_stime = CONVTCK(ru.ru_stime); 540 541 ltms.ltms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 542 ltms.ltms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 543 544 if ((error = copyout(<ms, SCARG(uap, tms), sizeof ltms))) 545 return error; 546 547 s = splclock(); 548 timersub(&time, &boottime, &t); 549 splx(s); 550 551 retval[0] = ((linux_clock_t)(CONVTCK(t))); 552 return 0; 553} 554 555/* 556 * NetBSD passes fd[0] in retval[0], and fd[1] in retval[1]. 557 * Linux directly passes the pointer. 558 */ 559int 560linux_pipe(p, uap, retval) 561 struct proc *p; 562 struct linux_pipe_args /* { 563 syscallarg(int *) pfds; 564 } */ *uap; 565 register_t *retval; 566{ 567 int error; 568 569 if ((error = pipe(p, 0, retval))) 570 return error; 571 572 /* Assumes register_t is an int */ 573 574 if ((error = copyout(retval, SCARG(uap, pfds), 2 * sizeof (int)))) 575 return error; 576 577 retval[0] = 0; 578 return 0; 579} 580 581/* 582 * Alarm. This is a libc call which used setitimer(2) in NetBSD. 583 * Fiddle with the timers to make it work. 584 */ 585int 586linux_alarm(p, uap, retval) 587 struct proc *p; 588 struct linux_alarm_args /* { 589 syscallarg(unsigned int) secs; 590 } */ *uap; 591 register_t *retval; 592{ 593 int error, s; 594 struct itimerval *itp, it; 595 596 itp = &p->p_realtimer; 597 s = splclock(); 598 /* 599 * Clear any pending timer alarms. 600 */ 601 untimeout(realitexpire, p); 602 timerclear(&itp->it_interval); 603 if (timerisset(&itp->it_value) && 604 timercmp(&itp->it_value, &time, >)) 605 timersub(&itp->it_value, &time, &itp->it_value); 606 /* 607 * Return how many seconds were left (rounded up) 608 */ 609 retval[0] = itp->it_value.tv_sec; 610 if (itp->it_value.tv_usec) 611 retval[0]++; 612 613 /* 614 * alarm(0) just resets the timer. 615 */ 616 if (SCARG(uap, secs) == 0) { 617 timerclear(&itp->it_value); 618 splx(s); 619 return 0; 620 } 621 622 /* 623 * Check the new alarm time for sanity, and set it. 624 */ 625 timerclear(&it.it_interval); 626 it.it_value.tv_sec = SCARG(uap, secs); 627 it.it_value.tv_usec = 0; 628 if (itimerfix(&it.it_value) || itimerfix(&it.it_interval)) { 629 splx(s); 630 return (EINVAL); 631 } 632 633 if (timerisset(&it.it_value)) { 634 timeradd(&it.it_value, &time, &it.it_value); 635 timeout(realitexpire, p, hzto(&it.it_value)); 636 } 637 p->p_realtimer = it; 638 splx(s); 639 640 return 0; 641} 642 643/* 644 * utime(). Do conversion to things that utimes() understands, 645 * and pass it on. 646 */ 647int 648linux_utime(p, uap, retval) 649 struct proc *p; 650 struct linux_utime_args /* { 651 syscallarg(char *) path; 652 syscallarg(struct linux_utimbuf *)times; 653 } */ *uap; 654 register_t *retval; 655{ 656 caddr_t sg; 657 int error; 658 struct utimes_args ua; 659 struct timeval tv[2], *tvp; 660 struct linux_utimbuf lut; 661 662 sg = stackgap_init(p->p_emul); 663 LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path)); 664 665 SCARG(&ua, path) = SCARG(uap, path); 666 667 if (SCARG(uap, times) != NULL) { 668 if ((error = copyin(SCARG(uap, times), &lut, sizeof lut))) 669 return error; 670 tv[0].tv_usec = tv[1].tv_usec = 0; 671 tv[0].tv_sec = lut.l_actime; 672 tv[1].tv_sec = lut.l_modtime; 673 tvp = (struct timeval *) stackgap_alloc(&sg, sizeof(tv)); 674 if ((error = copyout(tv, tvp, sizeof tv))) 675 return error; 676 SCARG(&ua, tptr) = tvp; 677 } 678 else 679 SCARG(&ua, tptr) = NULL; 680 681 return utimes(p, uap, retval); 682} 683 684/* 685 * The old Linux readdir was only able to read one entry at a time, 686 * even though it had a 'count' argument. In fact, the emulation 687 * of the old call was better than the original, because it did handle 688 * the count arg properly. Don't bother with it anymore now, and use 689 * it to distinguish between old and new. The difference is that the 690 * newer one actually does multiple entries, and the reclen field 691 * really is the reclen, not the namelength. 692 */ 693int 694linux_readdir(p, uap, retval) 695 struct proc *p; 696 struct linux_readdir_args /* { 697 syscallarg(int) fd; 698 syscallarg(struct linux_dirent *) dent; 699 syscallarg(unsigned int) count; 700 } */ *uap; 701 register_t *retval; 702{ 703 SCARG(uap, count) = 1; 704 return linux_getdents(p, uap, retval); 705} 706 707/* 708 * Linux 'readdir' call. This code is mostly taken from the 709 * SunOS getdents call (see compat/sunos/sunos_misc.c), though 710 * an attempt has been made to keep it a little cleaner (failing 711 * miserably, because of the cruft needed if count 1 is passed). 712 * 713 * The d_off field should contain the offset of the next valid entry, 714 * but in Linux it has the offset of the entry itself. We emulate 715 * that bug here. 716 * 717 * Read in BSD-style entries, convert them, and copy them out. 718 * 719 * Note that this doesn't handle union-mounted filesystems. 720 */ 721int 722linux_getdents(p, uap, retval) 723 struct proc *p; 724 struct linux_readdir_args /* { 725 syscallarg(int) fd; 726 syscallarg(struct linux_dirent *) dent; 727 syscallarg(unsigned int) count; 728 } */ *uap; 729 register_t *retval; 730{ 731 register struct dirent *bdp; 732 struct vnode *vp; 733 caddr_t inp, buf; /* BSD-format */ 734 int len, reclen; /* BSD-format */ 735 caddr_t outp; /* Linux-format */ 736 int resid, linuxreclen; /* Linux-format */ 737 struct file *fp; 738 struct uio auio; 739 struct iovec aiov; 740 struct linux_dirent idb; 741 off_t off; /* true file offset */ 742 linux_off_t soff; /* Linux file offset */ 743 int buflen, error, eofflag, nbytes, oldcall; 744 struct vattr va; 745 746 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 747 return (error); 748 749 if ((fp->f_flag & FREAD) == 0) 750 return (EBADF); 751 752 vp = (struct vnode *)fp->f_data; 753 754 if (vp->v_type != VDIR) /* XXX vnode readdir op should do this */ 755 return (EINVAL); 756 757 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) 758 return error; 759 760 nbytes = SCARG(uap, count); 761 if (nbytes == 1) { /* emulating old, broken behaviour */ 762 nbytes = sizeof (struct linux_dirent); 763 buflen = max(va.va_blocksize, nbytes); 764 oldcall = 1; 765 } else { 766 buflen = min(MAXBSIZE, nbytes); 767 oldcall = 0; 768 } 769 buf = malloc(buflen, M_TEMP, M_WAITOK); 770 VOP_LOCK(vp); 771 off = fp->f_offset; 772again: 773 aiov.iov_base = buf; 774 aiov.iov_len = buflen; 775 auio.uio_iov = &aiov; 776 auio.uio_iovcnt = 1; 777 auio.uio_rw = UIO_READ; 778 auio.uio_segflg = UIO_SYSSPACE; 779 auio.uio_procp = p; 780 auio.uio_resid = buflen; 781 auio.uio_offset = off; 782 /* 783 * First we read into the malloc'ed buffer, then 784 * we massage it into user space, one record at a time. 785 */ 786 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *)0, 0); 787 if (error) 788 goto out; 789 790 inp = buf; 791 outp = (caddr_t) SCARG(uap, dent); 792 resid = nbytes; 793 if ((len = buflen - auio.uio_resid) == 0) 794 goto eof; 795 796 for (; len > 0; len -= reclen) { 797 bdp = (struct dirent *)inp; 798 reclen = bdp->d_reclen; 799 if (reclen & 3) 800 panic("linux_readdir"); 801 off += reclen; 802 if (bdp->d_fileno == 0) { 803 inp += reclen; /* it is a hole; squish it out */ 804 continue; 805 } 806 linuxreclen = LINUX_RECLEN(&idb, bdp->d_namlen); 807 if (reclen > len || resid < linuxreclen) { 808 /* entry too big for buffer, so just stop */ 809 outp++; 810 break; 811 } 812 /* 813 * Massage in place to make a Linux-shaped dirent (otherwise 814 * we have to worry about touching user memory outside of 815 * the copyout() call). 816 */ 817 idb.d_ino = (long)bdp->d_fileno; 818 idb.d_off = off - reclen; 819 /* 820 * The old readdir() call used the reclen field as namlen. 821 */ 822 idb.d_reclen = oldcall ? (u_short)bdp->d_namlen : linuxreclen; 823 strcpy(idb.d_name, bdp->d_name); 824 if ((error = copyout((caddr_t)&idb, outp, linuxreclen))) 825 goto out; 826 /* advance past this real entry */ 827 inp += reclen; 828 /* advance output past Linux-shaped entry */ 829 outp += linuxreclen; 830 resid -= linuxreclen; 831 if (oldcall) 832 break; 833 } 834 835 /* if we squished out the whole block, try again */ 836 if (outp == (caddr_t) SCARG(uap, dent)) 837 goto again; 838 fp->f_offset = off; /* update the vnode offset */ 839 840 if (oldcall) 841 nbytes = resid + linuxreclen; 842 843eof: 844 *retval = nbytes - resid; 845out: 846 VOP_UNLOCK(vp); 847 free(buf, M_TEMP); 848 return error; 849} 850 851/* 852 * Not sure why the arguments to this older version of select() were put 853 * into a structure, because there are 5, and that can all be handled 854 * in registers on the i386 like Linux wants to. 855 */ 856int 857linux_oldselect(p, uap, retval) 858 struct proc *p; 859 struct linux_oldselect_args /* { 860 syscallarg(struct linux_select *) lsp; 861 } */ *uap; 862 register_t *retval; 863{ 864 struct linux_select ls; 865 int error; 866 867 if ((error = copyin(SCARG(uap, lsp), &ls, sizeof(ls)))) 868 return error; 869 870 return linux_select1(p, retval, ls.nfds, ls.readfds, ls.writefds, 871 ls.exceptfds, ls.timeout); 872} 873 874/* 875 * Even when just using registers to pass arguments to syscalls you can 876 * have 5 of them on the i386. So this newer version of select() does 877 * this. 878 */ 879int 880linux_select(p, uap, retval) 881 struct proc *p; 882 struct linux_select_args /* { 883 syscallarg(int) nfds; 884 syscallarg(fd_set *) readfds; 885 syscallarg(fd_set *) writefds; 886 syscallarg(fd_set *) exceptfds; 887 syscallarg(struct timeval *) timeout; 888 } */ *uap; 889 register_t *retval; 890{ 891 return linux_select1(p, retval, SCARG(uap, nfds), SCARG(uap, readfds), 892 SCARG(uap, writefds), SCARG(uap, exceptfds), SCARG(uap, timeout)); 893} 894 895/* 896 * Common code for the old and new versions of select(). A couple of 897 * things are important: 898 * 1) return the amount of time left in the 'timeout' parameter 899 * 2) select never returns ERESTART on Linux, always return EINTR 900 */ 901int 902linux_select1(p, retval, nfds, readfds, writefds, exceptfds, timeout) 903 struct proc *p; 904 register_t *retval; 905 int nfds; 906 fd_set *readfds, *writefds, *exceptfds; 907 struct timeval *timeout; 908{ 909 struct select_args bsa; 910 struct timeval tv0, tv1, utv, *tvp; 911 caddr_t sg; 912 int error; 913 914 SCARG(&bsa, nd) = nfds; 915 SCARG(&bsa, in) = readfds; 916 SCARG(&bsa, ou) = writefds; 917 SCARG(&bsa, ex) = exceptfds; 918 SCARG(&bsa, tv) = timeout; 919 920 /* 921 * Store current time for computation of the amount of 922 * time left. 923 */ 924 if (timeout) { 925 if ((error = copyin(timeout, &utv, sizeof(utv)))) 926 return error; 927 if (itimerfix(&utv)) { 928 /* 929 * The timeval was invalid. Convert it to something 930 * valid that will act as it does under Linux. 931 */ 932 sg = stackgap_init(p->p_emul); 933 tvp = stackgap_alloc(&sg, sizeof(utv)); 934 utv.tv_sec += utv.tv_usec / 1000000; 935 utv.tv_usec %= 1000000; 936 if (utv.tv_usec < 0) { 937 utv.tv_sec -= 1; 938 utv.tv_usec += 1000000; 939 } 940 if (utv.tv_sec < 0) 941 timerclear(&utv); 942 if ((error = copyout(&utv, tvp, sizeof(utv)))) 943 return error; 944 SCARG(&bsa, tv) = tvp; 945 } 946 microtime(&tv0); 947 } 948 949 error = select(p, &bsa, retval); 950 if (error) { 951 /* 952 * See fs/select.c in the Linux kernel. Without this, 953 * Maelstrom doesn't work. 954 */ 955 if (error == ERESTART) 956 error = EINTR; 957 return error; 958 } 959 960 if (timeout) { 961 if (*retval) { 962 /* 963 * Compute how much time was left of the timeout, 964 * by subtracting the current time and the time 965 * before we started the call, and subtracting 966 * that result from the user-supplied value. 967 */ 968 microtime(&tv1); 969 timersub(&tv1, &tv0, &tv1); 970 timersub(&utv, &tv1, &utv); 971 if (utv.tv_sec < 0) 972 timerclear(&utv); 973 } else 974 timerclear(&utv); 975 if ((error = copyout(&utv, timeout, sizeof(utv)))) 976 return error; 977 } 978 979 return 0; 980} 981 982/* 983 * Get the process group of a certain process. Look it up 984 * and return the value. 985 */ 986int 987linux_getpgid(p, uap, retval) 988 struct proc *p; 989 struct linux_getpgid_args /* { 990 syscallarg(int) pid; 991 } */ *uap; 992 register_t *retval; 993{ 994 struct proc *targp; 995 996 if (SCARG(uap, pid) != 0 && SCARG(uap, pid) != p->p_pid) 997 if ((targp = pfind(SCARG(uap, pid))) == 0) 998 return ESRCH; 999 else 1000 targp = p; 1001 1002 retval[0] = targp->p_pgid; 1003 return 0; 1004} 1005 1006/* 1007 * Set the 'personality' (emulation mode) for the current process. Only 1008 * accept the Linux personality here (0). This call is needed because 1009 * the Linux ELF crt0 issues it in an ugly kludge to make sure that 1010 * ELF binaries run in Linux mode, not SVR4 mode. 1011 */ 1012int 1013linux_personality(p, uap, retval) 1014 struct proc *p; 1015 struct linux_personality_args /* { 1016 syscallarg(int) per; 1017 } */ *uap; 1018 register_t *retval; 1019{ 1020 if (SCARG(uap, per) != 0) 1021 return EINVAL; 1022 retval[0] = 0; 1023 return 0; 1024} 1025 1026/* 1027 * The calls are here because of type conversions. 1028 */ 1029int 1030linux_setreuid(p, uap, retval) 1031 struct proc *p; 1032 struct linux_setreuid_args /* { 1033 syscallarg(int) ruid; 1034 syscallarg(int) euid; 1035 } */ *uap; 1036 register_t *retval; 1037{ 1038 struct compat_43_setreuid_args bsa; 1039 1040 SCARG(&bsa, ruid) = ((linux_uid_t)SCARG(uap, ruid) == (linux_uid_t)-1) ? 1041 (uid_t)-1 : SCARG(uap, ruid); 1042 SCARG(&bsa, euid) = ((linux_uid_t)SCARG(uap, euid) == (linux_uid_t)-1) ? 1043 (uid_t)-1 : SCARG(uap, euid); 1044 1045 return compat_43_setreuid(p, &bsa, retval); 1046} 1047 1048int 1049linux_setregid(p, uap, retval) 1050 struct proc *p; 1051 struct linux_setregid_args /* { 1052 syscallarg(int) rgid; 1053 syscallarg(int) egid; 1054 } */ *uap; 1055 register_t *retval; 1056{ 1057 struct compat_43_setregid_args bsa; 1058 1059 SCARG(&bsa, rgid) = ((linux_gid_t)SCARG(uap, rgid) == (linux_gid_t)-1) ? 1060 (uid_t)-1 : SCARG(uap, rgid); 1061 SCARG(&bsa, egid) = ((linux_gid_t)SCARG(uap, egid) == (linux_gid_t)-1) ? 1062 (uid_t)-1 : SCARG(uap, egid); 1063 1064 return compat_43_setregid(p, &bsa, retval); 1065} 1066