linux_pipe.c revision 1.1
1/* $NetBSD: linux_pipe.c,v 1.1 1995/02/28 23:25:07 fvdl Exp $ */ 2 3/* 4 * Copyright (c) 1995 Frank van der Linden 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed for the NetBSD Project 18 * by Frank van der Linden 19 * 4. The name of the author may not be used to endorse or promote products 20 * derived from this software without specific prior written permission 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34/* 35 * Linux compatibility module. Try to deal with various Linux system calls. 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/namei.h> 41#include <sys/proc.h> 42#include <sys/dir.h> 43#include <sys/file.h> 44#include <sys/stat.h> 45#include <sys/filedesc.h> 46#include <sys/ioctl.h> 47#include <sys/kernel.h> 48#include <sys/malloc.h> 49#include <sys/mbuf.h> 50#include <sys/mman.h> 51#include <sys/mount.h> 52#include <sys/ptrace.h> 53#include <sys/resource.h> 54#include <sys/resourcevar.h> 55#include <sys/signal.h> 56#include <sys/signalvar.h> 57#include <sys/socket.h> 58#include <sys/time.h> 59#include <sys/times.h> 60#include <sys/vnode.h> 61#include <sys/uio.h> 62#include <sys/wait.h> 63#include <sys/utsname.h> 64#include <sys/unistd.h> 65 66#include <sys/syscallargs.h> 67 68#include <vm/vm.h> 69#include <vm/vm_param.h> 70 71#include <compat/linux/linux_types.h> 72#include <compat/linux/linux_fcntl.h> 73#include <compat/linux/linux_mmap.h> 74#include <compat/linux/linux_syscallargs.h> 75#include <compat/linux/linux_util.h> 76#include <compat/linux/linux_dirent.h> 77 78/* 79 * The information on a terminated (or stopped) process needs 80 * to be converted in order for Linux binaries to get a valid signal 81 * number out of it. 82 */ 83static int 84bsd_to_linux_wstat(status) 85 int *status; 86{ 87 if (WIFSIGNALED(*status)) 88 *status = (*status & ~0177) | 89 bsd_to_linux_sig(WTERMSIG(*status)); 90 else if (WIFSTOPPED(*status)) 91 *status = (*status & ~0xff00) | 92 (bsd_to_linux_sig(WSTOPSIG(*status)) << 8); 93} 94 95/* 96 * waitpid(2). Passed on to the NetBSD call, surrounded by code to 97 * reserve some space for a NetBSD-style wait status, and converting 98 * it to what Linux wants. 99 */ 100int 101linux_waitpid(p, uap, retval) 102 struct proc *p; 103 struct linux_waitpid_args /* { 104 syscallarg(int) pid; 105 syscallarg(int *) status; 106 syscallarg(int) options; 107 } */ *uap; 108 register_t *retval; 109{ 110 struct wait4_args w4a; 111 int error, *status, tstat; 112 caddr_t sg; 113 114 sg = stackgap_init(); 115 status = (int *) stackgap_alloc(&sg, sizeof status); 116 117 SCARG(&w4a, pid) = SCARG(uap, pid); 118 SCARG(&w4a, status) = status; 119 SCARG(&w4a, options) = SCARG(uap, options); 120 SCARG(&w4a, rusage) = NULL; 121 122 if ((error = wait4(p, &w4a, retval))) 123 return error; 124 125 if ((error = copyin(status, &tstat, sizeof tstat))) 126 return error; 127 128 bsd_to_linux_wstat(&tstat); 129 130 return copyout(&tstat, SCARG(uap, status), sizeof tstat); 131} 132 133/* 134 * This is very much the same as waitpid() 135 */ 136int 137linux_wait4(p, uap, retval) 138 struct proc *p; 139 struct linux_wait4_args /* { 140 syscallarg(int) pid; 141 syscallarg(int *) status; 142 syscallarg(int) options; 143 syscallarg(struct rusage *) rusage; 144 } */ *uap; 145 register_t *retval; 146{ 147 struct wait4_args w4a; 148 int error, *status, tstat; 149 caddr_t sg; 150 151 sg = stackgap_init(); 152 status = (int *) stackgap_alloc(&sg, sizeof status); 153 154 SCARG(&w4a, pid) = SCARG(uap, pid); 155 SCARG(&w4a, status) = status; 156 SCARG(&w4a, options) = SCARG(uap, options); 157 SCARG(&w4a, rusage) = SCARG(uap, rusage); 158 159 if ((error = wait4(p, &w4a, retval))) 160 return error; 161 162 if ((error = copyin(status, &tstat, sizeof tstat))) 163 return error; 164 165 bsd_to_linux_wstat(&tstat); 166 167 return copyout(&tstat, SCARG(uap, status), sizeof tstat); 168} 169 170/* 171 * This is the old brk(2) call. I don't think anything in the Linux 172 * world uses this anymore 173 */ 174int 175linux_break(p, uap, retval) 176 struct proc *p; 177 struct linux_brk_args /* { 178 syscallarg(char *) nsize; 179 } */ *uap; 180 register_t *retval; 181{ 182 return ENOSYS; 183} 184 185/* 186 * Linux brk(2). The check if the new address is >= the old one is 187 * done in the kernel in Linux. NetBSD does it in the library. 188 */ 189int 190linux_brk(p, uap, retval) 191 struct proc *p; 192 struct linux_brk_args /* { 193 syscallarg(char *) nsize; 194 } */ *uap; 195 register_t *retval; 196{ 197 char *nbrk = SCARG(uap, nsize); 198 struct obreak_args oba; 199 struct vmspace *vm = p->p_vmspace; 200 int error = 0; 201 caddr_t oldbrk, newbrk; 202 203 oldbrk = vm->vm_daddr + ctob(vm->vm_dsize); 204 /* 205 * XXX inconsistent.. Linux always returns at least the old 206 * brk value, but it will be page-aligned if this fails, 207 * and possibly not page aligned if it succeeds (the user 208 * supplied pointer is returned). 209 */ 210 SCARG(&oba, nsize) = nbrk; 211 212 if ((caddr_t) nbrk > vm->vm_daddr && obreak(p, &oba, retval) == 0) 213 retval[0] = (register_t) nbrk; 214 else 215 retval[0] = (register_t) oldbrk; 216 217 return 0; 218} 219 220/* 221 * I wonder why Linux has gettimeofday() _and_ time().. Still, we 222 * need to deal with it. 223 */ 224int 225linux_time(p, uap, retval) 226 struct proc *p; 227 struct linux_time_args /* { 228 linux_time_t *t; 229 } */ *uap; 230 register_t *retval; 231{ 232 struct timeval atv; 233 linux_time_t tt; 234 int error; 235 236 microtime(&atv); 237 238 tt = atv.tv_sec; 239 if (SCARG(uap, t) && (error = copyout(&tt, SCARG(uap, t), sizeof tt))) 240 return error; 241 242 retval[0] = tt; 243 return 0; 244} 245 246/* 247 * The statfs and fstatfs called are not implemented yet. They're 248 * easy, but just not important for the binaries I wanted to get 249 * running. 250 */ 251int 252linux_statfs(p, uap, retval) 253 struct proc *p; 254 struct linux_statfs_args /* { 255 syscallarg(char *) path; 256 syscallarg(struct linux_statfs *) sp; 257 } */ *uap; 258 register_t *retval; 259{ 260 return ENOSYS; 261} 262 263int 264linux_fstatfs(p, uap, retval) 265 struct proc *p; 266 struct linux_fstatfs_args /* { 267 syscallarg(char *) path; 268 syscallarg(struct linux_statfs *) sp; 269 } */ *uap; 270 register_t *retval; 271{ 272 return ENOSYS; 273} 274 275/* 276 * uname(). Just copy the info from the various strings stored in the 277 * kernel, and put it in the Linux utsname structure. That structure 278 * is almost the same as the NetBSD one, only it has fields 65 characters 279 * long, and an extra domainname field. 280 */ 281int 282linux_uname(p, uap, retval) 283 struct proc *p; 284 struct linux_uname_args /* { 285 syscallarg(struct linux_utsname *) up; 286 } */ *uap; 287 register_t *retval; 288{ 289 extern char ostype[], osrelease[], version[], hostname[], domainname[]; 290 extern char machine[]; 291 struct linux_utsname tluts; 292 int len; 293 char *cp; 294 295 strncpy(tluts.l_sysname, ostype, sizeof (tluts.l_sysname)); 296 strncpy(tluts.l_nodename, hostname, sizeof (tluts.l_nodename)); 297 strncpy(tluts.l_release, osrelease, sizeof (tluts.l_release)); 298 strncpy(tluts.l_machine, machine, sizeof (tluts.l_machine)); 299 strncpy(tluts.l_domainname, domainname, sizeof (tluts.l_domainname)); 300 strncpy(tluts.l_version, version, sizeof (tluts.l_version)); 301 302 /* This part taken from the the uname() in libc */ 303 len = sizeof (tluts.l_version); 304 for (cp = tluts.l_version; len--; ++cp) 305 if (*cp == '\n' || *cp == '\t') 306 if (len > 1) 307 *cp = ' '; 308 else 309 *cp = '\0'; 310 311 return copyout(&tluts, SCARG(uap, up), sizeof tluts); 312} 313 314/* 315 * Linux wants to pass everything to a syscall in registers. However, 316 * mmap() has 6 of them. Oops: out of register error. They just pass 317 * everything in a structure. 318 */ 319int 320linux_mmap(p, uap, retval) 321 struct proc *p; 322 struct linux_mmap_args /* { 323 syscallarg(struct linux_mmap *) lmp; 324 } */ *uap; 325 register_t *retval; 326{ 327 struct linux_mmap lmap; 328 struct mmap_args cma; 329 int error, flags; 330 331 if ((error = copyin(SCARG(uap, lmp), &lmap, sizeof lmap))) 332 return error; 333 334 flags = 0; 335 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_SHARED, MAP_SHARED); 336 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_PRIVATE, MAP_PRIVATE); 337 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_FIXED, MAP_FIXED); 338 flags |= cvtto_bsd_mask(lmap.lm_flags, LINUX_MAP_ANON, MAP_ANON); 339 340 SCARG(&cma,addr) = lmap.lm_addr; 341 SCARG(&cma,len) = lmap.lm_len; 342 SCARG(&cma,prot) = lmap.lm_prot; 343 SCARG(&cma,flags) = flags; 344 SCARG(&cma,fd) = lmap.lm_fd; 345 SCARG(&cma,pad) = 0; 346 SCARG(&cma,pos) = lmap.lm_pos; 347 348 return mmap(p, &cma, retval); 349} 350 351/* 352 * Linux doesn't use the retval[1] value to determine whether 353 * we are the child or parent. 354 */ 355int 356linux_fork(p, uap, retval) 357 struct proc *p; 358 void *uap; 359 register_t *retval; 360{ 361 int error; 362 363 if ((error = fork(p, uap, retval))) 364 return error; 365 366 if (retval[1] == 1) 367 retval[0] = 0; 368 369 return 0; 370} 371 372/* 373 * This code is partly stolen from src/lib/libc/compat-43/times.c 374 * XXX - CLK_TCK isn't declared in /sys, just in <time.h>, done here 375 */ 376 377#define CLK_TCK 100 378#define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 379 380int 381linux_times(p, uap, retval) 382 struct proc *p; 383 struct linux_times_args /* { 384 syscallarg(struct times *) tms; 385 } */ *uap; 386 register_t *retval; 387{ 388 struct timeval t; 389 struct linux_tms ltms; 390 struct rusage ru; 391 int error; 392 393 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 394 ltms.ltms_utime = CONVTCK(ru.ru_utime); 395 ltms.ltms_stime = CONVTCK(ru.ru_stime); 396 397 ltms.ltms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 398 ltms.ltms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 399 400 if ((error = copyout(<ms, SCARG(uap, tms), sizeof ltms))) 401 return error; 402 403 microtime(&t); 404 405 retval[0] = ((linux_clock_t)(CONVTCK(t))); 406 return 0; 407} 408 409/* 410 * NetBSD passes fd[0] in retval[0], and fd[1] in retval[1]. 411 * Linux directly passes the pointer. 412 */ 413int 414linux_pipe(p, uap, retval) 415 struct proc *p; 416 struct linux_pipe_args /* { 417 syscallarg(int *) pfds; 418 } */ *uap; 419 register_t *retval; 420{ 421 int error; 422 423 if ((error = pipe(p, 0, retval))) 424 return error; 425 426 /* Assumes register_t is an int */ 427 428 if ((error = copyout(retval, SCARG(uap, pfds), 2 * sizeof (int)))) 429 return error; 430 431 retval[0] = 0; 432 return 0; 433} 434 435/* 436 * Alarm. This is a libc call which used setitimer(2) in NetBSD. 437 * Fiddle with the timers to make it work. 438 */ 439int 440linux_alarm(p, uap, retval) 441 struct proc *p; 442 struct linux_alarm_args /* { 443 syscallarg(unsigned int) secs; 444 } */ *uap; 445 register_t *retval; 446{ 447 int error, s; 448 struct itimerval *itp, it; 449 450 itp = &p->p_realtimer; 451 s = splclock(); 452 /* 453 * Clear any pending timer alarms. 454 */ 455 untimeout(realitexpire, p); 456 timerclear(&itp->it_interval); 457 if (timerisset(&itp->it_value) && 458 timercmp(&itp->it_value, &time, >)) 459 __timersub(&itp->it_value, &time); 460 /* 461 * Return how many seconds were left (rounded up) 462 */ 463 retval[0] = itp->it_value.tv_sec; 464 if (itp->it_value.tv_usec) 465 retval[0]++; 466 467 /* 468 * alarm(0) just resets the timer. 469 */ 470 if (SCARG(uap, secs) == 0) { 471 timerclear(&itp->it_value); 472 splx(s); 473 return 0; 474 } 475 476 /* 477 * Check the new alarm time for sanity, and set it. 478 */ 479 timerclear(&it.it_interval); 480 it.it_value.tv_sec = SCARG(uap, secs); 481 it.it_value.tv_usec = 0; 482 if (itimerfix(&it.it_value) || itimerfix(&it.it_interval)) { 483 splx(s); 484 return (EINVAL); 485 } 486 487 if (timerisset(&it.it_value)) { 488 __timeradd(&it.it_value, &time); 489 timeout(realitexpire, p, hzto(&it.it_value)); 490 } 491 p->p_realtimer = it; 492 splx(s); 493 494 return 0; 495} 496 497/* 498 * utime(). Do conversion to things that utimes() understands, 499 * and pass it on. 500 */ 501int 502linux_utime(p, uap, retval) 503 struct proc *p; 504 struct linux_utime_args /* { 505 syscallarg(char *) path; 506 syscallarg(struct linux_utimbuf *)times; 507 } */ *uap; 508 register_t *retval; 509{ 510 caddr_t sg; 511 int error; 512 struct utimes_args ua; 513 struct timeval tv[2], *tvp; 514 struct linux_utimbuf lut; 515 516 sg = stackgap_init(); 517 CHECK_ALT(p, &sg, SCARG(uap, path)); 518 519 SCARG(&ua, path) = SCARG(uap, path); 520 521 if (SCARG(uap, times) != NULL) { 522 if ((error = copyin(SCARG(uap, times), &lut, sizeof lut))) 523 return error; 524 tv[0].tv_usec = tv[1].tv_usec = 0; 525 tv[0].tv_sec = lut.l_actime; 526 tv[1].tv_sec = lut.l_modtime; 527 tvp = (struct timeval *) stackgap_alloc(sizeof tv); 528 if ((error = copyout(tv, tvp, sizeof tv))) 529 return error; 530 SCARG(&ua, tptr) = tvp; 531 } 532 else 533 SCARG(&ua, tptr) = NULL; 534 535 return utimes(p, uap, retval); 536} 537 538/* 539 * Linux 'readdir' call. This code is mostly taken from the 540 * SunOS getdents call (see compat/sunos/sunos_misc.c), though 541 * an attempt has been made to keep it a little cleaner (failing 542 * miserably, because of the cruft needed if count 1 is passed). 543 * 544 * Read in BSD-style entries, convert them, and copy them out. 545 * Note that the Linux d_reclen is actually the name length, 546 * and d_off is the reclen. 547 * 548 * Note that this doesn't handle union-mounted filesystems. 549 */ 550int 551linux_readdir(p, uap, retval) 552 struct proc *p; 553 struct linux_readdir_args /* { 554 syscallarg(int) fd; 555 syscallarg(struct linux_dirent *) dent; 556 syscallarg(unsigned int) count; 557 } */ *uap; 558 register_t *retval; 559{ 560 register struct dirent *bdp; 561 struct vnode *vp; 562 caddr_t inp, buf; /* BSD-format */ 563 int len, reclen; /* BSD-format */ 564 caddr_t outp; /* Linux-format */ 565 int resid, linuxreclen; /* Linux-format */ 566 struct file *fp; 567 struct uio auio; 568 struct iovec aiov; 569 struct linux_dirent idb; 570 off_t off; /* true file offset */ 571 linux_off_t soff; /* Linux file offset */ 572 int buflen, error, eofflag, nbytes, justone; 573 struct vattr va; 574 575 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 576 return (error); 577 578 if ((fp->f_flag & FREAD) == 0) 579 return (EBADF); 580 581 vp = (struct vnode *) fp->f_data; 582 583 if (vp->v_type != VDIR) /* XXX vnode readdir op should do this */ 584 return (EINVAL); 585 586 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) 587 return error; 588 589 nbytes = SCARG(uap, count); 590 if (nbytes == 1) { /* Need this for older Linux libs, apparently */ 591 nbytes = sizeof (struct linux_dirent); 592 justone = 1; 593 } 594 else 595 justone = 0; 596 597 buflen = max(va.va_blocksize, nbytes); 598 buf = malloc(buflen, M_TEMP, M_WAITOK); 599 VOP_LOCK(vp); 600 off = fp->f_offset; 601again: 602 aiov.iov_base = buf; 603 aiov.iov_len = buflen; 604 auio.uio_iov = &aiov; 605 auio.uio_iovcnt = 1; 606 auio.uio_rw = UIO_READ; 607 auio.uio_segflg = UIO_SYSSPACE; 608 auio.uio_procp = p; 609 auio.uio_resid = buflen; 610 auio.uio_offset = off; 611 /* 612 * First we read into the malloc'ed buffer, then 613 * we massage it into user space, one record at a time. 614 */ 615 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *) 0, 0); 616 if (error) 617 goto out; 618 619 inp = buf; 620 outp = (caddr_t) SCARG(uap, dent); 621 resid = nbytes; 622 if ((len = buflen - auio.uio_resid) == 0) 623 goto eof; 624 625 for (; len > 0; len -= reclen) { 626 reclen = ((struct dirent *) inp)->d_reclen; 627 if (reclen & 3) 628 panic("linux_readdir"); 629 off += reclen; /* each entry points to next */ 630 bdp = (struct dirent *) inp; 631 if (bdp->d_fileno == 0) { 632 inp += reclen; /* it is a hole; squish it out */ 633 continue; 634 } 635 linuxreclen = LINUX_RECLEN(&idb, bdp->d_namlen); 636 if (reclen > len || resid < linuxreclen) { 637 /* entry too big for buffer, so just stop */ 638 outp++; 639 break; 640 } 641 /* 642 * Massage in place to make a Linux-shaped dirent (otherwise 643 * we have to worry about touching user memory outside of 644 * the copyout() call). 645 */ 646 idb.l_dino = (long) bdp->d_fileno; 647 idb.l_doff = (linux_off_t) linuxreclen; 648 idb.l_dreclen = (u_short) bdp->d_namlen; /* sigh */ 649 strcpy(idb.l_dname, bdp->d_name); 650 if ((error = copyout((caddr_t)&idb, outp, linuxreclen))) 651 goto out; 652 /* advance past this real entry */ 653 inp += reclen; 654 /* advance output past Linux-shaped entry */ 655 outp += linuxreclen; 656 resid -= linuxreclen; 657 if (justone) 658 break; 659 } 660 661 /* if we squished out the whole block, try again */ 662 if (outp == (caddr_t) SCARG(uap, dent)) 663 goto again; 664 fp->f_offset = off; /* update the vnode offset */ 665 666 if (justone) 667 nbytes = resid + linuxreclen; 668 669eof: 670 *retval = nbytes - resid; 671out: 672 VOP_UNLOCK(vp); 673 free(buf, M_TEMP); 674 return error; 675} 676 677/* 678 * Out of register error once more.. Apart from that, no difference. 679 */ 680int 681linux_select(p, uap, retval) 682 struct proc *p; 683 struct linux_select_args /* { 684 syscallarg(struct linux_select *) lsp; 685 } */ *uap; 686 register_t *retval; 687{ 688 struct linux_select ls; 689 struct select_args bsa; 690 int error; 691 692 if ((error = copyin(SCARG(uap, lsp), (caddr_t) &ls, sizeof ls))) 693 return error; 694 695 SCARG(&bsa, nd) = ls.nfds; 696 SCARG(&bsa, in) = ls.readfds; 697 SCARG(&bsa, ou) = ls.writefds; 698 SCARG(&bsa, ex) = ls.exceptfds; 699 SCARG(&bsa, tv) = ls.timeout; 700 701 return select(p, &bsa, retval); 702} 703 704/* 705 * Get the process group of a certain process. Look it up 706 * and return the value. 707 */ 708int 709linux_getpgid(p, uap, retval) 710 struct proc *p; 711 struct linux_getpgid_args /* { 712 syscallarg(int) pid; 713 } */ *uap; 714 register_t *retval; 715{ 716 struct proc *targp; 717 718 if (SCARG(uap, pid) != 0 && SCARG(uap, pid) != p->p_pid) 719 if ((targp = pfind(SCARG(uap, pid))) == 0) 720 return ESRCH; 721 else 722 targp = p; 723 724 retval[0] = targp->p_pgid; 725 return 0; 726} 727