1/* $NetBSD$ */ 2 3/*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* Disable namespace mangling, Fortification is useless here anyway. */ 29#undef _FORTIFY_SOURCE 30 31#include <sys/cdefs.h> 32__RCSID("$NetBSD$"); 33 34#include <sys/param.h> 35#include <sys/types.h> 36#include <sys/event.h> 37#include <sys/ioctl.h> 38#include <sys/mman.h> 39#include <sys/mount.h> 40#include <sys/poll.h> 41#include <sys/socket.h> 42#include <sys/statvfs.h> 43#include <sys/quotactl.h> 44 45#include <rump/rumpclient.h> 46#include <rump/rump_syscalls.h> 47 48#include <assert.h> 49#include <dlfcn.h> 50#include <err.h> 51#include <errno.h> 52#include <fcntl.h> 53#include <poll.h> 54#include <pthread.h> 55#include <signal.h> 56#include <stdarg.h> 57#include <stdbool.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <time.h> 62#include <unistd.h> 63 64#include "hijack.h" 65 66enum dualcall { 67 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 68 DUALCALL_IOCTL, DUALCALL_FCNTL, 69 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT, 70 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 71 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 72 DUALCALL_SENDTO, DUALCALL_SENDMSG, 73 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 74 DUALCALL_SHUTDOWN, 75 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 76 DUALCALL_DUP2, 77 DUALCALL_CLOSE, 78 DUALCALL_POLLTS, 79 DUALCALL_KEVENT, 80 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 81 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 82 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 83 DUALCALL_OPEN, 84 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, 85 DUALCALL_CHDIR, DUALCALL_FCHDIR, 86 DUALCALL_LSEEK, 87 DUALCALL_GETDENTS, 88 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 89 DUALCALL_RENAME, 90 DUALCALL_MKDIR, DUALCALL_RMDIR, 91 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 92 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 93 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE, 94 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 95 DUALCALL___GETCWD, 96 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 97 DUALCALL_ACCESS, 98 DUALCALL_MKNOD, 99 DUALCALL___SYSCTL, 100 DUALCALL_GETVFSSTAT, DUALCALL_NFSSVC, 101 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 102#if __NetBSD_Prereq__(5,99,48) 103 DUALCALL_QUOTACTL, 104#endif 105 DUALCALL__NUM 106}; 107 108#define RSYS_STRING(a) __STRING(a) 109#define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 110 111/* 112 * Would be nice to get this automatically in sync with libc. 113 * Also, this does not work for compat-using binaries! 114 */ 115#if !__NetBSD_Prereq__(5,99,7) 116#define REALSELECT select 117#define REALPOLLTS pollts 118#define REALKEVENT kevent 119#define REALSTAT __stat30 120#define REALLSTAT __lstat30 121#define REALFSTAT __fstat30 122#define REALUTIMES utimes 123#define REALLUTIMES lutimes 124#define REALFUTIMES futimes 125#define REALMKNOD mknod 126#define REALFHSTAT __fhstat40 127#else 128#define REALSELECT _sys___select50 129#define REALPOLLTS _sys___pollts50 130#define REALKEVENT _sys___kevent50 131#define REALSTAT __stat50 132#define REALLSTAT __lstat50 133#define REALFSTAT __fstat50 134#define REALUTIMES __utimes50 135#define REALLUTIMES __lutimes50 136#define REALFUTIMES __futimes50 137#define REALMKNOD __mknod50 138#define REALFHSTAT __fhstat50 139#endif 140#define REALREAD _sys_read 141#define REALPREAD _sys_pread 142#define REALPWRITE _sys_pwrite 143#define REALGETDENTS __getdents30 144#define REALMOUNT __mount50 145#define REALGETFH __getfh30 146#define REALFHOPEN __fhopen40 147#define REALFHSTATVFS1 __fhstatvfs140 148#define OLDREALQUOTACTL __quotactl50 /* 5.99.48-62 only */ 149 150int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 151int REALPOLLTS(struct pollfd *, nfds_t, 152 const struct timespec *, const sigset_t *); 153int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 154 const struct timespec *); 155ssize_t REALREAD(int, void *, size_t); 156ssize_t REALPREAD(int, void *, size_t, off_t); 157ssize_t REALPWRITE(int, const void *, size_t, off_t); 158int REALSTAT(const char *, struct stat *); 159int REALLSTAT(const char *, struct stat *); 160int REALFSTAT(int, struct stat *); 161int REALGETDENTS(int, char *, size_t); 162int REALUTIMES(const char *, const struct timeval [2]); 163int REALLUTIMES(const char *, const struct timeval [2]); 164int REALFUTIMES(int, const struct timeval [2]); 165int REALMOUNT(const char *, const char *, int, void *, size_t); 166int __getcwd(char *, size_t); 167int REALMKNOD(const char *, mode_t, dev_t); 168int REALGETFH(const char *, void *, size_t *); 169int REALFHOPEN(const void *, size_t, int); 170int REALFHSTAT(const void *, size_t, struct stat *); 171int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 172int OLDREALQUOTACTL(const char *, struct plistref *); 173 174#define S(a) __STRING(a) 175struct sysnames { 176 enum dualcall scm_callnum; 177 const char *scm_hostname; 178 const char *scm_rumpname; 179} syscnames[] = { 180 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) }, 181 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 182 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 183 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 184 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 185 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 186 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 187 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 188 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 189 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 190 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 191 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 192 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 193 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 194 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 195 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 196 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 197 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 198 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 199 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 200 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 201 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 202 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 203 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 204 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 205 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 206 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 207 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 208 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 209 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 210 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 211 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 212 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 213 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 214 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 215 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 216 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 217 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 218 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 219 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 220 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 221 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 222 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 223 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 224 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 225 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 226 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) }, 227 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 228 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 229 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 230 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 231 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 232 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 233 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 234 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 235 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 236 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 237 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 238 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 239 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 240 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 241 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 242 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 243 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 244 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 245 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 246 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) }, 247 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 248 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 249 { DUALCALL_FHOPEN, S(REALFHOPEN),RSYS_NAME(FHOPEN) }, 250 { DUALCALL_FHSTAT, S(REALFHSTAT),RSYS_NAME(FHSTAT) }, 251 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 252#if __NetBSD_Prereq__(5,99,63) 253 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) }, 254#elif __NetBSD_Prereq__(5,99,48) 255 { DUALCALL_QUOTACTL, S(OLDREALQUOTACTL),RSYS_NAME(QUOTACTL) }, 256#endif 257}; 258#undef S 259 260struct bothsys { 261 void *bs_host; 262 void *bs_rump; 263} syscalls[DUALCALL__NUM]; 264#define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 265 266static pid_t (*host_fork)(void); 267static int (*host_daemon)(int, int); 268static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 269 270/* 271 * This tracks if our process is in a subdirectory of /rump. 272 * It's preserved over exec. 273 */ 274static bool pwdinrump; 275 276enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 277 278static bool fd_isrump(int); 279static enum pathtype path_isrump(const char *); 280 281/* default FD_SETSIZE is 256 ==> default fdoff is 128 */ 282static int hijack_fdoff = FD_SETSIZE/2; 283 284/* 285 * Maintain a mapping table for the usual dup2 suspects. 286 * Could use atomic ops to operate on dup2vec, but an application 287 * racing there is not well-defined, so don't bother. 288 */ 289/* note: you cannot change this without editing the env-passing code */ 290#define DUP2HIGH 2 291static uint32_t dup2vec[DUP2HIGH+1]; 292#define DUP2BIT (1<<31) 293#define DUP2ALIAS (1<<30) 294#define DUP2FDMASK ((1<<30)-1) 295 296static bool 297isdup2d(int fd) 298{ 299 300 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 301} 302 303static int 304mapdup2(int hostfd) 305{ 306 307 _DIAGASSERT(isdup2d(hostfd)); 308 return dup2vec[hostfd] & DUP2FDMASK; 309} 310 311static int 312unmapdup2(int rumpfd) 313{ 314 int i; 315 316 for (i = 0; i <= DUP2HIGH; i++) { 317 if (dup2vec[i] & DUP2BIT && 318 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 319 return i; 320 } 321 return -1; 322} 323 324static void 325setdup2(int hostfd, int rumpfd) 326{ 327 328 if (hostfd > DUP2HIGH) { 329 _DIAGASSERT(0); 330 return; 331 } 332 333 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 334} 335 336static void 337clrdup2(int hostfd) 338{ 339 340 if (hostfd > DUP2HIGH) { 341 _DIAGASSERT(0); 342 return; 343 } 344 345 dup2vec[hostfd] = 0; 346} 347 348static bool 349killdup2alias(int rumpfd) 350{ 351 int hostfd; 352 353 if ((hostfd = unmapdup2(rumpfd)) == -1) 354 return false; 355 356 if (dup2vec[hostfd] & DUP2ALIAS) { 357 dup2vec[hostfd] &= ~DUP2ALIAS; 358 return true; 359 } 360 return false; 361} 362 363//#define DEBUGJACK 364#ifdef DEBUGJACK 365#define DPRINTF(x) mydprintf x 366static void 367mydprintf(const char *fmt, ...) 368{ 369 va_list ap; 370 371 if (isdup2d(STDERR_FILENO)) 372 return; 373 374 va_start(ap, fmt); 375 vfprintf(stderr, fmt, ap); 376 va_end(ap); 377} 378 379static const char * 380whichfd(int fd) 381{ 382 383 if (fd == -1) 384 return "-1"; 385 else if (fd_isrump(fd)) 386 return "rump"; 387 else 388 return "host"; 389} 390 391static const char * 392whichpath(const char *path) 393{ 394 395 if (path_isrump(path)) 396 return "rump"; 397 else 398 return "host"; 399} 400 401#else 402#define DPRINTF(x) 403#endif 404 405#define FDCALL(type, name, rcname, args, proto, vars) \ 406type name args \ 407{ \ 408 type (*fun) proto; \ 409 \ 410 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 411 if (fd_isrump(fd)) { \ 412 fun = syscalls[rcname].bs_rump; \ 413 fd = fd_host2rump(fd); \ 414 } else { \ 415 fun = syscalls[rcname].bs_host; \ 416 } \ 417 \ 418 return fun vars; \ 419} 420 421#define PATHCALL(type, name, rcname, args, proto, vars) \ 422type name args \ 423{ \ 424 type (*fun) proto; \ 425 enum pathtype pt; \ 426 \ 427 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 428 whichpath(path))); \ 429 if ((pt = path_isrump(path)) != PATH_HOST) { \ 430 fun = syscalls[rcname].bs_rump; \ 431 if (pt == PATH_RUMP) \ 432 path = path_host2rump(path); \ 433 } else { \ 434 fun = syscalls[rcname].bs_host; \ 435 } \ 436 \ 437 return fun vars; \ 438} 439 440#define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 441type name args \ 442{ \ 443 type (*fun) proto; \ 444 \ 445 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 446 if (vfsbits & bit) { \ 447 fun = syscalls[rcname].bs_rump; \ 448 } else { \ 449 fun = syscalls[rcname].bs_host; \ 450 } \ 451 \ 452 return fun vars; \ 453} 454 455/* 456 * These variables are set from the RUMPHIJACK string and control 457 * which operations can product rump kernel file descriptors. 458 * This should be easily extendable for future needs. 459 */ 460#define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 461static bool rumpsockets[PF_MAX]; 462static const char *rumpprefix; 463static size_t rumpprefixlen; 464 465static struct { 466 int pf; 467 const char *name; 468} socketmap[] = { 469 { PF_LOCAL, "local" }, 470 { PF_INET, "inet" }, 471 { PF_LINK, "link" }, 472#ifdef PF_OROUTE 473 { PF_OROUTE, "oroute" }, 474#endif 475 { PF_ROUTE, "route" }, 476 { PF_INET6, "inet6" }, 477#ifdef PF_MPLS 478 { PF_MPLS, "mpls" }, 479#endif 480 { -1, NULL } 481}; 482 483static void 484sockparser(char *buf) 485{ 486 char *p, *l; 487 bool value; 488 int i; 489 490 /* if "all" is present, it must be specified first */ 491 if (strncmp(buf, "all", strlen("all")) == 0) { 492 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 493 rumpsockets[i] = true; 494 } 495 buf += strlen("all"); 496 if (*buf == ':') 497 buf++; 498 } 499 500 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 501 value = true; 502 if (strncmp(p, "no", strlen("no")) == 0) { 503 value = false; 504 p += strlen("no"); 505 } 506 507 for (i = 0; socketmap[i].name; i++) { 508 if (strcmp(p, socketmap[i].name) == 0) { 509 rumpsockets[socketmap[i].pf] = value; 510 break; 511 } 512 } 513 if (socketmap[i].name == NULL) { 514 errx(1, "invalid socket specifier %s", p); 515 } 516 } 517} 518 519static void 520pathparser(char *buf) 521{ 522 523 /* sanity-check */ 524 if (*buf != '/') 525 errx(1, "hijack path specifier must begin with ``/''"); 526 rumpprefixlen = strlen(buf); 527 if (rumpprefixlen < 2) 528 errx(1, "invalid hijack prefix: %s", buf); 529 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 530 errx(1, "hijack prefix may end in slash only if pure " 531 "slash, gave %s", buf); 532 533 if ((rumpprefix = strdup(buf)) == NULL) 534 err(1, "strdup"); 535 rumpprefixlen = strlen(rumpprefix); 536} 537 538static struct blanket { 539 const char *pfx; 540 size_t len; 541} *blanket; 542static int nblanket; 543 544static void 545blanketparser(char *buf) 546{ 547 char *p, *l; 548 int i; 549 550 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 551 continue; 552 553 blanket = malloc(nblanket * sizeof(*blanket)); 554 if (blanket == NULL) 555 err(1, "alloc blanket %d", nblanket); 556 557 for (p = strtok_r(buf, ":", &l), i = 0; p; 558 p = strtok_r(NULL, ":", &l), i++) { 559 blanket[i].pfx = strdup(p); 560 if (blanket[i].pfx == NULL) 561 err(1, "strdup blanket"); 562 blanket[i].len = strlen(p); 563 564 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 565 errx(1, "invalid blanket specifier %s", p); 566 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 567 errx(1, "invalid blanket specifier %s", p); 568 } 569} 570 571#define VFSBIT_NFSSVC 0x01 572#define VFSBIT_GETVFSSTAT 0x02 573#define VFSBIT_FHCALLS 0x04 574static unsigned vfsbits; 575 576static struct { 577 int bit; 578 const char *name; 579} vfscalls[] = { 580 { VFSBIT_NFSSVC, "nfssvc" }, 581 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 582 { VFSBIT_FHCALLS, "fhcalls" }, 583 { -1, NULL } 584}; 585 586static void 587vfsparser(char *buf) 588{ 589 char *p, *l; 590 bool turnon; 591 unsigned int fullmask; 592 int i; 593 594 /* build the full mask and sanity-check while we're at it */ 595 fullmask = 0; 596 for (i = 0; vfscalls[i].name != NULL; i++) { 597 if (fullmask & vfscalls[i].bit) 598 errx(1, "problem exists between vi and chair"); 599 fullmask |= vfscalls[i].bit; 600 } 601 602 603 /* if "all" is present, it must be specified first */ 604 if (strncmp(buf, "all", strlen("all")) == 0) { 605 vfsbits = fullmask; 606 buf += strlen("all"); 607 if (*buf == ':') 608 buf++; 609 } 610 611 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 612 turnon = true; 613 if (strncmp(p, "no", strlen("no")) == 0) { 614 turnon = false; 615 p += strlen("no"); 616 } 617 618 for (i = 0; vfscalls[i].name; i++) { 619 if (strcmp(p, vfscalls[i].name) == 0) { 620 if (turnon) 621 vfsbits |= vfscalls[i].bit; 622 else 623 vfsbits &= ~vfscalls[i].bit; 624 break; 625 } 626 } 627 if (vfscalls[i].name == NULL) { 628 errx(1, "invalid vfscall specifier %s", p); 629 } 630 } 631} 632 633static bool rumpsysctl = false; 634 635static void 636sysctlparser(char *buf) 637{ 638 639 if (buf == NULL) { 640 rumpsysctl = true; 641 return; 642 } 643 644 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 645 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 646 rumpsysctl = true; 647 return; 648 } 649 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 650 rumpsysctl = false; 651 return; 652 } 653 654 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf); 655} 656 657static void 658fdoffparser(char *buf) 659{ 660 unsigned long fdoff; 661 char *ep; 662 663 if (*buf == '-') { 664 errx(1, "fdoff must not be negative"); 665 } 666 fdoff = strtoul(buf, &ep, 10); 667 if (*ep != '\0') 668 errx(1, "invalid fdoff specifier \"%s\"", buf); 669 if (fdoff >= INT_MAX/2 || fdoff < 3) 670 errx(1, "fdoff out of range"); 671 hijack_fdoff = fdoff; 672} 673 674static struct { 675 void (*parsefn)(char *); 676 const char *name; 677 bool needvalues; 678} hijackparse[] = { 679 { sockparser, "socket", true }, 680 { pathparser, "path", true }, 681 { blanketparser, "blanket", true }, 682 { vfsparser, "vfs", true }, 683 { sysctlparser, "sysctl", false }, 684 { fdoffparser, "fdoff", true }, 685 { NULL, NULL, false }, 686}; 687 688static void 689parsehijack(char *hijack) 690{ 691 char *p, *p2, *l; 692 const char *hijackcopy; 693 bool nop2; 694 int i; 695 696 if ((hijackcopy = strdup(hijack)) == NULL) 697 err(1, "strdup"); 698 699 /* disable everything explicitly */ 700 for (i = 0; i < PF_MAX; i++) 701 rumpsockets[i] = false; 702 703 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 704 nop2 = false; 705 p2 = strchr(p, '='); 706 if (!p2) { 707 nop2 = true; 708 p2 = p + strlen(p); 709 } 710 711 for (i = 0; hijackparse[i].parsefn; i++) { 712 if (strncmp(hijackparse[i].name, p, 713 (size_t)(p2-p)) == 0) { 714 if (nop2 && hijackparse[i].needvalues) 715 errx(1, "invalid hijack specifier: %s", 716 hijackcopy); 717 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 718 break; 719 } 720 } 721 722 if (hijackparse[i].parsefn == NULL) 723 errx(1, "invalid hijack specifier name in %s", p); 724 } 725 726} 727 728static void __attribute__((constructor)) 729rcinit(void) 730{ 731 char buf[1024]; 732 unsigned i, j; 733 734 host_fork = dlsym(RTLD_NEXT, "fork"); 735 host_daemon = dlsym(RTLD_NEXT, "daemon"); 736 host_mmap = dlsym(RTLD_NEXT, "mmap"); 737 738 /* 739 * In theory cannot print anything during lookups because 740 * we might not have the call vector set up. so, the errx() 741 * is a bit of a strech, but it might work. 742 */ 743 744 for (i = 0; i < DUALCALL__NUM; i++) { 745 /* build runtime O(1) access */ 746 for (j = 0; j < __arraycount(syscnames); j++) { 747 if (syscnames[j].scm_callnum == i) 748 break; 749 } 750 751 if (j == __arraycount(syscnames)) 752 errx(1, "rumphijack error: syscall pos %d missing", i); 753 754 syscalls[i].bs_host = dlsym(RTLD_NEXT, 755 syscnames[j].scm_hostname); 756 if (syscalls[i].bs_host == NULL) 757 errx(1, "hostcall %s not found!", 758 syscnames[j].scm_hostname); 759 760 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 761 syscnames[j].scm_rumpname); 762 if (syscalls[i].bs_rump == NULL) 763 errx(1, "rumpcall %s not found!", 764 syscnames[j].scm_rumpname); 765 } 766 767 if (rumpclient_init() == -1) 768 err(1, "rumpclient init"); 769 770 /* check which syscalls we're supposed to hijack */ 771 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 772 strcpy(buf, RUMPHIJACK_DEFAULT); 773 } 774 parsehijack(buf); 775 776 /* set client persistence level */ 777 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 778 if (strcmp(buf, "die") == 0) 779 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 780 else if (strcmp(buf, "inftime") == 0) 781 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 782 else if (strcmp(buf, "once") == 0) 783 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 784 else { 785 time_t timeout; 786 char *ep; 787 788 timeout = (time_t)strtoll(buf, &ep, 10); 789 if (timeout <= 0 || ep != buf + strlen(buf)) 790 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 791 "keyword or integer, got: %s", buf); 792 793 rumpclient_setconnretry(timeout); 794 } 795 } 796 797 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 798 if (sscanf(buf, "%u,%u,%u", 799 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 800 warnx("invalid dup2mask: %s", buf); 801 memset(dup2vec, 0, sizeof(dup2vec)); 802 } 803 unsetenv("RUMPHIJACK__DUP2INFO"); 804 } 805 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 806 pwdinrump = true; 807 unsetenv("RUMPHIJACK__PWDINRUMP"); 808 } 809} 810 811static int 812fd_rump2host(int fd) 813{ 814 815 if (fd == -1) 816 return fd; 817 return fd + hijack_fdoff; 818} 819 820static int 821fd_rump2host_withdup(int fd) 822{ 823 int hfd; 824 825 _DIAGASSERT(fd != -1); 826 hfd = unmapdup2(fd); 827 if (hfd != -1) { 828 _DIAGASSERT(hfd <= DUP2HIGH); 829 return hfd; 830 } 831 return fd_rump2host(fd); 832} 833 834static int 835fd_host2rump(int fd) 836{ 837 838 if (!isdup2d(fd)) 839 return fd - hijack_fdoff; 840 else 841 return mapdup2(fd); 842} 843 844static bool 845fd_isrump(int fd) 846{ 847 848 return isdup2d(fd) || fd >= hijack_fdoff; 849} 850 851#define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff) 852 853static enum pathtype 854path_isrump(const char *path) 855{ 856 size_t plen; 857 int i; 858 859 if (rumpprefix == NULL && nblanket == 0) 860 return PATH_HOST; 861 862 if (*path == '/') { 863 plen = strlen(path); 864 if (rumpprefix && plen >= rumpprefixlen) { 865 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 866 && (plen == rumpprefixlen 867 || *(path + rumpprefixlen) == '/')) { 868 return PATH_RUMP; 869 } 870 } 871 for (i = 0; i < nblanket; i++) { 872 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 873 return PATH_RUMPBLANKET; 874 } 875 876 return PATH_HOST; 877 } else { 878 return pwdinrump ? PATH_RUMP : PATH_HOST; 879 } 880} 881 882static const char *rootpath = "/"; 883static const char * 884path_host2rump(const char *path) 885{ 886 const char *rv; 887 888 if (*path == '/') { 889 rv = path + rumpprefixlen; 890 if (*rv == '\0') 891 rv = rootpath; 892 } else { 893 rv = path; 894 } 895 896 return rv; 897} 898 899static int 900dodup(int oldd, int minfd) 901{ 902 int (*op_fcntl)(int, int, ...); 903 int newd; 904 int isrump; 905 906 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 907 if (fd_isrump(oldd)) { 908 op_fcntl = GETSYSCALL(rump, FCNTL); 909 oldd = fd_host2rump(oldd); 910 if (minfd >= hijack_fdoff) 911 minfd -= hijack_fdoff; 912 isrump = 1; 913 } else { 914 op_fcntl = GETSYSCALL(host, FCNTL); 915 isrump = 0; 916 } 917 918 newd = op_fcntl(oldd, F_DUPFD, minfd); 919 920 if (isrump) 921 newd = fd_rump2host(newd); 922 DPRINTF(("dup <- %d\n", newd)); 923 924 return newd; 925} 926 927/* 928 * Check that host fd value does not exceed fdoffset and if necessary 929 * dup the file descriptor so that it doesn't collide with the dup2mask. 930 */ 931static int 932fd_host2host(int fd) 933{ 934 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 935 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 936 int ofd, i; 937 938 if (fd >= hijack_fdoff) { 939 op_close(fd); 940 errno = ENFILE; 941 return -1; 942 } 943 944 for (i = 1; isdup2d(fd); i++) { 945 ofd = fd; 946 fd = op_fcntl(ofd, F_DUPFD, i); 947 op_close(ofd); 948 } 949 950 return fd; 951} 952 953int 954open(const char *path, int flags, ...) 955{ 956 int (*op_open)(const char *, int, ...); 957 bool isrump; 958 va_list ap; 959 enum pathtype pt; 960 int fd; 961 962 DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); 963 964 if ((pt = path_isrump(path)) != PATH_HOST) { 965 if (pt == PATH_RUMP) 966 path = path_host2rump(path); 967 op_open = GETSYSCALL(rump, OPEN); 968 isrump = true; 969 } else { 970 op_open = GETSYSCALL(host, OPEN); 971 isrump = false; 972 } 973 974 va_start(ap, flags); 975 fd = op_open(path, flags, va_arg(ap, mode_t)); 976 va_end(ap); 977 978 if (isrump) 979 fd = fd_rump2host(fd); 980 else 981 fd = fd_host2host(fd); 982 983 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); 984 return fd; 985} 986 987int 988chdir(const char *path) 989{ 990 int (*op_chdir)(const char *); 991 enum pathtype pt; 992 int rv; 993 994 if ((pt = path_isrump(path)) != PATH_HOST) { 995 op_chdir = GETSYSCALL(rump, CHDIR); 996 if (pt == PATH_RUMP) 997 path = path_host2rump(path); 998 } else { 999 op_chdir = GETSYSCALL(host, CHDIR); 1000 } 1001 1002 rv = op_chdir(path); 1003 if (rv == 0) 1004 pwdinrump = pt != PATH_HOST; 1005 1006 return rv; 1007} 1008 1009int 1010fchdir(int fd) 1011{ 1012 int (*op_fchdir)(int); 1013 bool isrump; 1014 int rv; 1015 1016 if (fd_isrump(fd)) { 1017 op_fchdir = GETSYSCALL(rump, FCHDIR); 1018 isrump = true; 1019 fd = fd_host2rump(fd); 1020 } else { 1021 op_fchdir = GETSYSCALL(host, FCHDIR); 1022 isrump = false; 1023 } 1024 1025 rv = op_fchdir(fd); 1026 if (rv == 0) { 1027 pwdinrump = isrump; 1028 } 1029 1030 return rv; 1031} 1032 1033int 1034__getcwd(char *bufp, size_t len) 1035{ 1036 int (*op___getcwd)(char *, size_t); 1037 size_t prefixgap; 1038 bool iamslash; 1039 int rv; 1040 1041 if (pwdinrump && rumpprefix) { 1042 if (rumpprefix[rumpprefixlen-1] == '/') 1043 iamslash = true; 1044 else 1045 iamslash = false; 1046 1047 if (iamslash) 1048 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1049 else 1050 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1051 if (len <= prefixgap) { 1052 errno = ERANGE; 1053 return -1; 1054 } 1055 1056 op___getcwd = GETSYSCALL(rump, __GETCWD); 1057 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1058 if (rv == -1) 1059 return rv; 1060 1061 /* augment the "/" part only for a non-root path */ 1062 memcpy(bufp, rumpprefix, rumpprefixlen); 1063 1064 /* append / only to non-root cwd */ 1065 if (rv != 2) 1066 bufp[prefixgap] = '/'; 1067 1068 /* don't append extra slash in the purely-slash case */ 1069 if (rv == 2 && !iamslash) 1070 bufp[rumpprefixlen] = '\0'; 1071 } else if (pwdinrump) { 1072 /* assume blanket. we can't provide a prefix here */ 1073 op___getcwd = GETSYSCALL(rump, __GETCWD); 1074 rv = op___getcwd(bufp, len); 1075 } else { 1076 op___getcwd = GETSYSCALL(host, __GETCWD); 1077 rv = op___getcwd(bufp, len); 1078 } 1079 1080 return rv; 1081} 1082 1083int 1084rename(const char *from, const char *to) 1085{ 1086 int (*op_rename)(const char *, const char *); 1087 enum pathtype ptf, ptt; 1088 1089 if ((ptf = path_isrump(from)) != PATH_HOST) { 1090 if ((ptt = path_isrump(to)) == PATH_HOST) { 1091 errno = EXDEV; 1092 return -1; 1093 } 1094 1095 if (ptf == PATH_RUMP) 1096 from = path_host2rump(from); 1097 if (ptt == PATH_RUMP) 1098 to = path_host2rump(to); 1099 op_rename = GETSYSCALL(rump, RENAME); 1100 } else { 1101 if (path_isrump(to) != PATH_HOST) { 1102 errno = EXDEV; 1103 return -1; 1104 } 1105 1106 op_rename = GETSYSCALL(host, RENAME); 1107 } 1108 1109 return op_rename(from, to); 1110} 1111 1112int __socket30(int, int, int); 1113int 1114__socket30(int domain, int type, int protocol) 1115{ 1116 int (*op_socket)(int, int, int); 1117 int fd; 1118 bool isrump; 1119 1120 isrump = domain < PF_MAX && rumpsockets[domain]; 1121 1122 if (isrump) 1123 op_socket = GETSYSCALL(rump, SOCKET); 1124 else 1125 op_socket = GETSYSCALL(host, SOCKET); 1126 fd = op_socket(domain, type, protocol); 1127 1128 if (isrump) 1129 fd = fd_rump2host(fd); 1130 else 1131 fd = fd_host2host(fd); 1132 DPRINTF(("socket <- %d\n", fd)); 1133 1134 return fd; 1135} 1136 1137int 1138accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1139{ 1140 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1141 int fd; 1142 bool isrump; 1143 1144 isrump = fd_isrump(s); 1145 1146 DPRINTF(("accept -> %d", s)); 1147 if (isrump) { 1148 op_accept = GETSYSCALL(rump, ACCEPT); 1149 s = fd_host2rump(s); 1150 } else { 1151 op_accept = GETSYSCALL(host, ACCEPT); 1152 } 1153 fd = op_accept(s, addr, addrlen); 1154 if (fd != -1 && isrump) 1155 fd = fd_rump2host(fd); 1156 else 1157 fd = fd_host2host(fd); 1158 1159 DPRINTF((" <- %d\n", fd)); 1160 1161 return fd; 1162} 1163 1164/* 1165 * ioctl and fcntl are varargs calls and need special treatment 1166 */ 1167int 1168ioctl(int fd, unsigned long cmd, ...) 1169{ 1170 int (*op_ioctl)(int, unsigned long cmd, ...); 1171 va_list ap; 1172 int rv; 1173 1174 DPRINTF(("ioctl -> %d\n", fd)); 1175 if (fd_isrump(fd)) { 1176 fd = fd_host2rump(fd); 1177 op_ioctl = GETSYSCALL(rump, IOCTL); 1178 } else { 1179 op_ioctl = GETSYSCALL(host, IOCTL); 1180 } 1181 1182 va_start(ap, cmd); 1183 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1184 va_end(ap); 1185 return rv; 1186} 1187 1188int 1189fcntl(int fd, int cmd, ...) 1190{ 1191 int (*op_fcntl)(int, int, ...); 1192 va_list ap; 1193 int rv, minfd, i, maxdup2; 1194 1195 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1196 1197 switch (cmd) { 1198 case F_DUPFD: 1199 va_start(ap, cmd); 1200 minfd = va_arg(ap, int); 1201 va_end(ap); 1202 return dodup(fd, minfd); 1203 1204 case F_CLOSEM: 1205 /* 1206 * So, if fd < HIJACKOFF, we want to do a host closem. 1207 */ 1208 1209 if (fd < hijack_fdoff) { 1210 int closemfd = fd; 1211 1212 if (rumpclient__closenotify(&closemfd, 1213 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1214 return -1; 1215 op_fcntl = GETSYSCALL(host, FCNTL); 1216 rv = op_fcntl(closemfd, cmd); 1217 if (rv) 1218 return rv; 1219 } 1220 1221 /* 1222 * Additionally, we want to do a rump closem, but only 1223 * for the file descriptors not dup2'd. 1224 */ 1225 1226 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) { 1227 if (dup2vec[i] & DUP2BIT) { 1228 int val; 1229 1230 val = dup2vec[i] & DUP2FDMASK; 1231 maxdup2 = MAX(val, maxdup2); 1232 } 1233 } 1234 1235 if (fd >= hijack_fdoff) 1236 fd -= hijack_fdoff; 1237 else 1238 fd = 0; 1239 fd = MAX(maxdup2+1, fd); 1240 1241 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1242 return rump_sys_fcntl(fd, F_CLOSEM); 1243 1244 case F_MAXFD: 1245 /* 1246 * For maxfd, if there's a rump kernel fd, return 1247 * it hostified. Otherwise, return host's MAXFD 1248 * return value. 1249 */ 1250 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1251 /* 1252 * This might go a little wrong in case 1253 * of dup2 to [012], but I'm not sure if 1254 * there's a justification for tracking 1255 * that info. Consider e.g. 1256 * dup2(rumpfd, 2) followed by rump_sys_open() 1257 * returning 1. We should return 1+HIJACKOFF, 1258 * not 2+HIJACKOFF. However, if [01] is not 1259 * open, the correct return value is 2. 1260 */ 1261 return fd_rump2host(fd); 1262 } else { 1263 op_fcntl = GETSYSCALL(host, FCNTL); 1264 return op_fcntl(fd, F_MAXFD); 1265 } 1266 /*NOTREACHED*/ 1267 1268 default: 1269 if (fd_isrump(fd)) { 1270 fd = fd_host2rump(fd); 1271 op_fcntl = GETSYSCALL(rump, FCNTL); 1272 } else { 1273 op_fcntl = GETSYSCALL(host, FCNTL); 1274 } 1275 1276 va_start(ap, cmd); 1277 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1278 va_end(ap); 1279 return rv; 1280 } 1281 /*NOTREACHED*/ 1282} 1283 1284int 1285close(int fd) 1286{ 1287 int (*op_close)(int); 1288 int rv; 1289 1290 DPRINTF(("close -> %d\n", fd)); 1291 if (fd_isrump(fd)) { 1292 bool undup2 = false; 1293 int ofd; 1294 1295 if (isdup2d(ofd = fd)) { 1296 undup2 = true; 1297 } 1298 1299 fd = fd_host2rump(fd); 1300 if (!undup2 && killdup2alias(fd)) { 1301 return 0; 1302 } 1303 1304 op_close = GETSYSCALL(rump, CLOSE); 1305 rv = op_close(fd); 1306 if (rv == 0 && undup2) { 1307 clrdup2(ofd); 1308 } 1309 } else { 1310 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1311 return -1; 1312 op_close = GETSYSCALL(host, CLOSE); 1313 rv = op_close(fd); 1314 } 1315 1316 return rv; 1317} 1318 1319/* 1320 * write cannot issue a standard debug printf due to recursion 1321 */ 1322ssize_t 1323write(int fd, const void *buf, size_t blen) 1324{ 1325 ssize_t (*op_write)(int, const void *, size_t); 1326 1327 if (fd_isrump(fd)) { 1328 fd = fd_host2rump(fd); 1329 op_write = GETSYSCALL(rump, WRITE); 1330 } else { 1331 op_write = GETSYSCALL(host, WRITE); 1332 } 1333 1334 return op_write(fd, buf, blen); 1335} 1336 1337/* 1338 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1339 * many programs do that. dup2 of a rump kernel fd to another value 1340 * not >= fdoff is an error. 1341 * 1342 * Note: cannot rump2host newd, because it is often hardcoded. 1343 */ 1344int 1345dup2(int oldd, int newd) 1346{ 1347 int (*host_dup2)(int, int); 1348 int rv; 1349 1350 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1351 1352 if (fd_isrump(oldd)) { 1353 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1354 1355 /* only allow fd 0-2 for cross-kernel dup */ 1356 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1357 errno = EBADF; 1358 return -1; 1359 } 1360 1361 /* regular dup2? */ 1362 if (fd_isrump(newd)) { 1363 newd = fd_host2rump(newd); 1364 rv = rump_sys_dup2(oldd, newd); 1365 return fd_rump2host(rv); 1366 } 1367 1368 /* 1369 * dup2 rump => host? just establish an 1370 * entry in the mapping table. 1371 */ 1372 op_close(newd); 1373 setdup2(newd, fd_host2rump(oldd)); 1374 rv = 0; 1375 } else { 1376 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1377 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1378 return -1; 1379 rv = host_dup2(oldd, newd); 1380 } 1381 1382 return rv; 1383} 1384 1385int 1386dup(int oldd) 1387{ 1388 1389 return dodup(oldd, 0); 1390} 1391 1392pid_t 1393fork() 1394{ 1395 pid_t rv; 1396 1397 DPRINTF(("fork\n")); 1398 1399 rv = rumpclient__dofork(host_fork); 1400 1401 DPRINTF(("fork returns %d\n", rv)); 1402 return rv; 1403} 1404/* we do not have the luxury of not requiring a stackframe */ 1405__strong_alias(__vfork14,fork); 1406 1407int 1408daemon(int nochdir, int noclose) 1409{ 1410 struct rumpclient_fork *rf; 1411 1412 if ((rf = rumpclient_prefork()) == NULL) 1413 return -1; 1414 1415 if (host_daemon(nochdir, noclose) == -1) 1416 return -1; 1417 1418 if (rumpclient_fork_init(rf) == -1) 1419 return -1; 1420 1421 return 0; 1422} 1423 1424int 1425execve(const char *path, char *const argv[], char *const envp[]) 1426{ 1427 char buf[128]; 1428 char *dup2str; 1429 const char *pwdinrumpstr; 1430 char **newenv; 1431 size_t nelem; 1432 int rv, sverrno; 1433 int bonus = 2, i = 0; 1434 1435 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1436 dup2vec[0], dup2vec[1], dup2vec[2]); 1437 dup2str = strdup(buf); 1438 if (dup2str == NULL) { 1439 errno = ENOMEM; 1440 return -1; 1441 } 1442 1443 if (pwdinrump) { 1444 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1445 bonus++; 1446 } else { 1447 pwdinrumpstr = NULL; 1448 } 1449 1450 for (nelem = 0; envp && envp[nelem]; nelem++) 1451 continue; 1452 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1453 if (newenv == NULL) { 1454 free(dup2str); 1455 errno = ENOMEM; 1456 return -1; 1457 } 1458 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1459 newenv[nelem+i] = dup2str; 1460 i++; 1461 1462 if (pwdinrumpstr) { 1463 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1464 i++; 1465 } 1466 newenv[nelem+i] = NULL; 1467 _DIAGASSERT(i < bonus); 1468 1469 rv = rumpclient_exec(path, argv, newenv); 1470 1471 _DIAGASSERT(rv != 0); 1472 sverrno = errno; 1473 free(newenv); 1474 free(dup2str); 1475 errno = sverrno; 1476 return rv; 1477} 1478 1479/* 1480 * select is done by calling poll. 1481 */ 1482int 1483REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1484 struct timeval *timeout) 1485{ 1486 struct pollfd *pfds; 1487 struct timespec ts, *tsp = NULL; 1488 nfds_t realnfds; 1489 int i, j; 1490 int rv, incr; 1491 1492 DPRINTF(("select\n")); 1493 1494 /* 1495 * Well, first we must scan the fds to figure out how many 1496 * fds there really are. This is because up to and including 1497 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1498 * Seems to be fixed in current, thank the maker. 1499 * god damn cluster...bomb. 1500 */ 1501 1502 for (i = 0, realnfds = 0; i < nfds; i++) { 1503 if (readfds && FD_ISSET(i, readfds)) { 1504 realnfds++; 1505 continue; 1506 } 1507 if (writefds && FD_ISSET(i, writefds)) { 1508 realnfds++; 1509 continue; 1510 } 1511 if (exceptfds && FD_ISSET(i, exceptfds)) { 1512 realnfds++; 1513 continue; 1514 } 1515 } 1516 1517 if (realnfds) { 1518 pfds = calloc(realnfds, sizeof(*pfds)); 1519 if (!pfds) 1520 return -1; 1521 } else { 1522 pfds = NULL; 1523 } 1524 1525 for (i = 0, j = 0; i < nfds; i++) { 1526 incr = 0; 1527 if (readfds && FD_ISSET(i, readfds)) { 1528 pfds[j].fd = i; 1529 pfds[j].events |= POLLIN; 1530 incr=1; 1531 } 1532 if (writefds && FD_ISSET(i, writefds)) { 1533 pfds[j].fd = i; 1534 pfds[j].events |= POLLOUT; 1535 incr=1; 1536 } 1537 if (exceptfds && FD_ISSET(i, exceptfds)) { 1538 pfds[j].fd = i; 1539 pfds[j].events |= POLLHUP|POLLERR; 1540 incr=1; 1541 } 1542 if (incr) 1543 j++; 1544 } 1545 assert(j == (int)realnfds); 1546 1547 if (timeout) { 1548 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1549 tsp = &ts; 1550 } 1551 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1552 /* 1553 * "If select() returns with an error the descriptor sets 1554 * will be unmodified" 1555 */ 1556 if (rv < 0) 1557 goto out; 1558 1559 /* 1560 * zero out results (can't use FD_ZERO for the 1561 * obvious select-me-not reason). whee. 1562 * 1563 * We do this here since some software ignores the return 1564 * value of select, and hence if the timeout expires, it may 1565 * assume all input descriptors have activity. 1566 */ 1567 for (i = 0; i < nfds; i++) { 1568 if (readfds) 1569 FD_CLR(i, readfds); 1570 if (writefds) 1571 FD_CLR(i, writefds); 1572 if (exceptfds) 1573 FD_CLR(i, exceptfds); 1574 } 1575 if (rv == 0) 1576 goto out; 1577 1578 /* 1579 * We have >0 fds with activity. Harvest the results. 1580 */ 1581 for (i = 0; i < (int)realnfds; i++) { 1582 if (readfds) { 1583 if (pfds[i].revents & POLLIN) { 1584 FD_SET(pfds[i].fd, readfds); 1585 } 1586 } 1587 if (writefds) { 1588 if (pfds[i].revents & POLLOUT) { 1589 FD_SET(pfds[i].fd, writefds); 1590 } 1591 } 1592 if (exceptfds) { 1593 if (pfds[i].revents & (POLLHUP|POLLERR)) { 1594 FD_SET(pfds[i].fd, exceptfds); 1595 } 1596 } 1597 } 1598 1599 out: 1600 free(pfds); 1601 return rv; 1602} 1603 1604static void 1605checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 1606{ 1607 nfds_t i; 1608 1609 for (i = 0; i < nfds; i++) { 1610 if (fds[i].fd == -1) 1611 continue; 1612 1613 if (fd_isrump(fds[i].fd)) 1614 (*rumpcall)++; 1615 else 1616 (*hostcall)++; 1617 } 1618} 1619 1620static void 1621adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 1622{ 1623 nfds_t i; 1624 1625 for (i = 0; i < nfds; i++) { 1626 fds[i].fd = fdadj(fds[i].fd); 1627 } 1628} 1629 1630/* 1631 * poll is easy as long as the call comes in the fds only in one 1632 * kernel. otherwise its quite tricky... 1633 */ 1634struct pollarg { 1635 struct pollfd *pfds; 1636 nfds_t nfds; 1637 const struct timespec *ts; 1638 const sigset_t *sigmask; 1639 int pipefd; 1640 int errnum; 1641}; 1642 1643static void * 1644hostpoll(void *arg) 1645{ 1646 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1647 const sigset_t *); 1648 struct pollarg *parg = arg; 1649 intptr_t rv; 1650 1651 op_pollts = GETSYSCALL(host, POLLTS); 1652 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 1653 if (rv == -1) 1654 parg->errnum = errno; 1655 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 1656 1657 return (void *)rv; 1658} 1659 1660int 1661REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 1662 const sigset_t *sigmask) 1663{ 1664 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1665 const sigset_t *); 1666 int (*host_close)(int); 1667 int hostcall = 0, rumpcall = 0; 1668 pthread_t pt; 1669 nfds_t i; 1670 int rv; 1671 1672 DPRINTF(("poll\n")); 1673 checkpoll(fds, nfds, &hostcall, &rumpcall); 1674 1675 if (hostcall && rumpcall) { 1676 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 1677 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 1678 struct pollarg parg; 1679 void *trv_val; 1680 int sverrno = 0, lrv, trv; 1681 1682 /* 1683 * ok, this is where it gets tricky. We must support 1684 * this since it's a very common operation in certain 1685 * types of software (telnet, netcat, etc). We allocate 1686 * two vectors and run two poll commands in separate 1687 * threads. Whichever returns first "wins" and the 1688 * other kernel's fds won't show activity. 1689 */ 1690 rv = -1; 1691 1692 /* allocate full vector for O(n) joining after call */ 1693 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 1694 if (!pfd_host) 1695 goto out; 1696 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 1697 if (!pfd_rump) { 1698 goto out; 1699 } 1700 1701 /* 1702 * then, open two pipes, one for notifications 1703 * to each kernel. 1704 * 1705 * At least the rump pipe should probably be 1706 * cached, along with the helper threads. This 1707 * should give a microbenchmark improvement (haven't 1708 * experienced a macro-level problem yet, though). 1709 */ 1710 if ((rv = rump_sys_pipe(rpipe)) == -1) { 1711 sverrno = errno; 1712 } 1713 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 1714 sverrno = errno; 1715 } 1716 1717 /* split vectors (or signal errors) */ 1718 for (i = 0; i < nfds; i++) { 1719 int fd; 1720 1721 fds[i].revents = 0; 1722 if (fds[i].fd == -1) { 1723 pfd_host[i].fd = -1; 1724 pfd_rump[i].fd = -1; 1725 } else if (fd_isrump(fds[i].fd)) { 1726 pfd_host[i].fd = -1; 1727 fd = fd_host2rump(fds[i].fd); 1728 if (fd == rpipe[0] || fd == rpipe[1]) { 1729 fds[i].revents = POLLNVAL; 1730 if (rv != -1) 1731 rv++; 1732 } 1733 pfd_rump[i].fd = fd; 1734 pfd_rump[i].events = fds[i].events; 1735 } else { 1736 pfd_rump[i].fd = -1; 1737 fd = fds[i].fd; 1738 if (fd == hpipe[0] || fd == hpipe[1]) { 1739 fds[i].revents = POLLNVAL; 1740 if (rv != -1) 1741 rv++; 1742 } 1743 pfd_host[i].fd = fd; 1744 pfd_host[i].events = fds[i].events; 1745 } 1746 pfd_rump[i].revents = pfd_host[i].revents = 0; 1747 } 1748 if (rv) { 1749 goto out; 1750 } 1751 1752 pfd_host[nfds].fd = hpipe[0]; 1753 pfd_host[nfds].events = POLLIN; 1754 pfd_rump[nfds].fd = rpipe[0]; 1755 pfd_rump[nfds].events = POLLIN; 1756 1757 /* 1758 * then, create a thread to do host part and meanwhile 1759 * do rump kernel part right here 1760 */ 1761 1762 parg.pfds = pfd_host; 1763 parg.nfds = nfds+1; 1764 parg.ts = ts; 1765 parg.sigmask = sigmask; 1766 parg.pipefd = rpipe[1]; 1767 pthread_create(&pt, NULL, hostpoll, &parg); 1768 1769 op_pollts = GETSYSCALL(rump, POLLTS); 1770 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 1771 sverrno = errno; 1772 write(hpipe[1], &rv, sizeof(rv)); 1773 pthread_join(pt, &trv_val); 1774 trv = (int)(intptr_t)trv_val; 1775 1776 /* check who "won" and merge results */ 1777 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 1778 rv = trv; 1779 1780 for (i = 0; i < nfds; i++) { 1781 if (pfd_rump[i].fd != -1) 1782 fds[i].revents = pfd_rump[i].revents; 1783 } 1784 sverrno = parg.errnum; 1785 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 1786 rv = trv; 1787 1788 for (i = 0; i < nfds; i++) { 1789 if (pfd_host[i].fd != -1) 1790 fds[i].revents = pfd_host[i].revents; 1791 } 1792 } else { 1793 rv = 0; 1794 } 1795 1796 out: 1797 host_close = GETSYSCALL(host, CLOSE); 1798 if (rpipe[0] != -1) 1799 rump_sys_close(rpipe[0]); 1800 if (rpipe[1] != -1) 1801 rump_sys_close(rpipe[1]); 1802 if (hpipe[0] != -1) 1803 host_close(hpipe[0]); 1804 if (hpipe[1] != -1) 1805 host_close(hpipe[1]); 1806 free(pfd_host); 1807 free(pfd_rump); 1808 errno = sverrno; 1809 } else { 1810 if (hostcall) { 1811 op_pollts = GETSYSCALL(host, POLLTS); 1812 } else { 1813 op_pollts = GETSYSCALL(rump, POLLTS); 1814 adjustpoll(fds, nfds, fd_host2rump); 1815 } 1816 1817 rv = op_pollts(fds, nfds, ts, sigmask); 1818 if (rumpcall) 1819 adjustpoll(fds, nfds, fd_rump2host_withdup); 1820 } 1821 1822 return rv; 1823} 1824 1825int 1826poll(struct pollfd *fds, nfds_t nfds, int timeout) 1827{ 1828 struct timespec ts; 1829 struct timespec *tsp = NULL; 1830 1831 if (timeout != INFTIM) { 1832 ts.tv_sec = timeout / 1000; 1833 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1834 1835 tsp = &ts; 1836 } 1837 1838 return REALPOLLTS(fds, nfds, tsp, NULL); 1839} 1840 1841int 1842REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 1843 struct kevent *eventlist, size_t nevents, 1844 const struct timespec *timeout) 1845{ 1846 int (*op_kevent)(int, const struct kevent *, size_t, 1847 struct kevent *, size_t, const struct timespec *); 1848 const struct kevent *ev; 1849 size_t i; 1850 1851 /* 1852 * Check that we don't attempt to kevent rump kernel fd's. 1853 * That needs similar treatment to select/poll, but is slightly 1854 * trickier since we need to manage to different kq descriptors. 1855 * (TODO, in case you're wondering). 1856 */ 1857 for (i = 0; i < nchanges; i++) { 1858 ev = &changelist[i]; 1859 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 1860 ev->filter == EVFILT_VNODE) { 1861 if (fd_isrump((int)ev->ident)) { 1862 errno = ENOTSUP; 1863 return -1; 1864 } 1865 } 1866 } 1867 1868 op_kevent = GETSYSCALL(host, KEVENT); 1869 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 1870} 1871 1872/* 1873 * mmapping from a rump kernel is not supported, so disallow it. 1874 */ 1875void * 1876mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 1877{ 1878 1879 if (flags & MAP_FILE && fd_isrump(fd)) { 1880 errno = ENOSYS; 1881 return MAP_FAILED; 1882 } 1883 return host_mmap(addr, len, prot, flags, fd, offset); 1884} 1885 1886/* 1887 * these go to one or the other on a per-process configuration 1888 */ 1889int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 1890int 1891__sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 1892 const void *new, size_t newlen) 1893{ 1894 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 1895 const void *, size_t); 1896 1897 if (rumpsysctl) { 1898 op___sysctl = GETSYSCALL(rump, __SYSCTL); 1899 } else { 1900 op___sysctl = GETSYSCALL(host, __SYSCTL); 1901 /* we haven't inited yet */ 1902 if (__predict_false(op___sysctl == NULL)) { 1903 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl"); 1904 } 1905 } 1906 1907 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 1908} 1909 1910/* 1911 * Rest are std type calls. 1912 */ 1913 1914FDCALL(int, bind, DUALCALL_BIND, \ 1915 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1916 (int, const struct sockaddr *, socklen_t), \ 1917 (fd, name, namelen)) 1918 1919FDCALL(int, connect, DUALCALL_CONNECT, \ 1920 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1921 (int, const struct sockaddr *, socklen_t), \ 1922 (fd, name, namelen)) 1923 1924FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 1925 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1926 (int, struct sockaddr *, socklen_t *), \ 1927 (fd, name, namelen)) 1928 1929FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 1930 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1931 (int, struct sockaddr *, socklen_t *), \ 1932 (fd, name, namelen)) 1933 1934FDCALL(int, listen, DUALCALL_LISTEN, \ 1935 (int fd, int backlog), \ 1936 (int, int), \ 1937 (fd, backlog)) 1938 1939FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 1940 (int fd, void *buf, size_t len, int flags, \ 1941 struct sockaddr *from, socklen_t *fromlen), \ 1942 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 1943 (fd, buf, len, flags, from, fromlen)) 1944 1945FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 1946 (int fd, const void *buf, size_t len, int flags, \ 1947 const struct sockaddr *to, socklen_t tolen), \ 1948 (int, const void *, size_t, int, \ 1949 const struct sockaddr *, socklen_t), \ 1950 (fd, buf, len, flags, to, tolen)) 1951 1952FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \ 1953 (int fd, struct msghdr *msg, int flags), \ 1954 (int, struct msghdr *, int), \ 1955 (fd, msg, flags)) 1956 1957FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \ 1958 (int fd, const struct msghdr *msg, int flags), \ 1959 (int, const struct msghdr *, int), \ 1960 (fd, msg, flags)) 1961 1962FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 1963 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 1964 (int, int, int, void *, socklen_t *), \ 1965 (fd, level, optn, optval, optlen)) 1966 1967FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 1968 (int fd, int level, int optn, \ 1969 const void *optval, socklen_t optlen), \ 1970 (int, int, int, const void *, socklen_t), \ 1971 (fd, level, optn, optval, optlen)) 1972 1973FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 1974 (int fd, int how), \ 1975 (int, int), \ 1976 (fd, how)) 1977 1978FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 1979 (int fd, void *buf, size_t buflen), \ 1980 (int, void *, size_t), \ 1981 (fd, buf, buflen)) 1982 1983FDCALL(ssize_t, readv, DUALCALL_READV, \ 1984 (int fd, const struct iovec *iov, int iovcnt), \ 1985 (int, const struct iovec *, int), \ 1986 (fd, iov, iovcnt)) 1987 1988FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 1989 (int fd, void *buf, size_t nbytes, off_t offset), \ 1990 (int, void *, size_t, off_t), \ 1991 (fd, buf, nbytes, offset)) 1992 1993FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 1994 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 1995 (int, const struct iovec *, int, off_t), \ 1996 (fd, iov, iovcnt, offset)) 1997 1998FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 1999 (int fd, const struct iovec *iov, int iovcnt), \ 2000 (int, const struct iovec *, int), \ 2001 (fd, iov, iovcnt)) 2002 2003FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2004 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2005 (int, const void *, size_t, off_t), \ 2006 (fd, buf, nbytes, offset)) 2007 2008FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2009 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2010 (int, const struct iovec *, int, off_t), \ 2011 (fd, iov, iovcnt, offset)) 2012 2013FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2014 (int fd, struct stat *sb), \ 2015 (int, struct stat *), \ 2016 (fd, sb)) 2017 2018FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 2019 (int fd, struct statvfs *buf, int flags), \ 2020 (int, struct statvfs *, int), \ 2021 (fd, buf, flags)) 2022 2023FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2024 (int fd, off_t offset, int whence), \ 2025 (int, off_t, int), \ 2026 (fd, offset, whence)) 2027__strong_alias(_lseek,lseek); 2028 2029FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2030 (int fd, char *buf, size_t nbytes), \ 2031 (int, char *, size_t), \ 2032 (fd, buf, nbytes)) 2033 2034FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2035 (int fd, uid_t owner, gid_t group), \ 2036 (int, uid_t, gid_t), \ 2037 (fd, owner, group)) 2038 2039FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2040 (int fd, mode_t mode), \ 2041 (int, mode_t), \ 2042 (fd, mode)) 2043 2044FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2045 (int fd, off_t length), \ 2046 (int, off_t), \ 2047 (fd, length)) 2048 2049FDCALL(int, fsync, DUALCALL_FSYNC, \ 2050 (int fd), \ 2051 (int), \ 2052 (fd)) 2053 2054FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2055 (int fd, int how, off_t start, off_t length), \ 2056 (int, int, off_t, off_t), \ 2057 (fd, how, start, length)) 2058 2059FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2060 (int fd, const struct timeval *tv), \ 2061 (int, const struct timeval *), \ 2062 (fd, tv)) 2063 2064FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2065 (int fd, u_long flags), \ 2066 (int, u_long), \ 2067 (fd, flags)) 2068 2069/* 2070 * path-based selectors 2071 */ 2072 2073PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2074 (const char *path, struct stat *sb), \ 2075 (const char *, struct stat *), \ 2076 (path, sb)) 2077 2078PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2079 (const char *path, struct stat *sb), \ 2080 (const char *, struct stat *), \ 2081 (path, sb)) 2082 2083PATHCALL(int, chown, DUALCALL_CHOWN, \ 2084 (const char *path, uid_t owner, gid_t group), \ 2085 (const char *, uid_t, gid_t), \ 2086 (path, owner, group)) 2087 2088PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2089 (const char *path, uid_t owner, gid_t group), \ 2090 (const char *, uid_t, gid_t), \ 2091 (path, owner, group)) 2092 2093PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2094 (const char *path, mode_t mode), \ 2095 (const char *, mode_t), \ 2096 (path, mode)) 2097 2098PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2099 (const char *path, mode_t mode), \ 2100 (const char *, mode_t), \ 2101 (path, mode)) 2102 2103PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 2104 (const char *path, struct statvfs *buf, int flags), \ 2105 (const char *, struct statvfs *, int), \ 2106 (path, buf, flags)) 2107 2108PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2109 (const char *path), \ 2110 (const char *), \ 2111 (path)) 2112 2113PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2114 (const char *target, const char *path), \ 2115 (const char *, const char *), \ 2116 (target, path)) 2117 2118PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \ 2119 (const char *path, char *buf, size_t bufsiz), \ 2120 (const char *, char *, size_t), \ 2121 (path, buf, bufsiz)) 2122 2123PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2124 (const char *path, mode_t mode), \ 2125 (const char *, mode_t), \ 2126 (path, mode)) 2127 2128PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2129 (const char *path), \ 2130 (const char *), \ 2131 (path)) 2132 2133PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2134 (const char *path, const struct timeval *tv), \ 2135 (const char *, const struct timeval *), \ 2136 (path, tv)) 2137 2138PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2139 (const char *path, const struct timeval *tv), \ 2140 (const char *, const struct timeval *), \ 2141 (path, tv)) 2142 2143PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2144 (const char *path, u_long flags), \ 2145 (const char *, u_long), \ 2146 (path, flags)) 2147 2148PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2149 (const char *path, u_long flags), \ 2150 (const char *, u_long), \ 2151 (path, flags)) 2152 2153PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2154 (const char *path, off_t length), \ 2155 (const char *, off_t), \ 2156 (path, length)) 2157 2158PATHCALL(int, access, DUALCALL_ACCESS, \ 2159 (const char *path, int mode), \ 2160 (const char *, int), \ 2161 (path, mode)) 2162 2163PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2164 (const char *path, mode_t mode, dev_t dev), \ 2165 (const char *, mode_t, dev_t), \ 2166 (path, mode, dev)) 2167 2168/* 2169 * Note: with mount the decisive parameter is the mount 2170 * destination directory. This is because we don't really know 2171 * about the "source" directory in a generic call (and besides, 2172 * it might not even exist, cf. nfs). 2173 */ 2174PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2175 (const char *type, const char *path, int flags, \ 2176 void *data, size_t dlen), \ 2177 (const char *, const char *, int, void *, size_t), \ 2178 (type, path, flags, data, dlen)) 2179 2180PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2181 (const char *path, int flags), \ 2182 (const char *, int), \ 2183 (path, flags)) 2184 2185#if __NetBSD_Prereq__(5,99,63) 2186PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \ 2187 (const char *path, struct quotactl_args *args), \ 2188 (const char *, struct quotactl_args *), \ 2189 (path, args)) 2190#elif __NetBSD_Prereq__(5,99,48) 2191PATHCALL(int, OLDREALQUOTACTL, DUALCALL_QUOTACTL, \ 2192 (const char *path, struct plistref *p), \ 2193 (const char *, struct plistref *), \ 2194 (path, p)) 2195#endif 2196 2197PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2198 (const char *path, void *fhp, size_t *fh_size), \ 2199 (const char *, void *, size_t *), \ 2200 (path, fhp, fh_size)) 2201 2202/* 2203 * These act different on a per-process vfs configuration 2204 */ 2205 2206VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \ 2207 (struct statvfs *buf, size_t buflen, int flags), \ 2208 (struct statvfs *, size_t, int), \ 2209 (buf, buflen, flags)) 2210 2211VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2212 (const void *fhp, size_t fh_size, int flags), \ 2213 (const char *, size_t, int), \ 2214 (fhp, fh_size, flags)) 2215 2216VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2217 (const void *fhp, size_t fh_size, struct stat *sb), \ 2218 (const char *, size_t, struct stat *), \ 2219 (fhp, fh_size, sb)) 2220 2221VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2222 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2223 (const char *, size_t, struct statvfs *, int), \ 2224 (fhp, fh_size, sb, flgs)) 2225 2226/* finally, put nfssvc here. "keep the namespace clean" */ 2227 2228#include <nfs/rpcv2.h> 2229#include <nfs/nfs.h> 2230 2231int 2232nfssvc(int flags, void *argstructp) 2233{ 2234 int (*op_nfssvc)(int, void *); 2235 2236 if (vfsbits & VFSBIT_NFSSVC){ 2237 struct nfsd_args *nfsdargs; 2238 2239 /* massage the socket descriptor if necessary */ 2240 if (flags == NFSSVC_ADDSOCK) { 2241 nfsdargs = argstructp; 2242 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2243 } 2244 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2245 } else 2246 op_nfssvc = GETSYSCALL(host, NFSSVC); 2247 2248 return op_nfssvc(flags, argstructp); 2249} 2250