/* * Copyright (c) 2017 Antonio Russo * Copyright (c) 2020 InsanePrawn * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define STRCMP ((int(*)(const void *, const void *))&strcmp) #define PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp) static int pid_t_cmp(const pid_t *lhs, const pid_t *rhs) { /* * This is always valid, quoth sys_types.h(7posix): * > blksize_t, pid_t, and ssize_t shall be signed integer types. */ return (*lhs - *rhs); } #define EXIT_ENOMEM() \ do { \ fprintf(stderr, PROGNAME "[%d]: " \ "not enough memory (L%d)!\n", getpid(), __LINE__); \ _exit(1); \ } while (0) #define PROGNAME "zfs-mount-generator" #define FSLIST SYSCONFDIR "/zfs/zfs-list.cache" #define ZFS SBINDIR "/zfs" #define OUTPUT_HEADER \ "# Automatically generated by " PROGNAME "\n" \ "\n" /* * Starts like the one in libzfs_util.c but also matches "//" * and captures until the end, since we actually use it for path extraxion */ #define URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$" static regex_t uri_regex; static char *argv0; static const char *destdir = "/tmp"; static int destdir_fd = -1; static void *known_pools = NULL; /* tsearch() of C strings */ static struct { sem_t noauto_not_on_sem; sem_t noauto_names_sem; size_t noauto_names_len; size_t noauto_names_max; char noauto_names[][NAME_MAX]; } *noauto_files; static char * systemd_escape(const char *input, const char *prepend, const char *append) { size_t len = strlen(input); size_t applen = strlen(append); size_t prelen = strlen(prepend); char *ret = malloc(4 * len + prelen + applen + 1); if (!ret) EXIT_ENOMEM(); memcpy(ret, prepend, prelen); char *out = ret + prelen; const char *cur = input; if (*cur == '.') { memcpy(out, "\\x2e", 4); out += 4; ++cur; } for (; *cur; ++cur) { if (*cur == '/') *(out++) = '-'; else if (strchr( "0123456789" "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ":_.", *cur)) *(out++) = *cur; else { sprintf(out, "\\x%02x", (int)*cur); out += 4; } } memcpy(out, append, applen + 1); return (ret); } static void simplify_path(char *path) { char *out = path; for (char *cur = path; *cur; ++cur) { if (*cur == '/') { while (*(cur + 1) == '/') ++cur; *(out++) = '/'; } else *(out++) = *cur; } *(out++) = '\0'; } static bool strendswith(const char *what, const char *suff) { size_t what_l = strlen(what); size_t suff_l = strlen(suff); return ((what_l >= suff_l) && (strcmp(what + what_l - suff_l, suff) == 0)); } /* Assumes already-simplified path, doesn't modify input */ static char * systemd_escape_path(char *input, const char *prepend, const char *append) { if (strcmp(input, "/") == 0) { char *ret; if (asprintf(&ret, "%s-%s", prepend, append) == -1) EXIT_ENOMEM(); return (ret); } else { /* * path_is_normalized() (flattened for absolute paths here), * required for proper escaping */ if (strstr(input, "/./") || strstr(input, "/../") || strendswith(input, "/.") || strendswith(input, "/..")) return (NULL); if (input[0] == '/') ++input; char *back = &input[strlen(input) - 1]; bool deslash = *back == '/'; if (deslash) *back = '\0'; char *ret = systemd_escape(input, prepend, append); if (deslash) *back = '/'; return (ret); } } static FILE * fopenat(int dirfd, const char *pathname, int flags, const char *stream_mode, mode_t mode) { int fd = openat(dirfd, pathname, flags, mode); if (fd < 0) return (NULL); return (fdopen(fd, stream_mode)); } static int line_worker(char *line, const char *cachefile) { char *toktmp; /* BEGIN CSTYLED */ const char *dataset = strtok_r(line, "\t", &toktmp); char *p_mountpoint = strtok_r(NULL, "\t", &toktmp); const char *p_canmount = strtok_r(NULL, "\t", &toktmp); const char *p_atime = strtok_r(NULL, "\t", &toktmp); const char *p_relatime = strtok_r(NULL, "\t", &toktmp); const char *p_devices = strtok_r(NULL, "\t", &toktmp); const char *p_exec = strtok_r(NULL, "\t", &toktmp); const char *p_readonly = strtok_r(NULL, "\t", &toktmp); const char *p_setuid = strtok_r(NULL, "\t", &toktmp); const char *p_nbmand = strtok_r(NULL, "\t", &toktmp); const char *p_encroot = strtok_r(NULL, "\t", &toktmp) ?: "-"; char *p_keyloc = strtok_r(NULL, "\t", &toktmp) ?: strdupa("none"); const char *p_systemd_requires = strtok_r(NULL, "\t", &toktmp) ?: "-"; const char *p_systemd_requiresmountsfor = strtok_r(NULL, "\t", &toktmp) ?: "-"; const char *p_systemd_before = strtok_r(NULL, "\t", &toktmp) ?: "-"; const char *p_systemd_after = strtok_r(NULL, "\t", &toktmp) ?: "-"; char *p_systemd_wantedby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-"); char *p_systemd_requiredby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-"); const char *p_systemd_nofail = strtok_r(NULL, "\t", &toktmp) ?: "-"; const char *p_systemd_ignore = strtok_r(NULL, "\t", &toktmp) ?: "-"; /* END CSTYLED */ const char *pool = dataset; if ((toktmp = strchr(pool, '/')) != NULL) pool = strndupa(pool, toktmp - pool); if (p_nbmand == NULL) { fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n", getpid(), dataset); return (1); } strncpy(argv0, dataset, strlen(argv0)); /* Minimal pre-requisites to mount a ZFS dataset */ const char *after = "zfs-import.target"; const char *wants = "zfs-import.target"; const char *bindsto = NULL; char *wantedby = NULL; char *requiredby = NULL; bool noauto = false; bool wantedby_append = true; /* * zfs-import.target is not needed if the pool is already imported. * This avoids a dependency loop on root-on-ZFS systems: * systemd-random-seed.service After (via RequiresMountsFor) * var-lib.mount After * zfs-import.target After * zfs-import-{cache,scan}.service After * cryptsetup.service After * systemd-random-seed.service */ if (tfind(pool, &known_pools, STRCMP)) { after = ""; wants = ""; } if (strcmp(p_systemd_after, "-") == 0) p_systemd_after = NULL; if (strcmp(p_systemd_before, "-") == 0) p_systemd_before = NULL; if (strcmp(p_systemd_requires, "-") == 0) p_systemd_requires = NULL; if (strcmp(p_systemd_requiresmountsfor, "-") == 0) p_systemd_requiresmountsfor = NULL; if (strcmp(p_encroot, "-") != 0) { char *keyloadunit = systemd_escape(p_encroot, "zfs-load-key@", ".service"); if (strcmp(dataset, p_encroot) == 0) { const char *keymountdep = NULL; bool is_prompt = false; regmatch_t uri_matches[3]; if (regexec(&uri_regex, p_keyloc, sizeof (uri_matches) / sizeof (*uri_matches), uri_matches, 0) == 0) { p_keyloc[uri_matches[2].rm_eo] = '\0'; const char *path = &p_keyloc[uri_matches[2].rm_so]; /* * Assumes all URI keylocations need * the mount for their path; * http://, for example, wouldn't * (but it'd need network-online.target et al.) */ keymountdep = path; } else { if (strcmp(p_keyloc, "prompt") != 0) fprintf(stderr, PROGNAME "[%d]: %s: " "unknown non-URI keylocation=%s\n", getpid(), dataset, p_keyloc); is_prompt = true; } /* Generate the key-load .service unit */ FILE *keyloadunit_f = fopenat(destdir_fd, keyloadunit, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644); if (!keyloadunit_f) { fprintf(stderr, PROGNAME "[%d]: %s: " "couldn't open %s under %s: %s\n", getpid(), dataset, keyloadunit, destdir, strerror(errno)); return (1); } fprintf(keyloadunit_f, OUTPUT_HEADER "[Unit]\n" "Description=Load ZFS key for %s\n" "SourcePath=" FSLIST "/%s\n" "Documentation=man:zfs-mount-generator(8)\n" "DefaultDependencies=no\n" "Wants=%s\n" "After=%s\n", dataset, cachefile, wants, after); if (p_systemd_requires) fprintf(keyloadunit_f, "Requires=%s\n", p_systemd_requires); if (p_systemd_requiresmountsfor || keymountdep) { fprintf(keyloadunit_f, "RequiresMountsFor="); if (p_systemd_requiresmountsfor) fprintf(keyloadunit_f, "%s ", p_systemd_requiresmountsfor); if (keymountdep) fprintf(keyloadunit_f, "'%s'", keymountdep); fprintf(keyloadunit_f, "\n"); } /* BEGIN CSTYLED */ fprintf(keyloadunit_f, "\n" "[Service]\n" "Type=oneshot\n" "RemainAfterExit=yes\n" "# This avoids a dependency loop involving systemd-journald.socket if this\n" "# dataset is a parent of the root filesystem.\n" "StandardOutput=null\n" "StandardError=null\n" "ExecStart=/bin/sh -euc '" "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"unavailable\" ] || exit 0;", dataset); if (is_prompt) fprintf(keyloadunit_f, "for i in 1 2 3; do " "systemd-ask-password --id=\"zfs:%s\" \"Enter passphrase for %s:\" |" "" ZFS " load-key \"%s\" && exit 0;" "done;" "exit 1", dataset, dataset, dataset); else fprintf(keyloadunit_f, "exec " ZFS " load-key \"%s\"", dataset); fprintf(keyloadunit_f, "'\n" "ExecStop=/bin/sh -euc '" "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"available\" ] || exit 0;" "exec " ZFS " unload-key \"%s\"" "'\n", dataset, dataset); /* END CSTYLED */ (void) fclose(keyloadunit_f); } /* Update dependencies for the mount file to want this */ bindsto = keyloadunit; if (after[0] == '\0') after = keyloadunit; else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1) after = toktmp; else EXIT_ENOMEM(); } /* Skip generation of the mount unit if org.openzfs.systemd:ignore=on */ if (strcmp(p_systemd_ignore, "-") == 0 || strcmp(p_systemd_ignore, "off") == 0) { /* ok */ } else if (strcmp(p_systemd_ignore, "on") == 0) return (0); else { fprintf(stderr, PROGNAME "[%d]: %s: " "invalid org.openzfs.systemd:ignore=%s\n", getpid(), dataset, p_systemd_ignore); return (1); } /* Check for canmount */ if (strcmp(p_canmount, "on") == 0) { /* ok */ } else if (strcmp(p_canmount, "noauto") == 0) noauto = true; else if (strcmp(p_canmount, "off") == 0) return (0); else { fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n", getpid(), dataset, p_canmount); return (1); } /* Check for legacy and blank mountpoints */ if (strcmp(p_mountpoint, "legacy") == 0 || strcmp(p_mountpoint, "none") == 0) return (0); else if (p_mountpoint[0] != '/') { fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n", getpid(), dataset, p_mountpoint); return (1); } /* Escape the mountpoint per systemd policy */ simplify_path(p_mountpoint); const char *mountfile = systemd_escape_path(p_mountpoint, "", ".mount"); if (mountfile == NULL) { fprintf(stderr, PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n", getpid(), dataset, p_mountpoint); return (1); } /* * Parse options, cf. lib/libzfs/libzfs_mount.c:zfs_add_options * * The longest string achievable here is * ",atime,strictatime,nodev,noexec,rw,nosuid,nomand". */ char opts[64] = ""; /* atime */ if (strcmp(p_atime, "on") == 0) { /* relatime */ if (strcmp(p_relatime, "on") == 0) strcat(opts, ",atime,relatime"); else if (strcmp(p_relatime, "off") == 0) strcat(opts, ",atime,strictatime"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid relatime=%s\n", getpid(), dataset, p_relatime); } else if (strcmp(p_atime, "off") == 0) { strcat(opts, ",noatime"); } else fprintf(stderr, PROGNAME "[%d]: %s: invalid atime=%s\n", getpid(), dataset, p_atime); /* devices */ if (strcmp(p_devices, "on") == 0) strcat(opts, ",dev"); else if (strcmp(p_devices, "off") == 0) strcat(opts, ",nodev"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid devices=%s\n", getpid(), dataset, p_devices); /* exec */ if (strcmp(p_exec, "on") == 0) strcat(opts, ",exec"); else if (strcmp(p_exec, "off") == 0) strcat(opts, ",noexec"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid exec=%s\n", getpid(), dataset, p_exec); /* readonly */ if (strcmp(p_readonly, "on") == 0) strcat(opts, ",ro"); else if (strcmp(p_readonly, "off") == 0) strcat(opts, ",rw"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid readonly=%s\n", getpid(), dataset, p_readonly); /* setuid */ if (strcmp(p_setuid, "on") == 0) strcat(opts, ",suid"); else if (strcmp(p_setuid, "off") == 0) strcat(opts, ",nosuid"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid setuid=%s\n", getpid(), dataset, p_setuid); /* nbmand */ if (strcmp(p_nbmand, "on") == 0) strcat(opts, ",mand"); else if (strcmp(p_nbmand, "off") == 0) strcat(opts, ",nomand"); else fprintf(stderr, PROGNAME "[%d]: %s: invalid nbmand=%s\n", getpid(), dataset, p_setuid); if (strcmp(p_systemd_wantedby, "-") != 0) { noauto = true; if (strcmp(p_systemd_wantedby, "none") != 0) wantedby = p_systemd_wantedby; } if (strcmp(p_systemd_requiredby, "-") != 0) { noauto = true; if (strcmp(p_systemd_requiredby, "none") != 0) requiredby = p_systemd_requiredby; } /* * For datasets with canmount=on, a dependency is created for * local-fs.target by default. To avoid regressions, this dependency * is reduced to "wants" rather than "requires" when nofail!=off. * **THIS MAY CHANGE** * noauto=on disables this behavior completely. */ if (!noauto) { if (strcmp(p_systemd_nofail, "off") == 0) requiredby = strdupa("local-fs.target"); else { wantedby = strdupa("local-fs.target"); wantedby_append = strcmp(p_systemd_nofail, "on") != 0; } } /* * Handle existing files: * 1. We never overwrite existing files, although we may delete * files if we're sure they were created by us. (see 5.) * 2. We handle files differently based on canmount. * Units with canmount=on always have precedence over noauto. * This is enforced by the noauto_not_on_sem semaphore, * which is only unlocked when the last canmount=on process exits. * It is important to use p_canmount and not noauto here, * since we categorise by canmount while other properties, * e.g. org.openzfs.systemd:wanted-by, also modify noauto. * 3. If no unit file exists for a noauto dataset, we create one. * Additionally, we use noauto_files to track the unit file names * (which are the systemd-escaped mountpoints) of all (exclusively) * noauto datasets that had a file created. * 4. If the file to be created is found in the tracking array, * we do NOT create it. * 5. If a file exists for a noauto dataset, * we check whether the file name is in the array. * If it is, we have multiple noauto datasets for the same * mountpoint. In such cases, we remove the file for safety. * We leave the file name in the tracking array to avoid * further noauto datasets creating a file for this path again. */ { sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ? &noauto_files->noauto_names_sem : &noauto_files->noauto_not_on_sem; while (sem_wait(our_sem) == -1 && errno == EINTR) ; } struct stat stbuf; bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0; bool is_known = false; for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) { if (strncmp( noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) { is_known = true; break; } } if (already_exists) { if (is_known) { /* If it's in $noauto_files, we must be noauto too */ /* See 5 */ errno = 0; (void) unlinkat(destdir_fd, mountfile, 0); /* See 2 */ fprintf(stderr, PROGNAME "[%d]: %s: " "removing duplicate noauto unit %s%s%s\n", getpid(), dataset, mountfile, errno ? "" : " failed: ", errno ? "" : strerror(errno)); } else { /* Don't log for canmount=noauto */ if (strcmp(p_canmount, "on") == 0) fprintf(stderr, PROGNAME "[%d]: %s: " "%s already exists. Skipping.\n", getpid(), dataset, mountfile); } /* File exists: skip current dataset */ if (strcmp(p_canmount, "on") == 0) sem_post(&noauto_files->noauto_names_sem); return (0); } else { if (is_known) { /* See 4 */ if (strcmp(p_canmount, "on") == 0) sem_post(&noauto_files->noauto_names_sem); return (0); } else if (strcmp(p_canmount, "noauto") == 0) { if (noauto_files->noauto_names_len == noauto_files->noauto_names_max) fprintf(stderr, PROGNAME "[%d]: %s: " "noauto dataset limit (%zu) reached! " "Not tracking %s. Please report this to " "https://github.com/openzfs/zfs\n", getpid(), dataset, noauto_files->noauto_names_max, mountfile); else { strncpy(noauto_files->noauto_names[ noauto_files->noauto_names_len], mountfile, NAME_MAX); ++noauto_files->noauto_names_len; } } } FILE *mountfile_f = fopenat(destdir_fd, mountfile, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644); if (strcmp(p_canmount, "on") == 0) sem_post(&noauto_files->noauto_names_sem); if (!mountfile_f) { fprintf(stderr, PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n", getpid(), dataset, mountfile, destdir, strerror(errno)); return (1); } fprintf(mountfile_f, OUTPUT_HEADER "[Unit]\n" "SourcePath=" FSLIST "/%s\n" "Documentation=man:zfs-mount-generator(8)\n" "\n" "Before=", cachefile); if (p_systemd_before) fprintf(mountfile_f, "%s ", p_systemd_before); fprintf(mountfile_f, "zfs-mount.service"); /* Ensures we don't race */ if (requiredby) fprintf(mountfile_f, " %s", requiredby); if (wantedby && wantedby_append) fprintf(mountfile_f, " %s", wantedby); fprintf(mountfile_f, "\n" "After="); if (p_systemd_after) fprintf(mountfile_f, "%s ", p_systemd_after); fprintf(mountfile_f, "%s\n", after); fprintf(mountfile_f, "Wants=%s\n", wants); if (bindsto) fprintf(mountfile_f, "BindsTo=%s\n", bindsto); if (p_systemd_requires) fprintf(mountfile_f, "Requires=%s\n", p_systemd_requires); if (p_systemd_requiresmountsfor) fprintf(mountfile_f, "RequiresMountsFor=%s\n", p_systemd_requiresmountsfor); fprintf(mountfile_f, "\n" "[Mount]\n" "Where=%s\n" "What=%s\n" "Type=zfs\n" "Options=defaults%s,zfsutil\n", p_mountpoint, dataset, opts); (void) fclose(mountfile_f); if (!requiredby && !wantedby) return (0); /* Finally, create the appropriate dependencies */ char *linktgt; if (asprintf(&linktgt, "../%s", mountfile) == -1) EXIT_ENOMEM(); char *dependencies[][2] = { {"wants", wantedby}, {"requires", requiredby}, {} }; for (__typeof__(&*dependencies) dep = &*dependencies; **dep; ++dep) { if (!(*dep)[1]) continue; for (char *reqby = strtok_r((*dep)[1], " ", &toktmp); reqby; reqby = strtok_r(NULL, " ", &toktmp)) { char *depdir; if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1) EXIT_ENOMEM(); (void) mkdirat(destdir_fd, depdir, 0755); int depdir_fd = openat(destdir_fd, depdir, O_PATH | O_DIRECTORY | O_CLOEXEC); if (depdir_fd < 0) { fprintf(stderr, PROGNAME "[%d]: %s: " "couldn't open %s under %s: %s\n", getpid(), dataset, depdir, destdir, strerror(errno)); free(depdir); continue; } if (symlinkat(linktgt, depdir_fd, mountfile) == -1) fprintf(stderr, PROGNAME "[%d]: %s: " "couldn't symlink at " "%s under %s under %s: %s\n", getpid(), dataset, mountfile, depdir, destdir, strerror(errno)); (void) close(depdir_fd); free(depdir); } } return (0); } static int pool_enumerator(zpool_handle_t *pool, void *data __attribute__((unused))) { int ret = 0; /* * Pools are guaranteed-unique by the kernel, * no risk of leaking dupes here */ char *name = strdup(zpool_get_name(pool)); if (!name || !tsearch(name, &known_pools, STRCMP)) { free(name); ret = ENOMEM; } zpool_close(pool); return (ret); } int main(int argc, char **argv) { struct timespec time_init = {}; clock_gettime(CLOCK_MONOTONIC_RAW, &time_init); { int kmfd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC); if (kmfd >= 0) { (void) dup2(kmfd, STDERR_FILENO); (void) close(kmfd); } } uint8_t debug = 0; argv0 = argv[0]; switch (argc) { case 1: /* Use default */ break; case 2: case 4: destdir = argv[1]; break; default: fprintf(stderr, PROGNAME "[%d]: wrong argument count: %d\n", getpid(), argc - 1); _exit(1); } { destdir_fd = open(destdir, O_PATH | O_DIRECTORY | O_CLOEXEC); if (destdir_fd < 0) { fprintf(stderr, PROGNAME "[%d]: " "can't open destination directory %s: %s\n", getpid(), destdir, strerror(errno)); _exit(1); } } DIR *fslist_dir = opendir(FSLIST); if (!fslist_dir) { if (errno != ENOENT) fprintf(stderr, PROGNAME "[%d]: couldn't open " FSLIST ": %s\n", getpid(), strerror(errno)); _exit(0); } { libzfs_handle_t *libzfs = libzfs_init(); if (libzfs) { if (zpool_iter(libzfs, pool_enumerator, NULL) != 0) fprintf(stderr, PROGNAME "[%d]: " "error listing pools, ignoring\n", getpid()); libzfs_fini(libzfs); } else fprintf(stderr, PROGNAME "[%d]: " "couldn't start libzfs, ignoring\n", getpid()); } { int regerr = regcomp(&uri_regex, URI_REGEX_S, 0); if (regerr != 0) { fprintf(stderr, PROGNAME "[%d]: invalid regex: %d\n", getpid(), regerr); _exit(1); } } { /* * We could just get a gigabyte here and Not Care, * but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored * and we'd try (and likely fail) to rip it out of swap */ noauto_files = mmap(NULL, 4 * 1024 * 1024, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); if (noauto_files == MAP_FAILED) { fprintf(stderr, PROGNAME "[%d]: couldn't allocate IPC region: %s\n", getpid(), strerror(errno)); _exit(1); } sem_init(&noauto_files->noauto_not_on_sem, true, 0); sem_init(&noauto_files->noauto_names_sem, true, 1); noauto_files->noauto_names_len = 0; /* Works out to 16447ish, *well* enough */ noauto_files->noauto_names_max = (4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX; } char *line = NULL; size_t linelen = 0; struct timespec time_start = {}; { const char *dbgenv = getenv("ZFS_DEBUG"); if (dbgenv) debug = atoi(dbgenv); else { FILE *cmdline = fopen("/proc/cmdline", "re"); if (cmdline != NULL) { if (getline(&line, &linelen, cmdline) >= 0) debug = strstr(line, "debug") ? 2 : 0; (void) fclose(cmdline); } } if (debug && !isatty(STDOUT_FILENO)) dup2(STDERR_FILENO, STDOUT_FILENO); } size_t forked_canmount_on = 0; size_t forked_canmount_not_on = 0; size_t canmount_on_pids_len = 128; pid_t *canmount_on_pids = malloc(canmount_on_pids_len * sizeof (*canmount_on_pids)); if (canmount_on_pids == NULL) canmount_on_pids_len = 0; if (debug) clock_gettime(CLOCK_MONOTONIC_RAW, &time_start); ssize_t read; pid_t pid; struct dirent *cachent; while ((cachent = readdir(fslist_dir)) != NULL) { if (strcmp(cachent->d_name, ".") == 0 || strcmp(cachent->d_name, "..") == 0) continue; FILE *cachefile = fopenat(dirfd(fslist_dir), cachent->d_name, O_RDONLY | O_CLOEXEC, "r", 0); if (!cachefile) { fprintf(stderr, PROGNAME "[%d]: " "couldn't open %s under " FSLIST ": %s\n", getpid(), cachent->d_name, strerror(errno)); continue; } while ((read = getline(&line, &linelen, cachefile)) >= 0) { line[read - 1] = '\0'; /* newline */ switch (pid = fork()) { case -1: fprintf(stderr, PROGNAME "[%d]: couldn't fork for %s: %s\n", getpid(), line, strerror(errno)); break; case 0: /* child */ _exit(line_worker(line, cachent->d_name)); default: { /* parent */ char *tmp; char *dset = strtok_r(line, "\t", &tmp); strtok_r(NULL, "\t", &tmp); char *canmount = strtok_r(NULL, "\t", &tmp); bool canmount_on = canmount && strncmp(canmount, "on", 2) == 0; if (debug >= 2) printf(PROGNAME ": forked %d, " "canmount_on=%d, dataset=%s\n", (int)pid, canmount_on, dset); if (canmount_on && forked_canmount_on == canmount_on_pids_len) { size_t new_len = (canmount_on_pids_len ?: 16) * 2; void *new_pidlist = realloc(canmount_on_pids, new_len * sizeof (*canmount_on_pids)); if (!new_pidlist) { fprintf(stderr, PROGNAME "[%d]: " "out of memory! " "Mount ordering may be " "affected.\n", getpid()); continue; } canmount_on_pids = new_pidlist; canmount_on_pids_len = new_len; } if (canmount_on) { canmount_on_pids[forked_canmount_on] = pid; ++forked_canmount_on; } else ++forked_canmount_not_on; break; } } } (void) fclose(cachefile); } free(line); if (forked_canmount_on == 0) { /* No canmount=on processes to finish, so don't deadlock here */ for (size_t i = 0; i < forked_canmount_not_on; ++i) sem_post(&noauto_files->noauto_not_on_sem); } else { /* Likely a no-op, since we got these from a narrow fork loop */ qsort(canmount_on_pids, forked_canmount_on, sizeof (*canmount_on_pids), PID_T_CMP); } int status, ret = 0; struct rusage usage; size_t forked_canmount_on_max = forked_canmount_on; while ((pid = wait4(-1, &status, 0, &usage)) != -1) { ret |= WEXITSTATUS(status) | WTERMSIG(status); if (forked_canmount_on != 0) { if (bsearch(&pid, canmount_on_pids, forked_canmount_on_max, sizeof (*canmount_on_pids), PID_T_CMP)) --forked_canmount_on; if (forked_canmount_on == 0) { /* * All canmount=on processes have finished, * let all the lower-priority ones finish now */ for (size_t i = 0; i < forked_canmount_not_on; ++i) sem_post( &noauto_files->noauto_not_on_sem); } } if (debug >= 2) printf(PROGNAME ": %d done, user=%llu.%06us, " "system=%llu.%06us, maxrss=%ldB, ex=0x%x\n", (int)pid, (unsigned long long) usage.ru_utime.tv_sec, (unsigned int) usage.ru_utime.tv_usec, (unsigned long long) usage.ru_stime.tv_sec, (unsigned int) usage.ru_stime.tv_usec, usage.ru_maxrss * 1024, status); } if (debug) { struct timespec time_end = {}; clock_gettime(CLOCK_MONOTONIC_RAW, &time_end); getrusage(RUSAGE_SELF, &usage); printf( "\n" PROGNAME ": self : " "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n", (unsigned long long) usage.ru_utime.tv_sec, (unsigned int) usage.ru_utime.tv_usec, (unsigned long long) usage.ru_stime.tv_sec, (unsigned int) usage.ru_stime.tv_usec, usage.ru_maxrss * 1024); getrusage(RUSAGE_CHILDREN, &usage); printf(PROGNAME ": children: " "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n", (unsigned long long) usage.ru_utime.tv_sec, (unsigned int) usage.ru_utime.tv_usec, (unsigned long long) usage.ru_stime.tv_sec, (unsigned int) usage.ru_stime.tv_usec, usage.ru_maxrss * 1024); if (time_start.tv_nsec > time_end.tv_nsec) { time_end.tv_nsec = 1000000000 + time_end.tv_nsec - time_start.tv_nsec; time_end.tv_sec -= 1; } else time_end.tv_nsec -= time_start.tv_nsec; time_end.tv_sec -= time_start.tv_sec; if (time_init.tv_nsec > time_start.tv_nsec) { time_start.tv_nsec = 1000000000 + time_start.tv_nsec - time_init.tv_nsec; time_start.tv_sec -= 1; } else time_start.tv_nsec -= time_init.tv_nsec; time_start.tv_sec -= time_init.tv_sec; time_init.tv_nsec = time_start.tv_nsec + time_end.tv_nsec; time_init.tv_sec = time_start.tv_sec + time_end.tv_sec + time_init.tv_nsec / 1000000000; time_init.tv_nsec %= 1000000000; printf(PROGNAME ": wall : " "total=%llu.%09llus = " "init=%llu.%09llus + real=%llu.%09llus\n", (unsigned long long) time_init.tv_sec, (unsigned long long) time_init.tv_nsec, (unsigned long long) time_start.tv_sec, (unsigned long long) time_start.tv_nsec, (unsigned long long) time_end.tv_sec, (unsigned long long) time_end.tv_nsec); } _exit(ret); }