1219089Spjd/* 2219089Spjd * CDDL HEADER START 3219089Spjd * 4219089Spjd * The contents of this file are subject to the terms of the 5219089Spjd * Common Development and Distribution License (the "License"). 6219089Spjd * You may not use this file except in compliance with the License. 7219089Spjd * 8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9219089Spjd * or http://www.opensolaris.org/os/licensing. 10219089Spjd * See the License for the specific language governing permissions 11219089Spjd * and limitations under the License. 12219089Spjd * 13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each 14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15219089Spjd * If applicable, add the following below this CDDL HEADER, with the 16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18219089Spjd * 19219089Spjd * CDDL HEADER END 20219089Spjd */ 21219089Spjd/* 22219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24219089Spjd */ 25219089Spjd 26219089Spjd#include <sys/types.h> 27219089Spjd#include <sys/param.h> 28219089Spjd#include <sys/errno.h> 29219089Spjd#include <sys/kmem.h> 30219089Spjd#include <sys/conf.h> 31219089Spjd#include <sys/sunddi.h> 32219089Spjd#include <sys/zfs_ioctl.h> 33219089Spjd#include <sys/zfs_onexit.h> 34219089Spjd#include <sys/zvol.h> 35219089Spjd 36219089Spjd/* 37219089Spjd * ZFS kernel routines may add/delete callback routines to be invoked 38219089Spjd * upon process exit (triggered via the close operation from the /dev/zfs 39219089Spjd * driver). 40219089Spjd * 41219089Spjd * These cleanup callbacks are intended to allow for the accumulation 42219089Spjd * of kernel state across multiple ioctls. User processes participate 43219089Spjd * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a 44219089Spjd * clone-open, generating a unique minor number. The process then passes 45219089Spjd * along that file descriptor to each ioctl that might have a cleanup operation. 46219089Spjd * 47219089Spjd * Consumers of the onexit routines should call zfs_onexit_fd_hold() early 48219089Spjd * on to validate the given fd and add a reference to its file table entry. 49219089Spjd * This allows the consumer to do its work and then add a callback, knowing 50219089Spjd * that zfs_onexit_add_cb() won't fail with EBADF. When finished, consumers 51219089Spjd * should call zfs_onexit_fd_rele(). 52219089Spjd * 53219089Spjd * A simple example is zfs_ioc_recv(), where we might create an AVL tree 54219089Spjd * with dataset/GUID mappings and then reuse that tree on subsequent 55219089Spjd * zfs_ioc_recv() calls. 56219089Spjd * 57219089Spjd * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc() 58219089Spjd * the AVL tree and pass it along with a callback function to 59219089Spjd * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the 60219089Spjd * callback and return an action handle. 61219089Spjd * 62219089Spjd * The action handle is then passed from user space to subsequent 63219089Spjd * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree 64219089Spjd * by calling zfs_onexit_cb_data() with the device minor number and 65219089Spjd * action handle. 66219089Spjd * 67219089Spjd * If the user process exits abnormally, the callback is invoked implicitly 68219089Spjd * as part of the driver close operation. Once the user space process is 69219089Spjd * finished with the accumulated kernel state, it can also just call close(2) 70219089Spjd * on the cleanup fd to trigger the cleanup callback. 71219089Spjd */ 72219089Spjd 73219089Spjdvoid 74219089Spjdzfs_onexit_init(zfs_onexit_t **zop) 75219089Spjd{ 76219089Spjd zfs_onexit_t *zo; 77219089Spjd 78219089Spjd zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP); 79219089Spjd mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL); 80219089Spjd list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t), 81219089Spjd offsetof(zfs_onexit_action_node_t, za_link)); 82219089Spjd} 83219089Spjd 84219089Spjdvoid 85219089Spjdzfs_onexit_destroy(zfs_onexit_t *zo) 86219089Spjd{ 87219089Spjd zfs_onexit_action_node_t *ap; 88219089Spjd 89219089Spjd mutex_enter(&zo->zo_lock); 90219089Spjd while ((ap = list_head(&zo->zo_actions)) != NULL) { 91219089Spjd list_remove(&zo->zo_actions, ap); 92219089Spjd mutex_exit(&zo->zo_lock); 93219089Spjd ap->za_func(ap->za_data); 94219089Spjd kmem_free(ap, sizeof (zfs_onexit_action_node_t)); 95219089Spjd mutex_enter(&zo->zo_lock); 96219089Spjd } 97219089Spjd mutex_exit(&zo->zo_lock); 98219089Spjd 99219089Spjd list_destroy(&zo->zo_actions); 100219089Spjd mutex_destroy(&zo->zo_lock); 101219089Spjd kmem_free(zo, sizeof (zfs_onexit_t)); 102219089Spjd} 103219089Spjd 104219089Spjdstatic int 105219089Spjdzfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) 106219089Spjd{ 107219089Spjd *zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); 108219089Spjd if (*zo == NULL) 109249195Smm return (SET_ERROR(EBADF)); 110219089Spjd 111219089Spjd return (0); 112219089Spjd} 113219089Spjd 114219089Spjd/* 115219089Spjd * Consumers might need to operate by minor number instead of fd, since 116219089Spjd * they might be running in another thread (e.g. txg_sync_thread). Callers 117219089Spjd * of this function must call zfs_onexit_fd_rele() when they're finished 118219089Spjd * using the minor number. 119219089Spjd */ 120219089Spjdint 121219089Spjdzfs_onexit_fd_hold(int fd, minor_t *minorp) 122219089Spjd{ 123219089Spjd file_t *fp, *tmpfp; 124219089Spjd zfs_onexit_t *zo; 125255219Spjd cap_rights_t rights; 126219089Spjd void *data; 127219089Spjd int error; 128219089Spjd 129255219Spjd fp = getf(fd, cap_rights_init(&rights)); 130219089Spjd if (fp == NULL) 131249195Smm return (SET_ERROR(EBADF)); 132219089Spjd 133219089Spjd tmpfp = curthread->td_fpop; 134219089Spjd curthread->td_fpop = fp; 135219089Spjd error = devfs_get_cdevpriv(&data); 136219089Spjd if (error == 0) 137219089Spjd *minorp = (minor_t)(uintptr_t)data; 138219089Spjd curthread->td_fpop = tmpfp; 139219089Spjd if (error != 0) 140282130Savg return (SET_ERROR(EBADF)); 141219089Spjd return (zfs_onexit_minor_to_state(*minorp, &zo)); 142219089Spjd} 143219089Spjd 144219089Spjdvoid 145219089Spjdzfs_onexit_fd_rele(int fd) 146219089Spjd{ 147219089Spjd releasef(fd); 148219089Spjd} 149219089Spjd 150219089Spjd/* 151219089Spjd * Add a callback to be invoked when the calling process exits. 152219089Spjd */ 153219089Spjdint 154219089Spjdzfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, 155219089Spjd uint64_t *action_handle) 156219089Spjd{ 157219089Spjd zfs_onexit_t *zo; 158219089Spjd zfs_onexit_action_node_t *ap; 159219089Spjd int error; 160219089Spjd 161219089Spjd error = zfs_onexit_minor_to_state(minor, &zo); 162219089Spjd if (error) 163219089Spjd return (error); 164219089Spjd 165219089Spjd ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP); 166219089Spjd list_link_init(&ap->za_link); 167219089Spjd ap->za_func = func; 168219089Spjd ap->za_data = data; 169219089Spjd 170219089Spjd mutex_enter(&zo->zo_lock); 171219089Spjd list_insert_tail(&zo->zo_actions, ap); 172219089Spjd mutex_exit(&zo->zo_lock); 173219089Spjd if (action_handle) 174219089Spjd *action_handle = (uint64_t)(uintptr_t)ap; 175219089Spjd 176219089Spjd return (0); 177219089Spjd} 178219089Spjd 179219089Spjdstatic zfs_onexit_action_node_t * 180219089Spjdzfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle) 181219089Spjd{ 182219089Spjd zfs_onexit_action_node_t *match; 183219089Spjd zfs_onexit_action_node_t *ap; 184219089Spjd list_t *l; 185219089Spjd 186219089Spjd ASSERT(MUTEX_HELD(&zo->zo_lock)); 187219089Spjd 188219089Spjd match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle; 189219089Spjd l = &zo->zo_actions; 190219089Spjd for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) { 191219089Spjd if (match == ap) 192219089Spjd break; 193219089Spjd } 194219089Spjd return (ap); 195219089Spjd} 196219089Spjd 197219089Spjd/* 198219089Spjd * Delete the callback, triggering it first if 'fire' is set. 199219089Spjd */ 200219089Spjdint 201219089Spjdzfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) 202219089Spjd{ 203219089Spjd zfs_onexit_t *zo; 204219089Spjd zfs_onexit_action_node_t *ap; 205219089Spjd int error; 206219089Spjd 207219089Spjd error = zfs_onexit_minor_to_state(minor, &zo); 208219089Spjd if (error) 209219089Spjd return (error); 210219089Spjd 211219089Spjd mutex_enter(&zo->zo_lock); 212219089Spjd ap = zfs_onexit_find_cb(zo, action_handle); 213219089Spjd if (ap != NULL) { 214219089Spjd list_remove(&zo->zo_actions, ap); 215219089Spjd mutex_exit(&zo->zo_lock); 216219089Spjd if (fire) 217219089Spjd ap->za_func(ap->za_data); 218219089Spjd kmem_free(ap, sizeof (zfs_onexit_action_node_t)); 219219089Spjd } else { 220219089Spjd mutex_exit(&zo->zo_lock); 221249195Smm error = SET_ERROR(ENOENT); 222219089Spjd } 223219089Spjd 224219089Spjd return (error); 225219089Spjd} 226219089Spjd 227219089Spjd/* 228219089Spjd * Return the data associated with this callback. This allows consumers 229219089Spjd * of the cleanup-on-exit interfaces to stash kernel data across system 230219089Spjd * calls, knowing that it will be cleaned up if the calling process exits. 231219089Spjd */ 232219089Spjdint 233219089Spjdzfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) 234219089Spjd{ 235219089Spjd zfs_onexit_t *zo; 236219089Spjd zfs_onexit_action_node_t *ap; 237219089Spjd int error; 238219089Spjd 239219089Spjd *data = NULL; 240219089Spjd 241219089Spjd error = zfs_onexit_minor_to_state(minor, &zo); 242219089Spjd if (error) 243219089Spjd return (error); 244219089Spjd 245219089Spjd mutex_enter(&zo->zo_lock); 246219089Spjd ap = zfs_onexit_find_cb(zo, action_handle); 247219089Spjd if (ap != NULL) 248219089Spjd *data = ap->za_data; 249219089Spjd else 250249195Smm error = SET_ERROR(ENOENT); 251219089Spjd mutex_exit(&zo->zo_lock); 252219089Spjd 253219089Spjd return (error); 254219089Spjd} 255