1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21219089Spjd/*
22219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved.
24219089Spjd */
25219089Spjd
26219089Spjd#include <sys/types.h>
27219089Spjd#include <sys/param.h>
28219089Spjd#include <sys/errno.h>
29219089Spjd#include <sys/kmem.h>
30219089Spjd#include <sys/conf.h>
31219089Spjd#include <sys/sunddi.h>
32219089Spjd#include <sys/zfs_ioctl.h>
33219089Spjd#include <sys/zfs_onexit.h>
34219089Spjd#include <sys/zvol.h>
35219089Spjd
36219089Spjd/*
37219089Spjd * ZFS kernel routines may add/delete callback routines to be invoked
38219089Spjd * upon process exit (triggered via the close operation from the /dev/zfs
39219089Spjd * driver).
40219089Spjd *
41219089Spjd * These cleanup callbacks are intended to allow for the accumulation
42219089Spjd * of kernel state across multiple ioctls.  User processes participate
43219089Spjd * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a
44219089Spjd * clone-open, generating a unique minor number. The process then passes
45219089Spjd * along that file descriptor to each ioctl that might have a cleanup operation.
46219089Spjd *
47219089Spjd * Consumers of the onexit routines should call zfs_onexit_fd_hold() early
48219089Spjd * on to validate the given fd and add a reference to its file table entry.
49219089Spjd * This allows the consumer to do its work and then add a callback, knowing
50219089Spjd * that zfs_onexit_add_cb() won't fail with EBADF.  When finished, consumers
51219089Spjd * should call zfs_onexit_fd_rele().
52219089Spjd *
53219089Spjd * A simple example is zfs_ioc_recv(), where we might create an AVL tree
54219089Spjd * with dataset/GUID mappings and then reuse that tree on subsequent
55219089Spjd * zfs_ioc_recv() calls.
56219089Spjd *
57219089Spjd * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc()
58219089Spjd * the AVL tree and pass it along with a callback function to
59219089Spjd * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the
60219089Spjd * callback and return an action handle.
61219089Spjd *
62219089Spjd * The action handle is then passed from user space to subsequent
63219089Spjd * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree
64219089Spjd * by calling zfs_onexit_cb_data() with the device minor number and
65219089Spjd * action handle.
66219089Spjd *
67219089Spjd * If the user process exits abnormally, the callback is invoked implicitly
68219089Spjd * as part of the driver close operation.  Once the user space process is
69219089Spjd * finished with the accumulated kernel state, it can also just call close(2)
70219089Spjd * on the cleanup fd to trigger the cleanup callback.
71219089Spjd */
72219089Spjd
73219089Spjdvoid
74219089Spjdzfs_onexit_init(zfs_onexit_t **zop)
75219089Spjd{
76219089Spjd	zfs_onexit_t *zo;
77219089Spjd
78219089Spjd	zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP);
79219089Spjd	mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL);
80219089Spjd	list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t),
81219089Spjd	    offsetof(zfs_onexit_action_node_t, za_link));
82219089Spjd}
83219089Spjd
84219089Spjdvoid
85219089Spjdzfs_onexit_destroy(zfs_onexit_t *zo)
86219089Spjd{
87219089Spjd	zfs_onexit_action_node_t *ap;
88219089Spjd
89219089Spjd	mutex_enter(&zo->zo_lock);
90219089Spjd	while ((ap = list_head(&zo->zo_actions)) != NULL) {
91219089Spjd		list_remove(&zo->zo_actions, ap);
92219089Spjd		mutex_exit(&zo->zo_lock);
93219089Spjd		ap->za_func(ap->za_data);
94219089Spjd		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
95219089Spjd		mutex_enter(&zo->zo_lock);
96219089Spjd	}
97219089Spjd	mutex_exit(&zo->zo_lock);
98219089Spjd
99219089Spjd	list_destroy(&zo->zo_actions);
100219089Spjd	mutex_destroy(&zo->zo_lock);
101219089Spjd	kmem_free(zo, sizeof (zfs_onexit_t));
102219089Spjd}
103219089Spjd
104219089Spjdstatic int
105219089Spjdzfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
106219089Spjd{
107219089Spjd	*zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
108219089Spjd	if (*zo == NULL)
109249195Smm		return (SET_ERROR(EBADF));
110219089Spjd
111219089Spjd	return (0);
112219089Spjd}
113219089Spjd
114219089Spjd/*
115219089Spjd * Consumers might need to operate by minor number instead of fd, since
116219089Spjd * they might be running in another thread (e.g. txg_sync_thread). Callers
117219089Spjd * of this function must call zfs_onexit_fd_rele() when they're finished
118219089Spjd * using the minor number.
119219089Spjd */
120219089Spjdint
121219089Spjdzfs_onexit_fd_hold(int fd, minor_t *minorp)
122219089Spjd{
123219089Spjd	file_t *fp, *tmpfp;
124219089Spjd	zfs_onexit_t *zo;
125255219Spjd	cap_rights_t rights;
126219089Spjd	void *data;
127219089Spjd	int error;
128219089Spjd
129255219Spjd	fp = getf(fd, cap_rights_init(&rights));
130219089Spjd	if (fp == NULL)
131249195Smm		return (SET_ERROR(EBADF));
132219089Spjd
133219089Spjd	tmpfp = curthread->td_fpop;
134219089Spjd	curthread->td_fpop = fp;
135219089Spjd	error = devfs_get_cdevpriv(&data);
136219089Spjd	if (error == 0)
137219089Spjd		*minorp = (minor_t)(uintptr_t)data;
138219089Spjd	curthread->td_fpop = tmpfp;
139219089Spjd	if (error != 0)
140282130Savg		return (SET_ERROR(EBADF));
141219089Spjd	return (zfs_onexit_minor_to_state(*minorp, &zo));
142219089Spjd}
143219089Spjd
144219089Spjdvoid
145219089Spjdzfs_onexit_fd_rele(int fd)
146219089Spjd{
147219089Spjd	releasef(fd);
148219089Spjd}
149219089Spjd
150219089Spjd/*
151219089Spjd * Add a callback to be invoked when the calling process exits.
152219089Spjd */
153219089Spjdint
154219089Spjdzfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
155219089Spjd    uint64_t *action_handle)
156219089Spjd{
157219089Spjd	zfs_onexit_t *zo;
158219089Spjd	zfs_onexit_action_node_t *ap;
159219089Spjd	int error;
160219089Spjd
161219089Spjd	error = zfs_onexit_minor_to_state(minor, &zo);
162219089Spjd	if (error)
163219089Spjd		return (error);
164219089Spjd
165219089Spjd	ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP);
166219089Spjd	list_link_init(&ap->za_link);
167219089Spjd	ap->za_func = func;
168219089Spjd	ap->za_data = data;
169219089Spjd
170219089Spjd	mutex_enter(&zo->zo_lock);
171219089Spjd	list_insert_tail(&zo->zo_actions, ap);
172219089Spjd	mutex_exit(&zo->zo_lock);
173219089Spjd	if (action_handle)
174219089Spjd		*action_handle = (uint64_t)(uintptr_t)ap;
175219089Spjd
176219089Spjd	return (0);
177219089Spjd}
178219089Spjd
179219089Spjdstatic zfs_onexit_action_node_t *
180219089Spjdzfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle)
181219089Spjd{
182219089Spjd	zfs_onexit_action_node_t *match;
183219089Spjd	zfs_onexit_action_node_t *ap;
184219089Spjd	list_t *l;
185219089Spjd
186219089Spjd	ASSERT(MUTEX_HELD(&zo->zo_lock));
187219089Spjd
188219089Spjd	match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle;
189219089Spjd	l = &zo->zo_actions;
190219089Spjd	for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) {
191219089Spjd		if (match == ap)
192219089Spjd			break;
193219089Spjd	}
194219089Spjd	return (ap);
195219089Spjd}
196219089Spjd
197219089Spjd/*
198219089Spjd * Delete the callback, triggering it first if 'fire' is set.
199219089Spjd */
200219089Spjdint
201219089Spjdzfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
202219089Spjd{
203219089Spjd	zfs_onexit_t *zo;
204219089Spjd	zfs_onexit_action_node_t *ap;
205219089Spjd	int error;
206219089Spjd
207219089Spjd	error = zfs_onexit_minor_to_state(minor, &zo);
208219089Spjd	if (error)
209219089Spjd		return (error);
210219089Spjd
211219089Spjd	mutex_enter(&zo->zo_lock);
212219089Spjd	ap = zfs_onexit_find_cb(zo, action_handle);
213219089Spjd	if (ap != NULL) {
214219089Spjd		list_remove(&zo->zo_actions, ap);
215219089Spjd		mutex_exit(&zo->zo_lock);
216219089Spjd		if (fire)
217219089Spjd			ap->za_func(ap->za_data);
218219089Spjd		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
219219089Spjd	} else {
220219089Spjd		mutex_exit(&zo->zo_lock);
221249195Smm		error = SET_ERROR(ENOENT);
222219089Spjd	}
223219089Spjd
224219089Spjd	return (error);
225219089Spjd}
226219089Spjd
227219089Spjd/*
228219089Spjd * Return the data associated with this callback.  This allows consumers
229219089Spjd * of the cleanup-on-exit interfaces to stash kernel data across system
230219089Spjd * calls, knowing that it will be cleaned up if the calling process exits.
231219089Spjd */
232219089Spjdint
233219089Spjdzfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
234219089Spjd{
235219089Spjd	zfs_onexit_t *zo;
236219089Spjd	zfs_onexit_action_node_t *ap;
237219089Spjd	int error;
238219089Spjd
239219089Spjd	*data = NULL;
240219089Spjd
241219089Spjd	error = zfs_onexit_minor_to_state(minor, &zo);
242219089Spjd	if (error)
243219089Spjd		return (error);
244219089Spjd
245219089Spjd	mutex_enter(&zo->zo_lock);
246219089Spjd	ap = zfs_onexit_find_cb(zo, action_handle);
247219089Spjd	if (ap != NULL)
248219089Spjd		*data = ap->za_data;
249219089Spjd	else
250249195Smm		error = SET_ERROR(ENOENT);
251219089Spjd	mutex_exit(&zo->zo_lock);
252219089Spjd
253219089Spjd	return (error);
254219089Spjd}
255