1329800Smav/*
2329800Smav * CDDL HEADER START
3329800Smav *
4329800Smav * This file and its contents are supplied under the terms of the
5329800Smav * Common Development and Distribution License ("CDDL"), version 1.0.
6329800Smav * You may only use this file in accordance with the terms of version
7329800Smav * 1.0 of the CDDL.
8329800Smav *
9329800Smav * A full copy of the text of the CDDL should have accompanied this
10329800Smav * source. A copy of the CDDL is also available via the Internet at
11329800Smav * http://www.illumos.org/license/CDDL.
12329800Smav *
13329800Smav * CDDL HEADER END
14329800Smav */
15329800Smav
16329800Smav/*
17329800Smav * Copyright (c) 2017 by Delphix. All rights reserved.
18329800Smav */
19329800Smav
20329800Smav/*
21329800Smav * ZTHR Infrastructure
22329800Smav * ===================
23329800Smav *
24329800Smav * ZTHR threads are used for isolated operations that span multiple txgs
25329800Smav * within a SPA. They generally exist from SPA creation/loading and until
26329800Smav * the SPA is exported/destroyed. The ideal requirements for an operation
27329800Smav * to be modeled with a zthr are the following:
28329800Smav *
29329800Smav * 1] The operation needs to run over multiple txgs.
30329800Smav * 2] There is be a single point of reference in memory or on disk that
31329800Smav *    indicates whether the operation should run/is running or is
32329800Smav *    stopped.
33329800Smav *
34329800Smav * If the operation satisfies the above then the following rules guarantee
35329800Smav * a certain level of correctness:
36329800Smav *
37329800Smav * 1] Any thread EXCEPT the zthr changes the work indicator from stopped
38329800Smav *    to running but not the opposite.
39329800Smav * 2] Only the zthr can change the work indicator from running to stopped
40329800Smav *    (e.g. when it is done) but not the opposite.
41329800Smav *
42329800Smav * This way a normal zthr cycle should go like this:
43329800Smav *
44329800Smav * 1] An external thread changes the work indicator from stopped to
45329800Smav *    running and wakes up the zthr.
46329800Smav * 2] The zthr wakes up, checks the indicator and starts working.
47329800Smav * 3] When the zthr is done, it changes the indicator to stopped, allowing
48329800Smav *    a new cycle to start.
49329800Smav *
50346686Smav * Besides being awakened by other threads, a zthr can be configured
51346686Smav * during creation to wakeup on it's own after a specified interval
52346686Smav * [see zthr_create_timer()].
53346686Smav *
54329800Smav * == ZTHR creation
55329800Smav *
56329800Smav * Every zthr needs three inputs to start running:
57329800Smav *
58329800Smav * 1] A user-defined checker function (checkfunc) that decides whether
59329800Smav *    the zthr should start working or go to sleep. The function should
60329800Smav *    return TRUE when the zthr needs to work or FALSE to let it sleep,
61329800Smav *    and should adhere to the following signature:
62329800Smav *    boolean_t checkfunc_name(void *args, zthr_t *t);
63329800Smav *
64329800Smav * 2] A user-defined ZTHR function (func) which the zthr executes when
65329800Smav *    it is not sleeping. The function should adhere to the following
66329800Smav *    signature type:
67329800Smav *    int func_name(void *args, zthr_t *t);
68329800Smav *
69329800Smav * 3] A void args pointer that will be passed to checkfunc and func
70329800Smav *    implicitly by the infrastructure.
71329800Smav *
72329800Smav * The reason why the above API needs two different functions,
73329800Smav * instead of one that both checks and does the work, has to do with
74329800Smav * the zthr's internal lock (zthr_lock) and the allowed cancellation
75329800Smav * windows. We want to hold the zthr_lock while running checkfunc
76329800Smav * but not while running func. This way the zthr can be cancelled
77329800Smav * while doing work and not while checking for work.
78329800Smav *
79329800Smav * To start a zthr:
80329800Smav *     zthr_t *zthr_pointer = zthr_create(checkfunc, func, args);
81346686Smav * or
82346686Smav *     zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func,
83346686Smav *         args, max_sleep);
84329800Smav *
85329800Smav * After that you should be able to wakeup, cancel, and resume the
86329800Smav * zthr from another thread using zthr_pointer.
87329800Smav *
88329800Smav * NOTE: ZTHR threads could potentially wake up spuriously and the
89329800Smav * user should take this into account when writing a checkfunc.
90329800Smav * [see ZTHR state transitions]
91329800Smav *
92329800Smav * == ZTHR cancellation
93329800Smav *
94329800Smav * ZTHR threads must be cancelled when their SPA is being exported
95329800Smav * or when they need to be paused so they don't interfere with other
96329800Smav * operations.
97329800Smav *
98329800Smav * To cancel a zthr:
99329800Smav *     zthr_cancel(zthr_pointer);
100329800Smav *
101329800Smav * To resume it:
102329800Smav *     zthr_resume(zthr_pointer);
103329800Smav *
104329800Smav * A zthr will implicitly check if it has received a cancellation
105329800Smav * signal every time func returns and everytime it wakes up [see ZTHR
106329800Smav * state transitions below].
107329800Smav *
108329800Smav * At times, waiting for the zthr's func to finish its job may take
109329800Smav * time. This may be very time-consuming for some operations that
110329800Smav * need to cancel the SPA's zthrs (e.g spa_export). For this scenario
111329800Smav * the user can explicitly make their ZTHR function aware of incoming
112329800Smav * cancellation signals using zthr_iscancelled(). A common pattern for
113329800Smav * that looks like this:
114329800Smav *
115329800Smav * int
116329800Smav * func_name(void *args, zthr_t *t)
117329800Smav * {
118329800Smav *     ... <unpack args> ...
119329800Smav *     while (!work_done && !zthr_iscancelled(t)) {
120329800Smav *         ... <do more work> ...
121329800Smav *     }
122329800Smav *     return (0);
123329800Smav * }
124329800Smav *
125329800Smav * == ZTHR exit
126329800Smav *
127329800Smav * For the rare cases where the zthr wants to stop running voluntarily
128329800Smav * while running its ZTHR function (func), we provide zthr_exit().
129329800Smav * When a zthr has voluntarily stopped running, it can be resumed with
130329800Smav * zthr_resume(), just like it would if it was cancelled by some other
131329800Smav * thread.
132329800Smav *
133329800Smav * == ZTHR cleanup
134329800Smav *
135329800Smav * Cancelling a zthr doesn't clean up its metadata (internal locks,
136329800Smav * function pointers to func and checkfunc, etc..). This is because
137329800Smav * we want to keep them around in case we want to resume the execution
138329800Smav * of the zthr later. Similarly for zthrs that exit themselves.
139329800Smav *
140329800Smav * To completely cleanup a zthr, cancel it first to ensure that it
141329800Smav * is not running and then use zthr_destroy().
142329800Smav *
143329800Smav * == ZTHR state transitions
144329800Smav *
145329800Smav *    zthr creation
146329800Smav *      +
147329800Smav *      |
148329800Smav *      |      woke up
149329800Smav *      |   +--------------+ sleep
150329800Smav *      |   |                  ^
151329800Smav *      |   |                  |
152329800Smav *      |   |                  | FALSE
153329800Smav *      |   |                  |
154329800Smav *      v   v     FALSE        +
155329800Smav *   cancelled? +---------> checkfunc?
156329800Smav *      +   ^                  +
157329800Smav *      |   |                  |
158329800Smav *      |   |                  | TRUE
159329800Smav *      |   |                  |
160329800Smav *      |   |  func returned   v
161329800Smav *      |   +---------------+ func
162329800Smav *      |
163329800Smav *      | TRUE
164329800Smav *      |
165329800Smav *      v
166329800Smav *   zthr stopped running
167329800Smav *
168329800Smav */
169329800Smav
170329800Smav#include <sys/zfs_context.h>
171329800Smav#include <sys/zthr.h>
172329800Smav
173329800Smavvoid
174329800Smavzthr_exit(zthr_t *t, int rc)
175329800Smav{
176329800Smav	ASSERT3P(t->zthr_thread, ==, curthread);
177329800Smav	mutex_enter(&t->zthr_lock);
178329800Smav	t->zthr_thread = NULL;
179329800Smav	t->zthr_rc = rc;
180329800Smav	cv_broadcast(&t->zthr_cv);
181329800Smav	mutex_exit(&t->zthr_lock);
182329800Smav	thread_exit();
183329800Smav}
184329800Smav
185329800Smavstatic void
186329800Smavzthr_procedure(void *arg)
187329800Smav{
188329800Smav	zthr_t *t = arg;
189329800Smav	int rc = 0;
190329800Smav
191329800Smav	mutex_enter(&t->zthr_lock);
192329800Smav	while (!t->zthr_cancel) {
193329800Smav		if (t->zthr_checkfunc(t->zthr_arg, t)) {
194329800Smav			mutex_exit(&t->zthr_lock);
195329800Smav			rc = t->zthr_func(t->zthr_arg, t);
196329800Smav			mutex_enter(&t->zthr_lock);
197329800Smav		} else {
198329800Smav			/* go to sleep */
199346686Smav			if (t->zthr_wait_time == 0) {
200346686Smav				cv_wait(&t->zthr_cv, &t->zthr_lock);
201346686Smav			} else {
202346686Smav				(void) cv_timedwait_hires(&t->zthr_cv,
203346686Smav				    &t->zthr_lock, t->zthr_wait_time,
204346686Smav				    MSEC2NSEC(1), 0);
205346686Smav			}
206329800Smav		}
207329800Smav	}
208329800Smav	mutex_exit(&t->zthr_lock);
209329800Smav
210329800Smav	zthr_exit(t, rc);
211329800Smav}
212329800Smav
213329800Smavzthr_t *
214329800Smavzthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg)
215329800Smav{
216346686Smav	return (zthr_create_timer(checkfunc, func, arg, (hrtime_t)0));
217346686Smav}
218346686Smav
219346686Smav/*
220346686Smav * Create a zthr with specified maximum sleep time.  If the time
221346686Smav * in sleeping state exceeds max_sleep, a wakeup(do the check and
222346686Smav * start working if required) will be triggered.
223346686Smav */
224346686Smavzthr_t *
225346686Smavzthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func,
226346686Smav    void *arg, hrtime_t max_sleep)
227346686Smav{
228329800Smav	zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
229329800Smav	mutex_init(&t->zthr_lock, NULL, MUTEX_DEFAULT, NULL);
230329800Smav	cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL);
231329800Smav
232329800Smav	mutex_enter(&t->zthr_lock);
233329800Smav	t->zthr_checkfunc = checkfunc;
234329800Smav	t->zthr_func = func;
235329800Smav	t->zthr_arg = arg;
236346686Smav	t->zthr_wait_time = max_sleep;
237329800Smav
238329800Smav	t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
239329800Smav	    0, &p0, TS_RUN, minclsyspri);
240329800Smav	mutex_exit(&t->zthr_lock);
241329800Smav
242329800Smav	return (t);
243329800Smav}
244329800Smav
245329800Smavvoid
246329800Smavzthr_destroy(zthr_t *t)
247329800Smav{
248329800Smav	VERIFY3P(t->zthr_thread, ==, NULL);
249329800Smav	mutex_destroy(&t->zthr_lock);
250329800Smav	cv_destroy(&t->zthr_cv);
251329800Smav	kmem_free(t, sizeof (*t));
252329800Smav}
253329800Smav
254329800Smav/*
255329800Smav * Note: If the zthr is not sleeping and misses the wakeup
256329800Smav * (e.g it is running its ZTHR function), it will check if
257329800Smav * there is work to do before going to sleep using its checker
258329800Smav * function [see ZTHR state transition in ZTHR block comment].
259329800Smav * Thus, missing the wakeup still yields the expected behavior.
260329800Smav */
261329800Smavvoid
262329800Smavzthr_wakeup(zthr_t *t)
263329800Smav{
264329800Smav	mutex_enter(&t->zthr_lock);
265329800Smav	cv_broadcast(&t->zthr_cv);
266329800Smav	mutex_exit(&t->zthr_lock);
267329800Smav}
268329800Smav
269329800Smav/*
270329800Smav * Note: If the zthr is not running (e.g. has been cancelled
271329800Smav * already), this is a no-op.
272329800Smav */
273329800Smavint
274329800Smavzthr_cancel(zthr_t *t)
275329800Smav{
276329800Smav	int rc = 0;
277329800Smav
278329800Smav	mutex_enter(&t->zthr_lock);
279329800Smav
280329800Smav	/* broadcast in case the zthr is sleeping */
281329800Smav	cv_broadcast(&t->zthr_cv);
282329800Smav
283329800Smav	t->zthr_cancel = B_TRUE;
284329800Smav	while (t->zthr_thread != NULL)
285329800Smav		cv_wait(&t->zthr_cv, &t->zthr_lock);
286329800Smav	t->zthr_cancel = B_FALSE;
287329800Smav	rc = t->zthr_rc;
288329800Smav	mutex_exit(&t->zthr_lock);
289329800Smav
290329800Smav	return (rc);
291329800Smav}
292329800Smav
293329800Smavvoid
294329800Smavzthr_resume(zthr_t *t)
295329800Smav{
296329800Smav	ASSERT3P(t->zthr_thread, ==, NULL);
297329800Smav
298329800Smav	mutex_enter(&t->zthr_lock);
299329800Smav
300329800Smav	ASSERT3P(&t->zthr_checkfunc, !=, NULL);
301329800Smav	ASSERT3P(&t->zthr_func, !=, NULL);
302329800Smav	ASSERT(!t->zthr_cancel);
303329800Smav
304329800Smav	t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
305329800Smav	    0, &p0, TS_RUN, minclsyspri);
306329800Smav
307329800Smav	mutex_exit(&t->zthr_lock);
308329800Smav}
309329800Smav
310329800Smav/*
311329800Smav * This function is intended to be used by the zthr itself
312329800Smav * to check if another thread has signal it to stop running.
313329800Smav *
314329800Smav * returns TRUE if we are in the middle of trying to cancel
315329800Smav *     this thread.
316329800Smav *
317329800Smav * returns FALSE otherwise.
318329800Smav */
319329800Smavboolean_t
320329800Smavzthr_iscancelled(zthr_t *t)
321329800Smav{
322329800Smav	boolean_t cancelled;
323329800Smav
324329800Smav	ASSERT3P(t->zthr_thread, ==, curthread);
325329800Smav
326329800Smav	mutex_enter(&t->zthr_lock);
327329800Smav	cancelled = t->zthr_cancel;
328329800Smav	mutex_exit(&t->zthr_lock);
329329800Smav
330329800Smav	return (cancelled);
331329800Smav}
332329800Smav
333329800Smavboolean_t
334329800Smavzthr_isrunning(zthr_t *t)
335329800Smav{
336329800Smav	boolean_t running;
337329800Smav
338329800Smav	mutex_enter(&t->zthr_lock);
339329800Smav	running = (t->zthr_thread != NULL);
340329800Smav	mutex_exit(&t->zthr_lock);
341329800Smav
342329800Smav	return (running);
343329800Smav}
344