1329800Smav/* 2329800Smav * CDDL HEADER START 3329800Smav * 4329800Smav * This file and its contents are supplied under the terms of the 5329800Smav * Common Development and Distribution License ("CDDL"), version 1.0. 6329800Smav * You may only use this file in accordance with the terms of version 7329800Smav * 1.0 of the CDDL. 8329800Smav * 9329800Smav * A full copy of the text of the CDDL should have accompanied this 10329800Smav * source. A copy of the CDDL is also available via the Internet at 11329800Smav * http://www.illumos.org/license/CDDL. 12329800Smav * 13329800Smav * CDDL HEADER END 14329800Smav */ 15329800Smav 16329800Smav/* 17329800Smav * Copyright (c) 2017 by Delphix. All rights reserved. 18329800Smav */ 19329800Smav 20329800Smav/* 21329800Smav * ZTHR Infrastructure 22329800Smav * =================== 23329800Smav * 24329800Smav * ZTHR threads are used for isolated operations that span multiple txgs 25329800Smav * within a SPA. They generally exist from SPA creation/loading and until 26329800Smav * the SPA is exported/destroyed. The ideal requirements for an operation 27329800Smav * to be modeled with a zthr are the following: 28329800Smav * 29329800Smav * 1] The operation needs to run over multiple txgs. 30329800Smav * 2] There is be a single point of reference in memory or on disk that 31329800Smav * indicates whether the operation should run/is running or is 32329800Smav * stopped. 33329800Smav * 34329800Smav * If the operation satisfies the above then the following rules guarantee 35329800Smav * a certain level of correctness: 36329800Smav * 37329800Smav * 1] Any thread EXCEPT the zthr changes the work indicator from stopped 38329800Smav * to running but not the opposite. 39329800Smav * 2] Only the zthr can change the work indicator from running to stopped 40329800Smav * (e.g. when it is done) but not the opposite. 41329800Smav * 42329800Smav * This way a normal zthr cycle should go like this: 43329800Smav * 44329800Smav * 1] An external thread changes the work indicator from stopped to 45329800Smav * running and wakes up the zthr. 46329800Smav * 2] The zthr wakes up, checks the indicator and starts working. 47329800Smav * 3] When the zthr is done, it changes the indicator to stopped, allowing 48329800Smav * a new cycle to start. 49329800Smav * 50346686Smav * Besides being awakened by other threads, a zthr can be configured 51346686Smav * during creation to wakeup on it's own after a specified interval 52346686Smav * [see zthr_create_timer()]. 53346686Smav * 54329800Smav * == ZTHR creation 55329800Smav * 56329800Smav * Every zthr needs three inputs to start running: 57329800Smav * 58329800Smav * 1] A user-defined checker function (checkfunc) that decides whether 59329800Smav * the zthr should start working or go to sleep. The function should 60329800Smav * return TRUE when the zthr needs to work or FALSE to let it sleep, 61329800Smav * and should adhere to the following signature: 62329800Smav * boolean_t checkfunc_name(void *args, zthr_t *t); 63329800Smav * 64329800Smav * 2] A user-defined ZTHR function (func) which the zthr executes when 65329800Smav * it is not sleeping. The function should adhere to the following 66329800Smav * signature type: 67329800Smav * int func_name(void *args, zthr_t *t); 68329800Smav * 69329800Smav * 3] A void args pointer that will be passed to checkfunc and func 70329800Smav * implicitly by the infrastructure. 71329800Smav * 72329800Smav * The reason why the above API needs two different functions, 73329800Smav * instead of one that both checks and does the work, has to do with 74329800Smav * the zthr's internal lock (zthr_lock) and the allowed cancellation 75329800Smav * windows. We want to hold the zthr_lock while running checkfunc 76329800Smav * but not while running func. This way the zthr can be cancelled 77329800Smav * while doing work and not while checking for work. 78329800Smav * 79329800Smav * To start a zthr: 80329800Smav * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args); 81346686Smav * or 82346686Smav * zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func, 83346686Smav * args, max_sleep); 84329800Smav * 85329800Smav * After that you should be able to wakeup, cancel, and resume the 86329800Smav * zthr from another thread using zthr_pointer. 87329800Smav * 88329800Smav * NOTE: ZTHR threads could potentially wake up spuriously and the 89329800Smav * user should take this into account when writing a checkfunc. 90329800Smav * [see ZTHR state transitions] 91329800Smav * 92329800Smav * == ZTHR cancellation 93329800Smav * 94329800Smav * ZTHR threads must be cancelled when their SPA is being exported 95329800Smav * or when they need to be paused so they don't interfere with other 96329800Smav * operations. 97329800Smav * 98329800Smav * To cancel a zthr: 99329800Smav * zthr_cancel(zthr_pointer); 100329800Smav * 101329800Smav * To resume it: 102329800Smav * zthr_resume(zthr_pointer); 103329800Smav * 104329800Smav * A zthr will implicitly check if it has received a cancellation 105329800Smav * signal every time func returns and everytime it wakes up [see ZTHR 106329800Smav * state transitions below]. 107329800Smav * 108329800Smav * At times, waiting for the zthr's func to finish its job may take 109329800Smav * time. This may be very time-consuming for some operations that 110329800Smav * need to cancel the SPA's zthrs (e.g spa_export). For this scenario 111329800Smav * the user can explicitly make their ZTHR function aware of incoming 112329800Smav * cancellation signals using zthr_iscancelled(). A common pattern for 113329800Smav * that looks like this: 114329800Smav * 115329800Smav * int 116329800Smav * func_name(void *args, zthr_t *t) 117329800Smav * { 118329800Smav * ... <unpack args> ... 119329800Smav * while (!work_done && !zthr_iscancelled(t)) { 120329800Smav * ... <do more work> ... 121329800Smav * } 122329800Smav * return (0); 123329800Smav * } 124329800Smav * 125329800Smav * == ZTHR exit 126329800Smav * 127329800Smav * For the rare cases where the zthr wants to stop running voluntarily 128329800Smav * while running its ZTHR function (func), we provide zthr_exit(). 129329800Smav * When a zthr has voluntarily stopped running, it can be resumed with 130329800Smav * zthr_resume(), just like it would if it was cancelled by some other 131329800Smav * thread. 132329800Smav * 133329800Smav * == ZTHR cleanup 134329800Smav * 135329800Smav * Cancelling a zthr doesn't clean up its metadata (internal locks, 136329800Smav * function pointers to func and checkfunc, etc..). This is because 137329800Smav * we want to keep them around in case we want to resume the execution 138329800Smav * of the zthr later. Similarly for zthrs that exit themselves. 139329800Smav * 140329800Smav * To completely cleanup a zthr, cancel it first to ensure that it 141329800Smav * is not running and then use zthr_destroy(). 142329800Smav * 143329800Smav * == ZTHR state transitions 144329800Smav * 145329800Smav * zthr creation 146329800Smav * + 147329800Smav * | 148329800Smav * | woke up 149329800Smav * | +--------------+ sleep 150329800Smav * | | ^ 151329800Smav * | | | 152329800Smav * | | | FALSE 153329800Smav * | | | 154329800Smav * v v FALSE + 155329800Smav * cancelled? +---------> checkfunc? 156329800Smav * + ^ + 157329800Smav * | | | 158329800Smav * | | | TRUE 159329800Smav * | | | 160329800Smav * | | func returned v 161329800Smav * | +---------------+ func 162329800Smav * | 163329800Smav * | TRUE 164329800Smav * | 165329800Smav * v 166329800Smav * zthr stopped running 167329800Smav * 168329800Smav */ 169329800Smav 170329800Smav#include <sys/zfs_context.h> 171329800Smav#include <sys/zthr.h> 172329800Smav 173329800Smavvoid 174329800Smavzthr_exit(zthr_t *t, int rc) 175329800Smav{ 176329800Smav ASSERT3P(t->zthr_thread, ==, curthread); 177329800Smav mutex_enter(&t->zthr_lock); 178329800Smav t->zthr_thread = NULL; 179329800Smav t->zthr_rc = rc; 180329800Smav cv_broadcast(&t->zthr_cv); 181329800Smav mutex_exit(&t->zthr_lock); 182329800Smav thread_exit(); 183329800Smav} 184329800Smav 185329800Smavstatic void 186329800Smavzthr_procedure(void *arg) 187329800Smav{ 188329800Smav zthr_t *t = arg; 189329800Smav int rc = 0; 190329800Smav 191329800Smav mutex_enter(&t->zthr_lock); 192329800Smav while (!t->zthr_cancel) { 193329800Smav if (t->zthr_checkfunc(t->zthr_arg, t)) { 194329800Smav mutex_exit(&t->zthr_lock); 195329800Smav rc = t->zthr_func(t->zthr_arg, t); 196329800Smav mutex_enter(&t->zthr_lock); 197329800Smav } else { 198329800Smav /* go to sleep */ 199346686Smav if (t->zthr_wait_time == 0) { 200346686Smav cv_wait(&t->zthr_cv, &t->zthr_lock); 201346686Smav } else { 202346686Smav (void) cv_timedwait_hires(&t->zthr_cv, 203346686Smav &t->zthr_lock, t->zthr_wait_time, 204346686Smav MSEC2NSEC(1), 0); 205346686Smav } 206329800Smav } 207329800Smav } 208329800Smav mutex_exit(&t->zthr_lock); 209329800Smav 210329800Smav zthr_exit(t, rc); 211329800Smav} 212329800Smav 213329800Smavzthr_t * 214329800Smavzthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg) 215329800Smav{ 216346686Smav return (zthr_create_timer(checkfunc, func, arg, (hrtime_t)0)); 217346686Smav} 218346686Smav 219346686Smav/* 220346686Smav * Create a zthr with specified maximum sleep time. If the time 221346686Smav * in sleeping state exceeds max_sleep, a wakeup(do the check and 222346686Smav * start working if required) will be triggered. 223346686Smav */ 224346686Smavzthr_t * 225346686Smavzthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func, 226346686Smav void *arg, hrtime_t max_sleep) 227346686Smav{ 228329800Smav zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP); 229329800Smav mutex_init(&t->zthr_lock, NULL, MUTEX_DEFAULT, NULL); 230329800Smav cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL); 231329800Smav 232329800Smav mutex_enter(&t->zthr_lock); 233329800Smav t->zthr_checkfunc = checkfunc; 234329800Smav t->zthr_func = func; 235329800Smav t->zthr_arg = arg; 236346686Smav t->zthr_wait_time = max_sleep; 237329800Smav 238329800Smav t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, 239329800Smav 0, &p0, TS_RUN, minclsyspri); 240329800Smav mutex_exit(&t->zthr_lock); 241329800Smav 242329800Smav return (t); 243329800Smav} 244329800Smav 245329800Smavvoid 246329800Smavzthr_destroy(zthr_t *t) 247329800Smav{ 248329800Smav VERIFY3P(t->zthr_thread, ==, NULL); 249329800Smav mutex_destroy(&t->zthr_lock); 250329800Smav cv_destroy(&t->zthr_cv); 251329800Smav kmem_free(t, sizeof (*t)); 252329800Smav} 253329800Smav 254329800Smav/* 255329800Smav * Note: If the zthr is not sleeping and misses the wakeup 256329800Smav * (e.g it is running its ZTHR function), it will check if 257329800Smav * there is work to do before going to sleep using its checker 258329800Smav * function [see ZTHR state transition in ZTHR block comment]. 259329800Smav * Thus, missing the wakeup still yields the expected behavior. 260329800Smav */ 261329800Smavvoid 262329800Smavzthr_wakeup(zthr_t *t) 263329800Smav{ 264329800Smav mutex_enter(&t->zthr_lock); 265329800Smav cv_broadcast(&t->zthr_cv); 266329800Smav mutex_exit(&t->zthr_lock); 267329800Smav} 268329800Smav 269329800Smav/* 270329800Smav * Note: If the zthr is not running (e.g. has been cancelled 271329800Smav * already), this is a no-op. 272329800Smav */ 273329800Smavint 274329800Smavzthr_cancel(zthr_t *t) 275329800Smav{ 276329800Smav int rc = 0; 277329800Smav 278329800Smav mutex_enter(&t->zthr_lock); 279329800Smav 280329800Smav /* broadcast in case the zthr is sleeping */ 281329800Smav cv_broadcast(&t->zthr_cv); 282329800Smav 283329800Smav t->zthr_cancel = B_TRUE; 284329800Smav while (t->zthr_thread != NULL) 285329800Smav cv_wait(&t->zthr_cv, &t->zthr_lock); 286329800Smav t->zthr_cancel = B_FALSE; 287329800Smav rc = t->zthr_rc; 288329800Smav mutex_exit(&t->zthr_lock); 289329800Smav 290329800Smav return (rc); 291329800Smav} 292329800Smav 293329800Smavvoid 294329800Smavzthr_resume(zthr_t *t) 295329800Smav{ 296329800Smav ASSERT3P(t->zthr_thread, ==, NULL); 297329800Smav 298329800Smav mutex_enter(&t->zthr_lock); 299329800Smav 300329800Smav ASSERT3P(&t->zthr_checkfunc, !=, NULL); 301329800Smav ASSERT3P(&t->zthr_func, !=, NULL); 302329800Smav ASSERT(!t->zthr_cancel); 303329800Smav 304329800Smav t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t, 305329800Smav 0, &p0, TS_RUN, minclsyspri); 306329800Smav 307329800Smav mutex_exit(&t->zthr_lock); 308329800Smav} 309329800Smav 310329800Smav/* 311329800Smav * This function is intended to be used by the zthr itself 312329800Smav * to check if another thread has signal it to stop running. 313329800Smav * 314329800Smav * returns TRUE if we are in the middle of trying to cancel 315329800Smav * this thread. 316329800Smav * 317329800Smav * returns FALSE otherwise. 318329800Smav */ 319329800Smavboolean_t 320329800Smavzthr_iscancelled(zthr_t *t) 321329800Smav{ 322329800Smav boolean_t cancelled; 323329800Smav 324329800Smav ASSERT3P(t->zthr_thread, ==, curthread); 325329800Smav 326329800Smav mutex_enter(&t->zthr_lock); 327329800Smav cancelled = t->zthr_cancel; 328329800Smav mutex_exit(&t->zthr_lock); 329329800Smav 330329800Smav return (cancelled); 331329800Smav} 332329800Smav 333329800Smavboolean_t 334329800Smavzthr_isrunning(zthr_t *t) 335329800Smav{ 336329800Smav boolean_t running; 337329800Smav 338329800Smav mutex_enter(&t->zthr_lock); 339329800Smav running = (t->zthr_thread != NULL); 340329800Smav mutex_exit(&t->zthr_lock); 341329800Smav 342329800Smav return (running); 343329800Smav} 344