1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5185029Spjd * Common Development and Distribution License (the "License"). 6185029Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25260742Savg/* 26260742Savg * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27275486Sdelphij * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 28297078Smav * Copyright (c) 2014 by Delphix. All rights reserved. 29260742Savg */ 30168404Spjd 31168404Spjd#include <sys/zfs_context.h> 32168404Spjd 33168404Spjdint taskq_now; 34208047Smmtaskq_t *system_taskq; 35168404Spjd 36168404Spjd#define TASKQ_ACTIVE 0x00010000 37297078Smav#define TASKQ_NAMELEN 31 38168404Spjd 39168404Spjdstruct taskq { 40297078Smav char tq_name[TASKQ_NAMELEN + 1]; 41168404Spjd kmutex_t tq_lock; 42168404Spjd krwlock_t tq_threadlock; 43168404Spjd kcondvar_t tq_dispatch_cv; 44168404Spjd kcondvar_t tq_wait_cv; 45168404Spjd thread_t *tq_threadlist; 46168404Spjd int tq_flags; 47168404Spjd int tq_active; 48168404Spjd int tq_nthreads; 49168404Spjd int tq_nalloc; 50168404Spjd int tq_minalloc; 51168404Spjd int tq_maxalloc; 52219089Spjd kcondvar_t tq_maxalloc_cv; 53219089Spjd int tq_maxalloc_wait; 54260742Savg taskq_ent_t *tq_freelist; 55260742Savg taskq_ent_t tq_task; 56168404Spjd}; 57168404Spjd 58260742Savgstatic taskq_ent_t * 59168404Spjdtask_alloc(taskq_t *tq, int tqflags) 60168404Spjd{ 61260742Savg taskq_ent_t *t; 62219089Spjd int rv; 63168404Spjd 64219089Spjdagain: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { 65260742Savg tq->tq_freelist = t->tqent_next; 66168404Spjd } else { 67168404Spjd if (tq->tq_nalloc >= tq->tq_maxalloc) { 68219089Spjd if (!(tqflags & KM_SLEEP)) 69168404Spjd return (NULL); 70219089Spjd 71168404Spjd /* 72168404Spjd * We don't want to exceed tq_maxalloc, but we can't 73168404Spjd * wait for other tasks to complete (and thus free up 74168404Spjd * task structures) without risking deadlock with 75168404Spjd * the caller. So, we just delay for one second 76219089Spjd * to throttle the allocation rate. If we have tasks 77219089Spjd * complete before one second timeout expires then 78219089Spjd * taskq_ent_free will signal us and we will 79219089Spjd * immediately retry the allocation. 80168404Spjd */ 81219089Spjd tq->tq_maxalloc_wait++; 82219089Spjd rv = cv_timedwait(&tq->tq_maxalloc_cv, 83219089Spjd &tq->tq_lock, ddi_get_lbolt() + hz); 84219089Spjd tq->tq_maxalloc_wait--; 85219089Spjd if (rv > 0) 86219089Spjd goto again; /* signaled */ 87168404Spjd } 88219089Spjd mutex_exit(&tq->tq_lock); 89219089Spjd 90260742Savg t = kmem_alloc(sizeof (taskq_ent_t), tqflags & KM_SLEEP); 91219089Spjd 92168404Spjd mutex_enter(&tq->tq_lock); 93168404Spjd if (t != NULL) 94168404Spjd tq->tq_nalloc++; 95168404Spjd } 96168404Spjd return (t); 97168404Spjd} 98168404Spjd 99168404Spjdstatic void 100260742Savgtask_free(taskq_t *tq, taskq_ent_t *t) 101168404Spjd{ 102168404Spjd if (tq->tq_nalloc <= tq->tq_minalloc) { 103260742Savg t->tqent_next = tq->tq_freelist; 104168404Spjd tq->tq_freelist = t; 105168404Spjd } else { 106168404Spjd tq->tq_nalloc--; 107168404Spjd mutex_exit(&tq->tq_lock); 108260742Savg kmem_free(t, sizeof (taskq_ent_t)); 109168404Spjd mutex_enter(&tq->tq_lock); 110168404Spjd } 111219089Spjd 112219089Spjd if (tq->tq_maxalloc_wait) 113219089Spjd cv_signal(&tq->tq_maxalloc_cv); 114168404Spjd} 115168404Spjd 116168404Spjdtaskqid_t 117168404Spjdtaskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) 118168404Spjd{ 119260742Savg taskq_ent_t *t; 120168404Spjd 121168404Spjd if (taskq_now) { 122168404Spjd func(arg); 123168404Spjd return (1); 124168404Spjd } 125168404Spjd 126168404Spjd mutex_enter(&tq->tq_lock); 127168404Spjd ASSERT(tq->tq_flags & TASKQ_ACTIVE); 128168404Spjd if ((t = task_alloc(tq, tqflags)) == NULL) { 129168404Spjd mutex_exit(&tq->tq_lock); 130168404Spjd return (0); 131168404Spjd } 132219089Spjd if (tqflags & TQ_FRONT) { 133260742Savg t->tqent_next = tq->tq_task.tqent_next; 134260742Savg t->tqent_prev = &tq->tq_task; 135219089Spjd } else { 136260742Savg t->tqent_next = &tq->tq_task; 137260742Savg t->tqent_prev = tq->tq_task.tqent_prev; 138219089Spjd } 139260742Savg t->tqent_next->tqent_prev = t; 140260742Savg t->tqent_prev->tqent_next = t; 141260742Savg t->tqent_func = func; 142260742Savg t->tqent_arg = arg; 143275486Sdelphij t->tqent_flags = 0; 144168404Spjd cv_signal(&tq->tq_dispatch_cv); 145168404Spjd mutex_exit(&tq->tq_lock); 146168404Spjd return (1); 147168404Spjd} 148168404Spjd 149168404Spjdvoid 150260742Savgtaskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, 151260742Savg taskq_ent_t *t) 152260742Savg{ 153260742Savg ASSERT(func != NULL); 154260742Savg ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC)); 155260742Savg 156260742Savg /* 157260742Savg * Mark it as a prealloc'd task. This is important 158260742Savg * to ensure that we don't free it later. 159260742Savg */ 160260742Savg t->tqent_flags |= TQENT_FLAG_PREALLOC; 161260742Savg /* 162260742Savg * Enqueue the task to the underlying queue. 163260742Savg */ 164260742Savg mutex_enter(&tq->tq_lock); 165260742Savg 166260742Savg if (flags & TQ_FRONT) { 167260742Savg t->tqent_next = tq->tq_task.tqent_next; 168260742Savg t->tqent_prev = &tq->tq_task; 169260742Savg } else { 170260742Savg t->tqent_next = &tq->tq_task; 171260742Savg t->tqent_prev = tq->tq_task.tqent_prev; 172260742Savg } 173260742Savg t->tqent_next->tqent_prev = t; 174260742Savg t->tqent_prev->tqent_next = t; 175260742Savg t->tqent_func = func; 176260742Savg t->tqent_arg = arg; 177260742Savg cv_signal(&tq->tq_dispatch_cv); 178260742Savg mutex_exit(&tq->tq_lock); 179260742Savg} 180260742Savg 181260742Savgvoid 182168404Spjdtaskq_wait(taskq_t *tq) 183168404Spjd{ 184168404Spjd mutex_enter(&tq->tq_lock); 185260742Savg while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0) 186168404Spjd cv_wait(&tq->tq_wait_cv, &tq->tq_lock); 187168404Spjd mutex_exit(&tq->tq_lock); 188168404Spjd} 189168404Spjd 190168404Spjdstatic void * 191168404Spjdtaskq_thread(void *arg) 192168404Spjd{ 193168404Spjd taskq_t *tq = arg; 194260742Savg taskq_ent_t *t; 195260742Savg boolean_t prealloc; 196168404Spjd 197168404Spjd mutex_enter(&tq->tq_lock); 198168404Spjd while (tq->tq_flags & TASKQ_ACTIVE) { 199260742Savg if ((t = tq->tq_task.tqent_next) == &tq->tq_task) { 200168404Spjd if (--tq->tq_active == 0) 201168404Spjd cv_broadcast(&tq->tq_wait_cv); 202168404Spjd cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock); 203168404Spjd tq->tq_active++; 204168404Spjd continue; 205168404Spjd } 206260742Savg t->tqent_prev->tqent_next = t->tqent_next; 207260742Savg t->tqent_next->tqent_prev = t->tqent_prev; 208260742Savg t->tqent_next = NULL; 209260742Savg t->tqent_prev = NULL; 210260742Savg prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC; 211168404Spjd mutex_exit(&tq->tq_lock); 212168404Spjd 213168404Spjd rw_enter(&tq->tq_threadlock, RW_READER); 214260742Savg t->tqent_func(t->tqent_arg); 215168404Spjd rw_exit(&tq->tq_threadlock); 216168404Spjd 217168404Spjd mutex_enter(&tq->tq_lock); 218260742Savg if (!prealloc) 219260742Savg task_free(tq, t); 220168404Spjd } 221168404Spjd tq->tq_nthreads--; 222168404Spjd cv_broadcast(&tq->tq_wait_cv); 223168404Spjd mutex_exit(&tq->tq_lock); 224168404Spjd return (NULL); 225168404Spjd} 226168404Spjd 227168404Spjd/*ARGSUSED*/ 228168404Spjdtaskq_t * 229168404Spjdtaskq_create(const char *name, int nthreads, pri_t pri, 230168404Spjd int minalloc, int maxalloc, uint_t flags) 231168404Spjd{ 232168404Spjd taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); 233168404Spjd int t; 234168404Spjd 235209962Smm if (flags & TASKQ_THREADS_CPU_PCT) { 236209962Smm int pct; 237209962Smm ASSERT3S(nthreads, >=, 0); 238209962Smm ASSERT3S(nthreads, <=, 100); 239209962Smm pct = MIN(nthreads, 100); 240209962Smm pct = MAX(pct, 0); 241209962Smm 242209962Smm nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100; 243209962Smm nthreads = MAX(nthreads, 1); /* need at least 1 thread */ 244209962Smm } else { 245209962Smm ASSERT3S(nthreads, >=, 1); 246209962Smm } 247209962Smm 248168404Spjd rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL); 249185029Spjd mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL); 250185029Spjd cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); 251185029Spjd cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); 252219089Spjd cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); 253297078Smav (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); 254168404Spjd tq->tq_flags = flags | TASKQ_ACTIVE; 255168404Spjd tq->tq_active = nthreads; 256168404Spjd tq->tq_nthreads = nthreads; 257168404Spjd tq->tq_minalloc = minalloc; 258168404Spjd tq->tq_maxalloc = maxalloc; 259260742Savg tq->tq_task.tqent_next = &tq->tq_task; 260260742Savg tq->tq_task.tqent_prev = &tq->tq_task; 261168404Spjd tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); 262168404Spjd 263168404Spjd if (flags & TASKQ_PREPOPULATE) { 264168404Spjd mutex_enter(&tq->tq_lock); 265168404Spjd while (minalloc-- > 0) 266168404Spjd task_free(tq, task_alloc(tq, KM_SLEEP)); 267168404Spjd mutex_exit(&tq->tq_lock); 268168404Spjd } 269168404Spjd 270168404Spjd for (t = 0; t < nthreads; t++) 271168404Spjd (void) thr_create(0, 0, taskq_thread, 272168404Spjd tq, THR_BOUND, &tq->tq_threadlist[t]); 273168404Spjd 274168404Spjd return (tq); 275168404Spjd} 276168404Spjd 277168404Spjdvoid 278168404Spjdtaskq_destroy(taskq_t *tq) 279168404Spjd{ 280168404Spjd int t; 281168404Spjd int nthreads = tq->tq_nthreads; 282168404Spjd 283168404Spjd taskq_wait(tq); 284168404Spjd 285168404Spjd mutex_enter(&tq->tq_lock); 286168404Spjd 287168404Spjd tq->tq_flags &= ~TASKQ_ACTIVE; 288168404Spjd cv_broadcast(&tq->tq_dispatch_cv); 289168404Spjd 290168404Spjd while (tq->tq_nthreads != 0) 291168404Spjd cv_wait(&tq->tq_wait_cv, &tq->tq_lock); 292168404Spjd 293168404Spjd tq->tq_minalloc = 0; 294168404Spjd while (tq->tq_nalloc != 0) { 295168404Spjd ASSERT(tq->tq_freelist != NULL); 296168404Spjd task_free(tq, task_alloc(tq, KM_SLEEP)); 297168404Spjd } 298168404Spjd 299168404Spjd mutex_exit(&tq->tq_lock); 300168404Spjd 301168404Spjd for (t = 0; t < nthreads; t++) 302168404Spjd (void) thr_join(tq->tq_threadlist[t], NULL, NULL); 303168404Spjd 304168404Spjd kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); 305168404Spjd 306168404Spjd rw_destroy(&tq->tq_threadlock); 307185029Spjd mutex_destroy(&tq->tq_lock); 308185029Spjd cv_destroy(&tq->tq_dispatch_cv); 309185029Spjd cv_destroy(&tq->tq_wait_cv); 310219089Spjd cv_destroy(&tq->tq_maxalloc_cv); 311168404Spjd 312168404Spjd kmem_free(tq, sizeof (taskq_t)); 313168404Spjd} 314168404Spjd 315168404Spjdint 316168404Spjdtaskq_member(taskq_t *tq, void *t) 317168404Spjd{ 318168404Spjd int i; 319168404Spjd 320168404Spjd if (taskq_now) 321168404Spjd return (1); 322168404Spjd 323168404Spjd for (i = 0; i < tq->tq_nthreads; i++) 324168404Spjd if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) 325168404Spjd return (1); 326168404Spjd 327168404Spjd return (0); 328168404Spjd} 329208047Smm 330208047Smmvoid 331208047Smmsystem_taskq_init(void) 332208047Smm{ 333208047Smm system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, 334208047Smm TASKQ_DYNAMIC | TASKQ_PREPOPULATE); 335208047Smm} 336219089Spjd 337219089Spjdvoid 338219089Spjdsystem_taskq_fini(void) 339219089Spjd{ 340219089Spjd taskq_destroy(system_taskq); 341219089Spjd system_taskq = NULL; /* defensive */ 342219089Spjd} 343