1/* $NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $ */ 2 3/*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020, 2023 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center, and by Andrew Doran. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $"); 36 37#ifdef _KERNEL_OPT 38#include "opt_lockdebug.h" 39#endif 40 41#include <sys/param.h> 42#include <sys/proc.h> 43#include <sys/lock.h> 44#include <sys/systm.h> 45#include <sys/kernel.h> 46#include <sys/lockdebug.h> 47#include <sys/cpu.h> 48#include <sys/syslog.h> 49#include <sys/atomic.h> 50#include <sys/lwp.h> 51#include <sys/pserialize.h> 52 53#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 54#include <sys/ksyms.h> 55#endif 56 57#include <machine/lock.h> 58 59#include <dev/lockstat.h> 60 61#define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0) 62 63bool kernel_lock_dodebug; 64 65__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)] 66 __cacheline_aligned; 67 68void 69assert_sleepable(void) 70{ 71 const char *reason; 72 long pctr; 73 bool idle; 74 75 if (__predict_false(panicstr != NULL)) { 76 return; 77 } 78 79 LOCKDEBUG_BARRIER(kernel_lock, 1); 80 81 /* 82 * Avoid disabling/re-enabling preemption here since this 83 * routine may be called in delicate situations. 84 */ 85 do { 86 pctr = lwp_pctr(); 87 idle = CURCPU_IDLE_P(); 88 } while (__predict_false(pctr != lwp_pctr())); 89 90 reason = NULL; 91 if (__predict_false(idle) && !cold) { 92 reason = "idle"; 93 goto panic; 94 } 95 if (__predict_false(cpu_intr_p())) { 96 reason = "interrupt"; 97 goto panic; 98 } 99 if (__predict_false(cpu_softintr_p())) { 100 reason = "softint"; 101 goto panic; 102 } 103 if (__predict_false(!pserialize_not_in_read_section())) { 104 reason = "pserialize"; 105 goto panic; 106 } 107 return; 108 109panic: panic("%s: %s caller=%p", __func__, reason, (void *)RETURN_ADDRESS); 110} 111 112/* 113 * Functions for manipulating the kernel_lock. We put them here 114 * so that they show up in profiles. 115 */ 116 117#define _KERNEL_LOCK_ABORT(msg) \ 118 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg) 119 120#ifdef LOCKDEBUG 121#define _KERNEL_LOCK_ASSERT(cond) \ 122do { \ 123 if (!(cond)) \ 124 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \ 125} while (/* CONSTCOND */ 0) 126#else 127#define _KERNEL_LOCK_ASSERT(cond) /* nothing */ 128#endif 129 130static void _kernel_lock_dump(const volatile void *, lockop_printer_t); 131 132lockops_t _kernel_lock_ops = { 133 .lo_name = "Kernel lock", 134 .lo_type = LOCKOPS_SPIN, 135 .lo_dump = _kernel_lock_dump, 136}; 137 138#ifdef LOCKDEBUG 139 140#ifdef DDB 141#include <ddb/ddb.h> 142#endif 143 144static void 145kernel_lock_trace_ipi(void *cookie) 146{ 147 148 printf("%s[%d %s]: hogging kernel lock\n", cpu_name(curcpu()), 149 curlwp->l_lid, 150 curlwp->l_name ? curlwp->l_name : curproc->p_comm); 151#ifdef DDB 152 db_stacktrace(); 153#endif 154} 155 156#endif 157 158/* 159 * Initialize the kernel lock. 160 */ 161void 162kernel_lock_init(void) 163{ 164 165 __cpu_simple_lock_init(kernel_lock); 166 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops, 167 RETURN_ADDRESS); 168} 169CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t)); 170 171/* 172 * Print debugging information about the kernel lock. 173 */ 174static void 175_kernel_lock_dump(const volatile void *junk, lockop_printer_t pr) 176{ 177 struct cpu_info *ci = curcpu(); 178 179 (void)junk; 180 181 pr("curcpu holds : %18d wanted by: %#018lx\n", 182 ci->ci_biglock_count, (long)ci->ci_biglock_wanted); 183} 184 185/* 186 * Acquire 'nlocks' holds on the kernel lock. 187 * 188 * Although it may not look it, this is one of the most central, intricate 189 * routines in the kernel, and tons of code elsewhere depends on its exact 190 * behaviour. If you change something in here, expect it to bite you in the 191 * rear. 192 */ 193void 194_kernel_lock(int nlocks) 195{ 196 struct cpu_info *ci; 197 LOCKSTAT_TIMER(spintime); 198 LOCKSTAT_FLAG(lsflag); 199 struct lwp *owant; 200#ifdef LOCKDEBUG 201 static struct cpu_info *kernel_lock_holder; 202 u_int spins = 0; 203 u_int starttime = getticks(); 204#endif 205 int s; 206 struct lwp *l = curlwp; 207 208 _KERNEL_LOCK_ASSERT(nlocks > 0); 209 210 s = splvm(); 211 ci = curcpu(); 212 if (ci->ci_biglock_count != 0) { 213 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 214 ci->ci_biglock_count += nlocks; 215 l->l_blcnt += nlocks; 216 splx(s); 217 return; 218 } 219 220 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0); 221 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 222 0); 223 224 if (__predict_true(__cpu_simple_lock_try(kernel_lock))) { 225#ifdef LOCKDEBUG 226 kernel_lock_holder = curcpu(); 227#endif 228 ci->ci_biglock_count = nlocks; 229 l->l_blcnt = nlocks; 230 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 231 RETURN_ADDRESS, 0); 232 splx(s); 233 return; 234 } 235 236 /* 237 * To remove the ordering constraint between adaptive mutexes 238 * and kernel_lock we must make it appear as if this thread is 239 * blocking. For non-interlocked mutex release, a store fence 240 * is required to ensure that the result of any mutex_exit() 241 * by the current LWP becomes visible on the bus before the set 242 * of ci->ci_biglock_wanted becomes visible. 243 * 244 * This membar_producer matches the membar_consumer in 245 * mutex_vector_enter. 246 * 247 * That way, if l has just released a mutex, mutex_vector_enter 248 * can't see this store ci->ci_biglock_wanted := l until it 249 * will also see the mutex_exit store mtx->mtx_owner := 0 which 250 * clears the has-waiters bit. 251 */ 252 membar_producer(); 253 owant = ci->ci_biglock_wanted; 254 atomic_store_relaxed(&ci->ci_biglock_wanted, l); 255#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 256 l->l_ld_wanted = __builtin_return_address(0); 257#endif 258 259 /* 260 * Spin until we acquire the lock. Once we have it, record the 261 * time spent with lockstat. 262 */ 263 LOCKSTAT_ENTER(lsflag); 264 LOCKSTAT_START_TIMER(lsflag, spintime); 265 266 do { 267 splx(s); 268 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) { 269#ifdef LOCKDEBUG 270 if (SPINLOCK_SPINOUT(spins) && start_init_exec && 271 (getticks() - starttime) > 10*hz) { 272 ipi_msg_t msg = { 273 .func = kernel_lock_trace_ipi, 274 }; 275 kpreempt_disable(); 276 ipi_unicast(&msg, kernel_lock_holder); 277 ipi_wait(&msg); 278 kpreempt_enable(); 279 _KERNEL_LOCK_ABORT("spinout"); 280 } 281#endif 282 SPINLOCK_BACKOFF_HOOK; 283 SPINLOCK_SPIN_HOOK; 284 } 285 s = splvm(); 286 } while (!__cpu_simple_lock_try(kernel_lock)); 287 288 ci->ci_biglock_count = nlocks; 289 l->l_blcnt = nlocks; 290 LOCKSTAT_STOP_TIMER(lsflag, spintime); 291 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 292 RETURN_ADDRESS, 0); 293 if (owant == NULL) { 294 LOCKSTAT_EVENT_RA(lsflag, kernel_lock, 295 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS); 296 } 297 LOCKSTAT_EXIT(lsflag); 298 splx(s); 299 300 /* 301 * Now that we have kernel_lock, reset ci_biglock_wanted. This 302 * store must be visible on other CPUs before a mutex_exit() on 303 * this CPU can test the has-waiters bit. 304 * 305 * This membar_enter matches the membar_enter in 306 * mutex_vector_enter. (Yes, not membar_exit -- the legacy 307 * naming is confusing, but store-before-load usually pairs 308 * with store-before-load, in the extremely rare cases where it 309 * is used at all.) 310 * 311 * That way, mutex_vector_enter can't see this store 312 * ci->ci_biglock_wanted := owant until it has set the 313 * has-waiters bit. 314 */ 315 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant); 316#ifndef __HAVE_ATOMIC_AS_MEMBAR 317 membar_enter(); 318#endif 319 320#ifdef LOCKDEBUG 321 kernel_lock_holder = curcpu(); 322#endif 323} 324 325/* 326 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release 327 * all holds. 328 */ 329void 330_kernel_unlock(int nlocks, int *countp) 331{ 332 struct cpu_info *ci; 333 u_int olocks; 334 int s; 335 struct lwp *l = curlwp; 336 337 _KERNEL_LOCK_ASSERT(nlocks < 2); 338 339 olocks = l->l_blcnt; 340 341 if (olocks == 0) { 342 _KERNEL_LOCK_ASSERT(nlocks <= 0); 343 if (countp != NULL) 344 *countp = 0; 345 return; 346 } 347 348 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 349 350 if (nlocks == 0) 351 nlocks = olocks; 352 else if (nlocks == -1) { 353 nlocks = 1; 354 _KERNEL_LOCK_ASSERT(olocks == 1); 355 } 356 s = splvm(); 357 ci = curcpu(); 358 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt); 359 if (ci->ci_biglock_count == nlocks) { 360 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock, 361 RETURN_ADDRESS, 0); 362 ci->ci_biglock_count = 0; 363 __cpu_simple_unlock(kernel_lock); 364 l->l_blcnt -= nlocks; 365 splx(s); 366 if (l->l_dopreempt) 367 kpreempt(0); 368 } else { 369 ci->ci_biglock_count -= nlocks; 370 l->l_blcnt -= nlocks; 371 splx(s); 372 } 373 374 if (countp != NULL) 375 *countp = olocks; 376} 377 378bool 379_kernel_locked_p(void) 380{ 381 return __SIMPLELOCK_LOCKED_P(kernel_lock); 382} 383