1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ 29/* 30 * pthread_support.c 31 */ 32 33#include <sys/param.h> 34#include <sys/queue.h> 35#include <sys/resourcevar.h> 36//#include <sys/proc_internal.h> 37#include <sys/kauth.h> 38#include <sys/systm.h> 39#include <sys/timeb.h> 40#include <sys/times.h> 41#include <sys/time.h> 42#include <sys/acct.h> 43#include <sys/kernel.h> 44#include <sys/wait.h> 45#include <sys/signalvar.h> 46#include <sys/syslog.h> 47#include <sys/stat.h> 48#include <sys/lock.h> 49#include <sys/kdebug.h> 50//#include <sys/sysproto.h> 51//#include <sys/pthread_internal.h> 52#include <sys/vm.h> 53#include <sys/user.h> 54 55#include <mach/mach_types.h> 56#include <mach/vm_prot.h> 57#include <mach/semaphore.h> 58#include <mach/sync_policy.h> 59#include <mach/task.h> 60#include <kern/kern_types.h> 61#include <kern/task.h> 62#include <kern/clock.h> 63#include <mach/kern_return.h> 64#include <kern/thread.h> 65#include <kern/sched_prim.h> 66#include <kern/thread_call.h> 67#include <kern/kalloc.h> 68#include <kern/zalloc.h> 69#include <kern/sched_prim.h> 70#include <kern/processor.h> 71#include <kern/wait_queue.h> 72//#include <kern/mach_param.h> 73#include <mach/mach_vm.h> 74#include <mach/mach_param.h> 75#include <mach/thread_policy.h> 76#include <mach/message.h> 77#include <mach/port.h> 78//#include <vm/vm_protos.h> 79#include <vm/vm_map.h> 80#include <mach/vm_region.h> 81 82#include <libkern/OSAtomic.h> 83 84#include <pexpert/pexpert.h> 85#include <sys/pthread_shims.h> 86 87#include "kern_internal.h" 88#include "synch_internal.h" 89#include "kern_trace.h" 90 91typedef struct uthread *uthread_t; 92 93//#define __FAILEDUSERTEST__(s) do { panic(s); } while (0) 94#define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0) 95 96#define ECVCERORR 256 97#define ECVPERORR 512 98 99lck_mtx_t *pthread_list_mlock; 100 101#define PTH_HASHSIZE 100 102 103static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl; 104static unsigned long pthhash; 105 106static LIST_HEAD(, ksyn_wait_queue) pth_free_list; 107 108static zone_t kwq_zone; /* zone for allocation of ksyn_queue */ 109static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */ 110 111#define SEQFIT 0 112#define FIRSTFIT 1 113 114struct ksyn_queue { 115 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist; 116 uint32_t ksynq_count; /* number of entries in queue */ 117 uint32_t ksynq_firstnum; /* lowest seq in queue */ 118 uint32_t ksynq_lastnum; /* highest seq in queue */ 119}; 120typedef struct ksyn_queue *ksyn_queue_t; 121 122enum { 123 KSYN_QUEUE_READ = 0, 124 KSYN_QUEUE_WRITER, 125 KSYN_QUEUE_MAX, 126}; 127 128struct ksyn_wait_queue { 129 LIST_ENTRY(ksyn_wait_queue) kw_hash; 130 LIST_ENTRY(ksyn_wait_queue) kw_list; 131 user_addr_t kw_addr; 132 uint64_t kw_owner; 133 uint64_t kw_object; /* object backing in shared mode */ 134 uint64_t kw_offset; /* offset inside the object in shared mode */ 135 int kw_pflags; /* flags under listlock protection */ 136 struct timeval kw_ts; /* timeval need for upkeep before free */ 137 int kw_iocount; /* inuse reference */ 138 int kw_dropcount; /* current users unlocking... */ 139 140 int kw_type; /* queue type like mutex, cvar, etc */ 141 uint32_t kw_inqueue; /* num of waiters held */ 142 uint32_t kw_fakecount; /* number of error/prepost fakes */ 143 uint32_t kw_highseq; /* highest seq in the queue */ 144 uint32_t kw_lowseq; /* lowest seq in the queue */ 145 uint32_t kw_lword; /* L value from userland */ 146 uint32_t kw_uword; /* U world value from userland */ 147 uint32_t kw_sword; /* S word value from userland */ 148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */ 149 /* for CV to be used as the seq kernel has seen so far */ 150#define kw_cvkernelseq kw_lastunlockseq 151 uint32_t kw_lastseqword; /* the last seq that unlocked */ 152 /* for mutex and cvar we need to track I bit values */ 153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */ 154 uint32_t kw_overlapwatch; /* chance for overlaps */ 155 uint32_t kw_pre_rwwc; /* prepost count */ 156 uint32_t kw_pre_lockseq; /* prepost target seq */ 157 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */ 158 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */ 159 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */ 160 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */ 161 uint32_t kw_pre_intrtype; /* type of failed wakueps*/ 162 163 int kw_kflags; 164 int kw_qos_override; /* QoS of max waiter during contention period */ 165 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */ 166 lck_mtx_t kw_lock; /* mutex lock protecting this structure */ 167}; 168typedef struct ksyn_wait_queue * ksyn_wait_queue_t; 169 170#define TID_ZERO (uint64_t)0 171 172/* bits needed in handling the rwlock unlock */ 173#define PTH_RW_TYPE_READ 0x01 174#define PTH_RW_TYPE_WRITE 0x04 175#define PTH_RW_TYPE_MASK 0xff 176#define PTH_RW_TYPE_SHIFT 8 177 178#define PTH_RWSHFT_TYPE_READ 0x0100 179#define PTH_RWSHFT_TYPE_WRITE 0x0400 180#define PTH_RWSHFT_TYPE_MASK 0xff00 181 182/* 183 * Mutex pshared attributes 184 */ 185#define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED 186#define PTHREAD_PROCESS_PRIVATE 0x20 187#define PTHREAD_PSHARED_FLAGS_MASK 0x30 188 189/* 190 * Mutex policy attributes 191 */ 192#define _PTHREAD_MUTEX_POLICY_NONE 0 193#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */ 194#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */ 195#define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */ 196#define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */ 197#define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */ 198#define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */ 199#define PTHREAD_POLICY_FLAGS_MASK 0x1c0 200 201/* pflags */ 202#define KSYN_WQ_INHASH 2 203#define KSYN_WQ_SHARED 4 204#define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */ 205#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */ 206 207/* kflags */ 208#define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */ 209#define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */ 210#define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */ 211 212#define KSYN_CLEANUP_DEADLINE 10 213static int psynch_cleanupset; 214thread_call_t psynch_thcall; 215 216#define KSYN_WQTYPE_INWAIT 0x1000 217#define KSYN_WQTYPE_INDROP 0x2000 218#define KSYN_WQTYPE_MTX 0x01 219#define KSYN_WQTYPE_CVAR 0x02 220#define KSYN_WQTYPE_RWLOCK 0x04 221#define KSYN_WQTYPE_SEMA 0x08 222#define KSYN_WQTYPE_MASK 0xff 223 224#define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX) 225 226#define KW_UNLOCK_PREPOST 0x01 227#define KW_UNLOCK_PREPOST_READLOCK 0x08 228#define KW_UNLOCK_PREPOST_WRLOCK 0x20 229 230static void 231CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq) 232{ 233 kwq->kw_pre_lockseq = 0; 234 kwq->kw_pre_sseq = PTHRW_RWS_INIT; 235 kwq->kw_pre_rwwc = 0; 236} 237 238static void 239CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq) 240{ 241 kwq->kw_pre_intrcount = 0; 242 kwq->kw_pre_intrseq = 0; 243 kwq->kw_pre_intrretbits = 0; 244 kwq->kw_pre_intrtype = 0; 245} 246 247static void 248CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq) 249{ 250 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { 251 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) { 252 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount); 253 } 254 }; 255 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { 256 kwq->kw_nextseqword = PTHRW_RWS_INIT; 257 kwq->kw_overlapwatch = 0; 258 }; 259 CLEAR_PREPOST_BITS(kwq); 260 kwq->kw_lastunlockseq = PTHRW_RWL_INIT; 261 kwq->kw_lastseqword = PTHRW_RWS_INIT; 262 CLEAR_INTR_PREPOST_BITS(kwq); 263 kwq->kw_lword = 0; 264 kwq->kw_uword = 0; 265 kwq->kw_sword = PTHRW_RWS_INIT; 266} 267 268static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset); 269static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq); 270static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype); 271static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp); 272 273static int _wait_result_to_errno(wait_result_t result); 274 275static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t); 276static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t); 277static void ksyn_freeallkwe(ksyn_queue_t kq); 278 279static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t); 280static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost); 281static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t, ksyn_waitq_element_t); 282static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t); 283 284static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen); 285 286static void ksyn_queue_init(ksyn_queue_t kq); 287static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit); 288static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe); 289static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all); 290 291static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq); 292static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq); 293static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq); 294static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp); 295 296static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto); 297 298static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen); 299static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep); 300static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep); 301static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq); 302 303static void psynch_cvcontinue(void *, wait_result_t); 304static void psynch_mtxcontinue(void *, wait_result_t); 305 306static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp); 307static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]); 308static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq); 309 310static void 311UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc) 312{ 313 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0); 314 315 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR); 316 317 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) { 318 /* the values of L,U and S are cleared out due to L==S in previous transition */ 319 kwq->kw_lword = mgen; 320 kwq->kw_uword = ugen; 321 kwq->kw_sword = rw_wc; 322 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT; 323 } else { 324 if (is_seqhigher(mgen, kwq->kw_lword)) { 325 kwq->kw_lword = mgen; 326 } 327 if (is_seqhigher(ugen, kwq->kw_uword)) { 328 kwq->kw_uword = ugen; 329 } 330 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) { 331 kwq->kw_sword = rw_wc; 332 } 333 } 334 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) { 335 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK); 336 } 337} 338 339static void 340pthread_list_lock(void) 341{ 342 lck_mtx_lock(pthread_list_mlock); 343} 344 345static void 346pthread_list_unlock(void) 347{ 348 lck_mtx_unlock(pthread_list_mlock); 349} 350 351static void 352ksyn_wqlock(ksyn_wait_queue_t kwq) 353{ 354 355 lck_mtx_lock(&kwq->kw_lock); 356} 357 358static void 359ksyn_wqunlock(ksyn_wait_queue_t kwq) 360{ 361 lck_mtx_unlock(&kwq->kw_lock); 362} 363 364 365/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */ 366static uint32_t 367_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags) 368{ 369 kern_return_t ret; 370 uint32_t returnbits = 0; 371 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; 372 uint32_t nextgen = (ugen + PTHRW_INC); 373 374 ksyn_wqlock(kwq); 375 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); 376 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT); 377 378redrive: 379 if (firstfit) { 380 if (kwq->kw_inqueue == 0) { 381 // not set or the new lock sequence is higher 382 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) { 383 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK); 384 } 385 kwq->kw_pre_rwwc = 1; 386 ksyn_mtx_drop_qos_override(kwq); 387 kwq->kw_owner = 0; 388 // indicate prepost content in kernel 389 returnbits = mgen | PTH_RWL_PBIT; 390 } else { 391 // signal first waiter 392 ret = ksyn_mtxsignal(kwq, NULL, updatebits); 393 if (ret == KERN_NOT_WAITING) { 394 goto redrive; 395 } 396 } 397 } else { 398 int prepost = 0; 399 if (kwq->kw_inqueue == 0) { 400 // No waiters in the queue. 401 prepost = 1; 402 } else { 403 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK); 404 if (low_writer == nextgen) { 405 /* next seq to be granted found */ 406 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ 407 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT); 408 if (ret == KERN_NOT_WAITING) { 409 /* interrupt post */ 410 kwq->kw_pre_intrcount = 1; 411 kwq->kw_pre_intrseq = nextgen; 412 kwq->kw_pre_intrretbits = updatebits; 413 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; 414 } 415 416 } else if (is_seqhigher(low_writer, nextgen)) { 417 prepost = 1; 418 } else { 419 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n"); 420 ksyn_waitq_element_t kwe; 421 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen); 422 if (kwe != NULL) { 423 /* next seq to be granted found */ 424 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ 425 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT); 426 if (ret == KERN_NOT_WAITING) { 427 goto redrive; 428 } 429 } else { 430 prepost = 1; 431 } 432 } 433 } 434 if (prepost) { 435 ksyn_mtx_drop_qos_override(kwq); 436 kwq->kw_owner = 0; 437 if (++kwq->kw_pre_rwwc > 1) { 438 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n"); 439 } else { 440 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); 441 } 442 } 443 } 444 445 ksyn_wqunlock(kwq); 446 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP); 447 return returnbits; 448} 449 450static int 451_ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval) 452{ 453 int res = (lgenval & PTHRW_RWL_INIT) != 0; 454 if (res) { 455 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { 456 /* first to notice the reset of the lock, clear preposts */ 457 CLEAR_REINIT_BITS(kwq); 458 kwq->kw_kflags |= KSYN_KWF_INITCLEARED; 459 } 460 } 461 return res; 462} 463 464static int 465_ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq, 466 uint32_t type, 467 uint32_t lockseq, 468 uint32_t *retval) 469{ 470 int res = 0; 471 if (kwq->kw_pre_intrcount != 0 && 472 kwq->kw_pre_intrtype == type && 473 is_seqlower_eq(lockseq, kwq->kw_pre_intrseq)) { 474 kwq->kw_pre_intrcount--; 475 *retval = kwq->kw_pre_intrretbits; 476 if (kwq->kw_pre_intrcount == 0) { 477 CLEAR_INTR_PREPOST_BITS(kwq); 478 } 479 res = 1; 480 } 481 return res; 482} 483 484static int 485_ksyn_handle_overlap(ksyn_wait_queue_t kwq, 486 uint32_t lgenval, 487 uint32_t rw_wc, 488 uint32_t *retval) 489{ 490 int res = 0; 491 492 // check for overlap and no pending W bit (indicates writers) 493 if (kwq->kw_overlapwatch != 0 && 494 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 && 495 (lgenval & PTH_RWL_WBIT) == 0) { 496 /* overlap is set, so no need to check for valid state for overlap */ 497 498 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) { 499 /* increase the next expected seq by one */ 500 kwq->kw_nextseqword += PTHRW_INC; 501 /* set count by one & bits from the nextseq and add M bit */ 502 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT); 503 res = 1; 504 } 505 } 506 return res; 507} 508 509static int 510_ksyn_handle_prepost(ksyn_wait_queue_t kwq, 511 uint32_t type, 512 uint32_t lockseq, 513 uint32_t *retval) 514{ 515 int res = 0; 516 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) { 517 kwq->kw_pre_rwwc--; 518 if (kwq->kw_pre_rwwc == 0) { 519 uint32_t preseq = kwq->kw_pre_lockseq; 520 uint32_t prerw_wc = kwq->kw_pre_sseq; 521 CLEAR_PREPOST_BITS(kwq); 522 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ 523 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; 524 } 525 526 int error, block; 527 uint32_t updatebits; 528 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq); 529 if (error != 0) { 530 panic("kwq_handle_unlock failed %d\n", error); 531 } 532 533 if (block == 0) { 534 *retval = updatebits; 535 res = 1; 536 } 537 } 538 } 539 return res; 540} 541 542/* Helpers for QoS override management. Only applies to mutexes */ 543static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost) 544{ 545 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { 546 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; 547 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread()); 548 549 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override); 550 551 if (prepost && kwq->kw_inqueue == 0) { 552 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an 553 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is 554 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner 555 tid = 0; 556 } 557 558 if (tid != 0) { 559 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) { 560 // hint continues to be accurate, and a boost was already applied 561 pthread_kern->proc_usynch_thread_qos_add_override(NULL, tid, kwq->kw_qos_override, FALSE); 562 } else { 563 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously 564 boolean_t boostsucceded; 565 566 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override(NULL, tid, kwq->kw_qos_override, TRUE); 567 568 if (boostsucceded) { 569 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED; 570 } 571 572 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) { 573 // the hint did not match the previous owner, so drop overrides 574 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); 575 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner); 576 } 577 } 578 } else { 579 // new hint tells us that we don't know the owner, so drop any existing overrides 580 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; 581 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; 582 583 if (wasboosted && (kwq->kw_owner != 0)) { 584 // the hint did not match the previous owner, so drop overrides 585 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); 586 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner); 587 } 588 } 589 } 590} 591 592static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe) 593{ 594 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { 595 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; 596 597 if (kwq->kw_inqueue > 1) { 598 boolean_t boostsucceeded; 599 600 // More than one waiter, so resource will still be contended after handing off ownership 601 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override(kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE); 602 603 if (boostsucceeded) { 604 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED; 605 } 606 } else { 607 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point 608 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; 609 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; 610 } 611 612 // Remove the override that was applied to kw_owner. There may have been a race, 613 // in which case it may not match the current thread 614 if (wasboosted) { 615 if (kwq->kw_owner == 0) { 616 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0); 617 } else if (thread_tid(current_thread()) != kwq->kw_owner) { 618 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); 619 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner); 620 } else { 621 pthread_kern->proc_usynch_thread_qos_remove_override(current_uthread(), 0); 622 } 623 } 624 } 625} 626 627static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq) 628{ 629 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { 630 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; 631 632 // assume nobody else in queue if this routine was called 633 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; 634 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; 635 636 // Remove the override that was applied to kw_owner. There may have been a race, 637 // in which case it may not match the current thread 638 if (wasboosted) { 639 if (kwq->kw_owner == 0) { 640 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0); 641 } else if (thread_tid(current_thread()) != kwq->kw_owner) { 642 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); 643 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner); 644 } else { 645 pthread_kern->proc_usynch_thread_qos_remove_override(current_uthread(), 0); 646 } 647 } 648 } 649} 650 651/* 652 * psynch_mutexwait: This system call is used for contended psynch mutexes to block. 653 */ 654 655int 656_psynch_mutexwait(__unused proc_t p, 657 user_addr_t mutex, 658 uint32_t mgen, 659 uint32_t ugen, 660 uint64_t tid, 661 uint32_t flags, 662 uint32_t *retval) 663{ 664 ksyn_wait_queue_t kwq; 665 int error=0; 666 int ins_flags; 667 668 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; 669 uint32_t updatebits = 0; 670 671 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK); 672 673 if (firstfit == 0) { 674 ins_flags = SEQFIT; 675 } else { 676 /* first fit */ 677 ins_flags = FIRSTFIT; 678 } 679 680 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq); 681 if (error != 0) { 682 return(error); 683 } 684 685 ksyn_wqlock(kwq); 686 687 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the 688 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond 689 // to a stale snapshot after the lock has subsequently been unlocked by another thread. 690 if (tid == 0) { 691 // contender came in before owner could write TID 692 tid = 0; 693 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) { 694 // owner is stale, someone has come in and unlocked since this contended read the TID, so 695 // assume what is known in the kernel is accurate 696 tid = kwq->kw_owner; 697 } else if (tid == PTHREAD_MTX_TID_SWITCHING) { 698 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't 699 // reached the kernel yet. So assume what is known in the kernel is accurate 700 tid = kwq->kw_owner; 701 } else { 702 // hint is being passed in for a specific thread, and we have no reason not to trust 703 // it (like the kernel unlock sequence being higher 704 } 705 706 707 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) { 708 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE); 709 kwq->kw_owner = thread_tid(current_thread()); 710 711 ksyn_wqunlock(kwq); 712 goto out; 713 } 714 715 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) { 716 /* got preposted lock */ 717 kwq->kw_pre_rwwc--; 718 if (kwq->kw_pre_rwwc == 0) { 719 CLEAR_PREPOST_BITS(kwq); 720 if (kwq->kw_inqueue == 0) { 721 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT); 722 } else { 723 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT); 724 } 725 updatebits &= ~PTH_RWL_MTX_WAIT; 726 727 if (updatebits == 0) { 728 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n"); 729 } 730 731 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE); 732 kwq->kw_owner = thread_tid(current_thread()); 733 734 ksyn_wqunlock(kwq); 735 *retval = updatebits; 736 goto out; 737 } else { 738 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n"); 739 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */ 740 ksyn_wqunlock(kwq); 741 error = EINVAL; 742 goto out; 743 } 744 } 745 746 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE); 747 kwq->kw_owner = tid; 748 749 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue); 750 // ksyn_wait drops wait queue lock 751out: 752 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); 753 return error; 754} 755 756void 757psynch_mtxcontinue(void *parameter, wait_result_t result) 758{ 759 uthread_t uth = current_uthread(); 760 ksyn_wait_queue_t kwq = parameter; 761 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); 762 763 int error = _wait_result_to_errno(result); 764 if (error != 0) { 765 ksyn_wqlock(kwq); 766 if (kwe->kwe_kwqqueue) { 767 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); 768 } 769 ksyn_wqunlock(kwq); 770 } else { 771 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT; 772 pthread_kern->uthread_set_returnval(uth, updatebits); 773 774 if (updatebits == 0) 775 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n"); 776 } 777 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); 778 pthread_kern->unix_syscall_return(error); 779} 780 781/* 782 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes. 783 */ 784int 785_psynch_mutexdrop(__unused proc_t p, 786 user_addr_t mutex, 787 uint32_t mgen, 788 uint32_t ugen, 789 uint64_t tid __unused, 790 uint32_t flags, 791 uint32_t *retval) 792{ 793 int res; 794 ksyn_wait_queue_t kwq; 795 796 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq); 797 if (res == 0) { 798 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags); 799 /* drops the kwq reference */ 800 if (retval) { 801 *retval = updateval; 802 } 803 } 804 805 return res; 806} 807 808static kern_return_t 809ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval) 810{ 811 kern_return_t ret; 812 813 if (!kwe) { 814 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist); 815 if (!kwe) { 816 panic("ksyn_mtxsignal: panic signaling empty queue"); 817 } 818 } 819 820 ksyn_mtx_transfer_qos_override(kwq, kwe); 821 kwq->kw_owner = kwe->kwe_tid; 822 823 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval); 824 825 // if waking the new owner failed, remove any overrides 826 if (ret != KERN_SUCCESS) { 827 ksyn_mtx_drop_qos_override(kwq); 828 kwq->kw_owner = 0; 829 } 830 831 return ret; 832} 833 834 835static void 836ksyn_prepost(ksyn_wait_queue_t kwq, 837 ksyn_waitq_element_t kwe, 838 uint32_t state, 839 uint32_t lockseq) 840{ 841 bzero(kwe, sizeof(*kwe)); 842 kwe->kwe_state = state; 843 kwe->kwe_lockseq = lockseq; 844 kwe->kwe_count = 1; 845 846 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT); 847 kwq->kw_fakecount++; 848} 849 850static void 851ksyn_cvsignal(ksyn_wait_queue_t ckwq, 852 thread_t th, 853 uint32_t uptoseq, 854 uint32_t signalseq, 855 uint32_t *updatebits, 856 int *broadcast, 857 ksyn_waitq_element_t *nkwep) 858{ 859 ksyn_waitq_element_t kwe = NULL; 860 ksyn_waitq_element_t nkwe = NULL; 861 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; 862 863 uptoseq &= PTHRW_COUNT_MASK; 864 865 // Find the specified thread to wake. 866 if (th != THREAD_NULL) { 867 uthread_t uth = pthread_kern->get_bsdthread_info(th); 868 kwe = pthread_kern->uthread_get_uukwe(uth); 869 if (kwe->kwe_kwqqueue != ckwq || 870 is_seqhigher(kwe->kwe_lockseq, uptoseq)) { 871 // Unless it's no longer waiting on this CV... 872 kwe = NULL; 873 // ...in which case we post a broadcast instead. 874 *broadcast = 1; 875 return; 876 } 877 } 878 879 // If no thread was specified, find any thread to wake (with the right 880 // sequence number). 881 while (th == THREAD_NULL) { 882 if (kwe == NULL) { 883 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq); 884 } 885 if (kwe == NULL && nkwe == NULL) { 886 // No eligible entries; need to allocate a new 887 // entry to prepost. Loop to rescan after 888 // reacquiring the lock after allocation in 889 // case anything new shows up. 890 ksyn_wqunlock(ckwq); 891 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone); 892 ksyn_wqlock(ckwq); 893 } else { 894 break; 895 } 896 } 897 898 if (kwe != NULL) { 899 // If we found a thread to wake... 900 if (kwe->kwe_state == KWE_THREAD_INWAIT) { 901 if (is_seqlower(kwe->kwe_lockseq, signalseq)) { 902 /* 903 * A valid thread in our range, but lower than our signal. 904 * Matching it may leave our match with nobody to wake it if/when 905 * it arrives (the signal originally meant for this thread might 906 * not successfully wake it). 907 * 908 * Convert to broadcast - may cause some spurious wakeups 909 * (allowed by spec), but avoids starvation (better choice). 910 */ 911 *broadcast = 1; 912 } else { 913 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT); 914 *updatebits += PTHRW_INC; 915 } 916 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) { 917 // Merge with existing prepost at same uptoseq. 918 kwe->kwe_count += 1; 919 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) { 920 // Existing broadcasts subsume this signal. 921 } else { 922 panic("unknown kwe state\n"); 923 } 924 if (nkwe) { 925 /* 926 * If we allocated a new kwe above but then found a different kwe to 927 * use then we need to deallocate the spare one. 928 */ 929 pthread_kern->zfree(kwe_zone, nkwe); 930 nkwe = NULL; 931 } 932 } else if (nkwe != NULL) { 933 // ... otherwise, insert the newly allocated prepost. 934 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq); 935 nkwe = NULL; 936 } else { 937 panic("failed to allocate kwe\n"); 938 } 939 940 *nkwep = nkwe; 941} 942 943static int 944__psynch_cvsignal(user_addr_t cv, 945 uint32_t cgen, 946 uint32_t cugen, 947 uint32_t csgen, 948 uint32_t flags, 949 int broadcast, 950 mach_port_name_t threadport, 951 uint32_t *retval) 952{ 953 int error = 0; 954 thread_t th = THREAD_NULL; 955 ksyn_wait_queue_t kwq; 956 957 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK; 958 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC; 959 960 // validate sane L, U, and S values 961 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) { 962 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n"); 963 return EINVAL; 964 } 965 966 if (threadport != 0) { 967 th = port_name_to_thread((mach_port_name_t)threadport); 968 if (th == THREAD_NULL) { 969 return ESRCH; 970 } 971 } 972 973 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq); 974 if (error == 0) { 975 uint32_t updatebits = 0; 976 ksyn_waitq_element_t nkwe = NULL; 977 978 ksyn_wqlock(kwq); 979 980 // update L, U and S... 981 UPDATE_CVKWQ(kwq, cgen, cugen, csgen); 982 983 if (!broadcast) { 984 // No need to signal if the CV is already balanced. 985 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) { 986 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe); 987 } 988 } 989 990 if (broadcast) { 991 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits); 992 } 993 994 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK); 995 // set C or P bits and free if needed 996 ksyn_cvupdate_fixup(kwq, &updatebits); 997 *retval = updatebits; 998 999 ksyn_wqunlock(kwq); 1000 1001 if (nkwe != NULL) { 1002 pthread_kern->zfree(kwe_zone, nkwe); 1003 } 1004 1005 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR)); 1006 } 1007 1008 if (th != NULL) { 1009 thread_deallocate(th); 1010 } 1011 1012 return error; 1013} 1014 1015/* 1016 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars. 1017 */ 1018int 1019_psynch_cvbroad(__unused proc_t p, 1020 user_addr_t cv, 1021 uint64_t cvlsgen, 1022 uint64_t cvudgen, 1023 uint32_t flags, 1024 __unused user_addr_t mutex, 1025 __unused uint64_t mugen, 1026 __unused uint64_t tid, 1027 uint32_t *retval) 1028{ 1029 uint32_t diffgen = cvudgen & 0xffffffff; 1030 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT; 1031 if (count > pthread_kern->get_task_threadmax()) { 1032 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n"); 1033 return EBUSY; 1034 } 1035 1036 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; 1037 uint32_t cgen = cvlsgen & 0xffffffff; 1038 uint32_t cugen = (cvudgen >> 32) & 0xffffffff; 1039 1040 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval); 1041} 1042 1043/* 1044 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars. 1045 */ 1046int 1047_psynch_cvsignal(__unused proc_t p, 1048 user_addr_t cv, 1049 uint64_t cvlsgen, 1050 uint32_t cvugen, 1051 int threadport, 1052 __unused user_addr_t mutex, 1053 __unused uint64_t mugen, 1054 __unused uint64_t tid, 1055 uint32_t flags, 1056 uint32_t *retval) 1057{ 1058 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; 1059 uint32_t cgen = cvlsgen & 0xffffffff; 1060 1061 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval); 1062} 1063 1064/* 1065 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel. 1066 */ 1067int 1068_psynch_cvwait(__unused proc_t p, 1069 user_addr_t cv, 1070 uint64_t cvlsgen, 1071 uint32_t cvugen, 1072 user_addr_t mutex, 1073 uint64_t mugen, 1074 uint32_t flags, 1075 int64_t sec, 1076 uint32_t nsec, 1077 uint32_t *retval) 1078{ 1079 int error = 0; 1080 uint32_t updatebits = 0; 1081 ksyn_wait_queue_t ckwq = NULL; 1082 ksyn_waitq_element_t kwe, nkwe = NULL; 1083 1084 /* for conformance reasons */ 1085 pthread_kern->__pthread_testcancel(0); 1086 1087 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; 1088 uint32_t cgen = cvlsgen & 0xffffffff; 1089 uint32_t ugen = (mugen >> 32) & 0xffffffff; 1090 uint32_t mgen = mugen & 0xffffffff; 1091 1092 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK); 1093 1094 /* 1095 * In cvwait U word can be out of range as cv could be used only for 1096 * timeouts. However S word needs to be within bounds and validated at 1097 * user level as well. 1098 */ 1099 if (is_seqhigher_eq(csgen, lockseq) != 0) { 1100 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n"); 1101 return EINVAL; 1102 } 1103 1104 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq); 1105 if (error != 0) { 1106 return error; 1107 } 1108 1109 if (mutex != 0) { 1110 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL); 1111 if (error != 0) { 1112 goto out; 1113 } 1114 } 1115 1116 ksyn_wqlock(ckwq); 1117 1118 // update L, U and S... 1119 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen); 1120 1121 /* Look for the sequence for prepost (or conflicting thread */ 1122 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; 1123 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq); 1124 if (kwe != NULL) { 1125 if (kwe->kwe_state == KWE_THREAD_PREPOST) { 1126 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) { 1127 /* we can safely consume a reference, so do so */ 1128 if (--kwe->kwe_count == 0) { 1129 ksyn_queue_remove_item(ckwq, kq, kwe); 1130 ckwq->kw_fakecount--; 1131 nkwe = kwe; 1132 } 1133 } else { 1134 /* 1135 * consuming a prepost higher than our lock sequence is valid, but 1136 * can leave the higher thread without a match. Convert the entry 1137 * to a broadcast to compensate for this. 1138 */ 1139 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits); 1140#if __TESTPANICS__ 1141 if (updatebits != 0) 1142 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits); 1143#endif /* __TESTPANICS__ */ 1144 } 1145 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) { 1146 // XXX 1147 // Nothing to do. 1148 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) { 1149 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n"); 1150 error = EBUSY; 1151 } else { 1152 panic("psync_cvwait: unexpected wait queue element type\n"); 1153 } 1154 1155 if (error == 0) { 1156 updatebits = PTHRW_INC; 1157 ckwq->kw_sword += PTHRW_INC; 1158 1159 /* set C or P bits and free if needed */ 1160 ksyn_cvupdate_fixup(ckwq, &updatebits); 1161 *retval = updatebits; 1162 } 1163 } else { 1164 uint64_t abstime = 0; 1165 1166 if (sec != 0 || (nsec & 0x3fffffff) != 0) { 1167 struct timespec ts; 1168 ts.tv_sec = (__darwin_time_t)sec; 1169 ts.tv_nsec = (nsec & 0x3fffffff); 1170 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime); 1171 clock_absolutetime_interval_to_deadline(abstime, &abstime); 1172 } 1173 1174 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue); 1175 // ksyn_wait drops wait queue lock 1176 } 1177 1178 ksyn_wqunlock(ckwq); 1179 1180 if (nkwe != NULL) { 1181 pthread_kern->zfree(kwe_zone, nkwe); 1182 } 1183out: 1184 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); 1185 return error; 1186} 1187 1188 1189void 1190psynch_cvcontinue(void *parameter, wait_result_t result) 1191{ 1192 uthread_t uth = current_uthread(); 1193 ksyn_wait_queue_t ckwq = parameter; 1194 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); 1195 1196 int error = _wait_result_to_errno(result); 1197 if (error != 0) { 1198 ksyn_wqlock(ckwq); 1199 /* just in case it got woken up as we were granting */ 1200 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval); 1201 1202 if (kwe->kwe_kwqqueue) { 1203 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); 1204 } 1205 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) { 1206 /* the condition var granted. 1207 * reset the error so that the thread returns back. 1208 */ 1209 error = 0; 1210 /* no need to set any bits just return as cvsig/broad covers this */ 1211 } else { 1212 ckwq->kw_sword += PTHRW_INC; 1213 1214 /* set C and P bits, in the local error */ 1215 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { 1216 error |= ECVCERORR; 1217 if (ckwq->kw_inqueue != 0) { 1218 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1); 1219 } 1220 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; 1221 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; 1222 } else { 1223 /* everythig in the queue is a fake entry ? */ 1224 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) { 1225 error |= ECVPERORR; 1226 } 1227 } 1228 } 1229 ksyn_wqunlock(ckwq); 1230 } else { 1231 int val = 0; 1232 // PTH_RWL_MTX_WAIT is removed 1233 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) { 1234 val = PTHRW_INC | PTH_RWS_CV_CBIT; 1235 } 1236 pthread_kern->uthread_set_returnval(uth, val); 1237 } 1238 1239 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); 1240 pthread_kern->unix_syscall_return(error); 1241} 1242 1243/* 1244 * psynch_cvclrprepost: This system call clears pending prepost if present. 1245 */ 1246int 1247_psynch_cvclrprepost(__unused proc_t p, 1248 user_addr_t cv, 1249 uint32_t cvgen, 1250 uint32_t cvugen, 1251 uint32_t cvsgen, 1252 __unused uint32_t prepocnt, 1253 uint32_t preposeq, 1254 uint32_t flags, 1255 int *retval) 1256{ 1257 int error = 0; 1258 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX); 1259 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP; 1260 ksyn_wait_queue_t kwq = NULL; 1261 1262 *retval = 0; 1263 1264 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq); 1265 if (error != 0) { 1266 return error; 1267 } 1268 1269 ksyn_wqlock(kwq); 1270 1271 if (mutex) { 1272 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; 1273 if (firstfit && kwq->kw_pre_rwwc != 0) { 1274 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) { 1275 // clear prepost 1276 kwq->kw_pre_rwwc = 0; 1277 kwq->kw_pre_lockseq = 0; 1278 } 1279 } 1280 } else { 1281 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0); 1282 } 1283 1284 ksyn_wqunlock(kwq); 1285 ksyn_wqrelease(kwq, 1, wqtype); 1286 return error; 1287} 1288 1289/* ***************** pthread_rwlock ************************ */ 1290 1291static int 1292__psynch_rw_lock(int type, 1293 user_addr_t rwlock, 1294 uint32_t lgenval, 1295 uint32_t ugenval, 1296 uint32_t rw_wc, 1297 int flags, 1298 uint32_t *retval) 1299{ 1300 int prepost_type, kqi; 1301 1302 if (type == PTH_RW_TYPE_READ) { 1303 prepost_type = KW_UNLOCK_PREPOST_READLOCK; 1304 kqi = KSYN_QUEUE_READ; 1305 } else { 1306 prepost_type = KW_UNLOCK_PREPOST_WRLOCK; 1307 kqi = KSYN_QUEUE_WRITER; 1308 } 1309 1310 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK; 1311 1312 int error; 1313 ksyn_wait_queue_t kwq; 1314 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); 1315 if (error == 0) { 1316 ksyn_wqlock(kwq); 1317 _ksyn_check_init(kwq, lgenval); 1318 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) || 1319 // handle overlap first as they are not counted against pre_rwwc 1320 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) || 1321 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) { 1322 ksyn_wqunlock(kwq); 1323 } else { 1324 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL); 1325 // ksyn_wait drops wait queue lock 1326 if (error == 0) { 1327 uthread_t uth = current_uthread(); 1328 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); 1329 *retval = kwe->kwe_psynchretval; 1330 } 1331 } 1332 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); 1333 } 1334 return error; 1335} 1336 1337/* 1338 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block. 1339 */ 1340int 1341_psynch_rw_rdlock(__unused proc_t p, 1342 user_addr_t rwlock, 1343 uint32_t lgenval, 1344 uint32_t ugenval, 1345 uint32_t rw_wc, 1346 int flags, 1347 uint32_t *retval) 1348{ 1349 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval); 1350} 1351 1352/* 1353 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block. 1354 */ 1355int 1356_psynch_rw_longrdlock(__unused proc_t p, 1357 __unused user_addr_t rwlock, 1358 __unused uint32_t lgenval, 1359 __unused uint32_t ugenval, 1360 __unused uint32_t rw_wc, 1361 __unused int flags, 1362 __unused uint32_t *retval) 1363{ 1364 return ESRCH; 1365} 1366 1367 1368/* 1369 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block. 1370 */ 1371int 1372_psynch_rw_wrlock(__unused proc_t p, 1373 user_addr_t rwlock, 1374 uint32_t lgenval, 1375 uint32_t ugenval, 1376 uint32_t rw_wc, 1377 int flags, 1378 uint32_t *retval) 1379{ 1380 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval); 1381} 1382 1383/* 1384 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block. 1385 */ 1386int 1387_psynch_rw_yieldwrlock(__unused proc_t p, 1388 __unused user_addr_t rwlock, 1389 __unused uint32_t lgenval, 1390 __unused uint32_t ugenval, 1391 __unused uint32_t rw_wc, 1392 __unused int flags, 1393 __unused uint32_t *retval) 1394{ 1395 return ESRCH; 1396} 1397 1398/* 1399 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate 1400 * reader/writer variety lock. 1401 */ 1402int 1403_psynch_rw_unlock(__unused proc_t p, 1404 user_addr_t rwlock, 1405 uint32_t lgenval, 1406 uint32_t ugenval, 1407 uint32_t rw_wc, 1408 int flags, 1409 uint32_t *retval) 1410{ 1411 int error = 0; 1412 ksyn_wait_queue_t kwq; 1413 uint32_t updatebits = 0; 1414 int diff; 1415 uint32_t count = 0; 1416 uint32_t curgen = lgenval & PTHRW_COUNT_MASK; 1417 1418 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); 1419 if (error != 0) { 1420 return(error); 1421 } 1422 1423 ksyn_wqlock(kwq); 1424 int isinit = _ksyn_check_init(kwq, lgenval); 1425 1426 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */ 1427 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) { 1428 error = 0; 1429 goto out; 1430 } 1431 1432 /* If L-U != num of waiters, then it needs to be preposted or spr */ 1433 diff = find_diff(lgenval, ugenval); 1434 1435 if (find_seq_till(kwq, curgen, diff, &count) == 0) { 1436 if ((count == 0) || (count < (uint32_t)diff)) 1437 goto prepost; 1438 } 1439 1440 /* no prepost and all threads are in place, reset the bit */ 1441 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){ 1442 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; 1443 } 1444 1445 /* can handle unlock now */ 1446 1447 CLEAR_PREPOST_BITS(kwq); 1448 1449 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0); 1450#if __TESTPANICS__ 1451 if (error != 0) 1452 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error); 1453#endif /* __TESTPANICS__ */ 1454out: 1455 if (error == 0) { 1456 /* update bits?? */ 1457 *retval = updatebits; 1458 } 1459 1460 1461 ksyn_wqunlock(kwq); 1462 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); 1463 1464 return(error); 1465 1466prepost: 1467 /* update if the new seq is higher than prev prepost, or first set */ 1468 if (is_rws_setseq(kwq->kw_pre_sseq) || 1469 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) { 1470 kwq->kw_pre_rwwc = (diff - count); 1471 kwq->kw_pre_lockseq = curgen; 1472 kwq->kw_pre_sseq = rw_wc; 1473 updatebits = lgenval; /* let this not do unlock handling */ 1474 } 1475 error = 0; 1476 goto out; 1477} 1478 1479 1480/* ************************************************************************** */ 1481void 1482pth_global_hashinit(void) 1483{ 1484 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash); 1485} 1486 1487void 1488_pth_proc_hashinit(proc_t p) 1489{ 1490 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash); 1491 if (ptr == NULL) { 1492 panic("pth_proc_hashinit: hash init returned 0\n"); 1493 } 1494 1495 pthread_kern->proc_set_pthhash(p, ptr); 1496} 1497 1498 1499static int 1500ksyn_wq_hash_lookup(user_addr_t uaddr, 1501 proc_t p, 1502 int flags, 1503 ksyn_wait_queue_t *out_kwq, 1504 struct pthhashhead **out_hashptr, 1505 uint64_t *out_object, 1506 uint64_t *out_offset) 1507{ 1508 int res = 0; 1509 ksyn_wait_queue_t kwq; 1510 uint64_t object = 0, offset = 0; 1511 struct pthhashhead *hashptr; 1512 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { 1513 hashptr = pth_glob_hashtbl; 1514 res = ksyn_findobj(uaddr, &object, &offset); 1515 if (res == 0) { 1516 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) { 1517 if (kwq->kw_object == object && kwq->kw_offset == offset) { 1518 break; 1519 } 1520 } 1521 } else { 1522 kwq = NULL; 1523 } 1524 } else { 1525 hashptr = pthread_kern->proc_get_pthhash(p); 1526 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) { 1527 if (kwq->kw_addr == uaddr) { 1528 break; 1529 } 1530 } 1531 } 1532 *out_kwq = kwq; 1533 *out_object = object; 1534 *out_offset = offset; 1535 *out_hashptr = hashptr; 1536 return res; 1537} 1538 1539void 1540_pth_proc_hashdelete(proc_t p) 1541{ 1542 struct pthhashhead * hashptr; 1543 ksyn_wait_queue_t kwq; 1544 unsigned long hashsize = pthhash + 1; 1545 unsigned long i; 1546 1547 hashptr = pthread_kern->proc_get_pthhash(p); 1548 pthread_kern->proc_set_pthhash(p, NULL); 1549 if (hashptr == NULL) { 1550 return; 1551 } 1552 1553 pthread_list_lock(); 1554 for(i= 0; i < hashsize; i++) { 1555 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) { 1556 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) { 1557 kwq->kw_pflags &= ~KSYN_WQ_INHASH; 1558 LIST_REMOVE(kwq, kw_hash); 1559 } 1560 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { 1561 kwq->kw_pflags &= ~KSYN_WQ_FLIST; 1562 LIST_REMOVE(kwq, kw_list); 1563 } 1564 pthread_list_unlock(); 1565 /* release fake entries if present for cvars */ 1566 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0)) 1567 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]); 1568 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); 1569 pthread_kern->zfree(kwq_zone, kwq); 1570 pthread_list_lock(); 1571 } 1572 } 1573 pthread_list_unlock(); 1574 FREE(hashptr, M_PROC); 1575} 1576 1577/* no lock held for this as the waitqueue is getting freed */ 1578void 1579ksyn_freeallkwe(ksyn_queue_t kq) 1580{ 1581 ksyn_waitq_element_t kwe; 1582 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { 1583 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); 1584 if (kwe->kwe_state != KWE_THREAD_INWAIT) { 1585 pthread_kern->zfree(kwe_zone, kwe); 1586 } 1587 } 1588} 1589 1590/* find kernel waitqueue, if not present create one. Grants a reference */ 1591int 1592ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp) 1593{ 1594 int res = 0; 1595 ksyn_wait_queue_t kwq = NULL; 1596 ksyn_wait_queue_t nkwq = NULL; 1597 struct pthhashhead *hashptr; 1598 proc_t p = current_proc(); 1599 1600 uint64_t object = 0, offset = 0; 1601 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { 1602 res = ksyn_findobj(uaddr, &object, &offset); 1603 hashptr = pth_glob_hashtbl; 1604 } else { 1605 hashptr = pthread_kern->proc_get_pthhash(p); 1606 } 1607 1608 while (res == 0) { 1609 pthread_list_lock(); 1610 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset); 1611 if (res != 0) { 1612 break; 1613 } 1614 if (kwq == NULL && nkwq == NULL) { 1615 // Drop the lock to allocate a new kwq and retry. 1616 pthread_list_unlock(); 1617 1618 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone); 1619 bzero(nkwq, sizeof(struct ksyn_wait_queue)); 1620 int i; 1621 for (i = 0; i < KSYN_QUEUE_MAX; i++) { 1622 ksyn_queue_init(&nkwq->kw_ksynqueues[i]); 1623 } 1624 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); 1625 continue; 1626 } else if (kwq == NULL && nkwq != NULL) { 1627 // Still not found, add the new kwq to the hash. 1628 kwq = nkwq; 1629 nkwq = NULL; // Don't free. 1630 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { 1631 kwq->kw_pflags |= KSYN_WQ_SHARED; 1632 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash); 1633 } else { 1634 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash); 1635 } 1636 kwq->kw_pflags |= KSYN_WQ_INHASH; 1637 } else if (kwq != NULL) { 1638 // Found an existing kwq, use it. 1639 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { 1640 LIST_REMOVE(kwq, kw_list); 1641 kwq->kw_pflags &= ~KSYN_WQ_FLIST; 1642 } 1643 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) { 1644 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) { 1645 if (kwq->kw_iocount == 0) { 1646 kwq->kw_type = 0; // mark for reinitialization 1647 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) { 1648 /* if all users are unlockers then wait for it to finish */ 1649 kwq->kw_pflags |= KSYN_WQ_WAITING; 1650 // Drop the lock and wait for the kwq to be free. 1651 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0); 1652 continue; 1653 } else { 1654 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n"); 1655 res = EINVAL; 1656 } 1657 } else { 1658 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n"); 1659 res = EINVAL; 1660 } 1661 } 1662 } 1663 if (res == 0) { 1664 if (kwq->kw_type == 0) { 1665 kwq->kw_addr = uaddr; 1666 kwq->kw_object = object; 1667 kwq->kw_offset = offset; 1668 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); 1669 CLEAR_REINIT_BITS(kwq); 1670 kwq->kw_lword = mgen; 1671 kwq->kw_uword = ugen; 1672 kwq->kw_sword = sgen; 1673 kwq->kw_owner = 0; 1674 kwq->kw_kflags = 0; 1675 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; 1676 } 1677 kwq->kw_iocount++; 1678 if (wqtype == KSYN_WQTYPE_MUTEXDROP) { 1679 kwq->kw_dropcount++; 1680 } 1681 } 1682 break; 1683 } 1684 pthread_list_unlock(); 1685 if (kwqp != NULL) { 1686 *kwqp = kwq; 1687 } 1688 if (nkwq) { 1689 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); 1690 pthread_kern->zfree(kwq_zone, nkwq); 1691 } 1692 return res; 1693} 1694 1695/* Reference from find is dropped here. Starts the free process if needed */ 1696void 1697ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype) 1698{ 1699 uint64_t deadline; 1700 ksyn_wait_queue_t free_elem = NULL; 1701 1702 pthread_list_lock(); 1703 if (wqtype == KSYN_WQTYPE_MUTEXDROP) { 1704 kwq->kw_dropcount--; 1705 } 1706 if (--kwq->kw_iocount == 0) { 1707 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) { 1708 /* some one is waiting for the waitqueue, wake them up */ 1709 kwq->kw_pflags &= ~KSYN_WQ_WAITING; 1710 wakeup(&kwq->kw_pflags); 1711 } 1712 1713 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) { 1714 if (qfreenow == 0) { 1715 microuptime(&kwq->kw_ts); 1716 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list); 1717 kwq->kw_pflags |= KSYN_WQ_FLIST; 1718 if (psynch_cleanupset == 0) { 1719 struct timeval t; 1720 microuptime(&t); 1721 t.tv_sec += KSYN_CLEANUP_DEADLINE; 1722 deadline = tvtoabstime(&t); 1723 thread_call_enter_delayed(psynch_thcall, deadline); 1724 psynch_cleanupset = 1; 1725 } 1726 } else { 1727 kwq->kw_pflags &= ~KSYN_WQ_INHASH; 1728 LIST_REMOVE(kwq, kw_hash); 1729 free_elem = kwq; 1730 } 1731 } 1732 } 1733 pthread_list_unlock(); 1734 if (free_elem != NULL) { 1735 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp); 1736 pthread_kern->zfree(kwq_zone, free_elem); 1737 } 1738} 1739 1740/* responsible to free the waitqueues */ 1741void 1742psynch_wq_cleanup(__unused void *param, __unused void * param1) 1743{ 1744 ksyn_wait_queue_t kwq; 1745 struct timeval t; 1746 int reschedule = 0; 1747 uint64_t deadline = 0; 1748 LIST_HEAD(, ksyn_wait_queue) freelist; 1749 LIST_INIT(&freelist); 1750 1751 pthread_list_lock(); 1752 1753 microuptime(&t); 1754 1755 LIST_FOREACH(kwq, &pth_free_list, kw_list) { 1756 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) { 1757 // still in use 1758 continue; 1759 } 1760 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec; 1761 if (diff < 0) 1762 diff *= -1; 1763 if (diff >= KSYN_CLEANUP_DEADLINE) { 1764 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); 1765 LIST_REMOVE(kwq, kw_hash); 1766 LIST_REMOVE(kwq, kw_list); 1767 LIST_INSERT_HEAD(&freelist, kwq, kw_list); 1768 } else { 1769 reschedule = 1; 1770 } 1771 1772 } 1773 if (reschedule != 0) { 1774 t.tv_sec += KSYN_CLEANUP_DEADLINE; 1775 deadline = tvtoabstime(&t); 1776 thread_call_enter_delayed(psynch_thcall, deadline); 1777 psynch_cleanupset = 1; 1778 } else { 1779 psynch_cleanupset = 0; 1780 } 1781 pthread_list_unlock(); 1782 1783 while ((kwq = LIST_FIRST(&freelist)) != NULL) { 1784 LIST_REMOVE(kwq, kw_list); 1785 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); 1786 pthread_kern->zfree(kwq_zone, kwq); 1787 } 1788} 1789 1790static int 1791_wait_result_to_errno(wait_result_t result) 1792{ 1793 int res = 0; 1794 switch (result) { 1795 case THREAD_TIMED_OUT: 1796 res = ETIMEDOUT; 1797 break; 1798 case THREAD_INTERRUPTED: 1799 res = EINTR; 1800 break; 1801 } 1802 return res; 1803} 1804 1805int 1806ksyn_wait(ksyn_wait_queue_t kwq, 1807 int kqi, 1808 uint32_t lockseq, 1809 int fit, 1810 uint64_t abstime, 1811 thread_continue_t continuation) 1812{ 1813 int res; 1814 1815 thread_t th = current_thread(); 1816 uthread_t uth = pthread_kern->get_bsdthread_info(th); 1817 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); 1818 bzero(kwe, sizeof(*kwe)); 1819 kwe->kwe_count = 1; 1820 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK; 1821 kwe->kwe_state = KWE_THREAD_INWAIT; 1822 kwe->kwe_uth = uth; 1823 kwe->kwe_tid = thread_tid(th); 1824 1825 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit); 1826 if (res != 0) { 1827 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX 1828 ksyn_wqunlock(kwq); 1829 return res; 1830 } 1831 1832 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0); 1833 ksyn_wqunlock(kwq); 1834 1835 kern_return_t ret; 1836 if (continuation == THREAD_CONTINUE_NULL) { 1837 ret = thread_block(NULL); 1838 } else { 1839 ret = thread_block_parameter(continuation, kwq); 1840 1841 // If thread_block_parameter returns (interrupted) call the 1842 // continuation manually to clean up. 1843 continuation(kwq, ret); 1844 1845 // NOT REACHED 1846 panic("ksyn_wait continuation returned"); 1847 } 1848 1849 res = _wait_result_to_errno(ret); 1850 if (res != 0) { 1851 ksyn_wqlock(kwq); 1852 if (kwe->kwe_kwqqueue) { 1853 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); 1854 } 1855 ksyn_wqunlock(kwq); 1856 } 1857 return res; 1858} 1859 1860kern_return_t 1861ksyn_signal(ksyn_wait_queue_t kwq, 1862 int kqi, 1863 ksyn_waitq_element_t kwe, 1864 uint32_t updateval) 1865{ 1866 kern_return_t ret; 1867 1868 // If no wait element was specified, wake the first. 1869 if (!kwe) { 1870 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist); 1871 if (!kwe) { 1872 panic("ksyn_signal: panic signaling empty queue"); 1873 } 1874 } 1875 1876 if (kwe->kwe_state != KWE_THREAD_INWAIT) { 1877 panic("ksyn_signal: panic signaling non-waiting element"); 1878 } 1879 1880 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); 1881 kwe->kwe_psynchretval = updateval; 1882 1883 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval); 1884 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) { 1885 panic("ksyn_signal: panic waking up thread %x\n", ret); 1886 } 1887 return ret; 1888} 1889 1890int 1891ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) 1892{ 1893 kern_return_t ret; 1894 vm_page_info_basic_data_t info; 1895 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; 1896 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); 1897 if (ret != KERN_SUCCESS) { 1898 return EINVAL; 1899 } 1900 1901 if (objectp != NULL) { 1902 *objectp = (uint64_t)info.object_id; 1903 } 1904 if (offsetp != NULL) { 1905 *offsetp = (uint64_t)info.offset; 1906 } 1907 1908 return(0); 1909} 1910 1911 1912/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */ 1913int 1914kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[]) 1915{ 1916 uint32_t kw_fr, kw_fwr, low; 1917 int type = 0, lowtype, typenum[2] = { 0 }; 1918 uint32_t numbers[2] = { 0 }; 1919 int count = 0, i; 1920 1921 1922 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { 1923 type |= PTH_RWSHFT_TYPE_READ; 1924 /* read entries are present */ 1925 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) { 1926 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum; 1927 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0)) 1928 kw_fr = premgen; 1929 } else 1930 kw_fr = premgen; 1931 1932 lowest[KSYN_QUEUE_READ] = kw_fr; 1933 numbers[count]= kw_fr; 1934 typenum[count] = PTH_RW_TYPE_READ; 1935 count++; 1936 } else 1937 lowest[KSYN_QUEUE_READ] = 0; 1938 1939 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { 1940 type |= PTH_RWSHFT_TYPE_WRITE; 1941 /* read entries are present */ 1942 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { 1943 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; 1944 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0)) 1945 kw_fwr = premgen; 1946 } else 1947 kw_fwr = premgen; 1948 1949 lowest[KSYN_QUEUE_WRITER] = kw_fwr; 1950 numbers[count]= kw_fwr; 1951 typenum[count] = PTH_RW_TYPE_WRITE; 1952 count++; 1953 } else 1954 lowest[KSYN_QUEUE_WRITER] = 0; 1955 1956#if __TESTPANICS__ 1957 if (count == 0) 1958 panic("nothing in the queue???\n"); 1959#endif /* __TESTPANICS__ */ 1960 1961 low = numbers[0]; 1962 lowtype = typenum[0]; 1963 if (count > 1) { 1964 for (i = 1; i< count; i++) { 1965 if (is_seqlower(numbers[i] , low) != 0) { 1966 low = numbers[i]; 1967 lowtype = typenum[i]; 1968 } 1969 } 1970 } 1971 type |= lowtype; 1972 1973 if (typep != 0) 1974 *typep = type; 1975 return(0); 1976} 1977 1978/* wakeup readers to upto the writer limits */ 1979int 1980ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp) 1981{ 1982 ksyn_queue_t kq; 1983 int failedwakeup = 0; 1984 int numwoken = 0; 1985 kern_return_t kret = KERN_SUCCESS; 1986 uint32_t lbits = 0; 1987 1988 lbits = updatebits; 1989 1990 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; 1991 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { 1992 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits); 1993 if (kret == KERN_NOT_WAITING) { 1994 failedwakeup++; 1995 } 1996 numwoken++; 1997 } 1998 1999 if (wokenp != NULL) 2000 *wokenp = numwoken; 2001 return(failedwakeup); 2002} 2003 2004 2005/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */ 2006int 2007kwq_handle_unlock(ksyn_wait_queue_t kwq, 2008 __unused uint32_t mgen, 2009 uint32_t rw_wc, 2010 uint32_t *updatep, 2011 int flags, 2012 int *blockp, 2013 uint32_t premgen) 2014{ 2015 uint32_t low_writer, limitrdnum; 2016 int rwtype, error=0; 2017 int allreaders, failed; 2018 uint32_t updatebits=0, numneeded = 0;; 2019 int prepost = flags & KW_UNLOCK_PREPOST; 2020 thread_t preth = THREAD_NULL; 2021 ksyn_waitq_element_t kwe; 2022 uthread_t uth; 2023 thread_t th; 2024 int woken = 0; 2025 int block = 1; 2026 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */ 2027 kern_return_t kret = KERN_SUCCESS; 2028 ksyn_queue_t kq; 2029 int curthreturns = 0; 2030 2031 if (prepost != 0) { 2032 preth = current_thread(); 2033 } 2034 2035 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; 2036 kwq->kw_lastseqword = rw_wc; 2037 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK); 2038 kwq->kw_overlapwatch = 0; 2039 2040 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest); 2041#if __TESTPANICS__ 2042 if (error != 0) 2043 panic("rwunlock: cannot fails to slot next round of threads"); 2044#endif /* __TESTPANICS__ */ 2045 2046 low_writer = lowest[KSYN_QUEUE_WRITER]; 2047 2048 allreaders = 0; 2049 updatebits = 0; 2050 2051 switch (rwtype & PTH_RW_TYPE_MASK) { 2052 case PTH_RW_TYPE_READ: { 2053 // XXX 2054 /* what about the preflight which is LREAD or READ ?? */ 2055 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) { 2056 if (rwtype & PTH_RWSHFT_TYPE_WRITE) { 2057 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT); 2058 } 2059 } 2060 limitrdnum = 0; 2061 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { 2062 limitrdnum = low_writer; 2063 } else { 2064 allreaders = 1; 2065 } 2066 2067 numneeded = 0; 2068 2069 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { 2070 limitrdnum = low_writer; 2071 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum); 2072 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) { 2073 curthreturns = 1; 2074 numneeded += 1; 2075 } 2076 } else { 2077 // no writers at all 2078 // no other waiters only readers 2079 kwq->kw_overlapwatch = 1; 2080 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; 2081 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) { 2082 curthreturns = 1; 2083 numneeded += 1; 2084 } 2085 } 2086 2087 updatebits += (numneeded << PTHRW_COUNT_SHIFT); 2088 2089 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; 2090 2091 if (curthreturns != 0) { 2092 block = 0; 2093 uth = current_uthread(); 2094 kwe = pthread_kern->uthread_get_uukwe(uth); 2095 kwe->kwe_psynchretval = updatebits; 2096 } 2097 2098 2099 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken); 2100 if (failed != 0) { 2101 kwq->kw_pre_intrcount = failed; /* actually a count */ 2102 kwq->kw_pre_intrseq = limitrdnum; 2103 kwq->kw_pre_intrretbits = updatebits; 2104 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; 2105 } 2106 2107 error = 0; 2108 2109 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0)) 2110 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits); 2111 } 2112 break; 2113 2114 case PTH_RW_TYPE_WRITE: { 2115 2116 /* only one thread is goin to be granted */ 2117 updatebits |= (PTHRW_INC); 2118 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT; 2119 2120 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) { 2121 block = 0; 2122 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { 2123 updatebits |= PTH_RWL_WBIT; 2124 } 2125 th = preth; 2126 uth = pthread_kern->get_bsdthread_info(th); 2127 kwe = pthread_kern->uthread_get_uukwe(uth); 2128 kwe->kwe_psynchretval = updatebits; 2129 } else { 2130 /* we are not granting writelock to the preposting thread */ 2131 /* if there are writers present or the preposting write thread then W bit is to be set */ 2132 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 || 2133 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) { 2134 updatebits |= PTH_RWL_WBIT; 2135 } 2136 /* setup next in the queue */ 2137 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits); 2138 if (kret == KERN_NOT_WAITING) { 2139 kwq->kw_pre_intrcount = 1; /* actually a count */ 2140 kwq->kw_pre_intrseq = low_writer; 2141 kwq->kw_pre_intrretbits = updatebits; 2142 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; 2143 } 2144 error = 0; 2145 } 2146 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; 2147 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT)) 2148 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits); 2149 } 2150 break; 2151 2152 default: 2153 panic("rwunlock: invalid type for lock grants"); 2154 2155 }; 2156 2157 if (updatep != NULL) 2158 *updatep = updatebits; 2159 if (blockp != NULL) 2160 *blockp = block; 2161 return(error); 2162} 2163 2164/************* Indiv queue support routines ************************/ 2165void 2166ksyn_queue_init(ksyn_queue_t kq) 2167{ 2168 TAILQ_INIT(&kq->ksynq_kwelist); 2169 kq->ksynq_count = 0; 2170 kq->ksynq_firstnum = 0; 2171 kq->ksynq_lastnum = 0; 2172} 2173 2174int 2175ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit) 2176{ 2177 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; 2178 uint32_t lockseq = mgen & PTHRW_COUNT_MASK; 2179 int res = 0; 2180 2181 if (kwe->kwe_kwqqueue != NULL) { 2182 panic("adding enqueued item to another queue"); 2183 } 2184 2185 if (kq->ksynq_count == 0) { 2186 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); 2187 kq->ksynq_firstnum = lockseq; 2188 kq->ksynq_lastnum = lockseq; 2189 } else if (fit == FIRSTFIT) { 2190 /* TBD: if retry bit is set for mutex, add it to the head */ 2191 /* firstfit, arriving order */ 2192 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); 2193 if (is_seqlower(lockseq, kq->ksynq_firstnum)) { 2194 kq->ksynq_firstnum = lockseq; 2195 } 2196 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) { 2197 kq->ksynq_lastnum = lockseq; 2198 } 2199 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) { 2200 /* During prepost when a thread is getting cancelled, we could have two with same seq */ 2201 res = EBUSY; 2202 if (kwe->kwe_state == KWE_THREAD_PREPOST) { 2203 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq); 2204 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) { 2205 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); 2206 res = 0; 2207 } 2208 } 2209 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher 2210 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); 2211 kq->ksynq_lastnum = lockseq; 2212 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) { 2213 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); 2214 kq->ksynq_firstnum = lockseq; 2215 } else { 2216 ksyn_waitq_element_t q_kwe, r_kwe; 2217 2218 res = ESRCH; 2219 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { 2220 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) { 2221 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list); 2222 res = 0; 2223 break; 2224 } 2225 } 2226 } 2227 2228 if (res == 0) { 2229 kwe->kwe_kwqqueue = kwq; 2230 kq->ksynq_count++; 2231 kwq->kw_inqueue++; 2232 update_low_high(kwq, lockseq); 2233 } 2234 return res; 2235} 2236 2237void 2238ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe) 2239{ 2240 if (kq->ksynq_count == 0) { 2241 panic("removing item from empty queue"); 2242 } 2243 2244 if (kwe->kwe_kwqqueue != kwq) { 2245 panic("removing item from wrong queue"); 2246 } 2247 2248 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); 2249 kwe->kwe_list.tqe_next = NULL; 2250 kwe->kwe_list.tqe_prev = NULL; 2251 kwe->kwe_kwqqueue = NULL; 2252 2253 if (--kq->ksynq_count > 0) { 2254 ksyn_waitq_element_t tmp; 2255 tmp = TAILQ_FIRST(&kq->ksynq_kwelist); 2256 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK; 2257 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head); 2258 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK; 2259 } else { 2260 kq->ksynq_firstnum = 0; 2261 kq->ksynq_lastnum = 0; 2262 } 2263 2264 if (--kwq->kw_inqueue > 0) { 2265 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK; 2266 if (kwq->kw_lowseq == curseq) { 2267 kwq->kw_lowseq = find_nextlowseq(kwq); 2268 } 2269 if (kwq->kw_highseq == curseq) { 2270 kwq->kw_highseq = find_nexthighseq(kwq); 2271 } 2272 } else { 2273 kwq->kw_lowseq = 0; 2274 kwq->kw_highseq = 0; 2275 } 2276} 2277 2278ksyn_waitq_element_t 2279ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq) 2280{ 2281 ksyn_waitq_element_t kwe; 2282 2283 // XXX: should stop searching when higher sequence number is seen 2284 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) { 2285 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) { 2286 return kwe; 2287 } 2288 } 2289 return NULL; 2290} 2291 2292/* find the thread at the target sequence (or a broadcast/prepost at or above) */ 2293ksyn_waitq_element_t 2294ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen) 2295{ 2296 ksyn_waitq_element_t result = NULL; 2297 ksyn_waitq_element_t kwe; 2298 uint32_t lgen = (cgen & PTHRW_COUNT_MASK); 2299 2300 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) { 2301 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) { 2302 result = kwe; 2303 2304 // KWE_THREAD_INWAIT must be strictly equal 2305 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) { 2306 result = NULL; 2307 } 2308 break; 2309 } 2310 } 2311 return result; 2312} 2313 2314/* look for a thread at lockseq, a */ 2315ksyn_waitq_element_t 2316ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq) 2317{ 2318 ksyn_waitq_element_t result = NULL; 2319 ksyn_waitq_element_t q_kwe, r_kwe; 2320 2321 // XXX 2322 /* case where wrap in the tail of the queue exists */ 2323 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { 2324 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) { 2325 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) { 2326 return result; 2327 } 2328 } 2329 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) { 2330 /* match any prepost at our same uptoseq or any broadcast above */ 2331 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) { 2332 continue; 2333 } 2334 return q_kwe; 2335 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) { 2336 /* 2337 * Match any (non-cancelled) thread at or below our upto sequence - 2338 * but prefer an exact match to our signal sequence (if present) to 2339 * keep exact matches happening. 2340 */ 2341 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) { 2342 return result; 2343 } 2344 if (q_kwe->kwe_kwqqueue == kwq) { 2345 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) { 2346 /* if equal or higher than our signal sequence, return this one */ 2347 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) { 2348 return q_kwe; 2349 } 2350 2351 /* otherwise, just remember this eligible thread and move on */ 2352 if (result == NULL) { 2353 result = q_kwe; 2354 } 2355 } 2356 } 2357 } else { 2358 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state); 2359 } 2360 } 2361 return result; 2362} 2363 2364void 2365ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all) 2366{ 2367 ksyn_waitq_element_t kwe; 2368 uint32_t tseq = upto & PTHRW_COUNT_MASK; 2369 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; 2370 2371 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { 2372 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) { 2373 break; 2374 } 2375 if (kwe->kwe_state == KWE_THREAD_INWAIT) { 2376 /* 2377 * This scenario is typically noticed when the cvar is 2378 * reinited and the new waiters are waiting. We can 2379 * return them as spurious wait so the cvar state gets 2380 * reset correctly. 2381 */ 2382 2383 /* skip canceled ones */ 2384 /* wake the rest */ 2385 /* set M bit to indicate to waking CV to retun Inc val */ 2386 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT); 2387 } else { 2388 ksyn_queue_remove_item(kwq, kq, kwe); 2389 pthread_kern->zfree(kwe_zone, kwe); 2390 kwq->kw_fakecount--; 2391 } 2392 } 2393} 2394 2395/*************************************************************************/ 2396 2397void 2398update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq) 2399{ 2400 if (kwq->kw_inqueue == 1) { 2401 kwq->kw_lowseq = lockseq; 2402 kwq->kw_highseq = lockseq; 2403 } else { 2404 if (is_seqlower(lockseq, kwq->kw_lowseq)) { 2405 kwq->kw_lowseq = lockseq; 2406 } 2407 if (is_seqhigher(lockseq, kwq->kw_highseq)) { 2408 kwq->kw_highseq = lockseq; 2409 } 2410 } 2411} 2412 2413uint32_t 2414find_nextlowseq(ksyn_wait_queue_t kwq) 2415{ 2416 uint32_t lowest = 0; 2417 int first = 1; 2418 int i; 2419 2420 for (i = 0; i < KSYN_QUEUE_MAX; i++) { 2421 if (kwq->kw_ksynqueues[i].ksynq_count > 0) { 2422 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum; 2423 if (first || is_seqlower(current, lowest)) { 2424 lowest = current; 2425 first = 0; 2426 } 2427 } 2428 } 2429 2430 return lowest; 2431} 2432 2433uint32_t 2434find_nexthighseq(ksyn_wait_queue_t kwq) 2435{ 2436 uint32_t highest = 0; 2437 int first = 1; 2438 int i; 2439 2440 for (i = 0; i < KSYN_QUEUE_MAX; i++) { 2441 if (kwq->kw_ksynqueues[i].ksynq_count > 0) { 2442 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum; 2443 if (first || is_seqhigher(current, highest)) { 2444 highest = current; 2445 first = 0; 2446 } 2447 } 2448 } 2449 2450 return highest; 2451} 2452 2453int 2454find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp) 2455{ 2456 int i; 2457 uint32_t count = 0; 2458 2459 for (i = 0; i< KSYN_QUEUE_MAX; i++) { 2460 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto); 2461 if (count >= nwaiters) { 2462 break; 2463 } 2464 } 2465 2466 if (countp != NULL) { 2467 *countp = count; 2468 } 2469 2470 if (count == 0) { 2471 return 0; 2472 } else if (count >= nwaiters) { 2473 return 1; 2474 } else { 2475 return 0; 2476 } 2477} 2478 2479 2480uint32_t 2481ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto) 2482{ 2483 uint32_t i = 0; 2484 ksyn_waitq_element_t kwe, newkwe; 2485 2486 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) { 2487 return 0; 2488 } 2489 if (upto == kq->ksynq_firstnum) { 2490 return 1; 2491 } 2492 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { 2493 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK); 2494 if (is_seqhigher(curval, upto)) { 2495 break; 2496 } 2497 ++i; 2498 if (upto == curval) { 2499 break; 2500 } 2501 } 2502 return i; 2503} 2504 2505/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */ 2506void 2507ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep) 2508{ 2509 ksyn_waitq_element_t kwe, newkwe; 2510 uint32_t updatebits = 0; 2511 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; 2512 2513 struct ksyn_queue kfreeq; 2514 ksyn_queue_init(&kfreeq); 2515 2516retry: 2517 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { 2518 if (is_seqhigher(kwe->kwe_lockseq, upto)) { 2519 // outside our range 2520 break; 2521 } 2522 2523 if (kwe->kwe_state == KWE_THREAD_INWAIT) { 2524 // Wake only non-canceled threads waiting on this CV. 2525 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) { 2526 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT); 2527 updatebits += PTHRW_INC; 2528 } 2529 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST || 2530 kwe->kwe_state == KWE_THREAD_PREPOST) { 2531 ksyn_queue_remove_item(ckwq, kq, kwe); 2532 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list); 2533 ckwq->kw_fakecount--; 2534 } else { 2535 panic("unknown kwe state\n"); 2536 } 2537 } 2538 2539 /* Need to enter a broadcast in the queue (if not already at L == S) */ 2540 2541 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) { 2542 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); 2543 if (newkwe == NULL) { 2544 ksyn_wqunlock(ckwq); 2545 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone); 2546 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list); 2547 ksyn_wqlock(ckwq); 2548 goto retry; 2549 } else { 2550 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list); 2551 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto); 2552 } 2553 } 2554 2555 // free up any remaining things stumbled across above 2556 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) { 2557 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list); 2558 pthread_kern->zfree(kwe_zone, kwe); 2559 } 2560 2561 if (updatep != NULL) { 2562 *updatep = updatebits; 2563 } 2564} 2565 2566void 2567ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits) 2568{ 2569 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { 2570 if (ckwq->kw_inqueue != 0) { 2571 /* FREE THE QUEUE */ 2572 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0); 2573#if __TESTPANICS__ 2574 if (ckwq->kw_inqueue != 0) 2575 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S"); 2576#endif /* __TESTPANICS__ */ 2577 } 2578 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; 2579 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; 2580 *updatebits |= PTH_RWS_CV_CBIT; 2581 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) { 2582 // only fake entries are present in the queue 2583 *updatebits |= PTH_RWS_CV_PBIT; 2584 } 2585} 2586 2587void 2588psynch_zoneinit(void) 2589{ 2590 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue"); 2591 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element"); 2592} 2593