1/* 2 * Copyright (c) 2008-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_APACHE_LICENSE_HEADER_START@ 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * @APPLE_APACHE_LICENSE_HEADER_END@ 19 */ 20 21#include "internal.h" 22 23#undef dispatch_once 24#undef dispatch_once_f 25 26 27typedef struct _dispatch_once_waiter_s { 28 volatile struct _dispatch_once_waiter_s *volatile dow_next; 29 _dispatch_thread_semaphore_t dow_sema; 30 mach_port_t dow_thread; 31} *_dispatch_once_waiter_t; 32 33#define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l) 34 35#ifdef __BLOCKS__ 36void 37dispatch_once(dispatch_once_t *val, dispatch_block_t block) 38{ 39 dispatch_once_f(val, block, _dispatch_Block_invoke(block)); 40} 41#endif 42 43DISPATCH_NOINLINE 44void 45dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func) 46{ 47 _dispatch_once_waiter_t volatile *vval = (_dispatch_once_waiter_t*)val; 48 struct _dispatch_once_waiter_s dow = { NULL, 0, MACH_PORT_NULL }; 49 _dispatch_once_waiter_t tail = &dow, next, tmp; 50 _dispatch_thread_semaphore_t sema; 51 52 if (dispatch_atomic_cmpxchg(vval, NULL, tail, acquire)) { 53 dow.dow_thread = _dispatch_thread_port(); 54 _dispatch_client_callout(ctxt, func); 55 56 // The next barrier must be long and strong. 57 // 58 // The scenario: SMP systems with weakly ordered memory models 59 // and aggressive out-of-order instruction execution. 60 // 61 // The problem: 62 // 63 // The dispatch_once*() wrapper macro causes the callee's 64 // instruction stream to look like this (pseudo-RISC): 65 // 66 // load r5, pred-addr 67 // cmpi r5, -1 68 // beq 1f 69 // call dispatch_once*() 70 // 1f: 71 // load r6, data-addr 72 // 73 // May be re-ordered like so: 74 // 75 // load r6, data-addr 76 // load r5, pred-addr 77 // cmpi r5, -1 78 // beq 1f 79 // call dispatch_once*() 80 // 1f: 81 // 82 // Normally, a barrier on the read side is used to workaround 83 // the weakly ordered memory model. But barriers are expensive 84 // and we only need to synchronize once! After func(ctxt) 85 // completes, the predicate will be marked as "done" and the 86 // branch predictor will correctly skip the call to 87 // dispatch_once*(). 88 // 89 // A far faster alternative solution: Defeat the speculative 90 // read-ahead of peer CPUs. 91 // 92 // Modern architectures will throw away speculative results 93 // once a branch mis-prediction occurs. Therefore, if we can 94 // ensure that the predicate is not marked as being complete 95 // until long after the last store by func(ctxt), then we have 96 // defeated the read-ahead of peer CPUs. 97 // 98 // In other words, the last "store" by func(ctxt) must complete 99 // and then N cycles must elapse before ~0l is stored to *val. 100 // The value of N is whatever is sufficient to defeat the 101 // read-ahead mechanism of peer CPUs. 102 // 103 // On some CPUs, the most fully synchronizing instruction might 104 // need to be issued. 105 106 dispatch_atomic_maximally_synchronizing_barrier(); 107 // above assumed to contain release barrier 108 next = dispatch_atomic_xchg(vval, DISPATCH_ONCE_DONE, relaxed); 109 while (next != tail) { 110 _dispatch_wait_until(tmp = (_dispatch_once_waiter_t)next->dow_next); 111 sema = next->dow_sema; 112 next = tmp; 113 _dispatch_thread_semaphore_signal(sema); 114 } 115 } else { 116 dow.dow_sema = _dispatch_get_thread_semaphore(); 117 next = *vval; 118 for (;;) { 119 if (next == DISPATCH_ONCE_DONE) { 120 break; 121 } 122 if (dispatch_atomic_cmpxchgvw(vval, next, tail, &next, release)) { 123 dow.dow_thread = next->dow_thread; 124 dow.dow_next = next; 125 if (dow.dow_thread) { 126 pthread_priority_t pp = _dispatch_get_priority(); 127 _dispatch_thread_override_start(dow.dow_thread, pp); 128 } 129 _dispatch_thread_semaphore_wait(dow.dow_sema); 130 if (dow.dow_thread) { 131 _dispatch_thread_override_end(dow.dow_thread); 132 } 133 break; 134 } 135 } 136 _dispatch_put_thread_semaphore(dow.dow_sema); 137 } 138} 139