1/*
2 * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21#include "internal.h"
22
23#undef dispatch_once
24#undef dispatch_once_f
25
26
27typedef struct _dispatch_once_waiter_s {
28	volatile struct _dispatch_once_waiter_s *volatile dow_next;
29	_dispatch_thread_semaphore_t dow_sema;
30	mach_port_t dow_thread;
31} *_dispatch_once_waiter_t;
32
33#define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
34
35#ifdef __BLOCKS__
36void
37dispatch_once(dispatch_once_t *val, dispatch_block_t block)
38{
39	dispatch_once_f(val, block, _dispatch_Block_invoke(block));
40}
41#endif
42
43DISPATCH_NOINLINE
44void
45dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
46{
47	_dispatch_once_waiter_t volatile *vval = (_dispatch_once_waiter_t*)val;
48	struct _dispatch_once_waiter_s dow = { NULL, 0, MACH_PORT_NULL };
49	_dispatch_once_waiter_t tail = &dow, next, tmp;
50	_dispatch_thread_semaphore_t sema;
51
52	if (dispatch_atomic_cmpxchg(vval, NULL, tail, acquire)) {
53		dow.dow_thread = _dispatch_thread_port();
54		_dispatch_client_callout(ctxt, func);
55
56		// The next barrier must be long and strong.
57		//
58		// The scenario: SMP systems with weakly ordered memory models
59		// and aggressive out-of-order instruction execution.
60		//
61		// The problem:
62		//
63		// The dispatch_once*() wrapper macro causes the callee's
64		// instruction stream to look like this (pseudo-RISC):
65		//
66		//      load r5, pred-addr
67		//      cmpi r5, -1
68		//      beq  1f
69		//      call dispatch_once*()
70		//      1f:
71		//      load r6, data-addr
72		//
73		// May be re-ordered like so:
74		//
75		//      load r6, data-addr
76		//      load r5, pred-addr
77		//      cmpi r5, -1
78		//      beq  1f
79		//      call dispatch_once*()
80		//      1f:
81		//
82		// Normally, a barrier on the read side is used to workaround
83		// the weakly ordered memory model. But barriers are expensive
84		// and we only need to synchronize once! After func(ctxt)
85		// completes, the predicate will be marked as "done" and the
86		// branch predictor will correctly skip the call to
87		// dispatch_once*().
88		//
89		// A far faster alternative solution: Defeat the speculative
90		// read-ahead of peer CPUs.
91		//
92		// Modern architectures will throw away speculative results
93		// once a branch mis-prediction occurs. Therefore, if we can
94		// ensure that the predicate is not marked as being complete
95		// until long after the last store by func(ctxt), then we have
96		// defeated the read-ahead of peer CPUs.
97		//
98		// In other words, the last "store" by func(ctxt) must complete
99		// and then N cycles must elapse before ~0l is stored to *val.
100		// The value of N is whatever is sufficient to defeat the
101		// read-ahead mechanism of peer CPUs.
102		//
103		// On some CPUs, the most fully synchronizing instruction might
104		// need to be issued.
105
106		dispatch_atomic_maximally_synchronizing_barrier();
107		// above assumed to contain release barrier
108		next = dispatch_atomic_xchg(vval, DISPATCH_ONCE_DONE, relaxed);
109		while (next != tail) {
110			_dispatch_wait_until(tmp = (_dispatch_once_waiter_t)next->dow_next);
111			sema = next->dow_sema;
112			next = tmp;
113			_dispatch_thread_semaphore_signal(sema);
114		}
115	} else {
116		dow.dow_sema = _dispatch_get_thread_semaphore();
117		next = *vval;
118		for (;;) {
119			if (next == DISPATCH_ONCE_DONE) {
120				break;
121			}
122			if (dispatch_atomic_cmpxchgvw(vval, next, tail, &next, release)) {
123				dow.dow_thread = next->dow_thread;
124				dow.dow_next = next;
125				if (dow.dow_thread) {
126					pthread_priority_t pp = _dispatch_get_priority();
127					_dispatch_thread_override_start(dow.dow_thread, pp);
128				}
129				_dispatch_thread_semaphore_wait(dow.dow_sema);
130				if (dow.dow_thread) {
131					_dispatch_thread_override_end(dow.dow_thread);
132				}
133				break;
134			}
135		}
136		_dispatch_put_thread_semaphore(dow.dow_sema);
137	}
138}
139