1#include <ck_ec.h>
2#include <ck_limits.h>
3
4#include "ck_ec_timeutil.h"
5
6#define DEFAULT_BUSY_LOOP_ITER 100U
7
8/*
9 * The 2ms, 8x/iter default parameter hit 1.024 seconds after 3
10 * iterations.
11 */
12#define DEFAULT_INITIAL_WAIT_NS 2000000L  /* Start at 2 ms */
13/* Grow the wait time 8x/iteration. */
14#define DEFAULT_WAIT_SCALE_FACTOR 8
15#define DEFAULT_WAIT_SHIFT_COUNT 0
16
17struct ck_ec32_slow_path_state {
18	struct ck_ec32 *ec;
19	uint32_t flagged_word;
20};
21
22#ifdef CK_F_EC64
23struct ck_ec64_slow_path_state {
24	struct ck_ec64 *ec;
25	uint64_t flagged_word;
26};
27#endif
28
29/* Once we've waited for >= 1 sec, go for the full deadline. */
30static const struct timespec final_wait_time = {
31	.tv_sec = 1
32};
33
34void
35ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops)
36{
37	/* Spurious wake-ups are OK. Clear the flag before futexing. */
38	ck_pr_and_32(&ec->counter, (1U << 31) - 1);
39	ops->wake32(ops, &ec->counter);
40	return;
41}
42
43int
44ck_ec32_wait_slow(struct ck_ec32 *ec,
45    const struct ck_ec_ops *ops,
46    uint32_t old_value,
47    const struct timespec *deadline)
48{
49	return ck_ec32_wait_pred_slow(ec, ops, old_value,
50				      NULL, NULL, deadline);
51}
52
53#ifdef CK_F_EC64
54void
55ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops)
56{
57	ck_pr_and_64(&ec->counter, ~1);
58	ops->wake64(ops, &ec->counter);
59	return;
60}
61
62int
63ck_ec64_wait_slow(struct ck_ec64 *ec,
64    const struct ck_ec_ops *ops,
65    uint64_t old_value,
66    const struct timespec *deadline)
67{
68	return ck_ec64_wait_pred_slow(ec, ops, old_value,
69				      NULL, NULL, deadline);
70}
71#endif
72
73int
74ck_ec_deadline_impl(struct timespec *new_deadline,
75    const struct ck_ec_ops *ops,
76    const struct timespec *timeout)
77{
78	struct timespec now;
79	int r;
80
81	if (timeout == NULL) {
82		new_deadline->tv_sec = TIME_MAX;
83		new_deadline->tv_nsec = NSEC_MAX;
84		return 0;
85	}
86
87	r = ops->gettime(ops, &now);
88	if (r != 0) {
89		return -1;
90	}
91
92	*new_deadline = timespec_add(now, *timeout);
93	return 0;
94}
95
96/* The rest of the file implements wait_pred_slow. */
97
98/*
99 * Returns a timespec value for deadline_ptr. If deadline_ptr is NULL,
100 * returns a timespec far in the future.
101 */
102static struct timespec
103canonical_deadline(const struct timespec *deadline_ptr)
104{
105
106	if (deadline_ptr == NULL) {
107		return (struct timespec) { .tv_sec = TIME_MAX };
108	}
109
110	return *deadline_ptr;
111}
112
113/*
114 * Really slow (sleeping) path for ck_ec_wait.	Drives the exponential
115 * backoff scheme to sleep for longer and longer periods of time,
116 * until either the sleep function returns true (the eventcount's
117 * value has changed), or the predicate returns non-0 (something else
118 * has changed).
119 *
120 * If deadline is ever reached, returns -1 (timeout).
121 *
122 * TODO: add some form of randomisation to the intermediate timeout
123 * values.
124 */
125static int
126exponential_backoff(struct ck_ec_wait_state *wait_state,
127    bool (*sleep)(const void *sleep_state,
128	const struct ck_ec_wait_state *wait_state,
129	const struct timespec *partial_deadline),
130    const void *sleep_state,
131    int (*pred)(const struct ck_ec_wait_state *state,
132	struct timespec *deadline),
133    const struct timespec *deadline)
134{
135	struct timespec begin;
136	struct timespec stop_backoff;
137	const struct ck_ec_ops *ops = wait_state->ops;
138	const uint32_t scale_factor = (ops->wait_scale_factor != 0)
139	    ? ops->wait_scale_factor
140	    : DEFAULT_WAIT_SCALE_FACTOR;
141	const uint32_t shift_count = (ops->wait_shift_count != 0)
142	    ? ops->wait_shift_count
143	    : DEFAULT_WAIT_SHIFT_COUNT;
144	uint32_t wait_ns = (ops->initial_wait_ns != 0)
145	    ? ops->initial_wait_ns
146	    : DEFAULT_INITIAL_WAIT_NS;
147	bool first = true;
148
149	for (;;) {
150		struct timespec now;
151		struct timespec partial_deadline;
152
153		if (check_deadline(&now, ops, *deadline) == true) {
154			/* Timeout. Bail out. */
155			return -1;
156		}
157
158		if (first) {
159			begin = now;
160			wait_state->start = begin;
161			stop_backoff = timespec_add(begin, final_wait_time);
162			first = false;
163		}
164
165		wait_state->now = now;
166		if (timespec_cmp(now, stop_backoff) >= 0) {
167			partial_deadline = *deadline;
168		} else {
169			do {
170				partial_deadline =
171				    timespec_add_ns(begin, wait_ns);
172				wait_ns =
173				    wait_time_scale(wait_ns,
174						    scale_factor,
175						    shift_count);
176			} while (timespec_cmp(partial_deadline, now) <= 0);
177		}
178
179		if (pred != NULL) {
180			int r = pred(wait_state, &partial_deadline);
181			if (r != 0) {
182				return r;
183			}
184		}
185
186		/* Canonicalize deadlines in the far future to NULL. */
187		if (sleep(sleep_state, wait_state,
188			  ((partial_deadline.tv_sec == TIME_MAX)
189			   ? NULL :  &partial_deadline)) == true) {
190			return 0;
191		}
192	}
193}
194
195/*
196 * Loops up to BUSY_LOOP_ITER times, or until ec's counter value
197 * (including the flag) differs from old_value.
198 *
199 * Returns the new value in ec.
200 */
201#define DEF_WAIT_EASY(W)						\
202	static uint##W##_t ck_ec##W##_wait_easy(struct ck_ec##W* ec,	\
203						const struct ck_ec_ops *ops, \
204						uint##W##_t expected)	\
205	{								\
206		uint##W##_t current = ck_pr_load_##W(&ec->counter);	\
207		size_t n = (ops->busy_loop_iter != 0)			\
208		    ? ops->busy_loop_iter				\
209		    : DEFAULT_BUSY_LOOP_ITER;				\
210									\
211		for (size_t i = 0;					\
212		     i < n && current == expected;			\
213		     i++) {						\
214			ck_pr_stall();					\
215			current = ck_pr_load_##W(&ec->counter);		\
216		}							\
217									\
218		return current;						\
219	}
220
221DEF_WAIT_EASY(32)
222#ifdef CK_F_EC64
223DEF_WAIT_EASY(64)
224#endif
225#undef DEF_WAIT_EASY
226/*
227 * Attempts to upgrade ec->counter from unflagged to flagged.
228 *
229 * Returns true if the event count has changed. Otherwise, ec's
230 * counter word is equal to flagged on return, or has been at some
231 * time before the return.
232 */
233#define DEF_UPGRADE(W)							\
234	static bool ck_ec##W##_upgrade(struct ck_ec##W* ec,		\
235				       uint##W##_t current,		\
236				       uint##W##_t unflagged,		\
237				       uint##W##_t flagged)		\
238	{								\
239		uint##W##_t old_word;					\
240									\
241		if (current == flagged) {				\
242			/* Nothing to do, no change. */			\
243			return false;					\
244		}							\
245									\
246		if (current != unflagged) {				\
247			/* We have a different counter value! */	\
248			return true;					\
249		}							\
250									\
251		/*							\
252		 * Flag the counter value. The CAS only fails if the	\
253		 * counter is already flagged, or has a new value.	\
254		 */							\
255		return (ck_pr_cas_##W##_value(&ec->counter,		\
256					      unflagged, flagged,	\
257					      &old_word) == false &&	\
258			old_word != flagged);				\
259	}
260
261DEF_UPGRADE(32)
262#ifdef CK_F_EC64
263DEF_UPGRADE(64)
264#endif
265#undef DEF_UPGRADE
266
267/*
268 * Blocks until partial_deadline on the ck_ec. Returns true if the
269 * eventcount's value has changed. If partial_deadline is NULL, wait
270 * forever.
271 */
272static bool
273ck_ec32_wait_slow_once(const void *vstate,
274    const struct ck_ec_wait_state *wait_state,
275    const struct timespec *partial_deadline)
276{
277	const struct ck_ec32_slow_path_state *state = vstate;
278	const struct ck_ec32 *ec = state->ec;
279	const uint32_t flagged_word = state->flagged_word;
280
281	wait_state->ops->wait32(wait_state, &ec->counter,
282				flagged_word, partial_deadline);
283	return ck_pr_load_32(&ec->counter) != flagged_word;
284}
285
286#ifdef CK_F_EC64
287static bool
288ck_ec64_wait_slow_once(const void *vstate,
289    const struct ck_ec_wait_state *wait_state,
290    const struct timespec *partial_deadline)
291{
292	const struct ck_ec64_slow_path_state *state = vstate;
293	const struct ck_ec64 *ec = state->ec;
294	const uint64_t flagged_word = state->flagged_word;
295
296	/* futex_wait will only compare the low 32 bits. Perform a
297	 * full comparison here to maximise the changes of catching an
298	 * ABA in the low 32 bits.
299	 */
300	if (ck_pr_load_64(&ec->counter) != flagged_word) {
301		return true;
302	}
303
304	wait_state->ops->wait64(wait_state, &ec->counter,
305				flagged_word, partial_deadline);
306	return ck_pr_load_64(&ec->counter) != flagged_word;
307}
308#endif
309
310/*
311 * The full wait logic is a lot of code (> 1KB). Encourage the
312 * compiler to lay this all out linearly with LIKELY annotations on
313 * every early exit.
314 */
315#define WAIT_SLOW_BODY(W, ec, ops, pred, data, deadline_ptr,		\
316		       old_value, unflagged, flagged)			\
317	do {								\
318		struct ck_ec_wait_state wait_state = {			\
319			.ops = ops,					\
320			.data = data					\
321		};							\
322		const struct ck_ec##W##_slow_path_state state = {	\
323			.ec = ec,					\
324			.flagged_word = flagged				\
325		};							\
326		const struct timespec deadline =			\
327			canonical_deadline(deadline_ptr);		\
328									\
329		/* Detect infinite past deadlines. */			\
330		if (CK_CC_LIKELY(deadline.tv_sec <= 0)) {		\
331			return -1;					\
332		}							\
333									\
334		for (;;) {						\
335			uint##W##_t current;				\
336			int r;						\
337									\
338			current = ck_ec##W##_wait_easy(ec, ops, unflagged); \
339									\
340			/*						\
341			 * We're about to wait harder (i.e.,		\
342			 * potentially with futex). Make sure the	\
343			 * counter word is flagged.			\
344			 */						\
345			if (CK_CC_LIKELY(				\
346				ck_ec##W##_upgrade(ec, current,		\
347					unflagged, flagged) == true)) { \
348				ck_pr_fence_acquire();			\
349				return 0;				\
350			}						\
351									\
352			/*						\
353			 * By now, ec->counter == flagged_word (at	\
354			 * some point in the past). Spin some more to	\
355			 * heuristically let any in-flight SP inc/add	\
356			 * to retire. This does not affect		\
357			 * correctness, but practically eliminates	\
358			 * lost wake-ups.				\
359			 */						\
360			current = ck_ec##W##_wait_easy(ec, ops, flagged); \
361			if (CK_CC_LIKELY(current != flagged_word)) {	\
362				ck_pr_fence_acquire();			\
363				return 0;				\
364			}						\
365									\
366			r = exponential_backoff(&wait_state,		\
367						ck_ec##W##_wait_slow_once, \
368						&state,			\
369						pred, &deadline); \
370			if (r != 0) {					\
371				return r;				\
372			}						\
373									\
374			if (ck_ec##W##_value(ec) != old_value) {	\
375				ck_pr_fence_acquire();			\
376				return 0;				\
377			}						\
378									\
379			/* Spurious wake-up. Redo the slow path. */	\
380		}							\
381	} while (0)
382
383int
384ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
385    const struct ck_ec_ops *ops,
386    uint32_t old_value,
387    int (*pred)(const struct ck_ec_wait_state *state,
388	struct timespec *deadline),
389    void *data,
390    const struct timespec *deadline_ptr)
391{
392	const uint32_t unflagged_word = old_value;
393	const uint32_t flagged_word = old_value | (1UL << 31);
394
395	if (CK_CC_UNLIKELY(ck_ec32_value(ec) != old_value)) {
396		return 0;
397	}
398
399	WAIT_SLOW_BODY(32, ec, ops, pred, data, deadline_ptr,
400		       old_value, unflagged_word, flagged_word);
401}
402
403#ifdef CK_F_EC64
404int
405ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
406    const struct ck_ec_ops *ops,
407    uint64_t old_value,
408    int (*pred)(const struct ck_ec_wait_state *state,
409	struct timespec *deadline),
410    void *data,
411    const struct timespec *deadline_ptr)
412{
413	const uint64_t unflagged_word = old_value << 1;
414	const uint64_t flagged_word = unflagged_word | 1;
415
416	if (CK_CC_UNLIKELY(ck_ec64_value(ec) != old_value)) {
417		return 0;
418	}
419
420	WAIT_SLOW_BODY(64, ec, ops, pred, data, deadline_ptr,
421		       old_value, unflagged_word, flagged_word);
422}
423#endif
424
425#undef WAIT_SLOW_BODY
426