1/*
2 * Copyright 2010-2015 Samy Al Bahra.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#ifndef CK_BYTELOCK_H
28#define CK_BYTELOCK_H
29
30/*
31 * The implementations here are derived from the work described in:
32 *   Dice, D. and Shavit, N. 2010. TLRW: return of the read-write lock.
33 *   In Proceedings of the 22nd ACM Symposium on Parallelism in Algorithms
34 *   and Architectures (Thira, Santorini, Greece, June 13 - 15, 2010).
35 *   SPAA '10. ACM, New York, NY, 284-293.
36 */
37
38#include <ck_cc.h>
39#include <ck_md.h>
40#include <ck_pr.h>
41#include <ck_stdbool.h>
42#include <ck_stddef.h>
43#include <ck_limits.h>
44
45struct ck_bytelock {
46	unsigned int owner;
47	unsigned int n_readers;
48	uint8_t readers[CK_MD_CACHELINE - sizeof(unsigned int) * 2] CK_CC_ALIGN(8);
49};
50typedef struct ck_bytelock ck_bytelock_t;
51
52#define CK_BYTELOCK_INITIALIZER { 0, 0, {0} }
53#define CK_BYTELOCK_UNSLOTTED   UINT_MAX
54
55CK_CC_INLINE static void
56ck_bytelock_init(struct ck_bytelock *bytelock)
57{
58	unsigned int i;
59
60	bytelock->owner = 0;
61	bytelock->n_readers = 0;
62	for (i = 0; i < sizeof bytelock->readers; i++)
63		bytelock->readers[i] = false;
64
65	ck_pr_barrier();
66	return;
67}
68
69#ifdef CK_F_PR_LOAD_64
70#define CK_BYTELOCK_LENGTH sizeof(uint64_t)
71#define CK_BYTELOCK_LOAD ck_pr_load_64
72#define CK_BYTELOCK_TYPE uint64_t
73#elif defined(CK_F_PR_LOAD_32)
74#define CK_BYTELOCK_LENGTH sizeof(uint32_t)
75#define CK_BYTELOCK_LOAD ck_pr_load_32
76#define CK_BYTELOCK_TYPE uint32_t
77#else
78#error Unsupported platform.
79#endif
80
81CK_CC_INLINE static void
82ck_bytelock_write_lock(struct ck_bytelock *bytelock, unsigned int slot)
83{
84	CK_BYTELOCK_TYPE *readers = (void *)bytelock->readers;
85	unsigned int i;
86
87	/* Announce upcoming writer acquisition. */
88	while (ck_pr_cas_uint(&bytelock->owner, 0, slot) == false)
89		ck_pr_stall();
90
91	/* If we are slotted, we might be upgrading from a read lock. */
92	if (slot <= sizeof bytelock->readers)
93		ck_pr_store_8(&bytelock->readers[slot - 1], false);
94
95	/*
96	 * Wait for slotted readers to drain out. This also provides the
97	 * lock acquire semantics.
98	 */
99	ck_pr_fence_atomic_load();
100
101	for (i = 0; i < sizeof(bytelock->readers) / CK_BYTELOCK_LENGTH; i++) {
102		while (CK_BYTELOCK_LOAD(&readers[i]) != false)
103			ck_pr_stall();
104	}
105
106	/* Wait for unslotted readers to drain out. */
107	while (ck_pr_load_uint(&bytelock->n_readers) != 0)
108		ck_pr_stall();
109
110	ck_pr_fence_lock();
111	return;
112}
113
114#undef CK_BYTELOCK_LENGTH
115#undef CK_BYTELOCK_LOAD
116#undef CK_BYTELOCK_TYPE
117
118CK_CC_INLINE static void
119ck_bytelock_write_unlock(struct ck_bytelock *bytelock)
120{
121
122	ck_pr_fence_unlock();
123	ck_pr_store_uint(&bytelock->owner, 0);
124	return;
125}
126
127CK_CC_INLINE static void
128ck_bytelock_read_lock(struct ck_bytelock *bytelock, unsigned int slot)
129{
130
131	if (ck_pr_load_uint(&bytelock->owner) == slot) {
132		ck_pr_store_8(&bytelock->readers[slot - 1], true);
133		ck_pr_fence_strict_store();
134		ck_pr_store_uint(&bytelock->owner, 0);
135		return;
136	}
137
138	/* Unslotted threads will have to use the readers counter. */
139	if (slot > sizeof bytelock->readers) {
140		for (;;) {
141			ck_pr_inc_uint(&bytelock->n_readers);
142			ck_pr_fence_atomic_load();
143			if (ck_pr_load_uint(&bytelock->owner) == 0)
144				break;
145			ck_pr_dec_uint(&bytelock->n_readers);
146
147			while (ck_pr_load_uint(&bytelock->owner) != 0)
148				ck_pr_stall();
149		}
150
151		ck_pr_fence_lock();
152		return;
153	}
154
155	slot -= 1;
156	for (;;) {
157#ifdef CK_F_PR_FAA_8
158		ck_pr_fas_8(&bytelock->readers[slot], true);
159		ck_pr_fence_atomic_load();
160#else
161		ck_pr_store_8(&bytelock->readers[slot], true);
162		ck_pr_fence_store_load();
163#endif
164
165		/*
166		 * If there is no owner at this point, our slot has
167		 * already been published and it is guaranteed no
168		 * write acquisition will succeed until we drain out.
169		 */
170		if (ck_pr_load_uint(&bytelock->owner) == 0)
171			break;
172
173		ck_pr_store_8(&bytelock->readers[slot], false);
174		while (ck_pr_load_uint(&bytelock->owner) != 0)
175			ck_pr_stall();
176	}
177
178	ck_pr_fence_lock();
179	return;
180}
181
182CK_CC_INLINE static void
183ck_bytelock_read_unlock(struct ck_bytelock *bytelock, unsigned int slot)
184{
185
186	ck_pr_fence_unlock();
187
188	if (slot > sizeof bytelock->readers)
189		ck_pr_dec_uint(&bytelock->n_readers);
190	else
191		ck_pr_store_8(&bytelock->readers[slot - 1], false);
192
193	return;
194}
195
196#endif /* CK_BYTELOCK_H */
197