1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29#ifndef _SYS__ATOMIC_SUBWORD_H_
30#define	_SYS__ATOMIC_SUBWORD_H_
31
32/*
33 * This header is specifically for platforms that either do not have ways to or
34 * simply do not do sub-word atomic operations.  These are not ideal as they
35 * require a little more effort to make sure our atomic operations are failing
36 * because of the bits of the word we're trying to write rather than the rest
37 * of the word.
38 */
39#ifndef _MACHINE_ATOMIC_H_
40#error do not include this header, use machine/atomic.h
41#endif
42
43#include <machine/endian.h>
44#ifndef _KERNEL
45#include <stdbool.h>
46#endif
47
48#ifndef NBBY
49#define	NBBY	8
50#endif
51
52#define	_ATOMIC_WORD_ALIGNED(p)		\
53    (uint32_t *)((__uintptr_t)(p) - ((__uintptr_t)(p) % 4))
54
55#if _BYTE_ORDER == _BIG_ENDIAN
56#define	_ATOMIC_BYTE_SHIFT(p)		\
57    ((3 - ((__uintptr_t)(p) % 4)) * NBBY)
58
59#define	_ATOMIC_HWORD_SHIFT(p)		\
60    ((2 - ((__uintptr_t)(p) % 4)) * NBBY)
61#else
62#define	_ATOMIC_BYTE_SHIFT(p)		\
63    ((((__uintptr_t)(p) % 4)) * NBBY)
64
65#define	_ATOMIC_HWORD_SHIFT(p)		\
66    ((((__uintptr_t)(p) % 4)) * NBBY)
67#endif
68
69#ifndef	_atomic_cmpset_masked_word
70/*
71 * Pass these bad boys a couple words and a mask of the bits you care about,
72 * they'll loop until we either succeed or fail because of those bits rather
73 * than the ones we're not masking.  old and val should already be preshifted to
74 * the proper position.
75 */
76static __inline int
77_atomic_cmpset_masked_word(uint32_t *addr, uint32_t old, uint32_t val,
78    uint32_t mask)
79{
80	int ret;
81	uint32_t wcomp;
82
83	wcomp = old;
84
85	/*
86	 * We'll attempt the cmpset on the entire word.  Loop here in case the
87	 * operation fails due to the other half-word resident in that word,
88	 * rather than the half-word we're trying to operate on.  Ideally we
89	 * only take one trip through here.  We'll have to recalculate the old
90	 * value since it's the other part of the word changing.
91	 */
92	do {
93		old = (*addr & ~mask) | wcomp;
94		ret = atomic_fcmpset_32(addr, &old, (old & ~mask) | val);
95	} while (ret == 0 && (old & mask) == wcomp);
96
97	return (ret);
98}
99#endif
100
101#ifndef	_atomic_fcmpset_masked_word
102static __inline int
103_atomic_fcmpset_masked_word(uint32_t *addr, uint32_t *old, uint32_t val,
104    uint32_t mask)
105{
106
107	/*
108	 * fcmpset_* is documented in atomic(9) to allow spurious failures where
109	 * *old == val on ll/sc architectures because the sc may fail due to
110	 * parallel writes or other reasons.  We take advantage of that here
111	 * and only attempt once, because the caller should be compensating for
112	 * that possibility.
113	 */
114	*old = (*addr & ~mask) | *old;
115	return (atomic_fcmpset_32(addr, old, (*old & ~mask) | val));
116}
117#endif
118
119#ifndef atomic_cmpset_8
120static __inline int
121atomic_cmpset_8(__volatile uint8_t *addr, uint8_t old, uint8_t val)
122{
123	int shift;
124
125	shift = _ATOMIC_BYTE_SHIFT(addr);
126
127	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
128	    old << shift, val << shift, 0xff << shift));
129}
130#endif
131
132#ifndef atomic_fcmpset_8
133static __inline int
134atomic_fcmpset_8(__volatile uint8_t *addr, uint8_t *old, uint8_t val)
135{
136	int ret, shift;
137	uint32_t wold;
138
139	shift = _ATOMIC_BYTE_SHIFT(addr);
140	wold = *old << shift;
141	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
142	    &wold, val << shift, 0xff << shift);
143	if (ret == 0)
144		*old = (wold >> shift) & 0xff;
145	return (ret);
146}
147#endif
148
149#ifndef atomic_cmpset_16
150static __inline int
151atomic_cmpset_16(__volatile uint16_t *addr, uint16_t old, uint16_t val)
152{
153	int shift;
154
155	shift = _ATOMIC_HWORD_SHIFT(addr);
156
157	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
158	    old << shift, val << shift, 0xffff << shift));
159}
160#endif
161
162#ifndef atomic_fcmpset_16
163static __inline int
164atomic_fcmpset_16(__volatile uint16_t *addr, uint16_t *old, uint16_t val)
165{
166	int ret, shift;
167	uint32_t wold;
168
169	shift = _ATOMIC_HWORD_SHIFT(addr);
170	wold = *old << shift;
171	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
172	    &wold, val << shift, 0xffff << shift);
173	if (ret == 0)
174		*old = (wold >> shift) & 0xffff;
175	return (ret);
176}
177#endif
178
179#ifndef atomic_load_acq_8
180static __inline uint8_t
181atomic_load_acq_8(volatile uint8_t *p)
182{
183	int shift;
184	uint8_t ret;
185
186	shift = _ATOMIC_BYTE_SHIFT(p);
187	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) & 0xff;
188	return (ret);
189}
190#endif
191
192#ifndef atomic_load_acq_16
193static __inline uint16_t
194atomic_load_acq_16(volatile uint16_t *p)
195{
196	int shift;
197	uint16_t ret;
198
199	shift = _ATOMIC_HWORD_SHIFT(p);
200	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) &
201	    0xffff;
202	return (ret);
203}
204#endif
205
206#undef _ATOMIC_WORD_ALIGNED
207#undef _ATOMIC_BYTE_SHIFT
208#undef _ATOMIC_HWORD_SHIFT
209
210/*
211 * Provide generic testandset_long implementation based on fcmpset long
212 * primitive.  It may not be ideal for any given arch, so machine/atomic.h
213 * should define the macro atomic_testandset_long to override with an
214 * MD-specific version.
215 *
216 * (Organizationally, this isn't really subword atomics.  But atomic_common is
217 * included too early in machine/atomic.h, so it isn't a good place for derived
218 * primitives like this.)
219 */
220#ifndef atomic_testandset_acq_long
221static __inline int
222atomic_testandset_acq_long(volatile u_long *p, u_int v)
223{
224	u_long bit, old;
225	bool ret;
226
227	bit = (1ul << (v % (sizeof(*p) * NBBY)));
228
229	old = atomic_load_acq_long(p);
230	ret = false;
231	while (!ret && (old & bit) == 0)
232		ret = atomic_fcmpset_acq_long(p, &old, old | bit);
233
234	return (!ret);
235}
236#endif
237
238#ifndef atomic_testandset_long
239static __inline int
240atomic_testandset_long(volatile u_long *p, u_int v)
241{
242	u_long bit, old;
243	bool ret;
244
245	bit = (1ul << (v % (sizeof(*p) * NBBY)));
246
247	old = atomic_load_long(p);
248	ret = false;
249	while (!ret && (old & bit) == 0)
250		ret = atomic_fcmpset_long(p, &old, old | bit);
251
252	return (!ret);
253}
254#endif
255
256#ifndef atomic_testandclear_long
257static __inline int
258atomic_testandclear_long(volatile u_long *p, u_int v)
259{
260	u_long bit, old;
261	bool ret;
262
263	bit = (1ul << (v % (sizeof(*p) * NBBY)));
264
265	old = atomic_load_long(p);
266	ret = false;
267	while (!ret && (old & bit) != 0)
268		ret = atomic_fcmpset_long(p, &old, old & ~bit);
269
270	return (ret);
271}
272#endif
273
274#endif	/* _SYS__ATOMIC_SUBWORD_H_ */
275