1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019, 2020 Jeffrey Roberson <jeff@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 *
29 */
30
31#ifndef _SYS_SMR_H_
32#define	_SYS_SMR_H_
33
34#include <sys/_smr.h>
35
36/*
37 * Safe memory reclamation.  See subr_smr.c for a description of the
38 * algorithm, and smr_types.h for macros to define and access SMR-protected
39 * data structures.
40 *
41 * Readers synchronize with smr_enter()/exit() and writers may either
42 * free directly to a SMR UMA zone or use smr_synchronize or wait.
43 */
44
45/*
46 * Modular arithmetic for comparing sequence numbers that have
47 * potentially wrapped.  Copied from tcp_seq.h.
48 */
49#define	SMR_SEQ_LT(a, b)	((smr_delta_t)((a)-(b)) < 0)
50#define	SMR_SEQ_LEQ(a, b)	((smr_delta_t)((a)-(b)) <= 0)
51#define	SMR_SEQ_GT(a, b)	((smr_delta_t)((a)-(b)) > 0)
52#define	SMR_SEQ_GEQ(a, b)	((smr_delta_t)((a)-(b)) >= 0)
53#define	SMR_SEQ_DELTA(a, b)	((smr_delta_t)((a)-(b)))
54#define	SMR_SEQ_MIN(a, b)	(SMR_SEQ_LT((a), (b)) ? (a) : (b))
55#define	SMR_SEQ_MAX(a, b)	(SMR_SEQ_GT((a), (b)) ? (a) : (b))
56
57#define	SMR_SEQ_INVALID		0
58
59/* Shared SMR state. */
60union s_wr {
61	struct {
62		smr_seq_t	seq;	/* Current write sequence #. */
63		int		ticks;	/* tick of last update (LAZY) */
64	};
65	uint64_t	_pair;
66};
67struct smr_shared {
68	const char	*s_name;	/* Name for debugging/reporting. */
69	union s_wr	s_wr;		/* Write sequence */
70	smr_seq_t	s_rd_seq;	/* Minimum observed read sequence. */
71};
72typedef struct smr_shared *smr_shared_t;
73
74/* Per-cpu SMR state. */
75struct smr {
76	smr_seq_t	c_seq;		/* Current observed sequence. */
77	smr_shared_t	c_shared;	/* Shared SMR state. */
78	int		c_deferred;	/* Deferred advance counter. */
79	int		c_limit;	/* Deferred advance limit. */
80	int		c_flags;	/* SMR Configuration */
81};
82
83#define	SMR_LAZY	0x0001		/* Higher latency write, fast read. */
84#define	SMR_DEFERRED	0x0002		/* Aggregate updates to wr_seq. */
85
86/*
87 * Return the current write sequence number.  This is not the same as the
88 * current goal which may be in the future.
89 */
90static inline smr_seq_t
91smr_shared_current(smr_shared_t s)
92{
93
94	return (atomic_load_int(&s->s_wr.seq));
95}
96
97static inline smr_seq_t
98smr_current(smr_t smr)
99{
100
101	return (smr_shared_current(zpcpu_get(smr)->c_shared));
102}
103
104/*
105 * Enter a read section.
106 */
107static inline void
108smr_enter(smr_t smr)
109{
110
111	critical_enter();
112	smr = zpcpu_get(smr);
113	KASSERT((smr->c_flags & SMR_LAZY) == 0,
114	    ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
115	KASSERT(smr->c_seq == 0,
116	    ("smr_enter(%s) does not support recursion.",
117	    smr->c_shared->s_name));
118
119	/*
120	 * Store the current observed write sequence number in our
121	 * per-cpu state so that it can be queried via smr_poll().
122	 * Frees that are newer than this stored value will be
123	 * deferred until we call smr_exit().
124	 *
125	 * An acquire barrier is used to synchronize with smr_exit()
126	 * and smr_poll().
127	 *
128	 * It is possible that a long delay between loading the wr_seq
129	 * and storing the c_seq could create a situation where the
130	 * rd_seq advances beyond our stored c_seq.  In this situation
131	 * only the observed wr_seq is stale, the fence still orders
132	 * the load.  See smr_poll() for details on how this condition
133	 * is detected and handled there.
134	 */
135	/* This is an add because we do not have atomic_store_acq_int */
136	atomic_add_acq_int(&smr->c_seq, smr_shared_current(smr->c_shared));
137}
138
139/*
140 * Exit a read section.
141 */
142static inline void
143smr_exit(smr_t smr)
144{
145
146	smr = zpcpu_get(smr);
147	CRITICAL_ASSERT(curthread);
148	KASSERT((smr->c_flags & SMR_LAZY) == 0,
149	    ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
150	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
151	    ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
152
153	/*
154	 * Clear the recorded sequence number.  This allows poll() to
155	 * detect CPUs not in read sections.
156	 *
157	 * Use release semantics to retire any stores before the sequence
158	 * number is cleared.
159	 */
160	atomic_store_rel_int(&smr->c_seq, SMR_SEQ_INVALID);
161	critical_exit();
162}
163
164/*
165 * Enter a lazy smr section.  This is used for read-mostly state that
166 * can tolerate a high free latency.
167 */
168static inline void
169smr_lazy_enter(smr_t smr)
170{
171
172	critical_enter();
173	smr = zpcpu_get(smr);
174	KASSERT((smr->c_flags & SMR_LAZY) != 0,
175	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
176	KASSERT(smr->c_seq == 0,
177	    ("smr_lazy_enter(%s) does not support recursion.",
178	    smr->c_shared->s_name));
179
180	/*
181	 * This needs no serialization.  If an interrupt occurs before we
182	 * assign sr_seq to c_seq any speculative loads will be discarded.
183	 * If we assign a stale wr_seq value due to interrupt we use the
184	 * same algorithm that renders smr_enter() safe.
185	 */
186	atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
187}
188
189/*
190 * Exit a lazy smr section.  This is used for read-mostly state that
191 * can tolerate a high free latency.
192 */
193static inline void
194smr_lazy_exit(smr_t smr)
195{
196
197	smr = zpcpu_get(smr);
198	CRITICAL_ASSERT(curthread);
199	KASSERT((smr->c_flags & SMR_LAZY) != 0,
200	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
201	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
202	    ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
203
204	/*
205	 * All loads/stores must be retired before the sequence becomes
206	 * visible.  The fence compiles away on amd64.  Another
207	 * alternative would be to omit the fence but store the exit
208	 * time and wait 1 tick longer.
209	 */
210	atomic_thread_fence_rel();
211	atomic_store_int(&smr->c_seq, SMR_SEQ_INVALID);
212	critical_exit();
213}
214
215/*
216 * Advances the write sequence number.  Returns the sequence number
217 * required to ensure that all modifications are visible to readers.
218 */
219smr_seq_t smr_advance(smr_t smr);
220
221/*
222 * Returns true if a goal sequence has been reached.  If
223 * wait is true this will busy loop until success.
224 */
225bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
226
227/* Create a new SMR context. */
228smr_t smr_create(const char *name, int limit, int flags);
229
230/* Destroy the context. */
231void smr_destroy(smr_t smr);
232
233/*
234 * Blocking wait for all readers to observe 'goal'.
235 */
236static inline bool
237smr_wait(smr_t smr, smr_seq_t goal)
238{
239
240	return (smr_poll(smr, goal, true));
241}
242
243/*
244 * Synchronize advances the write sequence and returns when all
245 * readers have observed it.
246 *
247 * If your application can cache a sequence number returned from
248 * smr_advance() and poll or wait at a later time there will
249 * be less chance of busy looping while waiting for readers.
250 */
251static inline void
252smr_synchronize(smr_t smr)
253{
254
255        smr_wait(smr, smr_advance(smr));
256}
257
258/* Only at startup. */
259void smr_init(void);
260
261#endif	/* _SYS_SMR_H_ */
262