1/*
2 * Copyright (c) 2006-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifndef _SYS_MCACHE_H
29#define	_SYS_MCACHE_H
30
31#ifdef KERNEL_PRIVATE
32
33#ifdef  __cplusplus
34extern "C" {
35#endif
36
37#include <sys/types.h>
38#include <sys/queue.h>
39#include <mach/boolean.h>
40#include <kern/locks.h>
41#include <libkern/OSAtomic.h>
42
43#ifdef ASSERT
44#undef ASSERT
45#endif
46
47#ifdef VERIFY
48#undef VERIFY
49#endif
50
51/*
52 * Unlike VERIFY(), ASSERT() is evaluated only in DEBUG build.
53 */
54#define	VERIFY(EX)	((void)((EX) || assfail(#EX, __FILE__, __LINE__)))
55#if DEBUG
56#define	ASSERT(EX)	VERIFY(EX)
57#else
58#define	ASSERT(EX)	((void)0)
59#endif
60
61/*
62 * Compile time assert; this should be on its own someday.
63 */
64#define	_CASSERT(x)	\
65	switch (0) { case 0: case (x): ; }
66
67/*
68 * Atomic macros; these should be on their own someday.
69 */
70#define	atomic_add_16_ov(a, n)						\
71	((u_int16_t) OSAddAtomic16(n, (volatile SInt16 *)a))
72
73#define	atomic_add_16(a, n)						\
74	((void) atomic_add_16_ov(a, n))
75
76#define	atomic_add_32_ov(a, n)						\
77	((u_int32_t) OSAddAtomic(n, (volatile SInt32 *)a))
78
79#define	atomic_add_32(a, n)						\
80	((void) atomic_add_32_ov(a, n))
81
82#define	atomic_add_64_ov(a, n)						\
83	((u_int64_t) OSAddAtomic64(n, (volatile SInt64 *)a))
84
85#define	atomic_add_64(a, n)						\
86	((void) atomic_add_64_ov(a, n))
87
88#define	atomic_set_64(a, n) do {					\
89	while (!OSCompareAndSwap64(*a, n, (volatile UInt64 *)a))	\
90		;							\
91} while (0)
92
93#if defined(__LP64__)
94#define	atomic_get_64(n, a) do {					\
95	(n) = *(a);							\
96} while (0)
97#else
98#define	atomic_get_64(n, a) do {					\
99	(n) = atomic_add_64_ov(a, 0);					\
100} while (0)
101#endif /* __LP64__ */
102
103#define	atomic_or_8_ov(a, n)						\
104	((u_int8_t) OSBitOrAtomic8(n, (volatile UInt8 *)a))
105
106#define	atomic_or_8(a, n)						\
107	((void) atomic_or_8_ov(a, n))
108
109#define	atomic_bitset_8(a, n)						\
110	atomic_or_8(a, n)
111
112#define	atomic_or_16_ov(a, n)						\
113	((u_int16_t) OSBitOrAtomic16(n, (volatile UInt16 *)a))
114
115#define	atomic_or_16(a, n)						\
116	((void) atomic_or_16_ov(a, n))
117
118#define	atomic_bitset_16(a, n)						\
119	atomic_or_16(a, n)
120
121#define	atomic_or_32_ov(a, n)						\
122	((u_int32_t) OSBitOrAtomic(n, (volatile UInt32 *)a))
123
124#define	atomic_or_32(a, n)						\
125	((void) atomic_or_32_ov(a, n))
126
127#define	atomic_bitset_32(a, n)						\
128	atomic_or_32(a, n)
129
130#define	atomic_and_8_ov(a, n)						\
131	((u_int8_t) OSBitAndAtomic8(n, (volatile UInt8 *)a))
132
133#define	atomic_and_8(a, n)						\
134	((void) atomic_and_8_ov(a, n))
135
136#define	atomic_bitclear_8(a, n)						\
137	atomic_and_8(a, ~(n))
138
139#define	atomic_and_16_ov(a, n)						\
140	((u_int16_t) OSBitAndAtomic16(n, (volatile UInt16 *)a))
141
142#define	atomic_and_16(a, n)						\
143	((void) atomic_and_16_ov(a, n))
144
145#define	atomic_bitclear_16(a, n)					\
146	atomic_and_16(a, ~(n))
147
148#define	atomic_and_32_ov(a, n)						\
149	((u_int32_t) OSBitAndAtomic(n, (volatile UInt32 *)a))
150
151#define	atomic_and_32(a, n)						\
152	((void) atomic_and_32_ov(a, n))
153
154#define	atomic_bitclear_32(a, n)					\
155	atomic_and_32(a, ~(n))
156
157/*
158 * Use CPU_CACHE_LINE_SIZE instead of MAX_CPU_CACHE_LINE_SIZE, unless
159 * wasting space is of no concern.
160 */
161#define	MAX_CPU_CACHE_LINE_SIZE	64
162#define	CPU_CACHE_LINE_SIZE	mcache_cache_line_size()
163
164#ifndef IS_P2ALIGNED
165#define	IS_P2ALIGNED(v, a) \
166	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
167#endif /* IS_P2ALIGNED */
168
169#ifndef P2ROUNDUP
170#define	P2ROUNDUP(x, align) \
171	(-(-((uintptr_t)(x)) & -((uintptr_t)align)))
172#endif /* P2ROUNDUP */
173
174#ifndef P2ROUNDDOWN
175#define	P2ROUNDDOWN(x, align) \
176	(((uintptr_t)(x)) & ~((uintptr_t)(align) - 1))
177#endif /* P2ROUNDDOWN */
178
179#define	MCACHE_FREE_PATTERN		0xdeadbeefdeadbeefULL
180#define	MCACHE_UNINITIALIZED_PATTERN	0xbaddcafebaddcafeULL
181
182/*
183 * mcache allocation request flags.
184 *
185 * MCR_NOSLEEP and MCR_FAILOK are mutually exclusive.  The latter is used
186 * by the mbuf allocator to handle the implementation of several caches that
187 * involve multiple layers of mcache.  It implies a best effort blocking
188 * allocation request; if the request cannot be satisfied, the caller will
189 * be blocked until further notice, similar to MCR_SLEEP, except that upon
190 * a wake up it will return immediately to the caller regardless of whether
191 * the request can been fulfilled.
192 *
193 * MCR_TRYHARD implies a non-blocking allocation request, regardless of
194 * whether MCR_NOSLEEP is set.  It informs the allocator that the request
195 * should not cause the calling thread to block, and that it must have
196 * exhausted all possible schemes to fulfill the request, including doing
197 * reclaims and/or purges, before returning to the caller.
198 *
199 * Regular mcache clients should only use MCR_SLEEP or MCR_NOSLEEP.
200 */
201#define	MCR_SLEEP	0x0000		/* same as M_WAITOK */
202#define	MCR_NOSLEEP	0x0001		/* same as M_NOWAIT */
203#define	MCR_FAILOK	0x0100		/* private, for internal use only */
204#define	MCR_TRYHARD	0x0200		/* private, for internal use only */
205#define	MCR_USR1	0x1000		/* private, for internal use only */
206
207#define	MCR_NONBLOCKING	(MCR_NOSLEEP | MCR_FAILOK | MCR_TRYHARD)
208
209/*
210 * Generic one-way linked list element structure.  This is used to handle
211 * mcache_alloc_ext() requests in order to chain the allocated objects
212 * together before returning them to the caller.
213 */
214typedef struct mcache_obj {
215	struct mcache_obj	*obj_next;
216} mcache_obj_t;
217
218typedef struct mcache_bkt {
219	void		*bkt_next;	/* next bucket in list */
220	void		*bkt_obj[1];	/* one or more objects */
221} mcache_bkt_t;
222
223typedef struct mcache_bktlist {
224	mcache_bkt_t	*bl_list;	/* bucket list */
225	u_int32_t	bl_total;	/* number of buckets */
226	u_int32_t	bl_min;		/* min since last update */
227	u_int32_t	bl_reaplimit;	/* max reapable buckets */
228	u_int64_t	bl_alloc;	/* allocations from this list */
229} mcache_bktlist_t;
230
231typedef struct mcache_bkttype {
232	int		bt_bktsize;	/* bucket size (number of elements) */
233	size_t		bt_minbuf;	/* all smaller buffers qualify */
234	size_t		bt_maxbuf;	/* no larger bfufers qualify */
235	struct mcache	*bt_cache;	/* bucket cache */
236} mcache_bkttype_t;
237
238typedef struct mcache_cpu {
239	decl_lck_mtx_data(, cc_lock);
240	mcache_bkt_t	*cc_filled;	/* the currently filled bucket */
241	mcache_bkt_t	*cc_pfilled;	/* the previously filled bucket */
242	u_int64_t	cc_alloc;	/* allocations from this cpu */
243	u_int64_t	cc_free;	/* frees to this cpu */
244	int		cc_objs;	/* number of objects in filled bkt */
245	int		cc_pobjs;	/* number of objects in previous bkt */
246	int		cc_bktsize;	/* number of elements in a full bkt */
247} __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE), packed)) mcache_cpu_t;
248
249typedef unsigned int (*mcache_allocfn_t)(void *, mcache_obj_t ***,
250    unsigned int, int);
251typedef void (*mcache_freefn_t)(void *, mcache_obj_t *, boolean_t);
252typedef void (*mcache_auditfn_t)(void *, mcache_obj_t *, boolean_t);
253typedef void (*mcache_logfn_t)(u_int32_t, mcache_obj_t *, boolean_t);
254typedef void (*mcache_notifyfn_t)(void *, u_int32_t);
255
256typedef struct mcache {
257	/*
258	 * Cache properties
259	 */
260	LIST_ENTRY(mcache) mc_list;	/* cache linkage */
261	char		mc_name[32];	/* cache name */
262	struct zone	*mc_slab_zone;	/* backend zone allocator */
263	mcache_allocfn_t mc_slab_alloc;	/* slab layer allocate callback */
264	mcache_freefn_t	mc_slab_free;	/* slab layer free callback */
265	mcache_auditfn_t mc_slab_audit;	/* slab layer audit callback */
266	mcache_logfn_t mc_slab_log;	/* slab layer log callback */
267	mcache_notifyfn_t mc_slab_notify; /* slab layer notify callback */
268	void		*mc_private;	/* opaque arg to callbacks */
269	size_t		mc_bufsize;	/* object size */
270	size_t		mc_align;	/* object alignment */
271	u_int32_t	mc_flags;	/* cache creation flags */
272	u_int32_t	mc_purge_cnt;	/* # of purges requested by slab */
273	u_int32_t	mc_enable_cnt;	/* # of reenables due to purges */
274	u_int32_t	mc_waiter_cnt;	/* # of slab layer waiters */
275	u_int32_t	mc_wretry_cnt;	/* # of wait retries */
276	u_int32_t	mc_nwretry_cnt;	/* # of no-wait retry attempts */
277	u_int32_t	mc_nwfail_cnt;	/* # of no-wait retries that failed */
278	decl_lck_mtx_data(, mc_sync_lock); /* protects purges and reenables */
279	lck_attr_t	*mc_sync_lock_attr;
280	lck_grp_t	*mc_sync_lock_grp;
281	lck_grp_attr_t	*mc_sync_lock_grp_attr;
282	/*
283	 * Keep CPU and buckets layers lock statistics separate.
284	 */
285	lck_attr_t	*mc_cpu_lock_attr;
286	lck_grp_t	*mc_cpu_lock_grp;
287	lck_grp_attr_t	*mc_cpu_lock_grp_attr;
288
289	/*
290	 * Bucket layer common to all CPUs
291	 */
292	decl_lck_mtx_data(, mc_bkt_lock);
293	lck_attr_t	*mc_bkt_lock_attr;
294	lck_grp_t	*mc_bkt_lock_grp;
295	lck_grp_attr_t  *mc_bkt_lock_grp_attr;
296	mcache_bkttype_t *cache_bkttype;	/* bucket type */
297	mcache_bktlist_t mc_full;		/* full buckets */
298	mcache_bktlist_t mc_empty;		/* empty buckets */
299	size_t		mc_chunksize;		/* bufsize + alignment */
300	u_int32_t	mc_bkt_contention;	/* lock contention count */
301	u_int32_t	mc_bkt_contention_prev;	/* previous snapshot */
302
303	/*
304	 * Per-CPU layer, aligned at cache line boundary
305	 */
306	mcache_cpu_t	mc_cpu[1];
307} mcache_t;
308
309#define	MCACHE_ALIGN	8	/* default guaranteed alignment */
310
311/* Valid values for mc_flags */
312#define	MCF_VERIFY	0x00000001	/* enable verification */
313#define	MCF_TRACE	0x00000002	/* enable transaction auditing */
314#define	MCF_NOCPUCACHE	0x00000010	/* disable CPU layer caching */
315#define	MCF_NOLEAKLOG	0x00000100	/* disable leak logging */
316#define	MCF_EXPLEAKLOG	0x00000200	/* expose leak info to user space */
317
318#define	MCF_DEBUG	(MCF_VERIFY | MCF_TRACE)
319#define	MCF_FLAGS_MASK	\
320	(MCF_DEBUG | MCF_NOCPUCACHE | MCF_NOLEAKLOG | MCF_EXPLEAKLOG)
321
322/* Valid values for notify callback */
323#define	MCN_RETRYALLOC	0x00000001	/* Allocation should be retried */
324
325#define	MCACHE_STACK_DEPTH 16
326
327#define	MCA_TRN_MAX	2		/* Number of transactions to record */
328
329typedef struct mcache_audit {
330	struct mcache_audit *mca_next;	/* next audit struct */
331	void		*mca_addr;	/* address of buffer */
332	mcache_t	*mca_cache;	/* parent cache of the buffer */
333	size_t		mca_contents_size; /* size of saved contents */
334	void		*mca_contents;	/* user-specific saved contents */
335	void		*mca_uptr;	/* user-specific pointer */
336	uint32_t	mca_uflags;	/* user-specific flags */
337	uint32_t	mca_next_trn;
338	struct mca_trn {
339		struct thread	*mca_thread;	/* thread doing transaction */
340		uint32_t	mca_tstamp;
341		uint16_t	mca_depth;
342		void		*mca_stack[MCACHE_STACK_DEPTH];
343	} mca_trns[MCA_TRN_MAX];
344} mcache_audit_t;
345
346__private_extern__ int assfail(const char *, const char *, int);
347__private_extern__ void mcache_init(void);
348__private_extern__ unsigned int mcache_getflags(void);
349__private_extern__ unsigned int mcache_cache_line_size(void);
350__private_extern__ mcache_t *mcache_create(const char *, size_t,
351    size_t, u_int32_t, int);
352__private_extern__ void *mcache_alloc(mcache_t *, int);
353__private_extern__ void mcache_free(mcache_t *, void *);
354__private_extern__ mcache_t *mcache_create_ext(const char *, size_t,
355    mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
356    mcache_notifyfn_t, void *, u_int32_t, int);
357__private_extern__ void mcache_destroy(mcache_t *);
358__private_extern__ unsigned int mcache_alloc_ext(mcache_t *, mcache_obj_t **,
359    unsigned int, int);
360__private_extern__ void mcache_free_ext(mcache_t *, mcache_obj_t *);
361__private_extern__ void mcache_reap(void);
362__private_extern__ boolean_t mcache_purge_cache(mcache_t *, boolean_t);
363__private_extern__ void mcache_waiter_inc(mcache_t *);
364__private_extern__ void mcache_waiter_dec(mcache_t *);
365__private_extern__ boolean_t mcache_bkt_isempty(mcache_t *);
366
367__private_extern__ void mcache_buffer_log(mcache_audit_t *, void *, mcache_t *,
368    struct timeval *);
369__private_extern__ void mcache_set_pattern(u_int64_t, void *, size_t);
370__private_extern__ void *mcache_verify_pattern(u_int64_t, void *, size_t);
371__private_extern__ void *mcache_verify_set_pattern(u_int64_t, u_int64_t,
372    void *, size_t);
373__private_extern__ void mcache_audit_free_verify(mcache_audit_t *,
374    void *, size_t, size_t);
375__private_extern__ void mcache_audit_free_verify_set(mcache_audit_t *,
376    void *, size_t, size_t);
377__private_extern__ char *mcache_dump_mca(mcache_audit_t *);
378__private_extern__ void mcache_audit_panic(mcache_audit_t *, void *, size_t,
379    int64_t, int64_t);
380
381extern int32_t total_sbmb_cnt;
382extern int32_t total_sbmb_cnt_peak;
383extern int64_t sbmb_limreached;
384extern mcache_t *mcache_audit_cache;
385
386#ifdef  __cplusplus
387}
388#endif
389
390#endif /* KERNEL_PRIVATE */
391
392#endif /* _SYS_MCACHE_H */
393