1/* $OpenBSD: kern_intrmap.c,v 1.3 2020/06/23 01:40:03 dlg Exp $ */
2
3/*
4 * Copyright (c) 1980, 1986, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)if.c	8.3 (Berkeley) 1/4/94
32 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
33 */
34
35/*
36 * This code is adapted from the if_ringmap code in DragonflyBSD,
37 * but generalised for use by all types of devices, not just network
38 * cards.
39 */
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/device.h>
44#include <sys/malloc.h>
45#include <sys/rwlock.h>
46
47#include <sys/intrmap.h>
48
49struct intrmap_cpus {
50	struct refcnt	  ic_refs;
51	unsigned int	  ic_count;
52	struct cpu_info **ic_cpumap;
53};
54
55struct intrmap {
56	unsigned int	 im_count;
57	unsigned int	 im_grid;
58	struct intrmap_cpus *
59			 im_cpus;
60	unsigned int	*im_cpumap;
61};
62
63/*
64 * The CPUs that should be used for interrupts may be a subset of all CPUs.
65 */
66
67struct rwlock		 intrmap_lock = RWLOCK_INITIALIZER("intrcpus");
68struct intrmap_cpus	*intrmap_cpus = NULL;
69int			 intrmap_ncpu = 0;
70
71static void
72intrmap_cpus_put(struct intrmap_cpus *ic)
73{
74	if (ic == NULL)
75		return;
76
77	if (refcnt_rele(&ic->ic_refs)) {
78		free(ic->ic_cpumap, M_DEVBUF,
79		    ic->ic_count * sizeof(*ic->ic_cpumap));
80		free(ic, M_DEVBUF, sizeof(*ic));
81	}
82}
83
84static struct intrmap_cpus *
85intrmap_cpus_get(void)
86{
87	struct intrmap_cpus *oic = NULL;
88	struct intrmap_cpus *ic;
89
90	rw_enter_write(&intrmap_lock);
91	if (intrmap_ncpu != ncpus) {
92		unsigned int icpus = 0;
93		struct cpu_info **cpumap;
94		CPU_INFO_ITERATOR cii;
95		struct cpu_info *ci;
96
97		/*
98		 * there's a new "version" of the set of CPUs available, so
99		 * we need to figure out which ones we can use for interrupts.
100		 */
101
102		cpumap = mallocarray(ncpus, sizeof(*cpumap),
103		    M_DEVBUF, M_WAITOK);
104
105		CPU_INFO_FOREACH(cii, ci) {
106#ifdef __HAVE_CPU_TOPOLOGY
107			if (ci->ci_smt_id > 0)
108				continue;
109#endif
110			cpumap[icpus++] = ci;
111		}
112
113		if (icpus < ncpus) {
114			/* this is mostly about free(9) needing a size */
115			struct cpu_info **icpumap = mallocarray(icpus,
116			    sizeof(*icpumap), M_DEVBUF, M_WAITOK);
117			memcpy(icpumap, cpumap, icpus * sizeof(*icpumap));
118			free(cpumap, M_DEVBUF, ncpus * sizeof(*cpumap));
119			cpumap = icpumap;
120		}
121
122		ic = malloc(sizeof(*ic), M_DEVBUF, M_WAITOK);
123		refcnt_init(&ic->ic_refs);
124		ic->ic_count = icpus;
125		ic->ic_cpumap = cpumap;
126
127		oic = intrmap_cpus;
128		intrmap_cpus = ic; /* give this ref to the global. */
129	} else
130		ic = intrmap_cpus;
131
132	refcnt_take(&ic->ic_refs); /* take a ref for the caller */
133	rw_exit_write(&intrmap_lock);
134
135	intrmap_cpus_put(oic);
136
137	return (ic);
138}
139
140static int
141intrmap_nintrs(const struct intrmap_cpus *ic, unsigned int nintrs,
142    unsigned int maxintrs)
143{
144	KASSERTMSG(maxintrs > 0, "invalid maximum interrupt count %u",
145	    maxintrs);
146
147	if (nintrs == 0 || nintrs > maxintrs)
148		nintrs = maxintrs;
149	if (nintrs > ic->ic_count)
150		nintrs = ic->ic_count;
151	return (nintrs);
152}
153
154static void
155intrmap_set_grid(struct intrmap *im, unsigned int unit, unsigned int grid)
156{
157	unsigned int i, offset;
158	unsigned int *cpumap = im->im_cpumap;
159	const struct intrmap_cpus *ic = im->im_cpus;
160
161	KASSERTMSG(grid > 0, "invalid if_ringmap grid %u", grid);
162	KASSERTMSG(grid >= im->im_count, "invalid intrmap grid %u, count %u",
163	    grid, im->im_count);
164	im->im_grid = grid;
165
166	offset = (grid * unit) % ic->ic_count;
167	for (i = 0; i < im->im_count; i++) {
168		cpumap[i] = offset + i;
169		KASSERTMSG(cpumap[i] < ic->ic_count,
170		    "invalid cpumap[%u] = %u, offset %u (ncpu %d)", i,
171		    cpumap[i], offset, ic->ic_count);
172	}
173}
174
175struct intrmap *
176intrmap_create(const struct device *dv,
177    unsigned int nintrs, unsigned int maxintrs, unsigned int flags)
178{
179	struct intrmap *im;
180	unsigned int unit = dv->dv_unit;
181	unsigned int i, grid = 0, prev_grid;
182	struct intrmap_cpus *ic;
183
184	ic = intrmap_cpus_get();
185
186	nintrs = intrmap_nintrs(ic, nintrs, maxintrs);
187	if (ISSET(flags, INTRMAP_POWEROF2))
188		nintrs = 1 << (fls(nintrs) - 1);
189	im = malloc(sizeof(*im), M_DEVBUF, M_WAITOK | M_ZERO);
190	im->im_count = nintrs;
191	im->im_cpus = ic;
192	im->im_cpumap = mallocarray(nintrs, sizeof(*im->im_cpumap), M_DEVBUF,
193	    M_WAITOK | M_ZERO);
194
195	prev_grid = ic->ic_count;
196	for (i = 0; i < ic->ic_count; i++) {
197		if (ic->ic_count % (i + 1) != 0)
198			continue;
199
200		grid = ic->ic_count / (i + 1);
201		if (nintrs > grid) {
202			grid = prev_grid;
203			break;
204		}
205
206		if (nintrs > ic->ic_count / (i + 2))
207			break;
208		prev_grid = grid;
209	}
210	intrmap_set_grid(im, unit, grid);
211
212	return (im);
213}
214
215void
216intrmap_destroy(struct intrmap *im)
217{
218	free(im->im_cpumap, M_DEVBUF, im->im_count * sizeof(*im->im_cpumap));
219	intrmap_cpus_put(im->im_cpus);
220	free(im, M_DEVBUF, sizeof(*im));
221}
222
223/*
224 * Align the two ringmaps.
225 *
226 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings.
227 *
228 * Before:
229 *
230 * CPU      0  1  2  3   4  5  6  7
231 * NIC_RX               n0 n1 n2 n3
232 * NIC_TX        N0 N1
233 *
234 * After:
235 *
236 * CPU      0  1  2  3   4  5  6  7
237 * NIC_RX               n0 n1 n2 n3
238 * NIC_TX               N0 N1
239 */
240void
241intrmap_align(const struct device *dv,
242    struct intrmap *im0, struct intrmap *im1)
243{
244	unsigned int unit = dv->dv_unit;
245
246	KASSERT(im0->im_cpus == im1->im_cpus);
247
248	if (im0->im_grid > im1->im_grid)
249		intrmap_set_grid(im1, unit, im0->im_grid);
250	else if (im0->im_grid < im1->im_grid)
251		intrmap_set_grid(im0, unit, im1->im_grid);
252}
253
254void
255intrmap_match(const struct device *dv,
256    struct intrmap *im0, struct intrmap *im1)
257{
258	unsigned int unit = dv->dv_unit;
259	const struct intrmap_cpus *ic;
260	unsigned int subset_grid, cnt, divisor, mod, offset, i;
261	struct intrmap *subset_im, *im;
262	unsigned int old_im0_grid, old_im1_grid;
263
264	KASSERT(im0->im_cpus == im1->im_cpus);
265	if (im0->im_grid == im1->im_grid)
266		return;
267
268	/* Save grid for later use */
269	old_im0_grid = im0->im_grid;
270	old_im1_grid = im1->im_grid;
271
272	intrmap_align(dv, im0, im1);
273
274	/*
275	 * Re-shuffle rings to get more even distribution.
276	 *
277	 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings.
278	 *
279	 * CPU       0  1  2  3   4  5  6  7   8  9 10 11
280	 *
281	 * NIC_RX   a0 a1 a2 a3  b0 b1 b2 b3  c0 c1 c2 c3
282	 * NIC_TX   A0 A1        B0 B1        C0 C1
283	 *
284	 * NIC_RX   d0 d1 d2 d3  e0 e1 e2 e3  f0 f1 f2 f3
285	 * NIC_TX         D0 D1        E0 E1        F0 F1
286	 */
287
288	if (im0->im_count >= (2 * old_im1_grid)) {
289		cnt = im0->im_count;
290		subset_grid = old_im1_grid;
291		subset_im = im1;
292		im = im0;
293	} else if (im1->im_count > (2 * old_im0_grid)) {
294		cnt = im1->im_count;
295		subset_grid = old_im0_grid;
296		subset_im = im0;
297		im = im1;
298	} else {
299		/* No space to shuffle. */
300		return;
301	}
302
303	ic = im0->im_cpus;
304
305	mod = cnt / subset_grid;
306	KASSERT(mod >= 2);
307	divisor = ic->ic_count / im->im_grid;
308	offset = ((unit / divisor) % mod) * subset_grid;
309
310	for (i = 0; i < subset_im->im_count; i++) {
311		subset_im->im_cpumap[i] += offset;
312		KASSERTMSG(subset_im->im_cpumap[i] < ic->ic_count,
313		    "match: invalid cpumap[%d] = %d, offset %d",
314		     i, subset_im->im_cpumap[i], offset);
315	}
316#ifdef DIAGNOSTIC
317	for (i = 0; i < subset_im->im_count; i++) {
318		unsigned int j;
319
320		for (j = 0; j < im->im_count; j++) {
321			if (im->im_cpumap[j] == subset_im->im_cpumap[i])
322				break;
323		}
324		KASSERTMSG(j < im->im_count,
325		    "subset cpumap[%u] = %u not found in superset",
326		     i, subset_im->im_cpumap[i]);
327	}
328#endif
329}
330
331unsigned int
332intrmap_count(const struct intrmap *im)
333{
334	return (im->im_count);
335}
336
337struct cpu_info *
338intrmap_cpu(const struct intrmap *im, unsigned int ring)
339{
340	const struct intrmap_cpus *ic = im->im_cpus;
341	unsigned int icpu;
342	KASSERTMSG(ring < im->im_count, "invalid ring %u", ring);
343	icpu = im->im_cpumap[ring];
344	KASSERTMSG(icpu < ic->ic_count, "invalid interrupt cpu %u for ring %u"
345	    " (intrmap %p)", icpu, ring, im);
346	return (ic->ic_cpumap[icpu]);
347}
348
349struct cpu_info *
350intrmap_one(const struct device *dv)
351{
352	unsigned int unit = dv->dv_unit;
353	struct intrmap_cpus *ic;
354	struct cpu_info *ci;
355
356	ic = intrmap_cpus_get();
357	ci = ic->ic_cpumap[unit % ic->ic_count];
358	intrmap_cpus_put(ic);
359
360	return (ci);
361}
362